diff --git a/BUILD.NOTES b/BUILD.NOTES index 0fd59bb35c6f885793c623ea3588a39134fcb858..574945fb050cdd0a2fa056caf7928fbcdc483600 100644 --- a/BUILD.NOTES +++ b/BUILD.NOTES @@ -37,7 +37,12 @@ Linux cluster (See BlueGene and AIX specific notes below for some differences). files, but not to code. - this is a prerelease (Release = 0.preX) 2. Tag the repository with the appropriate name for the new version. - > git tag -a slurm-2-3-0-0-pre5 -m "create tag v2.3.0-pre5" + Note the first three digits are the version number. For a proper release, + the last digit is "1" (except for a rebuild without code changes which + could be "2"). For pre-releases, the last digit should be "0" followed by + "pre#" or "rc#". + > git tag -a slurm-2-6-7-1 -m "create tag v2.6.7" OR + > git tag -a slurm-2-7-0-0pre5 -m "create tag v2.7.0-pre5" > git push --tags 3. Use the rpm make target to create the new RPMs. This requires a .rpmmacros (.rpmrc for newer versions of rpmbuild) file containing: diff --git a/DISCLAIMER b/DISCLAIMER index cd27e2782780a20998a15cea30a60a6653504b37..c893df27673d3b820a29125731f459e1b9bad42c 100644 --- a/DISCLAIMER +++ b/DISCLAIMER @@ -1,15 +1,16 @@ SLURM was produced at Lawrence Livermore National Laboratory in collaboration with various organizations. +Copyright (C) 2012-2013 Los Alamos National Security, LLC. Copyright (C) 2011 Trinity Centre for High Performance Computing -Copyright (C) 2010-2011 SchedMD LLC -Copyright (C) 2009 CEA/DAM/DIF +Copyright (C) 2010-2013 SchedMD LLC +Copyright (C) 2009-2013 CEA/DAM/DIF Copyright (C) 2009-2011 Centro Svizzero di Calcolo Scientifico (CSCS) Copyright (C) 2008-2011 Lawrence Livermore National Security Copyright (C) 2008 Vijay Ramasubramanian Copyright (C) 2007-2008 Red Hat, Inc. -Copyright (C) 2007-2009 National University of Defense Technology, China -Copyright (C) 2007-2011 Bull +Copyright (C) 2007-2013 National University of Defense Technology, China +Copyright (C) 2007-2013 Bull Copyright (C) 2005-2008 Hewlett-Packard Development Company, L.P. Copyright (C) 2004-2009, Marcus Holland-Moritz Copyright (C) 2002-2007 The Regents of the University of California @@ -110,7 +111,7 @@ Anne-Marie Wunderlin (Bull) CODE-OCEC-09-009. All rights reserved. This file is part of SLURM, a resource management program. -For details, see <http://www.schedmd.com/slurmdocs/>. +For details, see <http://slurm.schedmd.com/>. Please also read the supplied file: DISCLAIMER. SLURM is free software; you can redistribute it and/or modify it under diff --git a/META b/META index 913a3616010c62797171003696c6baf7b5d0cf97..dc089faf40ae7b85699d92c19cd7561fe5f1ef6c 100644 --- a/META +++ b/META @@ -1,11 +1,11 @@ Api_age: 0 - Api_current: 25 + Api_current: 26 Api_revision: 0 Major: 2 Meta: 1 - Micro: 7 - Minor: 5 + Micro: 4 + Minor: 6 Name: slurm Release: 1 Release_tags: dist - Version: 2.5.7 + Version: 2.6.4 diff --git a/Makefile.in b/Makefile.in index a5581f370122b496030a8d52fda89e71b1dff839..82b815073540fa0f0651ce3b6e022afd79a52989 100644 --- a/Makefile.in +++ b/Makefile.in @@ -66,6 +66,7 @@ DIST_COMMON = $(am__configure_deps) $(pkginclude_HEADERS) \ INSTALL NEWS ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -83,6 +84,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -91,11 +93,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -227,6 +231,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -247,6 +253,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -256,6 +265,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -263,6 +274,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -297,6 +317,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -324,6 +347,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/NEWS b/NEWS index 29a313317b4b91d545fbcba3fbce232f056a2788..7dde504975179a93c734b394a392d59d7e694659 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,387 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in Slurm 2.6.4 +======================== + -- Fixed sh5util to print its usage. + -- Corrected commit f9a3c7e4e8ec. + -- Honor ntasks-per-node option with exclusive node allocations. + -- sched/backfill - Prevent invalid memory reference if bf_continue option is + configured and slurm is reconfigured during one of the sleep cycles or if + there are any changes to the partition configuration or if the normal + scheduler runs and starts a job that the backfill scheduler is actively + working on. + -- Update man pages information about acct-freq and JobAcctGatherFrequency + to reflect only the latest supported format. + -- Minor document update to include note about PrivateData=Usage for the + slurm.conf when using the DBD. + -- Expand information reported with DebugFlags=backfill. + -- Initiate jobs pending to run in a reservation as soon as the reservation + becomes active. + -- Purged expired reservation even if it has pending jobs. + -- Corrections to calculation of a pending job's expected start time. + -- Remove some vestigial logic treating job priority of 1 as a special case. + -- Memory freeing up to avoid minor memory leaks at close of daemons + -- Updated documentation to give correct units being displayed. + -- Report AccountingStorageBackupHost with "scontrol show config". + -- init scripts ignore quotes around Pid file name specifications. + -- Fixed typo about command case in quickstart.html. + -- task/cgroup - handle new cpuset files, similar to commit c4223940. + -- Replace the tempname() function call with mkstemp(). + -- Fix for --cpu_bind=map_cpu/mask_cpu/map_ldom/mask_ldom plus + --mem_bind=map_mem/mask_mem options, broken in 2.6.2. + -- Restore default behavior of allocating cores to jobs on a cyclic basis + across the sockets unless SelectTypeParameters=CR_CORE_DEFAULT_DIST_BLOCK + or user specifies other distribution options. + -- Enforce JobRequeue configuration parameter on node failure. Previously + always requeued the job. + -- acct_gather_energy/ipmi - Add delay before retry on read error. + -- select/cons_res with GRES and multiple threads per core, fix possible + infinite loop. + -- proctrack/cgroup - Add cgroup create retry logic in case one step is + starting at the same time as another step is ending and the logic to create + and delete cgroups overlaps. + -- Improve setting of job wait "Reason" field. + -- Correct sbatch documentation and job_submit/pbs plugin "%j" is job ID, + not "%J" (which is job_id.step_id). + -- Improvements to sinfo performance, especially for large numbers of + partitions. + -- SlurmdDebug - Permit changes to slurmd debug level with "scontrol reconfig" + -- smap - Avoid invalid memory reference with hidden nodes. + -- Fix sacctmgr modify qos set preempt+/-=. + -- BLUEGENE - fix issue where node count wasn't set up correctly when srun + preforms the allocation, regression in 2.6.3. + -- Add support for dependencies of job array elements (e.g. + "sbatch --depend=afterok:123_4 ...") or all elements of a job array (e.g. + "sbatch --depend=afterok:123 ..."). + -- Add support for new options in sbatch qsub wrapper: + -W block=true (wait for job completion) + Clear PBS_NODEFILE environment variable + -- Fixed the MaxSubmitJobsPerUser limit in QOS which limited submissions + a job too early. + -- sched/wiki, sched/wiki2 - Fix to work with change logic introduced in + version 2.6.3 preventing Maui/Moab from starting jobs. + -- Updated the QOS limits documentation and man page. + +* Changes in Slurm 2.6.3 +======================== + -- Add support for some new #PBS options in sbatch scripts and qsub wrapper: + -l accelerator=true|false (GPU use) + -l mpiprocs=# (processors per node) + -l naccelerators=# (GPU count) + -l select=# (node count) + -l ncpus=# (task count) + -v key=value (environment variable) + -W depend=opts (job dependencies, including "on" and "before" options) + -W umask=# (set job's umask) + -- Added qalter and qrerun commands to torque package. + -- Corrections to qstat logic: job CPU count and partition time format. + -- Add job_submit/pbs plugin to translate PBS job dependency options to the + extend possible (no support for PBS "before" options) and set some PBS + environment variables. + -- Add spank/pbs plugin to set a bunch of PBS environment variables. + -- Backported sh5util from master to 2.6 as there are some important + bugfixes and the new item extraction feature. + -- select/cons_res - Correct MacCPUsPerNode partition constraint for CR_Socket. + -- scontrol - for setdebugflags command, avoid parsing "-flagname" as an + scontrol command line option. + -- Fix issue with step accounting if a job is requeued. + -- Close file descriptors on exec of prolog, epilog, etc. + -- Fix issue when a user has held a job and then sets the begin time + into the future. + -- Scontrol - Enable changing a job's stdout file. + -- Fix issues where memory or node count of a srun job is altered while the + srun is pending. The step creation would use the old values and possibly + hang srun since the step wouldn't be able to be created in the modified + allocation. + -- Add support for new SchedulerParameters value of "bf_max_job_part", the + maximum depth the backfill scheduler should go in any single partition. + -- acct_gather/infiniband plugin - Correct packets_in/out values. + -- BLUEGENE - Don't ignore a conn-type request from the user. + -- BGQ - Force a request on a Q for a MESH to be a TORUS in a dimension that + can only be a TORUS (1). + -- Change max message length from 100MB to 1GB before generating "Insane + message length" error. + -- sched/backfill - Prevent possible memory corruption due to use of + bf_continue option and long running scheduling cycle (pending jobs could + have been cancelled and purged). + -- CRAY - fix AcceleratorAllocation depth correctly for basil 1.3 + -- Created the environment variable SLURM_JOB_NUM_NODES for srun jobs and + updated the srun man page. + -- BLUEGENE/CRAY - Don't set env variables that pertain to a node when Slurm + isn't doing the launching. + -- gres/gpu and gres/mic - Do not treat the existence of an empty gres.conf + file as a fatal error. + -- Fixed for if hours are specified as 0 the time days-0:min specification + is not parsed correctly. + -- switch/nrt - Fix for memory leak. + -- Subtract the PMII_COMMANDLEN_SIZE in contribs/pmi2/pmi2_api.c to prevent + certain implementation of snprintf() to segfault. + +* Changes in Slurm 2.6.2 +======================== + -- Fix issue with reconfig and GrpCPURunMins + -- Fix of wrong node/job state problem after reconfig + -- Allow users who are coordinators update their own limits in the accounts + they are coordinators over. + -- BackupController - Make sure we have a connection to the DBD first thing + to avoid it thinking we don't have a cluster name. + -- Correct value of min_nodes returned by loading job information to consider + the job's task count and maximum CPUs per node. + -- If running jobacct_gather/none fix issue on unpacking step completion. + -- Reservation with CoreCnt: Avoid possible invalid memory reference. + -- sjstat - Add man page when generating rpms. + -- Make sure GrpCPURunMins is added when creating a user, account or QOS with + sacctmgr. + -- Fix for invalid memory reference due to multiple free calls caused by + job arrays submitted to multiple partitions. + -- Enforce --ntasks-per-socket=1 job option when allocating by socket. + -- Validate permissions of key directories at slurmctld startup. Report + anything that is world writable. + -- Improve GRES support for CPU topology. Previous logic would pick CPUs then + reject jobs that can not match GRES to the allocated CPUs. New logic first + filters out CPUs that can not use the GRES, next picks CPUs for the job, + and finally picks the GRES that best match those CPUs. + -- Switch/nrt - Prevent invalid memory reference when allocating single adapter + per node of specific adapter type + -- CRAY - Make Slurm work with CLE 5.1.1 + -- Fix segfault if submitting to multiple partitions and holding the job. + -- Use MAXPATHLEN instead of the hardcoded value 1024 for maximum file path + lengths. + -- If OverTimeLimit is defined do not declare failed those jobs that ended + in the OverTimeLimit interval. + +* Changes in Slurm 2.6.1 +======================== + -- slurmdbd - Allow job derived ec and comments to be modified by non-root + users. + -- Fix issue with job name being truncated to 24 chars when sending a mail + message. + -- Fix minor issues with spec file, missing files and including files + erroneously on a bluegene system. + -- sacct - fix --name and --partition options when using + accounting_storage/filetxt. + -- squeue - Remove extra whitespace of default printout. + -- BGQ - added head ppcfloor as an include dir when building. + -- BGQ - Better debug messages in runjob_mux plugin. + -- PMI2 Updated the Makefile.am to build a versioned library. + -- CRAY - Fix srun --mem_bind=local option with launch/aprun. + -- PMI2 Corrected buffer size computation in the pmi2_api.c module. + -- GRES accounting data wrong in database: gres_alloc, gres_req, and gres_used + fields were empty if the job was not started immediately. + -- Fix sbatch and srun task count logic when --ntasks-per-node specified, + but no explicit task count. + -- Corrected the hdf5 profile user guide and the acct_gather.conf + documentation. + -- IPMI - Fix Math bug getting new wattage. + -- Corrected the AcctGatherProfileType documentation in slurm.conf + -- Corrected the sh5util program to print the header in the csv file + only once, set the debug messages at debug() level, make the argument + check case insensitive and avoid printing duplicate \n. + -- If cannot collect energy values send message to the controller + to drain the node and log error slurmd log file. + -- Handle complete removal of CPURunMins time at the end of the job instead + of at multifactor poll. + -- sview - Add missing debug_flag options. + -- PGSQL - Notes about Postgres functionality being removed in the next + version of Slurm. + -- MYSQL - fix issue when rolling up usage and events happened when a cluster + was down (slurmctld not running) during that time period. + -- sched/wiki2 - Insure that Moab gets current CPU load information. + -- Prevent infinite loop in parsing configuration if including file containing + one blank line. + -- Fix pack and unpack between 2.6 and 2.5. + -- Fix job state recovery logic in which a job's accounting frequency was + not set. This would result in a value of 65534 seconds being used (the + equivalent of NO_VAL in uint16_t), which could result in the job being + requeued or aborted. + -- Validate a job's accounting frequency at submission time rather than + waiting for it's initiation to possibly fail. + -- Fix CPURunMins if a job is requeued from a failed launch. + -- Fix in accounting_storage/filetxt to correct start times which sometimes + could end up before the job started. + -- Fix issue with potentially referencing past an array in parse_time() + -- CRAY - fix issue with accelerators on a cray when parsing BASIL 1.3 XML. + -- Fix issue with a 2.5 slurmstepd locking up when talking to a 2.6 slurmd. + -- Add argument to priority plugin's priority_p_reconfig function to note + when the association and QOS used_cpu_run_secs field has been reset. + +* Changes in Slurm 2.6.0 +======================== + -- Fix it so bluegene and serial systems don't get warnings over new NODEDATA + enum. + -- When a job is aborted send a message for any tasks that have completed. + -- Correction to memory per CPU calculation on system with threads and + allocating cores or sockets. + -- Requeue batch job if it's node reboots (used to abort the job). + -- Enlarge maximum size of srun's hostlist file. + -- IPMI - Fix first poll to get correct consumed_energy for a step. + -- Correction to job state recovery logic that could result in assert failure. + -- Record partial step accounting record if allocated nodes fail abnormally. + -- Accounting - fix issue where PrivateData=jobs or users could potentially + show information to users that had no associations on the system. + -- Make PrivateData in slurmdbd.conf case insensitive. + -- sacct/sstat - Add format option ConsumedEnergyRaw to print full energy + values. + +* Changes in Slurm 2.6.0rc2 +=========================== + -- HDF5 - Fix issue with Ubuntu where HDF5 development headers are + overwritten by the parallel versions thus making it so we need handle + both cases. + -- ACCT_GATHER - handle suspending correctly for polling threads. + -- Make SLURM_DISTRIBUTION env var hold both types of distribution if + specified. + -- Remove hardcoded /usr/local from slurm.spec. + -- Modify slurmctld locking to improve performance under heavy load with + very large numbers of batch job submissions or job cancellations. + -- sstat - Fix issue where if -j wasn't given allow last argument to be checked + for as the job/step id. + -- IPMI - fix adjustment on poll when using EnergyIPMICalcAdjustment. + +* Changes in Slurm 2.6.0rc1 +=========================== + -- Added helper script for launching symmetric and MIC-only MPI tasks within + SLURM (in contribs/mic/mpirun-mic). + -- Change maximum delay for state save from 2 secs to 5 secs. Make timeout + configurable at build time by defining SAVE_MAX_WAIT. + -- Modify slurmctld data structure locking to interleave read and write + locks rather than always favor write locks over read locks. + -- Added sacct format option of "ALL" to print all fields. + -- Deprecate the SchedulerParameters value of "interval" use "bf_interval" + instead as documented. + -- Add acct_gather_profile/hdf5 to profile jobs with hdf5 + -- Added MaxCPUsPerNode partition configuration parameter. This can be + especially useful to schedule systems with GPUs. + -- Permit "scontrol reboot_node" for nodes in MAINT reservation. + -- Added "PriorityFlags" value of "SMALL_RELATIVE_TO_TIME". If set, the job's + size component will be based upon not the job size alone, but the job's + size divided by it's time limit. + -- Added sbatch option "--ignore-pbs" to ignore "#PBS" options in the batch + script. + -- Rename slurm_step_ctx_params_t field from "mem_per_cpu" to "pn_min_memory". + Job step now accepts memory specification in either per-cpu or per-node + basis. + -- Add ability to specify host repitition count in the srun hostfile (e.g. + "host1*2" is equivalent to "host1,host1"). + +* Changes in Slurm 2.6.0pre3 +============================ + -- Add milliseconds to default log message header (both RFC 5424 and ISO 8601 + time formats). Disable milliseconds logging using the configure + parameter "--disable-log-time-msec". Default time format changes to + ISO 8601 (without time zone information). Specify "--enable-rfc5424time" + to restore the time zone information. + -- Add username (%u) to the filename pattern in the batch script. + -- Added options for front end nodes of AllowGroups, AllowUsers, DenyGroups, + and DenyUsers. + -- Fix sched/backfill logic to initiate jobs with maximum time limit over the + partition limit, but the minimum time limit permits it to start. + -- gres/gpu - Fix for gres.conf file with multiple files on a single line + using a slurm expression (e.g. "File=/dev/nvidia[0-1]"). + -- Replaced ipmi.conf with generic acct_gather.conf file for all acct_gather + plugins. For those doing development to use this follow the model set + forth in the acct_gather_energy_ipmi plugin. + -- Added more options to update a step's information + -- Add DebugFlags=ThreadID which will print the thread id of the calling + thread. + -- CRAY - Allocate whole node (CPUs) in reservation despite what the + user requests. We have found any srun/aprun afterwards will work on a + subset of resources. + +* Changes in Slurm 2.6.0pre2 +============================ + -- Do not purge inactive interactive jobs that lack a port to ping (added + for MR+ operation). + -- Advanced reservations with hostname and core counts now supports asymetric + reservations (e.g. specific different core count for each node). + -- Added slurmctld/dynalloc plugin for MapReduce+ support. + -- Added "DynAllocPort" configuration parameter. + -- Added partition paramter of SelectTypeParameters to override system-wide + value. + -- Added cr_type to partition_info data structure. + -- Added allocated memory to node information available (within the existing + select_nodeinfo field of the node_info_t data structure). Added Allocated + Memory to node information displayed by sview and scontrol commands. + -- Make sched/backfill the default scheduling plugin rather than sched/builtin + (FIFO). + -- Added support for a job having different priorities in different partitions. + -- Added new SchedulerParameters configuration parameter of "bf_continue" + which permits the backfill scheduler to continue considering jobs for + backfill scheduling after yielding locks even if new jobs have been + submitted. This can result in lower priority jobs from being backfill + scheduled instead of newly arrived higher priority jobs, but will permit + more queued jobs to be considered for backfill scheduling. + -- Added support to purge reservation records from accounting. + -- Cray - Add support for Basil 1.3 + +* Changes in SLURM 2.6.0pre1 +============================ + -- Add "state" field to job step information reported by scontrol. + -- Notify srun to retry step creation upon completion of other job steps + rather than polling. This results in much faster throughput for job step + execution with --exclusive option. + -- Added "ResvEpilog" and "ResvProlog" configuration parameters to execute a + program at the beginning and end of each reservation. + -- Added "slurm_load_job_user" function. This is a variation of + "slurm_load_jobs", but accepts a user ID argument, potentially resulting + in substantial performance improvement for "squeue --user=ID" + -- Added "slurm_load_node_single" function. This is a variation of + "slurm_load_nodes", but accepts a node name argument, potentially resulting + in substantial performance improvement for "sinfo --nodes=NAME". + -- Added "HealthCheckNodeState" configuration parameter identify node states + on which HealthCheckProgram should be executed. + -- Remove sacct --dump --formatted-dump options which were deprecated in + 2.5. + -- Added support for job arrays (phase 1 of effort). See "man sbatch" option + -a/--array for details. + -- Add new AccountStorageEnforce options of 'nojobs' and 'nosteps' which will + allow the use of accounting features like associations, qos and limits but + not keep track of jobs or steps in accounting. + -- Cray - Add new cray.conf parameter of "AlpsEngine" to specify the + communication protocol to be used for ALPS/BASIL. + -- select/cons_res plugin: Correction to CPU allocation count logic in for + cores without hyperthreading. + -- Added new SelectTypeParameter value of "CR_ALLOCATE_FULL_SOCKET". + -- Added PriorityFlags value of "TICKET_BASED" and merged priority/multifactor2 + plugin into priority/multifactor plugin. + -- Add "KeepAliveTime" configuration parameter controlling how long sockets + used for srun/slurmstepd communications are kept alive after disconnect. + -- Added SLURM_SUBMIT_HOST to salloc, sbatch and srun job environment. + -- Added SLURM_ARRAY_TASK_ID to environment of job array. + -- Added squeue --array/-r option to optimize output for job arrays. + -- Added "SlurmctldPlugstack" configuration parameter for generic stack of + slurmctld daemon plugins. + -- Removed contribs/arrayrun tool. Use native support for job arrays. + -- Modify default installation locations for RPMs to match "make install": + _prefix /usr/local + _slurm_sysconfdir %{_prefix}/etc/slurm + _mandir %{_prefix}/share/man + _infodir %{_prefix}/share/info + -- Add acct_gather_energy/ipmi which works off freeipmi for energy gathering + * Changes in Slurm 2.5.8 ======================== + -- Fix for slurmctld segfault on NULL front-end reason field. + -- Avoid gres step allocation errors when a job shrinks in size due to either + down nodes or explicit resizing. Generated slurmctld errors of this type: + "step_test ... gres_bit_alloc is NULL" + -- Fix bug that would leak memory and over-write the AllowGroups field if on + "scontrol reconfig" when AllowNodes is manually changed using scontrol. + -- Get html/man files to install in correct places with rpms. + -- Remove --program-prefix from spec file since it appears to be added by + default and appeared to break other things. + -- Updated the automake min version in autogen.sh to be correct. + -- Select/cons_res - Correct total CPU count allocated to a job with + --exclusive and --cpus-per-task options + -- switch/nrt - Don't allocate network resources unless job step has 2+ nodes. + -- select/cons_res - Avoid extraneous "oversubscribe" error messages. + -- Reorder get config logic to avoid deadlock. + -- Enforce QOS MaxCPUsMin limit when job submission contains no user-specified + time limit. + -- EpilogSlurmctld pthread is passed required arguments rather than a pointer + to the job record, which under some conditions could be purged and result + in an invalid memory reference. * Changes in Slurm 2.5.7 ======================== @@ -22,7 +401,7 @@ documents those changes that are of interest to users and admins. -- POE - Fix logic binding tasks to CPUs. -- sview - Fix race condition where new information could of slipped past the node tab and we didn't notice. - -- Accounting - Fix an invalid memory read when slurmctld sends data about + -- Accounting - Fix an invalid memory read when slurmctld sends data about start job to slurmdbd. -- If a prolog or epilog failure occurs, drain the node rather than setting it down and killing all of its jobs. @@ -210,6 +589,7 @@ documents those changes that are of interest to users and admins. -- Fix race condition in job dependency logic which can result in invalid memory reference. + * Changes in SLURM 2.5.2 ======================== -- Fix advanced reservation recovery logic when upgrading from version 2.4. diff --git a/README.rst b/README.rst index 8a07ce8d924820fe087a76aaa5f960a3dcfc24cb..3e41922769b65c2bf38da422e892711c40cfac6c 100644 --- a/README.rst +++ b/README.rst @@ -66,12 +66,12 @@ COMPILING AND INSTALLING THE DISTRIBUTION ----------------------------------------- Please see the instructions at - http://www.schedmd.com/slurmdocs/quickstart_admin.html + http://slurm.schedmd.com/quickstart_admin.html Extensive documentation is available from our home page at - http://www.schedmd.com/slurmdocs/slurm.html + http://slurm.schedmd.com/slurm.html PROBLEMS -------- If you experience problems compiling, installing, or running SLURM, see - http://www.schedmd.com/slurmdocs/help.html + http://slurm.schedmd.com/help.html diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 38105e11b1568278a94bddc19e27e2fbb9667b89..f7f8a72b99a6e93f025e4e0ee6fd721cdd97fa8f 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,10 +1,10 @@ -RELEASE NOTES FOR SLURM VERSION 2.5 -8 November 2012 +RELEASE NOTES FOR SLURM VERSION 2.6 +21 March 2013 IMPORTANT NOTE: If using the slurmdbd (SLURM DataBase Daemon) you must update this first. -The 2.5 slurmdbd will work with SLURM daemons of version 2.3 and above. +The 2.6 slurmdbd will work with SLURM daemons of version 2.4 and above. You will not need to update all clusters at the same time, but it is very important to update slurmdbd first and having it running before updating any other clusters making use of it. No real harm will come from updating @@ -18,116 +18,232 @@ innodb_buffer_pool_size=64M under the [mysqld] reference in the my.cnf file and restarting the mysqld. This is needed when converting large tables over to the new database schema. -SLURM can be upgraded from version 2.3 or 2.4 to version 2.5 without loss of +SLURM can be upgraded from version 2.4 or 2.5 to version 2.6 without loss of jobs or other state information. Upgrading directly from an earlier version of SLURM will result in loss of state information. HIGHLIGHTS ========== -- Major performance improvements for high-throughput computing. -- Added "boards" count to node information and "boards_per_node" to job request - and job information. Optimize resource allocation to minimize number of - boards used by a job. -- Added support for IBM Parallel Environment (PE) including the launching of - jobs using either the srun or poe command. -- Add support for advanced reservation for specific cores rather than whole - nodes. -- Added srun option "--cpu-freq" to enable user control over the job's CPU - frequency and thus it's power consumption. -- Added priority/multifactor2 plugin supporting ticket based shares. -- Added gres/mic plugin supporting Intel Many Integrated Core (MIC) processors. -- Added launch plugin to support srun interface to launch tasks using different - methods like IBM's poe or Cray's aprun. + - Added support for job arrays, which increases performance and ease of use + for sets of similar jobs. This may necessitate changes in prolog and/or + epilog scripts due to change in the job ID format, which is now of the form + "<job_id>_<index>" for job arrays. + http://slurm.schedmd.com/job_array.html + - Added support for job profiling to periodically capture each task's CPU use, + memory use, power consumption, Lustre use and Infiniband network use. + http://slurm.schedmd.com/hdf5_profile_user_guide.html + - Added support for generic external sensor plugins which can be used to + capture temperature and power consumption data. + http://slurm.schedmd.com/ext_sensorsplugins.html + http://slurm.schedmd.com/ext_sensors.conf.html + - Added mpi/pmi2 plugin with much more scalable performance for MPI + implementations using PMI communications interface. + http://slurm.schedmd.com/mpi_guide.html#mpich2 + - Added prolog and epilog support for advanced reservations. + - Much faster throughput for job step execution with --exclusive option. The + srun process is notified when resources become available rather than + periodic polling. + - Added "MaxCPUsPerNode" partition configuration parameter. This can be + especially useful to schedule GPUs. For example a node can be associated + with two Slurm partitions (e.g. "cpu" and "gpu") and the partition/queue + "cpu" could be limited to only a subset of the node's CPUs, insuring that + one or more CPUs would be available to jobs in the "gpu" partition/queue. + - Advanced reservations with hostname and core counts now supports asymetric + reservations (e.g. specific different core count for each node). + - Added slurmctld/dynalloc plugin for MapReduce+ support. New versions of + OpenMPI and MapReduce are required to enable this functionality. + http://slurm.schedmd.com/dynalloc.html + - Make sched/backfill the default scheduling plugin rather than sched/builtin + (FIFO). CONFIGURATION FILE CHANGES (see "man slurm.conf" for details) ============================================================= -- Added node configuration parameter of "Boards". -- Added DebugFlag option of "Switch" to log switch plugin details. -- Added "AcctGatherEnergy" configuration parameter to identify the plugin - to be used to gather energy consumption data for jobs. -- When running with multiple slurmd daemons per node, enable specifying a - range of ports on a single line of the node configuration in slurm.conf. -- New SelectType plugin of "serial" provides highly optimized throughput for - serial (single CPU) jobs. -- New SwitchType plugin of "nrt" provides support for IBM Network Resource - Table API. -- Added configuration option of "LaunchType" to control the mechanism used for - launching application tasks. Available plugins include "slurm" (native SLURM - mode), "runjob" (for use with IBM BlueGene/Q systems) and "poe" (for use with - IBM Parallel Environment). + - Added "HealthCheckNodeState" configuration parameter identifing node states + on which HealthCheckProgram should be executed. + - Added "MaxArraySize" configuration parameter specifying maximum job array + size. + - Added "ResvEpilog" and "ResvProlog" configuration parameters to execute a + program at the beginning and end of a reservation. + - Added new cray.conf parameter of "AlpsEngine" to specify the communication + protocol to be used for ALPS/BASIL. + - Added new SelectTypeParameter value of "CR_ALLOCATE_FULL_SOCKET". + - Added PriorityFlags value of "TICKET_BASED" and merged priority/multifactor2 + plugin into priority/multifactor plugin. + - Added "KeepAliveTime" controlling how long sockets used for srun/slurmstepd + communications are kept alive after disconnect. + - Added "SlurmctldPlugstack" configuration parameter for generic stack of + slurmctld daemon plugins. Only the plugin's init and fini functions are + called. + - Added "DynAllocPort" configuration parameter for use by slurmctld/dynalloc + plugin. + - Added new "SchedulerParameters" configuration parameter of "bf_continue" + which permits the backfill scheduler to continue considering jobs for + backfill scheduling after yielding locks even if new jobs have been + submitted. This can result in lower priority jobs from being backfill + scheduled instead of newly arrived higher priority jobs, but will permit + more queued jobs to be considered for backfill scheduling. + - Added options for front end nodes of "AllowGroups", "AllowUsers", + "DenyGroups", and "DenyUsers". + - Added "PriorityFlags" value of "SMALL_RELATIVE_TO_TIME". If set, the job's + size component will be based upon not the job size alone, but the job's + size divided by it's time limit. + +DBD CONFIGURATION FILE CHANGES (see "man slurmdbd.conf" for details) +==================================================================== + - Added "ArchiveResvs" and "PurgeResvAfter" options to be able to handle old + reservations in the database. COMMAND CHANGES (see man pages for details) =========================================== -- Added sinfo option of "-T" to print reservation information. -- Added LicensesUsed field to output of "scontrol show configuration" command. - Output is of the form "name:used/total". -- Add reservation flag of "Part_Nodes" to allocate all nodes in a partition to - a reservation and automatically change the reservation when nodes are - added to or removed from the reservation. -- sinfo partition field size will be set the the length of the longest - partition name by default. -- Deprecation of sacct --dump --fdump. This will go away in 2.6 completely. + - Added step "State" field to scontrol output. + - Added "--array" option to sbatch for job array support. + - Enlarged default JOBID and STEPID field sizes in squeue output to better + support job arrays. For job arrays, the job ID is no longer a single number + but has the format "JOBID_TASKID" while a step ID format is now + "JOBID_TASKID.STEPID". + - Modified squeue output field options for job arrays: + %i is now of the form <base_job_id>_<array_index> + %F is the <base_job_id> + %K is the <array_index> + %A is the <job_id>, which is unique for each element of a job array + - Fully removed deprecated sacct --dump --fdump options. + - Added partition "SelectTypeParameters" field to scontrol output. + - Added Allocated Memory to node information displayed by sview and scontrol + commands. + - Added username (%u) to the filename pattern for srun and sbatch commands. + - Added sacct format option of "ALL" to print all fields. OTHER CHANGES ============= + - Added PMI2 client library. Refere to the documentation here: + http://slurm.schedmd.com/mpi_guide.html#mpich2 + - Added SLURM_SUBMIT_HOST to salloc, sbatch and srun job environment. + - Added SLURM_ARRAY_TASK_ID and SLURM_ARRAY_TASK_ID to environment of job + array. + - Added milliseconds to default log message header (both RFC 5424 and ISO 8601 + time formats). Disable milliseconds logging using the configure + parameter "--disable-log-time-msec". Default time format changes to + ISO 8601 (without time zone information). Specify "--enable-rfc5424time" + to restore the time zone information. + - Added sbatch option "--ignore-pbs" to ignore "#PBS" options in the batch + script. API CHANGES =========== Changed members of the following structs ======================================== -Added boards_per_node to job_info and job_desc_msg_t. -Added acct_gather_energy_t, boards and cpu_load to node_info_t. -Added step_signal to slurm_step_launch_callbacks_t - for signaling steps that - are perhaps not running as srun. -Added acct_gather_energy_type, acct_gather_node_freq launch_type, licenses, - and licenses_used to slurm_ctl_conf_t -Added ntasks_per_board, boards_per_node, sockets_per_board to slurm_job_info_t -Added ntasks_per_board, boards_per_node job_desc_msg_t -Added cpu_freq to slurm_step_ctx_params_t -Added cpu_freq to slurm_step_launch_params_t -Added cpu_freq to job_step_info_t -Added (*step_signal) to slurm_step_launch_callbacks_t -Added core_cnt to reserve_info_t -Added core_cnt to resv_desc_msg_t -Added actual_boards to slurmd_status_t -Added act_cpufreq, consumed_energy, and req_cpufreq to slurmdb_stats_t + - Added "time_t poll_time" to acct_gather_energy_t. + - Changed "acctg_freq" from uint16_t to char * in job_desc_msg_t. + - Added "char *array_inx" field to job_desc_msg_t. + - Added "void *array_bitmap" field to job_desc_msg_t. + - Added "uint32_t profile" field to job_desc_msg_t. + - Added "uint32_t array_job_id" field to slurm_job_info_t. + - Added "uint32_t array_task_id" field to slurm_job_info_t. + - Added "uint32_t profile" field to slurm_job_info_t. + - Added "time_t end_time" field to step_update_request_msg_t. + - Added "uint32_t exit_code" field to step_update_request_msg_t. + - Added "jobacctinfo_t *jobacct" field to step_update_request_msg_t. + - Added "char *name" field to step_update_request_msg_t. + - Added "time_t start_time" field to step_update_request_msg_t. + - Rename "mem_per_cpu" to "pn_min_memory" in slurm_step_ctx_params_t. + - Added "uint32_t profile" field to slurm_step_ctx_params_t. + - Added "uint32_t profile" field to slurm_step_launch_params_t. + - Changed "acctg_freq" from uint16_t to char * in slurm_step_launch_params_t. + - Added "uint32_t array_job_id" field to job_step_info_t. + - Added "uint32_t array_task_id" field to job_step_info_t. + - Added "uint16_t state" field to job_step_info_t. + - Added "ext_sensors_data_t *ext_sensors" field to node_info_t. + - Added "char *allow_groups" field to front_end_info_t. + - Added "char *allow_users" field to front_end_info_t. + - Added "char *deny_groups" field to front_end_info_t. + - Added "char *deny_users" field to front_end_info_t. + - Added "uint16_t cr_type" field to partition_info_t. + - Added "uint32_t max_cpus_per_node" field to partition_info_t. + - Changed "core_cnt" from uint32_t to uint32_t* in resv_desc_msg_t. + - Added "char *acct_gather_profile_type" field to slurm_ctl_conf_t. + - Added "char *acct_gather_infiniband_type" field to slurm_ctl_conf_t. + - Added "char *acct_gather_filesystem_type" field to slurm_ctl_conf_t. + - Added "uint16_t dynalloc_port" field to slurm_ctl_conf_t. + - Added "char *ext_sensors_type" field to slurm_ctl_conf_t. + - Added "uint16_t ext_sensors_freq" field to slurm_ctl_conf_t. + - Added "uint16_t health_check_node_state" field to slurm_ctl_conf_t. + - Changed "job_acct_gather_freq" from uint16_t to char * in slurm_ctl_conf_t. + - Added "uint16_t keep_alive_time" field to slurm_ctl_conf_t. + - Added "uint16_t max_array_sz" field to slurm_ctl_conf_t. + - Added "char *resv_epilog" field to slurm_ctl_conf_t + - Added "char *resv_prolog" field to slurm_ctl_conf_t. + - Added "char *slurmctld_plugstack" field to slurm_ctl_conf_t. Added the following struct definitions ====================================== -acct_gather_energy_t -acct_gather_node_resp_msg_t -Changed job_info_t to slurm_job_info_t since IBM PE machines have a job_info_t - structure already defined. job_info_t is defined as slurm_job_info_t on - will still work in a non IBM PE environment, but shouldn't be used in - future code. + - ext_sensors_data_t + - acct_gather_energy_req_msg_t Changed the following enums and #defines ======================================== -added #define DEBUG_FLAG_SWITCH -added #define DEBUG_FLAG_ENERGY -added #define CPU_FREQ_RANGE_FLAG -added #define CPU_FREQ_LOW -added #define CPU_FREQ_MEDIUM -added #define CPU_FREQ_HIGH -added #define CR_BOARD -added #define RESERVE_FLAG_PART_NODES -added #define RESERVE_FLAG_NO_PART_NODES -added #define RECONFIG_KEEP_PART_STAT -added enum acct_energy_type -added SELECT_JOBDATA_CONFIRMED to enum select_jobdata_type -added JOBACCT_DATA_ACT_CPUFREQ and JOBACCT_DATA_CONSUMED_ENERGY - to enum jobacct_data_type -Added CPU_BIND_TO_BOARDS to enum cpu_bind_type + - Added SELECT_NODEDATA_MEM_ALLOC to select_nodedata_type enum + - Added #define ACCT_GATHER_PROFILE_NOT_SET + - Added #define ACCT_GATHER_PROFILE_NONE + - Added #define ACCT_GATHER_PROFILE_ENERGY + - Added #define ACCT_GATHER_PROFILE_TASK + - Added #define ACCT_GATHER_PROFILE_LUSTRE + - Added #define ACCT_GATHER_PROFILE_NETWORK + - Added #define ACCT_GATHER_PROFILE_ALL + - Added JOBACCT_DATA_CONSUMED_ENERGY to jobacct_data_type enum + - Added JOBACCT_DATA_MAX_DISK_READ to jobacct_data_type enum + - Added JOBACCT_DATA_MAX_DISK_READ_ID to jobacct_data_type enum + - Added JOBACCT_DATA_TOT_DISK_READ to jobacct_data_type enum + - Added JOBACCT_DATA_MAX_DISK_WRITE to jobacct_data_type enum + - Added JOBACCT_DATA_MAX_DISK_WRITE_ID to jobacct_data_type enum + - Added JOBACCT_DATA_TOT_DISK_WRITE to jobacct_data_type enum + - Added ENERGY_DATA_PROFILE to acct_energy_type + - Added ENERGY_DATA_LAST_POLL to acct_energy_type + - Added #define CR_ALLOCATE_FULL_SOCKET + - Added #define PRIORITY_FLAGS_TICKET_BASED + - Added #define PRIORITY_FLAGS_SIZE_RELATIVE + - Added #define DEBUG_FLAG_EXT_SENSORS + - Added #define DEBUG_FLAG_THREADID + - Added #define DEBUG_FLAG_PROFILE + - Added #define DEBUG_FLAG_INFINIBAND + - Added #define DEBUG_FLAG_FILESYSTEM + - Added #define HEALTH_CHECK_NODE_IDLE + - Added #define HEALTH_CHECK_NODE_ALLOC + - Added #define HEALTH_CHECK_NODE_MIXED + - Added #define HEALTH_CHECK_NODE_ANY + - Added #define KILL_JOB_BATCH + - Added #define KILL_JOB_ARRAY Added the following API's ========================= -slurm_step_launch_add - added for adding tasks to steps that were - previously started. (Note: it currently has only been - tested with user managed io jobs.) -slurm_init_trigger_msg - added to initialize trigger clear/update message + - Added "slurm_step_ctx_create_timeout" function. + - Added "slurm_load_job_user" function. This is a variation of + "slurm_load_jobs", but accepts a user ID argument, potentially resulting + in substantial performance improvement for "squeue --user=ID" + - Added "slurm_xlate_job_id" function. + - Added "slurm_load_node" function. + - Added "slurm_load_node_single" function. This is a variation of + "slurm_load_nodes", but accepts a node name argument, potentially resulting + in substantial performance improvement for "sinfo --nodes=NAME". + - Added "slurm_get_node_energy" function. Changed the following API's =========================== -slurm_step_ctx_daemon_per_node_hack - ported to newer poe interface + +DBD API Changes +=============== + - Changed "cpu_min_taskid" from a uint16_t to a uint32_t in slurmdb_stats_t. + - Added "double disk_read_ave" field to slurmdb_stats_t. + - Added "double disk_read_max" field to slurmdb_stats_t. + - Added "uint32_t disk_read_max_nodeid" field to slurmdb_stats_t. + - Added "uint32_t disk_read_max_taskid" field to slurmdb_stats_t. + - Added "double disk_write_ave" field to slurmdb_stats_t. + - Added "double disk_write_max" field to slurmdb_stats_t. + - Added "uint32_t disk_write_max_nodeid" field to slurmdb_stats_t. + - Added "uint32_t disk_write_max_taskid" field to slurmdb_stats_t. + - Changed "pages_max_taskid" from a uint16_t to a uint32_t in slurmdb_stats_t. + - Changed "rss_max_taskid" from a uint16_t to a uint32_t in slurmdb_stats_t. + - Changed "vsize_max_taskid" from a uint16_t to a uint32_t in slurmdb_stats_t. + - Added "uint32_t purge_resv" field to slurmdb_archive_cond_t. + - Added "uint32_t req_mem" field to slurmdb_job_rec_t. diff --git a/aclocal.m4 b/aclocal.m4 index d507619f0fef8b11c0d5c70bbc3d9e780d7adce7..5b12c6b49b5e3626d37326658d8636e82d87704c 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1582,6 +1582,7 @@ AC_SUBST([am__untar]) ]) # _AM_PROG_TAR m4_include([auxdir/acx_pthread.m4]) +m4_include([auxdir/ax_lib_hdf5.m4]) m4_include([auxdir/libtool.m4]) m4_include([auxdir/ltoptions.m4]) m4_include([auxdir/ltsugar.m4]) @@ -1599,6 +1600,7 @@ m4_include([auxdir/x_ac_databases.m4]) m4_include([auxdir/x_ac_debug.m4]) m4_include([auxdir/x_ac_dlfcn.m4]) m4_include([auxdir/x_ac_env.m4]) +m4_include([auxdir/x_ac_freeipmi.m4]) m4_include([auxdir/x_ac_gpl_licensed.m4]) m4_include([auxdir/x_ac_hwloc.m4]) m4_include([auxdir/x_ac_iso.m4]) @@ -1607,11 +1609,13 @@ m4_include([auxdir/x_ac_man2html.m4]) m4_include([auxdir/x_ac_munge.m4]) m4_include([auxdir/x_ac_ncurses.m4]) m4_include([auxdir/x_ac_nrt.m4]) +m4_include([auxdir/x_ac_ofed.m4]) m4_include([auxdir/x_ac_pam.m4]) m4_include([auxdir/x_ac_printf_null.m4]) m4_include([auxdir/x_ac_ptrace.m4]) m4_include([auxdir/x_ac_readline.m4]) m4_include([auxdir/x_ac_rfc5424_time.m4]) +m4_include([auxdir/x_ac_rrdtool.m4]) m4_include([auxdir/x_ac_setpgrp.m4]) m4_include([auxdir/x_ac_setproctitle.m4]) m4_include([auxdir/x_ac_sgi_job.m4]) diff --git a/autogen.sh b/autogen.sh index 0587917d2abf88154226e1ba41d6305224dfb8bd..9e0b497ef9dc806d1e816533409a8ee05bd86a9f 100755 --- a/autogen.sh +++ b/autogen.sh @@ -17,8 +17,8 @@ ACMAJOR=2 ACMINOR=59 AMMAJOR=1 -AMMINOR=9 -AMPATCH=0 +AMMINOR=10 +AMPATCH=2 LTMAJOR=1 LTMINOR=5 diff --git a/auxdir/Makefile.in b/auxdir/Makefile.in index f5b99c1ddaced07768803790e87605f1e1975aee..52980ebaa0623ff92d238941d4ae05b02efcf4f4 100644 --- a/auxdir/Makefile.in +++ b/auxdir/Makefile.in @@ -56,6 +56,7 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in compile \ config.guess config.sub depcomp install-sh ltmain.sh missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -73,6 +74,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -81,11 +83,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -128,6 +132,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -148,6 +154,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -157,6 +166,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -164,6 +175,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -198,6 +218,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -225,6 +248,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/auxdir/ax_lib_hdf5.m4 b/auxdir/ax_lib_hdf5.m4 new file mode 100644 index 0000000000000000000000000000000000000000..f4ab86614b1f804ff700d1646206996ebffa5bda --- /dev/null +++ b/auxdir/ax_lib_hdf5.m4 @@ -0,0 +1,305 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_lib_hdf5.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_LIB_HDF5([serial/parallel]) +# +# DESCRIPTION +# +# This macro provides tests of the availability of HDF5 library. +# +# The optional macro argument should be either 'serial' or 'parallel'. The +# former only looks for serial HDF5 installations via h5cc. The latter +# only looks for parallel HDF5 installations via h5pcc. If the optional +# argument is omitted, serial installations will be preferred over +# parallel ones. +# +# The macro adds a --with-hdf5 option accepting one of three values: +# +# no - do not check for the HDF5 library. +# yes - do check for HDF5 library in standard locations. +# path - complete path to the HDF5 helper script h5cc or h5pcc. +# +# If HDF5 is successfully found, this macro calls +# +# AC_SUBST(HDF5_VERSION) +# AC_SUBST(HDF5_CC) +# AC_SUBST(HDF5_CFLAGS) +# AC_SUBST(HDF5_CPPFLAGS) +# AC_SUBST(HDF5_LDFLAGS) +# AC_SUBST(HDF5_LIBS) +# AC_SUBST(HDF5_FC) +# AC_SUBST(HDF5_FFLAGS) +# AC_SUBST(HDF5_FLIBS) +# AC_DEFINE(HAVE_HDF5) +# +# and sets with_hdf5="yes". Additionally, the macro sets +# with_hdf5_fortran="yes" if a matching Fortran wrapper script is found. +# Note that Autconf's Fortran support is not used to perform this check. +# H5CC and H5FC will contain the appropriate serial or parallel HDF5 +# wrapper script locations. +# +# If HDF5 is disabled or not found, this macros sets with_hdf5="no" and +# with_hdf5_fortran="no". +# +# Your configuration script can test $with_hdf to take any further +# actions. HDF5_{C,CPP,LD}FLAGS may be used when building with C or C++. +# HDF5_F{FLAGS,LIBS} should be used when building Fortran applications. +# +# To use the macro, one would code one of the following in "configure.ac" +# before AC_OUTPUT: +# +# 1) dnl Check for HDF5 support +# AX_LIB_HDF5() +# +# 2) dnl Check for serial HDF5 support +# AX_LIB_HDF5([serial]) +# +# 3) dnl Check for parallel HDF5 support +# AX_LIB_HDF5([parallel]) +# +# One could test $with_hdf5 for the outcome or display it as follows +# +# echo "HDF5 support: $with_hdf5" +# +# You could also for example, override the default CC in "configure.ac" to +# enforce compilation with the compiler that HDF5 uses: +# +# AX_LIB_HDF5([parallel]) +# if test "$with_hdf5" = "yes"; then +# CC="$HDF5_CC" +# else +# AC_MSG_ERROR([Unable to find HDF5, we need parallel HDF5.]) +# fi +# +# LICENSE +# +# Copyright (c) 2009 Timothy Brown <tbrown@freeshell.org> +# Copyright (c) 2010 Rhys Ulerich <rhys.ulerich@gmail.com> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 8 + +AC_DEFUN([AX_LIB_HDF5], [ + +AC_REQUIRE([AC_PROG_SED]) +AC_REQUIRE([AC_PROG_AWK]) +AC_REQUIRE([AC_PROG_GREP]) + +dnl Check first argument is one of the recognized values. +dnl Fail eagerly if is incorrect as this simplifies case statements below. +if test "m4_normalize(m4_default([$1],[]))" = "" ; then + : # Recognized value +elif test "m4_normalize(m4_default([$1],[]))" = "serial" ; then + : # Recognized value +elif test "m4_normalize(m4_default([$1],[]))" = "parallel"; then + : # Recognized value +else + AC_MSG_ERROR([ +Unrecognized value for AX[]_LIB_HDF5 within configure.ac. +If supplied, argument 1 must be either 'serial' or 'parallel'. +]) +fi + +dnl Add a default --with-hdf5 configuration option. +AC_ARG_WITH([hdf5], + AS_HELP_STRING( + [--with-hdf5=[yes/no/PATH]], + m4_case(m4_normalize([$1]), + [serial], [location of h5cc for serial HDF5 configuration], + [parallel], [location of h5pcc for parallel HDF5 configuration], + [location of h5cc or h5pcc for HDF5 configuration]) + ), + [if test "$withval" = "no"; then + with_hdf5="no" + elif test "$withval" = "yes"; then + with_hdf5="yes" + else + with_hdf5="yes" + H5CC="$withval" + fi], + [with_hdf5="yes"] +) + +dnl Set defaults to blank +HDF5_CC="" +HDF5_VERSION="" +HDF5_CFLAGS="" +HDF5_CPPFLAGS="" +HDF5_LDFLAGS="" +HDF5_LIBS="" +HDF5_FC="" +HDF5_FFLAGS="" +HDF5_FLIBS="" + +dnl Try and find hdf5 compiler tools and options. +if test "$with_hdf5" = "yes"; then + if test -z "$H5CC"; then + dnl Check to see if H5CC is in the path. + AC_PATH_PROGS( + [H5CC], + m4_case(m4_normalize([$1]), + [serial], [h5cc], + [parallel], [h5pcc], + [h5cc h5pcc]), + []) + else + AC_MSG_CHECKING([Using provided HDF5 C wrapper]) + AC_MSG_RESULT([$H5CC]) + fi + AC_MSG_CHECKING([for HDF5 libraries]) + if test ! -f "$H5CC" || test ! -x "$H5CC"; then + AC_MSG_RESULT([no]) + AC_MSG_WARN(m4_case(m4_normalize([$1]), + [serial], [ +Unable to locate serial HDF5 compilation helper script 'h5cc'. +Please specify --with-hdf5=<LOCATION> as the full path to h5cc. +HDF5 support is being disabled (equivalent to --with-hdf5=no). +], [parallel],[ +Unable to locate parallel HDF5 compilation helper script 'h5pcc'. +Please specify --with-hdf5=<LOCATION> as the full path to h5pcc. +HDF5 support is being disabled (equivalent to --with-hdf5=no). +], [ +Unable to locate HDF5 compilation helper scripts 'h5cc' or 'h5pcc'. +Please specify --with-hdf5=<LOCATION> as the full path to h5cc or h5pcc. +HDF5 support is being disabled (equivalent to --with-hdf5=no). +])) + with_hdf5="no" + with_hdf5_fortran="no" + else + dnl Get the h5cc output + HDF5_SHOW=$(eval $H5CC -show) + + dnl Get the actual compiler used + HDF5_CC=$(eval $H5CC -show | $AWK '{print $[]1}') + + dnl h5cc provides both AM_ and non-AM_ options + dnl depending on how it was compiled either one of + dnl these are empty. Lets roll them both into one. + + dnl Look for "HDF5 Version: X.Y.Z" + HDF5_VERSION=$(eval $H5CC -showconfig | $GREP 'HDF5 Version:' \ + | $AWK '{print $[]3}') + + dnl A ideal situation would be where everything we needed was + dnl in the AM_* variables. However most systems are not like this + dnl and seem to have the values in the non-AM variables. + dnl + dnl We try the following to find the flags: + dnl (1) Look for "NAME:" tags + dnl (2) Look for "H5_NAME:" tags + dnl (3) Look for "AM_NAME:" tags + dnl + HDF5_tmp_flags=$(eval $H5CC -showconfig \ + | $GREP 'FLAGS\|Extra libraries:' \ + | $AWK -F: '{printf("%s "), $[]2}' ) + + dnl Find the installation directory and append include/ + HDF5_tmp_inst=$(eval $H5CC -showconfig \ + | $GREP 'Installation point:' \ + | $AWK -F: '{print $[]2}' ) + + dnl Add this to the CPPFLAGS + HDF5_CPPFLAGS="-I${HDF5_tmp_inst}/include" + + dnl Now sort the flags out based upon their prefixes + for arg in $HDF5_SHOW $HDF5_tmp_flags ; do + case "$arg" in + -I*) echo $HDF5_CPPFLAGS | $GREP -e "$arg" 2>&1 >/dev/null \ + || HDF5_CPPFLAGS="$arg $HDF5_CPPFLAGS" + ;; + -L*) echo $HDF5_LDFLAGS | $GREP -e "$arg" 2>&1 >/dev/null \ + || HDF5_LDFLAGS="$arg $HDF5_LDFLAGS" + ;; + -l*) echo $HDF5_LIBS | $GREP -e "$arg" 2>&1 >/dev/null \ + || HDF5_LIBS="$arg $HDF5_LIBS" + ;; + esac + done + + HDF5_LIBS="$HDF5_LIBS -lhdf5" + AC_MSG_RESULT([yes (version $[HDF5_VERSION])]) + + dnl See if we can compile + ax_lib_hdf5_save_CC=$CC + ax_lib_hdf5_save_CPPFLAGS=$CPPFLAGS + ax_lib_hdf5_save_LIBS=$LIBS + ax_lib_hdf5_save_LDFLAGS=$LDFLAGS + CC=$HDF5_CC + CPPFLAGS=$HDF5_CPPFLAGS + LIBS=$HDF5_LIBS + LDFLAGS=$HDF5_LDFLAGS + AC_CHECK_HEADER([hdf5.h], [ac_cv_hadf5_h=yes], [ac_cv_hadf5_h=no]) + AC_CHECK_LIB([hdf5], [H5Fcreate], [ac_cv_libhdf5=yes], + [ac_cv_libhdf5=no]) + if test "$ac_cv_hadf5_h" = "no" && test "$ac_cv_libhdf5" = "no" ; then + AC_MSG_WARN([Unable to compile HDF5 test program]) + fi + dnl Look for HDF5's high level library + AC_HAVE_LIBRARY([hdf5_hl], [HDF5_LIBS="$HDF5_LIBS -lhdf5_hl"], [], []) + + CC=$ax_lib_hdf5_save_CC + LIBS=$ax_lib_hdf5_save_LIBS + LDFLAGS=$ax_lib_hdf5_save_LDFLAGS + + AC_MSG_CHECKING([for matching HDF5 Fortran wrapper]) + dnl Presume HDF5 Fortran wrapper is just a name variant from H5CC + H5FC=$(eval echo -n $H5CC | $SED -n 's/cc$/fc/p') + if test -x "$H5FC"; then + AC_MSG_RESULT([$H5FC]) + with_hdf5_fortran="yes" + AC_SUBST([H5FC]) + + dnl Again, pry any remaining -Idir/-Ldir from compiler wrapper + for arg in `$H5FC -show` + do + case "$arg" in #( + -I*) echo $HDF5_FFLAGS | $GREP -e "$arg" >/dev/null \ + || HDF5_FFLAGS="$arg $HDF5_FFLAGS" + ;;#( + -L*) echo $HDF5_FFLAGS | $GREP -e "$arg" >/dev/null \ + || HDF5_FFLAGS="$arg $HDF5_FFLAGS" + dnl HDF5 installs .mod files in with libraries, + dnl but some compilers need to find them with -I + echo $HDF5_FFLAGS | $GREP -e "-I${arg#-L}" >/dev/null \ + || HDF5_FFLAGS="-I${arg#-L} $HDF5_FFLAGS" + ;; + esac + done + + dnl Make Fortran link line by inserting Fortran libraries + for arg in $HDF5_LIBS + do + case "$arg" in #( + -lhdf5_hl) HDF5_FLIBS="$HDF5_FLIBS -lhdf5hl_fortran $arg" + ;; #( + -lhdf5) HDF5_FLIBS="$HDF5_FLIBS -lhdf5_fortran $arg" + ;; #( + *) HDF5_FLIBS="$HDF5_FLIBS $arg" + ;; + esac + done + else + AC_MSG_RESULT([no]) + with_hdf5_fortran="no" + fi + + AC_SUBST([HDF5_VERSION]) + AC_SUBST([HDF5_CC]) + AC_SUBST([HDF5_CFLAGS]) + AC_SUBST([HDF5_CPPFLAGS]) + AC_SUBST([HDF5_LDFLAGS]) + AC_SUBST([HDF5_LIBS]) + AC_SUBST([HDF5_FC]) + AC_SUBST([HDF5_FFLAGS]) + AC_SUBST([HDF5_FLIBS]) + AC_DEFINE([HAVE_HDF5], [1], [Defined if you have HDF5 support]) + fi +fi +]) diff --git a/auxdir/libtool.m4 b/auxdir/libtool.m4 index 828104cfde21845bcd4632d75a4f63ce9fca60fb..02b4bbec520c94a9535cdb7701d4f9201a0709a9 100644 --- a/auxdir/libtool.m4 +++ b/auxdir/libtool.m4 @@ -1324,7 +1324,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - LD="${LD-ld} -m elf_i386" + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" @@ -1688,7 +1695,8 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` - if test -n "$lt_cv_sys_max_cmd_len"; then + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else @@ -2512,17 +2520,6 @@ freebsd* | dragonfly*) esac ;; -gnu*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no @@ -2639,7 +2636,7 @@ linux*oldld* | linux*aout* | linux*coff*) ;; # This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no @@ -3255,10 +3252,6 @@ freebsd* | dragonfly*) fi ;; -gnu*) - lt_cv_deplibs_check_method=pass_all - ;; - haiku*) lt_cv_deplibs_check_method=pass_all ;; @@ -3297,7 +3290,7 @@ irix5* | irix6* | nonstopux*) ;; # This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) lt_cv_deplibs_check_method=pass_all ;; @@ -4049,7 +4042,7 @@ m4_if([$1], [CXX], [ ;; esac ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # KAI C++ Compiler @@ -4348,7 +4341,7 @@ m4_if([$1], [CXX], [ _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in # old Intel for x86_64 which still supported -KPIC. ecc*) @@ -6241,9 +6234,6 @@ if test "$_lt_caught_CXX_error" != yes; then _LT_TAGVAR(ld_shlibs, $1)=yes ;; - gnu*) - ;; - haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes @@ -6405,7 +6395,7 @@ if test "$_lt_caught_CXX_error" != yes; then _LT_TAGVAR(inherit_rpath, $1)=yes ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler diff --git a/auxdir/ltmain.sh b/auxdir/ltmain.sh index 0096fe6c7558166dd0921da7ae893389e7e41260..b9205eeb4f8876a2a765b18f825a34ff2a7edc08 100644 --- a/auxdir/ltmain.sh +++ b/auxdir/ltmain.sh @@ -70,7 +70,7 @@ # compiler: $LTCC # compiler flags: $LTCFLAGS # linker: $LD (gnu? $with_gnu_ld) -# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu2 +# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1.2ubuntu1 # automake: $automake_version # autoconf: $autoconf_version # @@ -80,7 +80,7 @@ PROGRAM=libtool PACKAGE=libtool -VERSION="2.4.2 Debian-2.4.2-1ubuntu2" +VERSION="2.4.2 Debian-2.4.2-1.2ubuntu1" TIMESTAMP="" package_revision=1.3337 diff --git a/auxdir/slurm.m4 b/auxdir/slurm.m4 index 8d1b9b6452c0aa4672a9f71f9ff511e5f98891b8..9d93ec9d12d3ee78231981776a364a5317b92c8d 100644 --- a/auxdir/slurm.m4 +++ b/auxdir/slurm.m4 @@ -65,6 +65,18 @@ AC_DEFUN([X_AC_SLURM_PORTS], AC_DEFINE_UNQUOTED(SLURMCTLD_PORT_COUNT, [$slurmctldportcount], [Define the default port count for slurmctld]) AC_SUBST(SLURMCTLD_PORT_COUNT) + + AC_MSG_CHECKING([for dynamic allocation port to be enabled]) + AC_ARG_ENABLE([dynamic-allocation], + AS_HELP_STRING([--enable-dynamic-allocation, enable dynamic allocation requests from user programs ([disabled])])) + if test "$enable_dynamic_allocation" = "yes"; then + AC_MSG_RESULT([yes]) + slurm_enable_dynamic_allocation="yes" + else + AC_MSG_RESULT([no]) + slurm_enable_dynamic_allocation="no" + fi + AM_CONDITIONAL(SLURM_ENABLE_DYNAMIC_ALLOCATION, test "$slurm_enable_dynamic_allocation" = "yes") ]) dnl @@ -99,7 +111,8 @@ AC_DEFUN([X_AC_SLURM_PROGRAM_INVOCATION_NAME], [ AC_MSG_CHECKING([for program_invocation_name]) - AC_LINK_IFELSE([AC_LANG_PROGRAM([[extern char *program_invocation_name;]], [[char *p; p = program_invocation_name; printf("%s\n", p);]])],[got_program_invocation_name=yes],[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <stdio.h> +extern char *program_invocation_name;]], [[char *p; p = program_invocation_name; printf("%s\n", p);]])],[got_program_invocation_name=yes],[ ]) AC_MSG_RESULT(${got_program_invocation_name=no}) diff --git a/auxdir/x_ac_bluegene.m4 b/auxdir/x_ac_bluegene.m4 index 941186da53ea0b96d63746e7e017205bc2b3081d..eeecb0694dfa01aecfb77e7575d75badc04511d1 100644 --- a/auxdir/x_ac_bluegene.m4 +++ b/auxdir/x_ac_bluegene.m4 @@ -274,7 +274,7 @@ AC_DEFUN([X_AC_BGQ], # Search for headers in the directory if test -z "$have_bg_hdr" -a -f "$bg_dir/hlcs/include/bgsched/bgsched.h" ; then have_bgq_hdr=yes - bg_includes="-I$bg_dir/hlcs/include" + bg_includes="-I$bg_dir -I$bg_dir/hlcs/include" fi if test -z "$have_bg_hdr" -a -f "$bg_dir/extlib/include/log4cxx/logger.h" ; then have_bgq_hdr=yes diff --git a/auxdir/x_ac_freeipmi.m4 b/auxdir/x_ac_freeipmi.m4 new file mode 100644 index 0000000000000000000000000000000000000000..348d703cd2458f7d8ef4f625987a11fbe0a7b8d4 --- /dev/null +++ b/auxdir/x_ac_freeipmi.m4 @@ -0,0 +1,64 @@ +##***************************************************************************** +# AUTHOR: +# Thomas Cadeau<thomas.cadeau@ext.bull.net> +# +# SYNOPSIS: +# X_AC_FREEIPMI +# +# DESCRIPTION: +# Determine if the FREEIPMI libraries exists +##***************************************************************************** + +AC_DEFUN([X_AC_FREEIPMI], +[ + _x_ac_freeipmi_dirs="/usr /usr/local" + _x_ac_freeipmi_libs="lib64 lib" + + AC_ARG_WITH( + [freeipmi], + AS_HELP_STRING(--with-freeipmi=PATH,Specify path to freeipmi installation), + [_x_ac_freeipmi_dirs="$withval $_x_ac_freeipmi_dirs"]) + + AC_CACHE_CHECK( + [for freeipmi installation], + [x_ac_cv_freeipmi_dir], + [ + for d in $_x_ac_freeipmi_dirs; do + test -d "$d" || continue + test -d "$d/include" || continue + test -f "$d/include/ipmi_monitoring.h" || continue + for bit in $_x_ac_freeipmi_libs; do + test -d "$d/$bit" || continue + _x_ac_freeipmi_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_freeipmi_libs_save="$LIBS" + LIBS="-L$d/$bit -lipmimonitoring $LIBS" + AC_TRY_LINK([#include <ipmi_monitoring.h> +#include <ipmi_monitoring_bitmasks.h>], +[int err;] +[unsigned int flag = 0;] +[return ipmi_monitoring_init (flag, &err);], +AS_VAR_SET(x_ac_cv_freeipmi_dir, $d), []) + CPPFLAGS="$_x_ac_freeipmi_cppflags_save" + LIBS="$_x_ac_freeipmi_libs_save" + test -n "$x_ac_cv_freeipmi_dir" && break + done + test -n "$x_ac_cv_freeipmi_dir" && break + done + ]) + + if test -z "$x_ac_cv_freeipmi_dir"; then + AC_MSG_WARN([unable to locate freeipmi installation]) + else + FREEIPMI_CPPFLAGS="-I$x_ac_cv_freeipmi_dir/include" + FREEIPMI_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_freeipmi_dir/$bit -L$x_ac_cv_freeipmi_dir/$bit" + FREEIPMI_LIBS="-lipmimonitoring" + AC_DEFINE(HAVE_FREEIPMI, 1, [Define to 1 if freeipmi library found]) + fi + + AC_SUBST(FREEIPMI_LIBS) + AC_SUBST(FREEIPMI_CPPFLAGS) + AC_SUBST(FREEIPMI_LDFLAGS) + AM_CONDITIONAL(BUILD_IPMI, test -n "$x_ac_cv_freeipmi_dir") + +]) diff --git a/auxdir/x_ac_ofed.m4 b/auxdir/x_ac_ofed.m4 new file mode 100644 index 0000000000000000000000000000000000000000..8b7975b2a17df3252de8a43f15e4abc969910970 --- /dev/null +++ b/auxdir/x_ac_ofed.m4 @@ -0,0 +1,69 @@ +##***************************************************************************** +# AUTHOR: +# Yiannis Georgiou<yiannis.georgiou@bull.net> +# +# SYNOPSIS: +# X_AC_OFED +# +# DESCRIPTION: +# Determine if the OFED related libraries exist +##***************************************************************************** + +AC_DEFUN([X_AC_OFED], +[ + _x_ac_ofed_dirs="/usr /usr/local" + _x_ac_ofed_libs="lib64 lib" + + AC_ARG_WITH( + [ofed], + AS_HELP_STRING(--with-ofed=PATH,Specify path to ofed installation), + [_x_ac_ofed_dirs="$withval $_x_ac_ofed_dirs"]) + + AC_CACHE_CHECK( + [for ofed installation], + [x_ac_cv_ofed_dir], + [ + for d in $_x_ac_ofed_dirs; do + test -d "$d" || continue + test -d "$d/include/infiniband" || continue + test -f "$d/include/infiniband/mad.h" || continue + for bit in $_x_ac_ofed_libs; do + test -d "$d/$bit" || continue + _x_ac_ofed_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_ofed_libs_save="$LIBS" + LIBS="-L$d/$bit -libmad -libumad $LIBS" + AC_LINK_IFELSE( + [AC_LANG_CALL([], mad_rpc_open_port)], + AS_VAR_SET(x_ac_cv_ofed_dir, $d), []) + AC_LINK_IFELSE( + [AC_LANG_CALL([], pma_query_via)], + [have_pma_query_via=yes], + [AC_MSG_RESULT(Using old libmad)]) + CPPFLAGS="$_x_ac_ofed_cppflags_save" + LIBS="$_x_ac_ofed_libs_save" + test -n "$x_ac_cv_ofed_dir" && break + done + test -n "$x_ac_cv_ofed_dir" && break + done + ]) + + if test -z "$x_ac_cv_ofed_dir"; then + AC_MSG_WARN([unable to locate ofed installation]) + else + OFED_CPPFLAGS="-I$x_ac_cv_ofed_dir/include/infiniband" + OFED_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_ofed_dir/$bit -L$x_ac_cv_ofed_dir/$bit" + OFED_LIBS="-libmad -libumad" + AC_DEFINE(HAVE_OFED, 1, [Define to 1 if ofed library found]) + if test ! -z "$have_pma_query_via" ; then + AC_DEFINE(HAVE_OFED_PMA_QUERY_VIA, 1, [Define to 1 if using code with pma_query_via]) + fi + fi + + AC_SUBST(OFED_LIBS) + AC_SUBST(OFED_CPPFLAGS) + AC_SUBST(OFED_LDFLAGS) + AM_CONDITIONAL(BUILD_OFED, test -n "$x_ac_cv_ofed_dir") + +]) + diff --git a/auxdir/x_ac_rfc5424_time.m4 b/auxdir/x_ac_rfc5424_time.m4 index 010cec41a7df76f6e452907591f3b5499ef51a3f..4a31cafa4de208bc895f0967ad16b40cf1deec2c 100644 --- a/auxdir/x_ac_rfc5424_time.m4 +++ b/auxdir/x_ac_rfc5424_time.m4 @@ -7,13 +7,14 @@ # # DESCRIPTION: # Test for RFC 5424 compliant time support. +# Test for time stamp resolution to the millisecond (default) or second ##***************************************************************************** AC_DEFUN([X_AC_RFC5424_TIME], [ AC_MSG_CHECKING([whether to enable RFC 5424 time format support]) AC_ARG_ENABLE( [rfc5424time], - AS_HELP_STRING(--disable-rfc5424time, disable RFC 5424 time format support), + AS_HELP_STRING(--enable-rfc5424time, enable RFC 5424 time format support), [ case "$enableval" in yes) x_ac_rfc5424time=yes ;; no) x_ac_rfc5424time=no ;; @@ -21,7 +22,7 @@ AC_DEFUN([X_AC_RFC5424_TIME], [ AC_MSG_ERROR([bad value "$enableval" for --enable-rfc5424time]) ;; esac ], - [x_ac_rfc5424time=yes] + [x_ac_rfc5424time=no] ) if test "$x_ac_rfc5424time" = yes; then @@ -30,5 +31,26 @@ AC_DEFUN([X_AC_RFC5424_TIME], [ else AC_MSG_RESULT([no]) fi + + AC_MSG_CHECKING([log timestamps to millisecond resolution]) + AC_ARG_ENABLE( + [log-time-msec], + AS_HELP_STRING(--disable-log-time-msec, log timestamps to millisecond resolution), + [ case "$enableval" in + yes) x_ac_log_time_msec=yes ;; + no) x_ac_log_time_msec=no ;; + *) AC_MSG_RESULT([doh!]) + AC_MSG_ERROR([bad value "$enableval" for --disable-log-time-msec]) ;; + esac + ], + [x_ac_log_time_msec=yes] + ) + + if test "$x_ac_log_time_msec" = yes; then + AC_MSG_RESULT([yes]) + AC_DEFINE(LOG_TIME_MSEC,,[log timestamps to millisecond resolution]) + else + AC_MSG_RESULT([no]) + fi ]) diff --git a/auxdir/x_ac_rrdtool.m4 b/auxdir/x_ac_rrdtool.m4 new file mode 100644 index 0000000000000000000000000000000000000000..923fe590244ccced0e8df58e41707e59fc4fb631 --- /dev/null +++ b/auxdir/x_ac_rrdtool.m4 @@ -0,0 +1,66 @@ +##***************************************************************************** +# AUTHOR: +# Written by Bull- Thomas Cadeau +# +# SYNOPSIS: +# X_AC_RRDTOOL +# +# DESCRIPTION: +# Determine if the RRDTOOL libraries exists +##***************************************************************************** + +AC_DEFUN([X_AC_RRDTOOL], +[ + _x_ac_rrdtool_dirs="/usr /usr/local" + _x_ac_rrdtool_libs="lib64 lib" + + AC_ARG_WITH([rrdtool], + AS_HELP_STRING(--with-rrdtool=PATH, + Specify path to rrdtool-devel installation), + [_x_ac_rrdtool_dirs="$withval $_x_ac_rrdtool_dirs"], + [with_rrdtool=check]) + + echo with rrdtool $with_rrdtool + echo without rrdtool $without_rrdtool + AS_IF([test "x$with_rrdtool" != "xno"], + [AC_CACHE_CHECK( + [for rrdtool installation], + [x_ac_cv_rrdtool_dir], + [ + for d in $_x_ac_rrdtool_dirs; do + test -d "$d" || continue + test -d "$d/include" || continue + test -f "$d/include/rrd.h" || continue + for bit in $_x_ac_rrdtool_libs; do + test -d "$d/$bit" || continue + _x_ac_rrdtool_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_rrdtool_libs_save="$LIBS" + LIBS="-L$d/$bit -lrrd $LIBS" + AC_TRY_LINK([#include <rrd.h>], + [rrd_value_t *rrd_data;] [rrd_info_t *rrd_info;] [ rrd_test_error();], + AS_VAR_SET(x_ac_cv_rrdtool_dir, $d), + []) + CPPFLAGS="$_x_ac_rrdtool_cppflags_save" + LIBS="$_x_ac_rrdtool_libs_save" + test -n "$x_ac_cv_rrdtool_dir" && break + done + test -n "$x_ac_cv_rrdtool_dir" && break + done + ]) + ]) + + echo x_ac_cv_rrdtool_dir $x_ac_cv_rrdtool_dir + if test -z "$x_ac_cv_rrdtool_dir"; then + AC_MSG_WARN([unable to locate rrdtool installation]) + else + RRDTOOL_CPPFLAGS="-I$x_ac_cv_rrdtool_dir/include" + RRDTOOL_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_rrdtool_dir/$bit -L$x_ac_cv_rrdtool_dir/$bit" + RRDTOOL_LIBS="-lrrd" + fi + + AC_SUBST(RRDTOOL_LIBS) + AC_SUBST(RRDTOOL_CPPFLAGS) + AC_SUBST(RRDTOOL_LDFLAGS) + AM_CONDITIONAL(BUILD_RRD, test -n "$x_ac_cv_rrdtool_dir") +]) diff --git a/config.h.in b/config.h.in index 7fd6249115ef3509c95af170dc1a6ee9e7bc423d..ad63a464f80adc431673b27bb84509a79193c832 100644 --- a/config.h.in +++ b/config.h.in @@ -36,6 +36,9 @@ /* Define to 1 if using gtk+-2.12.0 or higher */ #undef GTK2_USE_TOOLTIP +/* Make sure we get the 1.8 HDF5 API */ +#undef H5_NO_DEPRECATED_SYMBOLS + /* Define to 1 if 3-dimensional architecture */ #undef HAVE_3D @@ -118,9 +121,15 @@ /* Define to 1 if you have the <float.h> header file. */ #undef HAVE_FLOAT_H +/* Define to 1 if freeipmi library found */ +#undef HAVE_FREEIPMI + /* Define to 1 if running slurmd on front-end only */ #undef HAVE_FRONT_END +/* Defined if you have HDF5 support */ +#undef HAVE_HDF5 + /* Define to 1 if you have the `hstrerror' function. */ #undef HAVE_HSTRERROR @@ -185,6 +194,12 @@ /* define if numa library installed */ #undef HAVE_NUMA +/* Define to 1 if ofed library found */ +#undef HAVE_OFED + +/* Define to 1 if using code with pma_query_via */ +#undef HAVE_OFED_PMA_QUERY_VIA + /* define if you have openssl. */ #undef HAVE_OPENSSL @@ -363,6 +378,9 @@ /* Define to 1 for --get-user-env to load user environment without .login */ #undef LOAD_ENV_NO_LOGIN +/* log timestamps to millisecond resolution */ +#undef LOG_TIME_MSEC + /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR @@ -509,12 +527,6 @@ /* Version number of package */ #undef VERSION -/* Have definition of lsd_fatal_error() */ -#undef WITH_LSD_FATAL_ERROR_FUNC - -/* Have definition of lsd_nomem_error() */ -#undef WITH_LSD_NOMEM_ERROR_FUNC - /* Define if you have pthreads. */ #undef WITH_PTHREADS diff --git a/configure b/configure index c8b2420c51369e35bd701a238bfa93754d0726a7..8eb67d6602e22204896275f267128371bc5a4a4c 100755 --- a/configure +++ b/configure @@ -673,6 +673,8 @@ HAVE_NRT_FALSE HAVE_NRT_TRUE NRT_CPPFLAGS SLURM_PREFIX +SLURM_ENABLE_DYNAMIC_ALLOCATION_FALSE +SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE SLURMCTLD_PORT_COUNT SLURMDBD_PORT SLURMD_PORT @@ -707,15 +709,47 @@ GOBJECT_QUERY GLIB_GENMARSHAL GLIB_LIBS GLIB_CFLAGS +HAVE_CHECK_FALSE +HAVE_CHECK_TRUE +CHECK_LIBS +CHECK_CFLAGS HAVE_SOME_CURSES HAVE_SOME_CURSES_FALSE HAVE_SOME_CURSES_TRUE NCURSES +BUILD_RRD_FALSE +BUILD_RRD_TRUE +RRDTOOL_LDFLAGS +RRDTOOL_CPPFLAGS +RRDTOOL_LIBS SEMAPHORE_LIBS SEMAPHORE_SOURCES +BUILD_IPMI_FALSE +BUILD_IPMI_TRUE +FREEIPMI_LDFLAGS +FREEIPMI_CPPFLAGS +FREEIPMI_LIBS HWLOC_LDFLAGS HWLOC_CPPFLAGS HWLOC_LIBS +BUILD_HDF5_FALSE +BUILD_HDF5_TRUE +HDF5_FLIBS +HDF5_FFLAGS +HDF5_FC +HDF5_LIBS +HDF5_LDFLAGS +HDF5_CPPFLAGS +HDF5_CFLAGS +HDF5_CC +HDF5_VERSION +H5FC +H5CC +BUILD_OFED_FALSE +BUILD_OFED_TRUE +OFED_LDFLAGS +OFED_CPPFLAGS +OFED_LIBS PTHREAD_CFLAGS PTHREAD_LIBS PTHREAD_CC @@ -932,12 +966,17 @@ with_cpusetdir enable_pam with_pam_dir enable_rfc5424time +enable_log_time_msec enable_iso8601 enable_load_env_no_login enable_sun_const with_dimensions +with_ofed +with_hdf5 with_hwloc +with_freeipmi with_xcpu +with_rrdtool enable_glibtest enable_gtktest with_mysql_config @@ -954,6 +993,7 @@ with_slurmctld_port with_slurmd_port with_slurmdbd_port with_slurmctld_port_count +enable_dynamic_allocation with_nrth with_libnrt with_readline @@ -978,6 +1018,8 @@ CXXCPP PKG_CONFIG PKG_CONFIG_PATH PKG_CONFIG_LIBDIR +CHECK_CFLAGS +CHECK_LIBS lua_CFLAGS lua_LIBS' @@ -1614,7 +1656,8 @@ Optional Features: --disable-libtool-lock avoid locking (might break parallel builds) --enable-pam enable PAM (Pluggable Authentication Modules) support - --disable-rfc5424time disable RFC 5424 time format support + --enable-rfc5424time enable RFC 5424 time format support + --disable-log-time-msec log timestamps to millisecond resolution --disable-iso8601 disable ISO 8601 time format support --enable-load-env-no-login enable --get-user-env option to load user @@ -1634,6 +1677,8 @@ Optional Features: termination --disable-salloc-background disable salloc execution in the background + --enable-dynamic-allocation, enable dynamic allocation requests from user programs (disabled) + --enable-multiple-slurmd enable multiple-slurmd support @@ -1655,8 +1700,12 @@ Optional Packages: --with-pam_dir=PATH Specify path to PAM module installation --with-dimensions=N set system dimension count for generic computer system + --with-ofed=PATH Specify path to ofed installation + --with-hdf5=yes/no/PATH location of h5cc or h5pcc for HDF5 configuration --with-hwloc=PATH Specify path to hwloc installation + --with-freeipmi=PATH Specify path to freeipmi installation --with-xcpu=PATH specify path to XCPU directory + --with-rrdtool=PATH Specify path to rrdtool-devel installation --with-mysql_config=PATH Specify path to mysql_config binary --with-pg_config=PATH Specify path to pg_config binary @@ -1691,6 +1740,9 @@ Some influential environment variables: directories to add to pkg-config's search path PKG_CONFIG_LIBDIR path overriding pkg-config's built-in search path + CHECK_CFLAGS + C compiler flags for CHECK, overriding pkg-config + CHECK_LIBS linker flags for CHECK, overriding pkg-config lua_CFLAGS C compiler flags for lua, overriding pkg-config lua_LIBS linker flags for lua, overriding pkg-config @@ -5263,7 +5315,7 @@ $as_echo "$as_me: Running in BG/Q emulation mode" >&6;} # Search for headers in the directory if test -z "$have_bg_hdr" -a -f "$bg_dir/hlcs/include/bgsched/bgsched.h" ; then have_bgq_hdr=yes - bg_includes="-I$bg_dir/hlcs/include" + bg_includes="-I$bg_dir -I$bg_dir/hlcs/include" fi if test -z "$have_bg_hdr" -a -f "$bg_dir/extlib/include/log4cxx/logger.h" ; then have_bgq_hdr=yes @@ -7940,7 +7992,8 @@ else ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` - if test -n "$lt_cv_sys_max_cmd_len"; then + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else @@ -8341,10 +8394,6 @@ freebsd* | dragonfly*) fi ;; -gnu*) - lt_cv_deplibs_check_method=pass_all - ;; - haiku*) lt_cv_deplibs_check_method=pass_all ;; @@ -8383,7 +8432,7 @@ irix5* | irix6* | nonstopux*) ;; # This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) lt_cv_deplibs_check_method=pass_all ;; @@ -9480,7 +9529,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - LD="${LD-ld} -m elf_i386" + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" @@ -11052,7 +11108,7 @@ lt_prog_compiler_static= lt_prog_compiler_static='-non_shared' ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in # old Intel for x86_64 which still supported -KPIC. ecc*) @@ -13222,17 +13278,6 @@ freebsd* | dragonfly*) esac ;; -gnu*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no @@ -13349,7 +13394,7 @@ linux*oldld* | linux*aout* | linux*coff*) ;; # This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no @@ -15172,9 +15217,6 @@ fi ld_shlibs_CXX=yes ;; - gnu*) - ;; - haiku*) archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' link_all_deplibs_CXX=yes @@ -15336,7 +15378,7 @@ fi inherit_rpath_CXX=yes ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler @@ -16196,7 +16238,7 @@ lt_prog_compiler_static_CXX= ;; esac ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # KAI C++ Compiler @@ -17068,17 +17110,6 @@ freebsd* | dragonfly*) esac ;; -gnu*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no @@ -17195,7 +17226,7 @@ linux*oldld* | linux*aout* | linux*coff*) ;; # This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no @@ -17963,12 +17994,12 @@ fi for ac_header in mcheck.h values.h socket.h sys/socket.h \ - stdbool.h sys/ipc.h sys/shm.h sys/sem.h errno.h \ - stdlib.h dirent.h pthread.h sys/prctl.h \ - sysint.h inttypes.h termcap.h netdb.h sys/socket.h \ - sys/systemcfg.h ncurses.h curses.h sys/dr.h sys/vfs.h \ - pam/pam_appl.h security/pam_appl.h sys/sysctl.h \ - pty.h utmp.h \ + stdbool.h sys/ipc.h sys/shm.h sys/sem.h errno.h \ + stdlib.h dirent.h pthread.h sys/prctl.h \ + sysint.h inttypes.h termcap.h netdb.h sys/socket.h \ + sys/systemcfg.h ncurses.h curses.h sys/dr.h sys/vfs.h \ + pam/pam_appl.h security/pam_appl.h sys/sysctl.h \ + pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ sys/termios.h float.h @@ -18330,6 +18361,7 @@ $as_echo_n "checking for program_invocation_name... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ +#include <stdio.h> extern char *program_invocation_name; int main () @@ -18732,7 +18764,7 @@ $as_echo "doh!" >&6; } esac else - x_ac_rfc5424time=yes + x_ac_rfc5424time=no fi @@ -18748,6 +18780,35 @@ $as_echo "#define USE_RFC5424_TIME /**/" >>confdefs.h $as_echo "no" >&6; } fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking log timestamps to millisecond resolution" >&5 +$as_echo_n "checking log timestamps to millisecond resolution... " >&6; } + # Check whether --enable-log-time-msec was given. +if test "${enable_log_time_msec+set}" = set; then : + enableval=$enable_log_time_msec; case "$enableval" in + yes) x_ac_log_time_msec=yes ;; + no) x_ac_log_time_msec=no ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: doh!" >&5 +$as_echo "doh!" >&6; } + as_fn_error $? "bad value \"$enableval\" for --disable-log-time-msec" "$LINENO" 5 ;; + esac + +else + x_ac_log_time_msec=yes + +fi + + + if test "$x_ac_log_time_msec" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +$as_echo "#define LOG_TIME_MSEC /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable ISO 8601 time format support" >&5 @@ -19723,6 +19784,450 @@ $as_echo "not set" >&6; }; fi + _x_ac_ofed_dirs="/usr /usr/local" + _x_ac_ofed_libs="lib64 lib" + + +# Check whether --with-ofed was given. +if test "${with_ofed+set}" = set; then : + withval=$with_ofed; _x_ac_ofed_dirs="$withval $_x_ac_ofed_dirs" +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ofed installation" >&5 +$as_echo_n "checking for ofed installation... " >&6; } +if ${x_ac_cv_ofed_dir+:} false; then : + $as_echo_n "(cached) " >&6 +else + + for d in $_x_ac_ofed_dirs; do + test -d "$d" || continue + test -d "$d/include/infiniband" || continue + test -f "$d/include/infiniband/mad.h" || continue + for bit in $_x_ac_ofed_libs; do + test -d "$d/$bit" || continue + _x_ac_ofed_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_ofed_libs_save="$LIBS" + LIBS="-L$d/$bit -libmad -libumad $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mad_rpc_open_port (); +int +main () +{ +return mad_rpc_open_port (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + x_ac_cv_ofed_dir=$d +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pma_query_via (); +int +main () +{ +return pma_query_via (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + have_pma_query_via=yes +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using old libmad" >&5 +$as_echo "Using old libmad" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CPPFLAGS="$_x_ac_ofed_cppflags_save" + LIBS="$_x_ac_ofed_libs_save" + test -n "$x_ac_cv_ofed_dir" && break + done + test -n "$x_ac_cv_ofed_dir" && break + done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $x_ac_cv_ofed_dir" >&5 +$as_echo "$x_ac_cv_ofed_dir" >&6; } + + if test -z "$x_ac_cv_ofed_dir"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unable to locate ofed installation" >&5 +$as_echo "$as_me: WARNING: unable to locate ofed installation" >&2;} + else + OFED_CPPFLAGS="-I$x_ac_cv_ofed_dir/include/infiniband" + OFED_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_ofed_dir/$bit -L$x_ac_cv_ofed_dir/$bit" + OFED_LIBS="-libmad -libumad" + +$as_echo "#define HAVE_OFED 1" >>confdefs.h + + if test ! -z "$have_pma_query_via" ; then + +$as_echo "#define HAVE_OFED_PMA_QUERY_VIA 1" >>confdefs.h + + fi + fi + + + + + if test -n "$x_ac_cv_ofed_dir"; then + BUILD_OFED_TRUE= + BUILD_OFED_FALSE='#' +else + BUILD_OFED_TRUE='#' + BUILD_OFED_FALSE= +fi + + + + + + + + + + +if test "" = "" ; then + : # Recognized value +elif test "" = "serial" ; then + : # Recognized value +elif test "" = "parallel"; then + : # Recognized value +else + as_fn_error $? " +Unrecognized value for AX_LIB_HDF5 within configure.ac. +If supplied, argument 1 must be either 'serial' or 'parallel'. +" "$LINENO" 5 +fi + + +# Check whether --with-hdf5 was given. +if test "${with_hdf5+set}" = set; then : + withval=$with_hdf5; if test "$withval" = "no"; then + with_hdf5="no" + elif test "$withval" = "yes"; then + with_hdf5="yes" + else + with_hdf5="yes" + H5CC="$withval" + fi +else + with_hdf5="yes" + +fi + + +HDF5_CC="" +HDF5_VERSION="" +HDF5_CFLAGS="" +HDF5_CPPFLAGS="" +HDF5_LDFLAGS="" +HDF5_LIBS="" +HDF5_FC="" +HDF5_FFLAGS="" +HDF5_FLIBS="" + +if test "$with_hdf5" = "yes"; then + if test -z "$H5CC"; then + for ac_prog in h5cc h5pcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_H5CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $H5CC in + [\\/]* | ?:[\\/]*) + ac_cv_path_H5CC="$H5CC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_H5CC="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +H5CC=$ac_cv_path_H5CC +if test -n "$H5CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $H5CC" >&5 +$as_echo "$H5CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$H5CC" && break +done + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking Using provided HDF5 C wrapper" >&5 +$as_echo_n "checking Using provided HDF5 C wrapper... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $H5CC" >&5 +$as_echo "$H5CC" >&6; } + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HDF5 libraries" >&5 +$as_echo_n "checking for HDF5 libraries... " >&6; } + if test ! -f "$H5CC" || test ! -x "$H5CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +Unable to locate HDF5 compilation helper scripts 'h5cc' or 'h5pcc'. +Please specify --with-hdf5=<LOCATION> as the full path to h5cc or h5pcc. +HDF5 support is being disabled (equivalent to --with-hdf5=no). +" >&5 +$as_echo "$as_me: WARNING: +Unable to locate HDF5 compilation helper scripts 'h5cc' or 'h5pcc'. +Please specify --with-hdf5=<LOCATION> as the full path to h5cc or h5pcc. +HDF5 support is being disabled (equivalent to --with-hdf5=no). +" >&2;} + with_hdf5="no" + with_hdf5_fortran="no" + else + HDF5_SHOW=$(eval $H5CC -show) + + HDF5_CC=$(eval $H5CC -show | $AWK '{print $1}') + + + HDF5_VERSION=$(eval $H5CC -showconfig | $GREP 'HDF5 Version:' \ + | $AWK '{print $3}') + + HDF5_tmp_flags=$(eval $H5CC -showconfig \ + | $GREP 'FLAGS\|Extra libraries:' \ + | $AWK -F: '{printf("%s "), $2}' ) + + HDF5_tmp_inst=$(eval $H5CC -showconfig \ + | $GREP 'Installation point:' \ + | $AWK -F: '{print $2}' ) + + HDF5_CPPFLAGS="-I${HDF5_tmp_inst}/include" + + for arg in $HDF5_SHOW $HDF5_tmp_flags ; do + case "$arg" in + -I*) echo $HDF5_CPPFLAGS | $GREP -e "$arg" 2>&1 >/dev/null \ + || HDF5_CPPFLAGS="$arg $HDF5_CPPFLAGS" + ;; + -L*) echo $HDF5_LDFLAGS | $GREP -e "$arg" 2>&1 >/dev/null \ + || HDF5_LDFLAGS="$arg $HDF5_LDFLAGS" + ;; + -l*) echo $HDF5_LIBS | $GREP -e "$arg" 2>&1 >/dev/null \ + || HDF5_LIBS="$arg $HDF5_LIBS" + ;; + esac + done + + HDF5_LIBS="$HDF5_LIBS -lhdf5" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (version $HDF5_VERSION)" >&5 +$as_echo "yes (version $HDF5_VERSION)" >&6; } + + ax_lib_hdf5_save_CC=$CC + ax_lib_hdf5_save_CPPFLAGS=$CPPFLAGS + ax_lib_hdf5_save_LIBS=$LIBS + ax_lib_hdf5_save_LDFLAGS=$LDFLAGS + CC=$HDF5_CC + CPPFLAGS=$HDF5_CPPFLAGS + LIBS=$HDF5_LIBS + LDFLAGS=$HDF5_LDFLAGS + ac_fn_c_check_header_mongrel "$LINENO" "hdf5.h" "ac_cv_header_hdf5_h" "$ac_includes_default" +if test "x$ac_cv_header_hdf5_h" = xyes; then : + ac_cv_hadf5_h=yes +else + ac_cv_hadf5_h=no +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for H5Fcreate in -lhdf5" >&5 +$as_echo_n "checking for H5Fcreate in -lhdf5... " >&6; } +if ${ac_cv_lib_hdf5_H5Fcreate+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhdf5 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char H5Fcreate (); +int +main () +{ +return H5Fcreate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hdf5_H5Fcreate=yes +else + ac_cv_lib_hdf5_H5Fcreate=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hdf5_H5Fcreate" >&5 +$as_echo "$ac_cv_lib_hdf5_H5Fcreate" >&6; } +if test "x$ac_cv_lib_hdf5_H5Fcreate" = xyes; then : + ac_cv_libhdf5=yes +else + ac_cv_libhdf5=no +fi + + if test "$ac_cv_hadf5_h" = "no" && test "$ac_cv_libhdf5" = "no" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to compile HDF5 test program" >&5 +$as_echo "$as_me: WARNING: Unable to compile HDF5 test program" >&2;} + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lhdf5_hl" >&5 +$as_echo_n "checking for main in -lhdf5_hl... " >&6; } +if ${ac_cv_lib_hdf5_hl_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhdf5_hl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hdf5_hl_main=yes +else + ac_cv_lib_hdf5_hl_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hdf5_hl_main" >&5 +$as_echo "$ac_cv_lib_hdf5_hl_main" >&6; } +if test "x$ac_cv_lib_hdf5_hl_main" = xyes; then : + HDF5_LIBS="$HDF5_LIBS -lhdf5_hl" +fi +ac_cv_lib_hdf5_hl=ac_cv_lib_hdf5_hl_main + + + CC=$ax_lib_hdf5_save_CC + LIBS=$ax_lib_hdf5_save_LIBS + LDFLAGS=$ax_lib_hdf5_save_LDFLAGS + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for matching HDF5 Fortran wrapper" >&5 +$as_echo_n "checking for matching HDF5 Fortran wrapper... " >&6; } + H5FC=$(eval echo -n $H5CC | $SED -n 's/cc$/fc/p') + if test -x "$H5FC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $H5FC" >&5 +$as_echo "$H5FC" >&6; } + with_hdf5_fortran="yes" + + + for arg in `$H5FC -show` + do + case "$arg" in #( + -I*) echo $HDF5_FFLAGS | $GREP -e "$arg" >/dev/null \ + || HDF5_FFLAGS="$arg $HDF5_FFLAGS" + ;;#( + -L*) echo $HDF5_FFLAGS | $GREP -e "$arg" >/dev/null \ + || HDF5_FFLAGS="$arg $HDF5_FFLAGS" + echo $HDF5_FFLAGS | $GREP -e "-I${arg#-L}" >/dev/null \ + || HDF5_FFLAGS="-I${arg#-L} $HDF5_FFLAGS" + ;; + esac + done + + for arg in $HDF5_LIBS + do + case "$arg" in #( + -lhdf5_hl) HDF5_FLIBS="$HDF5_FLIBS -lhdf5hl_fortran $arg" + ;; #( + -lhdf5) HDF5_FLIBS="$HDF5_FLIBS -lhdf5_fortran $arg" + ;; #( + *) HDF5_FLIBS="$HDF5_FLIBS $arg" + ;; + esac + done + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + with_hdf5_fortran="no" + fi + + + + + + + + + + + +$as_echo "#define HAVE_HDF5 1" >>confdefs.h + + fi +fi + + if test "$with_hdf5" = "yes"; then + BUILD_HDF5_TRUE= + BUILD_HDF5_FALSE='#' +else + BUILD_HDF5_TRUE='#' + BUILD_HDF5_FALSE= +fi + +# Some older systems (Debian/Ubuntu/...) configure HDF5 with +# --with-default-api-version=v16 which creates problems for slurm +# because slurm uses the 1.8 API. By defining this CPP macro we get +# the 1.8 API. + +$as_echo "#define H5_NO_DEPRECATED_SYMBOLS 1" >>confdefs.h + + + _x_ac_hwloc_dirs="/usr /usr/local" _x_ac_hwloc_libs="lib64 lib" x_ac_cv_hwloc_pci="no" @@ -19822,6 +20327,88 @@ $as_echo "#define HAVE_HWLOC_PCI 1" >>confdefs.h + _x_ac_freeipmi_dirs="/usr /usr/local" + _x_ac_freeipmi_libs="lib64 lib" + + +# Check whether --with-freeipmi was given. +if test "${with_freeipmi+set}" = set; then : + withval=$with_freeipmi; _x_ac_freeipmi_dirs="$withval $_x_ac_freeipmi_dirs" +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for freeipmi installation" >&5 +$as_echo_n "checking for freeipmi installation... " >&6; } +if ${x_ac_cv_freeipmi_dir+:} false; then : + $as_echo_n "(cached) " >&6 +else + + for d in $_x_ac_freeipmi_dirs; do + test -d "$d" || continue + test -d "$d/include" || continue + test -f "$d/include/ipmi_monitoring.h" || continue + for bit in $_x_ac_freeipmi_libs; do + test -d "$d/$bit" || continue + _x_ac_freeipmi_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_freeipmi_libs_save="$LIBS" + LIBS="-L$d/$bit -lipmimonitoring $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <ipmi_monitoring.h> +#include <ipmi_monitoring_bitmasks.h> +int +main () +{ +int err; +unsigned int flag = 0; +return ipmi_monitoring_init (flag, &err); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + x_ac_cv_freeipmi_dir=$d +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CPPFLAGS="$_x_ac_freeipmi_cppflags_save" + LIBS="$_x_ac_freeipmi_libs_save" + test -n "$x_ac_cv_freeipmi_dir" && break + done + test -n "$x_ac_cv_freeipmi_dir" && break + done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $x_ac_cv_freeipmi_dir" >&5 +$as_echo "$x_ac_cv_freeipmi_dir" >&6; } + + if test -z "$x_ac_cv_freeipmi_dir"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unable to locate freeipmi installation" >&5 +$as_echo "$as_me: WARNING: unable to locate freeipmi installation" >&2;} + else + FREEIPMI_CPPFLAGS="-I$x_ac_cv_freeipmi_dir/include" + FREEIPMI_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_freeipmi_dir/$bit -L$x_ac_cv_freeipmi_dir/$bit" + FREEIPMI_LIBS="-lipmimonitoring" + +$as_echo "#define HAVE_FREEIPMI 1" >>confdefs.h + + fi + + + + + if test -n "$x_ac_cv_freeipmi_dir"; then + BUILD_IPMI_TRUE= + BUILD_IPMI_FALSE='#' +else + BUILD_IPMI_TRUE='#' + BUILD_IPMI_FALSE= +fi + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether XCPU is enabled" >&5 $as_echo_n "checking whether XCPU is enabled... " >&6; } @@ -19910,6 +20497,89 @@ fi + _x_ac_rrdtool_dirs="/usr /usr/local" + _x_ac_rrdtool_libs="lib64 lib" + + +# Check whether --with-rrdtool was given. +if test "${with_rrdtool+set}" = set; then : + withval=$with_rrdtool; _x_ac_rrdtool_dirs="$withval $_x_ac_rrdtool_dirs" +else + with_rrdtool=check +fi + + + echo with rrdtool $with_rrdtool + echo without rrdtool $without_rrdtool + if test "x$with_rrdtool" != "xno"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for rrdtool installation" >&5 +$as_echo_n "checking for rrdtool installation... " >&6; } +if ${x_ac_cv_rrdtool_dir+:} false; then : + $as_echo_n "(cached) " >&6 +else + + for d in $_x_ac_rrdtool_dirs; do + test -d "$d" || continue + test -d "$d/include" || continue + test -f "$d/include/rrd.h" || continue + for bit in $_x_ac_rrdtool_libs; do + test -d "$d/$bit" || continue + _x_ac_rrdtool_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_rrdtool_libs_save="$LIBS" + LIBS="-L$d/$bit -lrrd $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <rrd.h> +int +main () +{ +rrd_value_t *rrd_data; rrd_info_t *rrd_info; rrd_test_error(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + x_ac_cv_rrdtool_dir=$d +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CPPFLAGS="$_x_ac_rrdtool_cppflags_save" + LIBS="$_x_ac_rrdtool_libs_save" + test -n "$x_ac_cv_rrdtool_dir" && break + done + test -n "$x_ac_cv_rrdtool_dir" && break + done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $x_ac_cv_rrdtool_dir" >&5 +$as_echo "$x_ac_cv_rrdtool_dir" >&6; } + +fi + + echo x_ac_cv_rrdtool_dir $x_ac_cv_rrdtool_dir + if test -z "$x_ac_cv_rrdtool_dir"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unable to locate rrdtool installation" >&5 +$as_echo "$as_me: WARNING: unable to locate rrdtool installation" >&2;} + else + RRDTOOL_CPPFLAGS="-I$x_ac_cv_rrdtool_dir/include" + RRDTOOL_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_rrdtool_dir/$bit -L$x_ac_cv_rrdtool_dir/$bit" + RRDTOOL_LIBS="-lrrd" + fi + + + + + if test -n "$x_ac_cv_rrdtool_dir"; then + BUILD_RRD_TRUE= + BUILD_RRD_FALSE='#' +else + BUILD_RRD_TRUE='#' + BUILD_RRD_FALSE= +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for initscr in -lncurses" >&5 $as_echo_n "checking for initscr in -lncurses... " >&6; } @@ -20048,6 +20718,90 @@ fi +# +# Tests for Check +# + + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for CHECK" >&5 +$as_echo_n "checking for CHECK... " >&6; } + +if test -n "$CHECK_CFLAGS"; then + pkg_cv_CHECK_CFLAGS="$CHECK_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"check >= 0.9.8\""; } >&5 + ($PKG_CONFIG --exists --print-errors "check >= 0.9.8") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_CHECK_CFLAGS=`$PKG_CONFIG --cflags "check >= 0.9.8" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$CHECK_LIBS"; then + pkg_cv_CHECK_LIBS="$CHECK_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"check >= 0.9.8\""; } >&5 + ($PKG_CONFIG --exists --print-errors "check >= 0.9.8") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_CHECK_LIBS=`$PKG_CONFIG --libs "check >= 0.9.8" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + CHECK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "check >= 0.9.8" 2>&1` + else + CHECK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "check >= 0.9.8" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$CHECK_PKG_ERRORS" >&5 + + ac_have_check="no" +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + ac_have_check="no" +else + CHECK_CFLAGS=$pkg_cv_CHECK_CFLAGS + CHECK_LIBS=$pkg_cv_CHECK_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + ac_have_check="yes" +fi + if test "x$ac_have_check" = "xyes"; then + HAVE_CHECK_TRUE= + HAVE_CHECK_FALSE='#' +else + HAVE_CHECK_TRUE='#' + HAVE_CHECK_FALSE= +fi + + # # Tests for GTK+ # @@ -21629,6 +22383,31 @@ _ACEOF + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dynamic allocation port to be enabled" >&5 +$as_echo_n "checking for dynamic allocation port to be enabled... " >&6; } + # Check whether --enable-dynamic-allocation was given. +if test "${enable_dynamic_allocation+set}" = set; then : + enableval=$enable_dynamic_allocation; +fi + + if test "$enable_dynamic_allocation" = "yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + slurm_enable_dynamic_allocation="yes" + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + slurm_enable_dynamic_allocation="no" + fi + if test "$slurm_enable_dynamic_allocation" = "yes"; then + SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE= + SLURM_ENABLE_DYNAMIC_ALLOCATION_FALSE='#' +else + SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE='#' + SLURM_ENABLE_DYNAMIC_ALLOCATION_FALSE= +fi + + if test "x$prefix" = "xNONE" ; then @@ -22495,13 +23274,6 @@ fi LIBS="$savedLIBS" -$as_echo "#define WITH_LSD_FATAL_ERROR_FUNC 1" >>confdefs.h - - -$as_echo "#define WITH_LSD_NOMEM_ERROR_FUNC 1" >>confdefs.h - - - _x_ac_blcr_dirs="/usr /usr/local /opt/freeware /opt/blcr" _x_ac_blcr_libs="lib64 lib" @@ -22606,7 +23378,7 @@ fi -ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/arrayrun/Makefile contribs/cray/Makefile contribs/cray/opt_modulefiles_slurm contribs/lua/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile doc/Makefile doc/man/Makefile doc/man/man1/Makefile doc/man/man3/Makefile doc/man/man5/Makefile doc/man/man8/Makefile doc/html/Makefile doc/html/configurator.html doc/html/configurator.easy.html etc/init.d.slurm etc/init.d.slurmdbd src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/sshare/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/sdiag/Makefile src/sprio/Makefile src/srun/Makefile src/srun/libsrun/Makefile src/srun_cr/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/poe/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/gres/mic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/cgroup/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/acct_gather_energy/Makefile src/plugins/acct_gather_energy/rapl/Makefile src/plugins/acct_gather_energy/ipmi/Makefile src/plugins/acct_gather_energy/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/all_partitions/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/launch/Makefile src/plugins/launch/aprun/Makefile src/plugins/launch/poe/Makefile src/plugins/launch/runjob/Makefile src/plugins/launch/slurm/Makefile src/plugins/preempt/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/priority/multifactor2/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/cray/libalps/Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/select/serial/Makefile src/plugins/switch/Makefile src/plugins/switch/none/Makefile src/plugins/switch/nrt/Makefile src/plugins/switch/nrt/libpermapi/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/mpi/pmi2/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile" +ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/cray/Makefile contribs/cray/opt_modulefiles_slurm contribs/lua/Makefile contribs/mic/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile contribs/pmi2/Makefile doc/Makefile doc/man/Makefile doc/man/man1/Makefile doc/man/man3/Makefile doc/man/man5/Makefile doc/man/man8/Makefile doc/html/Makefile doc/html/configurator.html doc/html/configurator.easy.html etc/init.d.slurm etc/init.d.slurmdbd src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/sshare/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/sdiag/Makefile src/sprio/Makefile src/srun/Makefile src/srun/libsrun/Makefile src/srun_cr/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/poe/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/ext_sensors/Makefile src/plugins/ext_sensors/rrd/Makefile src/plugins/ext_sensors/none/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/gres/mic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/cgroup/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/acct_gather_energy/Makefile src/plugins/acct_gather_energy/rapl/Makefile src/plugins/acct_gather_energy/ipmi/Makefile src/plugins/acct_gather_energy/none/Makefile src/plugins/acct_gather_infiniband/Makefile src/plugins/acct_gather_infiniband/ofed/Makefile src/plugins/acct_gather_infiniband/none/Makefile src/plugins/acct_gather_filesystem/Makefile src/plugins/acct_gather_filesystem/lustre/Makefile src/plugins/acct_gather_filesystem/none/Makefile src/plugins/acct_gather_profile/Makefile src/plugins/acct_gather_profile/hdf5/Makefile src/plugins/acct_gather_profile/hdf5/sh5util/Makefile src/plugins/acct_gather_profile/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/all_partitions/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/job_submit/pbs/Makefile src/plugins/job_submit/require_timelimit/Makefile src/plugins/launch/Makefile src/plugins/launch/aprun/Makefile src/plugins/launch/poe/Makefile src/plugins/launch/runjob/Makefile src/plugins/launch/slurm/Makefile src/plugins/preempt/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/cray/libalps/Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/select/serial/Makefile src/plugins/slurmctld/Makefile src/plugins/slurmctld/dynalloc/Makefile src/plugins/switch/Makefile src/plugins/switch/none/Makefile src/plugins/switch/nrt/Makefile src/plugins/switch/nrt/libpermapi/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/mpi/pmi2/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile" cat >confcache <<\_ACEOF @@ -22815,10 +23587,30 @@ if test -z "${HAVE_UNSETENV_TRUE}" && test -z "${HAVE_UNSETENV_FALSE}"; then as_fn_error $? "conditional \"HAVE_UNSETENV\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${BUILD_OFED_TRUE}" && test -z "${BUILD_OFED_FALSE}"; then + as_fn_error $? "conditional \"BUILD_OFED\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${BUILD_HDF5_TRUE}" && test -z "${BUILD_HDF5_FALSE}"; then + as_fn_error $? "conditional \"BUILD_HDF5\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${BUILD_IPMI_TRUE}" && test -z "${BUILD_IPMI_FALSE}"; then + as_fn_error $? "conditional \"BUILD_IPMI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${BUILD_RRD_TRUE}" && test -z "${BUILD_RRD_FALSE}"; then + as_fn_error $? "conditional \"BUILD_RRD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${HAVE_SOME_CURSES_TRUE}" && test -z "${HAVE_SOME_CURSES_FALSE}"; then as_fn_error $? "conditional \"HAVE_SOME_CURSES\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_CHECK_TRUE}" && test -z "${HAVE_CHECK_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CHECK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${BUILD_SVIEW_TRUE}" && test -z "${BUILD_SVIEW_FALSE}"; then as_fn_error $? "conditional \"BUILD_SVIEW\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -22851,6 +23643,10 @@ if test -z "${DEBUG_MODULES_TRUE}" && test -z "${DEBUG_MODULES_FALSE}"; then as_fn_error $? "conditional \"DEBUG_MODULES\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE}" && test -z "${SLURM_ENABLE_DYNAMIC_ALLOCATION_FALSE}"; then + as_fn_error $? "conditional \"SLURM_ENABLE_DYNAMIC_ALLOCATION\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${HAVE_NRT_TRUE}" && test -z "${HAVE_NRT_FALSE}"; then as_fn_error $? "conditional \"HAVE_NRT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -23866,10 +24662,10 @@ do "config.xml") CONFIG_FILES="$CONFIG_FILES config.xml" ;; "auxdir/Makefile") CONFIG_FILES="$CONFIG_FILES auxdir/Makefile" ;; "contribs/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/Makefile" ;; - "contribs/arrayrun/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/arrayrun/Makefile" ;; "contribs/cray/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/cray/Makefile" ;; "contribs/cray/opt_modulefiles_slurm") CONFIG_FILES="$CONFIG_FILES contribs/cray/opt_modulefiles_slurm" ;; "contribs/lua/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/lua/Makefile" ;; + "contribs/mic/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/mic/Makefile" ;; "contribs/pam/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/pam/Makefile" ;; "contribs/perlapi/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/perlapi/Makefile" ;; "contribs/perlapi/libslurm/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/perlapi/libslurm/Makefile" ;; @@ -23881,6 +24677,7 @@ do "contribs/phpext/slurm_php/config.m4") CONFIG_FILES="$CONFIG_FILES contribs/phpext/slurm_php/config.m4" ;; "contribs/sjobexit/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/sjobexit/Makefile" ;; "contribs/slurmdb-direct/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/slurmdb-direct/Makefile" ;; + "contribs/pmi2/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/pmi2/Makefile" ;; "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;; "doc/man/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/Makefile" ;; "doc/man/man1/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/man1/Makefile" ;; @@ -23947,6 +24744,9 @@ do "src/plugins/crypto/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/crypto/Makefile" ;; "src/plugins/crypto/munge/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/crypto/munge/Makefile" ;; "src/plugins/crypto/openssl/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/crypto/openssl/Makefile" ;; + "src/plugins/ext_sensors/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/ext_sensors/Makefile" ;; + "src/plugins/ext_sensors/rrd/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/ext_sensors/rrd/Makefile" ;; + "src/plugins/ext_sensors/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/ext_sensors/none/Makefile" ;; "src/plugins/gres/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/gres/Makefile" ;; "src/plugins/gres/gpu/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/gres/gpu/Makefile" ;; "src/plugins/gres/nic/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/gres/nic/Makefile" ;; @@ -23960,6 +24760,16 @@ do "src/plugins/acct_gather_energy/rapl/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_energy/rapl/Makefile" ;; "src/plugins/acct_gather_energy/ipmi/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_energy/ipmi/Makefile" ;; "src/plugins/acct_gather_energy/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_energy/none/Makefile" ;; + "src/plugins/acct_gather_infiniband/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_infiniband/Makefile" ;; + "src/plugins/acct_gather_infiniband/ofed/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_infiniband/ofed/Makefile" ;; + "src/plugins/acct_gather_infiniband/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_infiniband/none/Makefile" ;; + "src/plugins/acct_gather_filesystem/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_filesystem/Makefile" ;; + "src/plugins/acct_gather_filesystem/lustre/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_filesystem/lustre/Makefile" ;; + "src/plugins/acct_gather_filesystem/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_filesystem/none/Makefile" ;; + "src/plugins/acct_gather_profile/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/Makefile" ;; + "src/plugins/acct_gather_profile/hdf5/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/hdf5/Makefile" ;; + "src/plugins/acct_gather_profile/hdf5/sh5util/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/hdf5/sh5util/Makefile" ;; + "src/plugins/acct_gather_profile/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/none/Makefile" ;; "src/plugins/jobcomp/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/Makefile" ;; "src/plugins/jobcomp/filetxt/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/filetxt/Makefile" ;; "src/plugins/jobcomp/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/none/Makefile" ;; @@ -23973,6 +24783,8 @@ do "src/plugins/job_submit/logging/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/job_submit/logging/Makefile" ;; "src/plugins/job_submit/lua/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/job_submit/lua/Makefile" ;; "src/plugins/job_submit/partition/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/job_submit/partition/Makefile" ;; + "src/plugins/job_submit/pbs/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/job_submit/pbs/Makefile" ;; + "src/plugins/job_submit/require_timelimit/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/job_submit/require_timelimit/Makefile" ;; "src/plugins/launch/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/launch/Makefile" ;; "src/plugins/launch/aprun/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/launch/aprun/Makefile" ;; "src/plugins/launch/poe/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/launch/poe/Makefile" ;; @@ -23985,7 +24797,6 @@ do "src/plugins/priority/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/priority/Makefile" ;; "src/plugins/priority/basic/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/priority/basic/Makefile" ;; "src/plugins/priority/multifactor/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/priority/multifactor/Makefile" ;; - "src/plugins/priority/multifactor2/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/priority/multifactor2/Makefile" ;; "src/plugins/proctrack/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/Makefile" ;; "src/plugins/proctrack/aix/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/aix/Makefile" ;; "src/plugins/proctrack/cgroup/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/cgroup/Makefile" ;; @@ -24012,6 +24823,8 @@ do "src/plugins/select/cray/libemulate/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/select/cray/libemulate/Makefile" ;; "src/plugins/select/linear/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/select/linear/Makefile" ;; "src/plugins/select/serial/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/select/serial/Makefile" ;; + "src/plugins/slurmctld/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/slurmctld/Makefile" ;; + "src/plugins/slurmctld/dynalloc/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/slurmctld/dynalloc/Makefile" ;; "src/plugins/switch/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/switch/Makefile" ;; "src/plugins/switch/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/switch/none/Makefile" ;; "src/plugins/switch/nrt/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/switch/nrt/Makefile" ;; diff --git a/configure.ac b/configure.ac index 12ac71134e6e078fa24d22be6996b777162b9aad..cdb24a60a509e5b74970cefc3d21a637c77d1f1f 100644 --- a/configure.ac +++ b/configure.ac @@ -73,9 +73,9 @@ dnl aliases for plugins. dnl case "$host" in *-*-aix*) AC_DEFINE(USE_ALIAS, 0, - [Define slurm_ prefix function aliases for plugins]) ;; + [Define slurm_ prefix function aliases for plugins]) ;; *darwin*) AC_DEFINE(USE_ALIAS, 0, - [Define slurm_ prefix function aliases for plugins]) ;; + [Define slurm_ prefix function aliases for plugins]) ;; *) AC_DEFINE(USE_ALIAS, 1, [Define slurm_ prefix function aliases for plugins]) ;; esac @@ -119,12 +119,12 @@ AC_SEARCH_LIBS([kstat_open], [kstat]) dnl Checks for header files. dnl AC_CHECK_HEADERS(mcheck.h values.h socket.h sys/socket.h \ - stdbool.h sys/ipc.h sys/shm.h sys/sem.h errno.h \ - stdlib.h dirent.h pthread.h sys/prctl.h \ - sysint.h inttypes.h termcap.h netdb.h sys/socket.h \ - sys/systemcfg.h ncurses.h curses.h sys/dr.h sys/vfs.h \ - pam/pam_appl.h security/pam_appl.h sys/sysctl.h \ - pty.h utmp.h \ + stdbool.h sys/ipc.h sys/shm.h sys/sem.h errno.h \ + stdlib.h dirent.h pthread.h sys/prctl.h \ + sysint.h inttypes.h termcap.h netdb.h sys/socket.h \ + sys/systemcfg.h ncurses.h curses.h sys/dr.h sys/vfs.h \ + pam/pam_appl.h security/pam_appl.h sys/sysctl.h \ + pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ sys/termios.h float.h @@ -221,14 +221,33 @@ X_AC_SUN_CONST X_AC_DIMENSIONS X_AC_CFLAGS +X_AC_OFED + +AX_LIB_HDF5() +AM_CONDITIONAL(BUILD_HDF5, test "$with_hdf5" = "yes") +# Some older systems (Debian/Ubuntu/...) configure HDF5 with +# --with-default-api-version=v16 which creates problems for slurm +# because slurm uses the 1.8 API. By defining this CPP macro we get +# the 1.8 API. +AC_DEFINE([H5_NO_DEPRECATED_SYMBOLS], [1], [Make sure we get the 1.8 HDF5 API]) + X_AC_HWLOC +X_AC_FREEIPMI X_AC_XCPU X_AC_SLURM_SEMAPHORE +X_AC_RRDTOOL X_AC_NCURSES AM_CONDITIONAL(HAVE_SOME_CURSES, test "x$ac_have_some_curses" = "xyes") AC_SUBST(HAVE_SOME_CURSES) +# +# Tests for Check +# + +PKG_CHECK_MODULES([CHECK], [check >= 0.9.8], [ac_have_check="yes"], [ac_have_check="no"]) +AM_CONDITIONAL(HAVE_CHECK, test "x$ac_have_check" = "xyes") + # # Tests for GTK+ # @@ -376,10 +395,6 @@ AC_CHECK_LIB(util, openpty, [UTIL_LIBS="-lutil"], []) AC_SUBST(UTIL_LIBS) LIBS="$savedLIBS" -dnl Add LSD-Tools defines: -AC_DEFINE(WITH_LSD_FATAL_ERROR_FUNC, 1, [Have definition of lsd_fatal_error()]) -AC_DEFINE(WITH_LSD_NOMEM_ERROR_FUNC, 1, [Have definition of lsd_nomem_error()]) - dnl dnl Check for compilation of SLURM with BLCR support: dnl @@ -400,10 +415,10 @@ AC_CONFIG_FILES([Makefile config.xml auxdir/Makefile contribs/Makefile - contribs/arrayrun/Makefile contribs/cray/Makefile contribs/cray/opt_modulefiles_slurm contribs/lua/Makefile + contribs/mic/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile @@ -415,6 +430,7 @@ AC_CONFIG_FILES([Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile + contribs/pmi2/Makefile doc/Makefile doc/man/Makefile doc/man/man1/Makefile @@ -481,6 +497,9 @@ AC_CONFIG_FILES([Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile + src/plugins/ext_sensors/Makefile + src/plugins/ext_sensors/rrd/Makefile + src/plugins/ext_sensors/none/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile @@ -494,6 +513,16 @@ AC_CONFIG_FILES([Makefile src/plugins/acct_gather_energy/rapl/Makefile src/plugins/acct_gather_energy/ipmi/Makefile src/plugins/acct_gather_energy/none/Makefile + src/plugins/acct_gather_infiniband/Makefile + src/plugins/acct_gather_infiniband/ofed/Makefile + src/plugins/acct_gather_infiniband/none/Makefile + src/plugins/acct_gather_filesystem/Makefile + src/plugins/acct_gather_filesystem/lustre/Makefile + src/plugins/acct_gather_filesystem/none/Makefile + src/plugins/acct_gather_profile/Makefile + src/plugins/acct_gather_profile/hdf5/Makefile + src/plugins/acct_gather_profile/hdf5/sh5util/Makefile + src/plugins/acct_gather_profile/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile @@ -507,6 +536,8 @@ AC_CONFIG_FILES([Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile + src/plugins/job_submit/pbs/Makefile + src/plugins/job_submit/require_timelimit/Makefile src/plugins/launch/Makefile src/plugins/launch/aprun/Makefile src/plugins/launch/poe/Makefile @@ -519,7 +550,6 @@ AC_CONFIG_FILES([Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile - src/plugins/priority/multifactor2/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cgroup/Makefile @@ -546,6 +576,8 @@ AC_CONFIG_FILES([Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/select/serial/Makefile + src/plugins/slurmctld/Makefile + src/plugins/slurmctld/dynalloc/Makefile src/plugins/switch/Makefile src/plugins/switch/none/Makefile src/plugins/switch/nrt/Makefile diff --git a/contribs/Makefile.am b/contribs/Makefile.am index 45c13a11930869c4f038b4390077f18dab156579..26fd7721acf8166bc92a7dcbafb83bc39543cec0 100644 --- a/contribs/Makefile.am +++ b/contribs/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = arrayrun cray lua pam perlapi torque sjobexit slurmdb-direct +SUBDIRS = cray lua pam perlapi torque sjobexit slurmdb-direct pmi2 EXTRA_DIST = \ env_cache_builder.c \ diff --git a/contribs/Makefile.in b/contribs/Makefile.in index ef7115fb47004100b2aecb80e753d7e535c7adb1..40049f2e7a4159fac166a8805fc7d9fd90488cfb 100644 --- a/contribs/Makefile.in +++ b/contribs/Makefile.in @@ -55,6 +55,7 @@ subdir = contribs DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -352,7 +378,7 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = arrayrun cray lua pam perlapi torque sjobexit slurmdb-direct +SUBDIRS = cray lua pam perlapi torque sjobexit slurmdb-direct pmi2 EXTRA_DIST = \ env_cache_builder.c \ make.slurm.patch \ diff --git a/contribs/README b/contribs/README index 6921c7ee0983c3b8edc6b170dc6893958fb7fa27..9d4cc5b1415b1e81a7eb3af30ee2e82821da6f9c 100644 --- a/contribs/README +++ b/contribs/README @@ -7,12 +7,6 @@ Subdirectories contain the source-code for the various contributations for SLURM as their documentation. A quick description of the subdirectories of the SLURM contribs distribution follows: - arrayrun [Adds support for array jobs] - README - Description of the arrayrun tool and its use - arrayrun - Command used to submit job arrays - arrayrun_worker - Back-end to the arrayrun command responsible for - spawning the jobs in the array - cray [Tools for use on Cray systems] etc_init_d_munge - /etc/init.d/munge script for use with Munge etc_sysconfig_slurm - /etc/sysconfig/slurm for Cray XT/XE systems @@ -73,6 +67,8 @@ of the SLURM contribs distribution follows: existing SLURM allocation. Outside of a SLURM allocation, make's behavior will be unchanged. Designed for GNU make-3.81. + mic [Tools for use on Intel MIC processors] + mpich1.slurm.patch [ Patch to mpich1/p4 library for SLURM job task launch ] For SLURM based job initiations (from srun command), get the parameters from environment variables as needed. This allows for a truly parallel @@ -91,6 +87,10 @@ of the SLURM contribs distribution follows: API to SLURM using php. Not a complete API, but offers quite a few interfaces to existing SLURM proper APIs. + pmi2 [ PMI2 client library ] + User applications can link with this library to use Slurm's mpi/pmi2 + plugin. + ptrace.patch [ Linux Kernel patch required to for TotalView use ] 0. This has been fixed on most recent Linux kernels. Older versions of Linux may need this patch support TotalView. @@ -136,3 +136,4 @@ of the SLURM contribs distribution follows: torque/ [ Wrapper Scripts for Torque migration to SLURM ] Helpful scripts to make transition to SLURM easier from PBS or Torque. These scripts are easily updatable if there is functionality missing. + NOTE: For the showq command, see https://github.com/pedmon/slurm_showq diff --git a/contribs/arrayrun/Makefile.am b/contribs/arrayrun/Makefile.am deleted file mode 100644 index 2b280966df39a8d3944bc76b449f01829424ac0c..0000000000000000000000000000000000000000 --- a/contribs/arrayrun/Makefile.am +++ /dev/null @@ -1,4 +0,0 @@ -EXTRA_DIST = \ - arrayrun \ - arrayrun_worker \ - README diff --git a/contribs/arrayrun/README b/contribs/arrayrun/README deleted file mode 100644 index 3615d053e6ead7f0e0b6dc32e1a69923c5cff2f3..0000000000000000000000000000000000000000 --- a/contribs/arrayrun/README +++ /dev/null @@ -1,132 +0,0 @@ --*- text -*- $Id: README.arrayrun,v 1.2 2011/06/28 11:21:27 bhm Exp $ - -Overview -======== - -Arrayrun is an attempt to simulate arrayjobs as found in SGE and PBS. It -works very similarly to mpirun: - - arrayrun [-r] taskids [sbatch arguments] YourCommand [arguments] - -In principle, arrayrun does - - TASK_ID=id sbatch [sbatch arguments] YourCommand [arguments] - -for each id in the 'taskids' specification. 'taskids' is a comma separated -list of integers, ranges of integers (first-last) or ranges with step size -(first-last:step). If -r is specified, arrayrun will restart a job that has -failed. To avoid endless loops, a job is only restarted once, and a maximum -of 10 (configurable) jobs will be restarted. - -The idea is to submit a master job that calls arrayrun to start the jobs, -for instance - - $ cat workerScript - #!/bin/sh - #SBATCH --account=YourProject - #SBATCH --time=1:0:0 - #SBATCH --mem-per-cpu=1G - - DATASET=dataset.$TASK_ID - OUTFILE=result.$TASK_ID - cd $SCRATCH - YourProgram $DATASET > $OUTFILE - # end of workerScript - - $ cat submitScript - #!/bin/sh - #SBATCH --account=YourProject - #SBATCH --time=50:0:0 - #SBATCH --mem-per-cpu=100M - - arrayrun 1-200 workerScript - # end of submitScript - - $ sbatch submitScript - -The --time specification in the master script must be long enough for all -jobs to finish. - -Alternatively, arrayrun can be run on the command line of a login or master -node. - -If the master job is cancelled, or the arrayrun process is killed, it tries -to scancel all running or pending jobs before it exits. - -Arrayrun tries not to flood the queue with jobs. It works by submitting a -limited number of jobs, sleeping a while, checking the status of its jobs, -and iterating, until all jobs have finished. All limits and times are -configurable (see below). It also tries to handle all errors in a graceful -manner. - - -Installation and configuration -============================== - -There are two files, arrayrun (to be called by users) and arrayrun_worker -(exec'ed or srun'ed by arrayrun, to make scancel work). - -arrayrun should be placed somewhere on the $PATH. arrayrun_worker can be -place anywhere. Both files should be accessible from all nodes. - -There are quite a few configuration variables, so arrayrun can be tuned to -work under different policies and work loads. - -Configuration variables in arrayrun: - -- WORKER: the location of arrayrun_worker - -Configuration variables in arrayrun_worker: - -- $maxJobs: The maximal number of jobs arrayrun will allow in the - queue at any time -- $maxIdleJobs: The maximal number of _pending_ jobs arrayrun will allow - in the queue at any time -- $maxBurst: The maximal number of jobs submitted at a time -- $pollSeconds: How many seconds to sleep between each iteration -- $maxFails: The maximal number of errors to accept when submitting a - job -- $retrySleep: The number of seconds to sleep between each retry when - submitting a job -- $doubleCheckSleep: The number of seconds to sleep after a failed sbatch - before runnung squeue to double check whether the job - was submitted or not. -- $maxRestarts: The maximal number of restarts all in all -- $sbatch: The full path of the sbatch command to use - - -Notes and caveats -================= - -Arrayrun is an attempt to simulate array jobs. As such, it is not -perfect or foolproof. Here are a couple of caveats. - -- Sometimes, arrayrun fails to scancel all jobs when it is itself cancelled - -- When arrayrun is run as a master job, it consumes one CPU for the whole - duration of the job. Also, the --time limit must be long enough. This can - be avoided by running arrayrun interactively on a master/login node (in - which case running it under screen is probably a good idea). - -- Arrayrun does (currently) not checkpoint, so if an arrayrun is restarted, - it starts from scratch with the first taskid. - -We welcome any suggestions for improvements or additional functionality! - - -Copyright -========= - -Copyright 2009,2010,2011 Bjørn-Helge Mevik <b.h.mevik@usit.uio.no> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License version 2 as -published by the Free Software Foundation. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License version 2 for more details. - -A copy of the GPL v. 2 text is available here: -http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt diff --git a/contribs/arrayrun/arrayrun b/contribs/arrayrun/arrayrun deleted file mode 100644 index 944e5f3987a66baebab50152da26caa75d8c2d2f..0000000000000000000000000000000000000000 --- a/contribs/arrayrun/arrayrun +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -### Simulate an array job -### $Id: arrayrun,v 1.6 2011/02/10 11:57:53 root Exp $ - -### Copyright 2009,2010 Bjørn-Helge Mevik <b.h.mevik@usit.uio.no> -### -### This program is free software; you can redistribute it and/or modify -### it under the terms of the GNU General Public License version 2 as -### published by the Free Software Foundation. -### -### This program is distributed in the hope that it will be useful, -### but WITHOUT ANY WARRANTY; without even the implied warranty of -### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -### GNU General Public License version 2 for more details. -### -### A copy of the GPL v. 2 text is available here: -### http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt - - -## Debugging -#set -x - -### Configuration: -## The work horse: -WORKER=/site/lib/arrayrun_worker - -## Documentation: -function usage () { - echo "Run many instances of the same job or command in the queue system. -The instances are submitted via sbatch, and each get their own value -of the environment variable TASK_ID. This can be used to select which -intput or output file to use, etc. - -Usage: - arrayrun [-r] taskids [sbatch arguments] command [arguments] - arrayrun [-h | --help] - -Arguments: - '-r': Restart a job if it fails. For security reasons, each job is - restarted only once, and no more than 5 jobs will be restarted. - 'taskids': Run 'command' with TASK_ID set to the values specified in - 'taskids'. 'taskids' is a comma separated list of integers, - ranges of integers (first-last) or ranges with step size - (first-last:step). For instance - 1-5 means 1, 2, 3, 4, 5 - 1,4,6 means 1, 4, 6 - 10-20:5 means 10, 15, 20 - 1-5,15,100-150:25 means 1, 2, 3, 4, 5, 15, 100, 125, 150 - Note: spaces, negative number or decimal numbers are not allowed. - 'sbatch arguments': Any command line arguments for the implied sbatch. This - is most useful when 'command' is not a job script. - 'command': The command or job script to run. If it is a job script, it can - contain #SBATCH lines in addition to or instead of the 'sbatch - arguments'. - 'arguments': Any arguments for 'command'. - '-h', '--help' (or no arguments): Display this help." -} - -if [ $# == 0 -o "$1" == '--help' -o "$1" == '-h' ]; then - usage - exit 0 -fi - -if [ -n "$SLURM_JOB_ID" ]; then - ## Started in a job script. Run with srun to make "scancel" work - exec srun --ntasks=1 $WORKER "$@" -else - exec $WORKER "$@" -fi diff --git a/contribs/arrayrun/arrayrun_worker b/contribs/arrayrun/arrayrun_worker deleted file mode 100644 index 8107c72d4644cfef6bd0cdd1c043d835fb9a5b72..0000000000000000000000000000000000000000 --- a/contribs/arrayrun/arrayrun_worker +++ /dev/null @@ -1,255 +0,0 @@ -#!/usr/bin/perl -### Simulate an array job -- work horse script -### $Id: arrayrun_worker,v 1.30 2011/04/27 08:58:25 root Exp $ - -### Copyright 2009,2010,2011 Bjørn-Helge Mevik <b.h.mevik@usit.uio.no> -### -### This program is free software; you can redistribute it and/or modify -### it under the terms of the GNU General Public License version 2 as -### published by the Free Software Foundation. -### -### This program is distributed in the hope that it will be useful, -### but WITHOUT ANY WARRANTY; without even the implied warranty of -### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -### GNU General Public License version 2 for more details. -### -### A copy of the GPL v. 2 text is available here: -### http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt - - -### Note: This script is meant to be run by 'arrayrun'; do not -### run this script directly. - -use strict; -use List::Util qw/min/; -use Time::HiRes qw/sleep/; - -## Debug: -use warnings; -use constant DEBUG => 1; -$| = 1 if DEBUG; - -## Configuration: -my $maxJobs = 100; # Max total number of jobs in queue -my $maxIdleJobs = 10; # Max number of pending jobs in queue -my $maxBurst = 10; # Max number of jobs to submit at a time -my $pollSeconds = 180; # How many seconds to sleep between each poll -my $maxFails = 300; # Max errors to accept when submitting a job -my $retrySleep = 300; # Seconds to sleep between each retry -my $doubleCheckSleep = 30; # Seconds to sleep before double checking -my $maxRestarts = 10; # Max number of restarts all in all -my $sbatch = "/site/bin/sbatch";# Which sbatch command to use - -## Parse command line -my $restart = 0; -if (@ARGV && $ARGV[0] eq '-r') { - $restart = 1; - shift @ARGV; -} -my $jobSpec = shift @ARGV or die "Too few arguments\n"; -my @commandLine = @ARGV or die "Too few arguments\n"; -my @jobArray; -foreach (split /,/, $jobSpec) { - if (/^(\d+)$/) { - push @jobArray, $1; - } elsif (/^(\d+)[-:](\d+)$/) { - push @jobArray, $1 .. $2; - } elsif (/^(\d+)[-:](\d+):(\d+)$/) { - for (my $i = $1; $i <= $2; $i += $3) { - push @jobArray, $i; - } - } else { - die "Unknown TASK_ID specification: '$_'\n"; - } -} -die "No TASK_IDs specified\n" unless (@jobArray); - -print "TASK_IDs to submit: ", join(",", @jobArray), " -Command line: @commandLine\n" if DEBUG; -print "Will restart failed jobs\n" if DEBUG && $restart; - -## Setup -my $mainid = $ENV{'SLURM_JOB_ID'} || $ENV{'SLURM_JOBID'} || 'null'; -my $runids = []; # List of IDs of running jobs -my $pendids = []; # List of IDs of pending jobs -my $testids = []; # List of IDs to test -my %taskid; # TASK_ID for all submitted jobs -my @restartedTasks; # TASK_ID of all restarted jobs -my @tmp = (localtime())[5,4,3]; -my $starttime = sprintf "%d-%02d-%02d", $tmp[0] + 1900, $tmp[1] + 1, $tmp[2]; - -print "Main job id: $mainid\nStart time: $starttime\n" if DEBUG; - -## Trap signals such that any running sub jobs are cancelled if the -## main job is cancelled or times out. -sub clean_up { - print "Caught signal. Cleaning up...\n" if DEBUG; - ## Cancel any subjobs: - if (@{$runids} || @{$pendids} || @{$testids}) { - print "Cancelling @{$runids} @{$pendids} @{$testids}\n" if DEBUG; - system("echo scancel @{$runids} @{$pendids} @{$testids}"); - system("scancel @{$runids} @{$pendids} @{$testids}"); - print "Cancelled @{$runids} @{$pendids} @{$testids}\n" if DEBUG; - } - exit 0; -} -$SIG{'TERM'} = 'clean_up'; # scancel/timeout -$SIG{'INT'} = 'clean_up'; # ^C in interactive use - - -## Submit a job with fail resilience: -sub submit_job { - my $jobName = shift; - (my $commandLine = shift) || die "Job script not specified\n"; - my $id; - my $nFails = 0; - my $success = 0; - until ($success) { - my $fail = 0; - $id = `$sbatch --job-name=$jobName $commandLine 2>&1`; - if ($? == 0) { - chomp($id); - print " Result from submit: $id" if DEBUG; - if ($id =~ s/.*Submitted batch job //) { - $success = 1; - } - } else { - warn " sbatch failed with error code '$?' (output: '", - $id || '', "'): $!\n"; - $nFails++; - } - until ($success || $fail || $nFails > $maxFails) { - ## Double check that the job did not start - warn " Problem with submitting/checking job. Checking with squeue in a while.\n"; - sleep $doubleCheckSleep - 5 + int(rand(11)); - $id = `squeue -h -o '%i %j' -u $ENV{USER}`; - if ($? == 0) { - chomp($id); - print " Result from squeue: $id" if DEBUG; - if ($id =~ s/ $jobName//) { - warn "Job '$jobName' seems to have been started as jobid '$id'. Using that id.\n"; - $success = 1; - } else { - warn "Job '$jobName' did not start.\n"; - $fail = 1; - } - } else { - $nFails++; - } - } - unless ($success) { - if ($nFails <= $maxFails) { - warn " Could not submit job. Trying again in a while.\n"; - sleep $retrySleep - 5 + int(rand(11)); - } else { - die " Cannot submit job. Giving up after $nFails errors.\n"; - } - } - } - print " => job ID $id\n" if DEBUG; - $id; -} - - -## Check the given jobs, and return lists of the ones still running/waiting: -sub check_queue { - print scalar localtime, ": Checking queue...\n" if DEBUG; - my $queueids = `squeue -h -o '%i %t' 2>&1`; - if ($? != 0) { - print "squeue failed with error code '$?',\nmessage: $queueids\nI will assume all jobs are still running/waiting\n"; - return; - } - my $testids = [ @{$runids}, @{$pendids} ]; - print "Number of jobs to check: ", scalar @{$testids}, "\n" if DEBUG; - sleep 10 + rand; # Sleep to allow requeued jobs to get back - # in queue. - $runids = []; - $pendids = []; - foreach my $id (@{$testids}) { - if ($queueids =~ /$id (\w+)/) { - if ($1 eq "PD") { - print " Job $id is still waiting\n" if DEBUG; - push @{$pendids}, $id; - } else { - print " Job $id is still running\n" if DEBUG; - push @{$runids}, $id; - } - } else { - print " Job $id has finished:\n" if DEBUG; - my @sacctres = `sacct -o jobid,start,end,maxvmsize,maxrss,state,exitcode -S $starttime -j $id 2>&1`; - if ($? != 0) { - print " sacct failed with error code '$?',\n message: ", - @sacctres, " I will assume job $id finished successfully\n"; - } else { - print join(" ", @sacctres); - if (grep /^[ ]*$id[ ]+.*RUNNING/, @sacctres) { - print " Job seems to be still running, after all.\n" if DEBUG; - push @{$runids}, $id; - } elsif ($restart && !grep /^[ ]*$id[ ]+.*COMPLETED[ ]+0:0/, @sacctres) { - print " Job failed. "; - if (@restartedTasks >= $maxRestarts) { - print "Too many jobs have been restarted. Will not restart TASK_ID $taskid{$id}\n"; - } elsif (grep /^$taskid{$id}$/, @restartedTasks) { - print "TASK_ID $taskid{$id} has already been restarted once. Will not restart it again\n"; - } else { - print "Restarting TASK_ID $taskid{$id}\n"; - $ENV{'TASK_ID'} = $taskid{$id}; - my $newid = submit_job "$mainid.$taskid{$id}", "@commandLine"; - push @{$runids}, $newid; - $taskid{$newid} = $taskid{$id}; - push @restartedTasks, $taskid{$newid}; - sleep 1.5 + rand; # Sleep between 1.5 and 2.5 secs - } - } - } - } - } -} - - -## Make sure sub jobs do not inherit the main job TMPDIR or jobname: -delete $ENV{'TMPDIR'}; -delete $ENV{'SLURM_JOB_NAME'}; - -while (@jobArray) { - ## There is more to submit - print scalar localtime, ": Submitting jobs...\n" if DEBUG; - print scalar @jobArray, " more job(s) to submit\n" if DEBUG; - ## Submit as many as possible: - my $nToSubmit = min(scalar @jobArray, - $maxJobs - @{$runids} - @{$pendids}, - $maxIdleJobs - @{$pendids}, - $maxBurst); - print scalar(@{$runids}), " job(s) are running, and ", - scalar(@{$pendids}), " are waiting\n" if DEBUG; - print "Submitting $nToSubmit job(s):\n" if DEBUG; - for (my $i = 1; $i <= $nToSubmit; $i++) { - my $currJob = shift @jobArray; - print " TASK_ID $currJob:\n" if DEBUG; - ## Set $TASK_ID for the job: - $ENV{'TASK_ID'} = $currJob; - my $id = submit_job "$mainid.$currJob", "@commandLine"; - push @{$pendids}, $id; - $taskid{$id} = $currJob; - sleep 1.5 + rand; # Sleep between 1.5 and 2.5 secs - } - ## Wait a while: - print "Sleeping...\n" if DEBUG; - sleep $pollSeconds - 5 + int(rand(11)); - ## Find which are still running or waiting: - check_queue(); -} -print "All jobs have been submitted\n" if DEBUG; - -while (@{$runids} || @{$pendids}) { - ## Some jobs are still running or pending - print scalar(@{$runids}), " job(s) are still running, and ", - scalar(@{$pendids}), " are waiting\n" if DEBUG; - ## Wait a while - print "Sleeping...\n" if DEBUG; - sleep $pollSeconds - 5 + int(rand(11)); - ## Find which are still running or waiting: - check_queue(); -} - -print "Done.\n" if DEBUG; diff --git a/contribs/cray/Makefile.am b/contribs/cray/Makefile.am index 05c5c54117924e2e0cad9d6e435b28162332e598..cafd27219e280ec4c63f291643011caaf43fe593 100644 --- a/contribs/cray/Makefile.am +++ b/contribs/cray/Makefile.am @@ -5,32 +5,7 @@ AUTOMAKE_OPTIONS = foreign EXTRA_DIST = \ - etc_init_d_munge \ etc_sysconfig_slurm \ libalps_test_programs.tar.gz \ - munge_build_script.sh \ opt_modulefiles_slurm \ - pam_job.c \ - slurm-build-script.sh - -srun: -_perldir=$(exec_prefix)`perl -e 'use Config; $$T=$$Config{installsitearch}; $$P=$$Config{installprefix}; $$P1="$$P/local"; $$T =~ s/$$P1//; $$T =~ s/$$P//; print $$T;'` - -install-binSCRIPTS: $(bin_SCRIPTS) - @$(NORMAL_INSTALL) - test -z "$(DESTDIR)$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)" - @list='$(bin_SCRIPTS)'; for p in $$list; do \ - echo "sed 's%use lib .*%use lib qw(${_perldir});%' $(top_srcdir)/contribs/cray/$$p.pl | sed 's%BINDIR%@bindir@%' > $(DESTDIR)$(bindir)/$$p"; \ - sed "s%use lib .*%use lib qw(${_perldir});%" $(top_srcdir)/contribs/cray/$$p.pl | sed "s%BINDIR%@bindir@%" > $(DESTDIR)$(bindir)/$$p; \ - chmod 755 $(DESTDIR)$(bindir)/$$p;\ - done - -uninstall-binSCRIPTS: - @$(NORMAL_UNINSTALL) - @list='$(bin_SCRIPTS)'; for p in $$list; do \ - echo " rm -f '$(DESTDIR)$(bindir)/$$p'"; \ - rm -f "$(DESTDIR)$(bindir)/$$p"; \ - done - -clean: - + pam_job.c diff --git a/contribs/cray/Makefile.in b/contribs/cray/Makefile.in index ac22200d02b0886027c766c9d97754c94f2307b0..31b1e2d0ea2af212b24d65c459d8d15b4b0b234c 100644 --- a/contribs/cray/Makefile.in +++ b/contribs/cray/Makefile.in @@ -60,6 +60,7 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(srcdir)/opt_modulefiles_slurm.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -132,6 +136,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -152,6 +158,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -161,6 +170,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -168,6 +179,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -202,6 +222,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -229,6 +252,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -319,15 +345,11 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign EXTRA_DIST = \ - etc_init_d_munge \ etc_sysconfig_slurm \ libalps_test_programs.tar.gz \ - munge_build_script.sh \ opt_modulefiles_slurm \ - pam_job.c \ - slurm-build-script.sh + pam_job.c -_perldir = $(exec_prefix)`perl -e 'use Config; $$T=$$Config{installsitearch}; $$P=$$Config{installprefix}; $$P1="$$P/local"; $$T =~ s/$$P1//; $$T =~ s/$$P//; print $$T;'` all: all-am .SUFFIXES: @@ -440,6 +462,8 @@ distclean-generic: maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am @@ -518,26 +542,6 @@ uninstall-am: mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am -srun: - -install-binSCRIPTS: $(bin_SCRIPTS) - @$(NORMAL_INSTALL) - test -z "$(DESTDIR)$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)" - @list='$(bin_SCRIPTS)'; for p in $$list; do \ - echo "sed 's%use lib .*%use lib qw(${_perldir});%' $(top_srcdir)/contribs/cray/$$p.pl | sed 's%BINDIR%@bindir@%' > $(DESTDIR)$(bindir)/$$p"; \ - sed "s%use lib .*%use lib qw(${_perldir});%" $(top_srcdir)/contribs/cray/$$p.pl | sed "s%BINDIR%@bindir@%" > $(DESTDIR)$(bindir)/$$p; \ - chmod 755 $(DESTDIR)$(bindir)/$$p;\ - done - -uninstall-binSCRIPTS: - @$(NORMAL_UNINSTALL) - @list='$(bin_SCRIPTS)'; for p in $$list; do \ - echo " rm -f '$(DESTDIR)$(bindir)/$$p'"; \ - rm -f "$(DESTDIR)$(bindir)/$$p"; \ - done - -clean: - # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/contribs/cray/etc_init_d_munge b/contribs/cray/etc_init_d_munge deleted file mode 100644 index 0bc5e3393edc0f13d0330ae9368491b77f8181a0..0000000000000000000000000000000000000000 --- a/contribs/cray/etc_init_d_munge +++ /dev/null @@ -1,559 +0,0 @@ -#!/bin/sh -# -# /etc/init.d/munge - Start/stop script configured for Cray XT/XE -# -############################################################################### -# Written by Chris Dunlap <cdunlap@llnl.gov>. -# Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. -# Copyright (C) 2002-2007 The Regents of the University of California. -# UCRL-CODE-155910. -############################################################################### -# chkconfig: - 40 60 -# Description: Start/Stop the MUNGE authentication service. -############################################################################### -### BEGIN INIT INFO -# Provides: munge -# Required-Start: $remote_fs -# Required-Stop: $remote_fs -# Default-Start: 2 3 5 -# Default-Stop: -# Short-Description: Start/Stop the MUNGE authentication service. -# Description: MUNGE (MUNGE Uid 'N' Gid Emporium) is a highly scalable -# authentication service for creating and validating -# credentials. -### END INIT INFO - -unset DESC DAEMON CONFIG DAEMON_ARGS PIDFILE NICE USER SIGHUP_RELOAD - -prefix="/opt/slurm/munge" -exec_prefix="${prefix}" -sbindir="${exec_prefix}/sbin" -sysconfdir="${prefix}/etc" -localstatedir="/var" - -DESC="MUNGE" -DAEMON="$sbindir/munged" -#CONFIG=#_NOT_SUPPORTED_# -DAEMON_ARGS="--key-file ${prefix}/etc/munge.key" -PIDFILE="$localstatedir/run/munge/munged.pid" -#NICE= -#USER="daemon" -#SIGHUP_RELOAD=#_NOT_SUPPORTED_# - -############################################################################### - -service_init () -{ -# Determine the system type and initialize the environment. -# -# Note that the shell positional parameters must be preserved when calling -# this function in order for SuSE to initialize its environment properly. -## - PATH=/sbin:/usr/sbin:/bin:/usr/bin - DAEMON_NAME="`basename \"$DAEMON\"`" - SCRIPT_NAME="`basename \"$0\" .init | sed 's/^[SK][0-9][0-9]*//'`" - SIGTERM_TIMEOUT="3" - STATUS=0 - - # Read configuration defaults to override variables: - # $CONFIG, $DAEMON_ARGS, $PIDFILE, $USER, $NICE, $SIGHUP_RELOAD - ## - for dir in "$sysconfdir/default" "$sysconfdir/sysconfig"; do - [ -r "$dir/$SCRIPT_NAME" ] && . "$dir/$SCRIPT_NAME" - done - [ -z "$DAEMON_ARGS" -a -n "$OPTIONS" ] && DAEMON_ARGS="$OPTIONS" - [ "`id | sed 's/^uid=\([0-9]*\).*/\1/'`" -ne 0 ] && unset USER - expr -- "$NICE" : '[0-9]*$' >/dev/null 2>&1 && NICE="+$NICE" - [ -n "$SIGHUP_RELOAD" -a "$SIGHUP_RELOAD" != 0 ] \ - && RELOAD=1 || unset RELOAD - - if [ -f /etc/debian_version -a -x /sbin/start-stop-daemon ]; then - SYSTEM="DEBIAN" - [ -x "$DAEMON" ] || exit 0 # pkg removed but not purged - [ -r /etc/default/rcS ] && . /etc/default/rcS - [ -r /lib/init/vars.sh ] && . /lib/init/vars.sh - [ -r /lib/lsb/init-functions ] && . /lib/lsb/init-functions - elif [ -f /etc/redhat-release -a -r /etc/init.d/functions ]; then - SYSTEM="REDHAT" - . /etc/init.d/functions - RH_SUBSYS="/var/lock/subsys/$DAEMON_NAME" - elif [ -f /etc/SuSE-release -a -r /etc/rc.status ]; then - SYSTEM="SUSE" - . /etc/rc.status - rc_reset - elif [ -r /lib/lsb/init-functions ]; then - SYSTEM="LSB" - . /lib/lsb/init-functions - else - SYSTEM="OTHER" - fi - - # Exit if the package has been removed. - ## - [ -x "$DAEMON" ] || exit 5 # LSB: program not installed - - # Exit if the configuration has been removed. - ## - [ -z "$CONFIG" -o -r "$CONFIG" ] || exit 6 # LSB: program not configured -} - -service_fini () -{ -# Return the exit status. -## - case $SYSTEM in - SUSE) - rc_exit - ;; - DEBIAN|REDHAT|LSB|*) - exit $STATUS - ;; - esac -} - -service_start () -{ -# Start the service. -# -# Required by LSB, where running "start" on a service already running should be -# considered successful. -## - log_init "Starting $DESC" "$DAEMON_NAME" - - VARRUNDIR="$localstatedir/run/munge" - if [ ! -d "$VARRUNDIR" ]; then - mkdir -m 755 -p "$VARRUNDIR" - [ -n "$USER" ] && chown "$USER" "$VARRUNDIR" - fi - - case $SYSTEM in - DEBIAN) - if $0 status >/dev/null 2>&1; then - STATUS=0 - else - ERRMSG=`start-stop-daemon --start --quiet \ - ${NICE:+"--nicelevel"} ${NICE:+"$NICE"} \ - ${USER:+"--chuid"} ${USER:+"$USER"} \ - ${PIDFILE:+"--pidfile"} ${PIDFILE:+"$PIDFILE"} \ - --exec "$DAEMON" -- $DAEMON_ARGS 2>&1` - STATUS=$? - fi - ;; - REDHAT) - if $0 status >/dev/null 2>&1; then - STATUS=0 - else - daemon ${NICE:+"$NICE"} ${USER:+"--user"} ${USER:+"$USER"} \ - "$DAEMON" $DAEMON_ARGS - STATUS=$? - fi - [ $STATUS -eq 0 ] && touch "$RH_SUBSYS" >/dev/null 2>&1 - ;; - SUSE) - ERRMSG=`startproc ${NICE:+"-n"} ${NICE:+"$NICE"} \ - ${USER:+"-u"} ${USER:+"$USER"} \ - ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} \ - "$DAEMON" $DAEMON_ARGS 2>&1` - rc_status -v - STATUS=$? - ;; - LSB) - if [ -n "$USER" ]; then - ERRMSG=`su "$USER" -c "/sbin/start_daemon \ - ${NICE:+\"-n\"} ${NICE:+\"$NICE\"} \ - ${PIDFILE:+\"-p\"} ${PIDFILE:+\"$PIDFILE\"} \ - \"$DAEMON\" $DAEMON_ARGS" 2>&1` - else - ERRMSG=`start_daemon ${NICE:+"-n"} ${NICE:+"$NICE"} \ - ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} "$DAEMON" $DAEMON_ARGS 2>&1` - fi - STATUS=$? - ;; - *) - if $0 status >/dev/null 2>&1; then - STATUS=0 - else - [ -n "$NICE" ] && nice="nice -n $NICE" - if [ -n "$USER" ]; then - ERRMSG=`su "$USER" -c "$nice \"$DAEMON\" $DAEMON_ARGS" 2>&1` - else - ERRMSG=`$nice "$DAEMON" $DAEMON_ARGS 2>&1` - fi - STATUS=$? - fi - ;; - esac - log_fini "$STATUS" "$ERRMSG" -} - -service_stop () -{ -# Stop the service. -# -# Required by LSB, where running "stop" on a service already stopped or not -# running should be considered successful. -## - log_init "Stopping $DESC" "$DAEMON_NAME" - case $SYSTEM in - DEBIAN) - if ! $0 status >/dev/null 2>&1; then - STATUS=0 - else - start-stop-daemon --stop --quiet \ - ${PIDFILE:+"--pidfile"} ${PIDFILE:+"$PIDFILE"} \ - --name "$DAEMON_NAME" ${SIGTERM_TIMEOUT:+"--retry"} \ - ${SIGTERM_TIMEOUT:+"$SIGTERM_TIMEOUT"} >/dev/null 2>&1 - STATUS=$? - fi - ;; - REDHAT) - if ! $0 status >/dev/null 2>&1; then - STATUS=0 - else - killproc "$DAEMON" - STATUS=$? - fi - [ $STATUS -eq 0 ] && rm -f "$RH_SUBSYS" >/dev/null 2>&1 - ;; - SUSE) - killproc ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} \ - ${SIGTERM_TIMEOUT:+"-t"} ${SIGTERM_TIMEOUT:+"$SIGTERM_TIMEOUT"} \ - "$DAEMON" - rc_status -v - ;; - LSB) - killproc ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} "$DAEMON" - STATUS=$? - ;; - *) - signal_process "$DAEMON" - rc=$? - [ $rc -eq 0 -o $rc -eq 2 ] && STATUS=0 || STATUS=1 - ;; - esac - log_fini "$STATUS" - [ -f "$PIDFILE" ] && rm -f "$PIDFILE" -} - -service_restart () -{ -# Stop and restart the service if it is already running; -# otherwise, start the service. -# -# Required by LSB, where running "restart" on a service already stopped or not -# running should be considered successful. -## - if $0 status >/dev/null 2>&1; then - $0 stop && $0 start - else - $0 start - fi - - case $SYSTEM in - SUSE) - rc_status - ;; - DEBIAN|REDHAT|LSB|*) - STATUS=$? - ;; - esac -} - -service_try_restart () -{ -# Restart the service if it is already running. -# -# Optional for LSB, where running "try-restart" on a service already stopped or -# not running should be considered successful. -# Also known as "condrestart" by RedHat. -## - case $SYSTEM in - REDHAT) - [ -f "$RH_SUBSYS" ] && $0 restart || : - STATUS=$? - ;; - SUSE) - $0 status >/dev/null 2>&1 && $0 restart || rc_reset - rc_status - ;; - DEBIAN|LSB|*) - $0 status >/dev/null 2>&1 && $0 restart || : - STATUS=$? - ;; - esac -} - -service_reload () -{ -# Reload the configuration without stopping and restarting the service. -# -# Optional for LSB. -## - [ -z "$RELOAD" ] && STATUS=3 # LSB: unimplemented feature - - log_init "Reloading $DESC" "$DAEMON_NAME" - case $SYSTEM in - DEBIAN) - if [ -n "$RELOAD" ]; then - start-stop-daemon --stop --quiet --signal HUP \ - ${PIDFILE:+"--pidfile"} ${PIDFILE:+"$PIDFILE"} \ - --name "$DAEMON_NAME" >/dev/null 2>&1 - STATUS=$? - fi - ;; - REDHAT) - if [ -n "$RELOAD" ]; then - killproc "$DAEMON" -HUP - STATUS=$? - else - echo_failure - fi - ;; - SUSE) - if [ -n "$RELOAD" ]; then - killproc -HUP ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} "$DAEMON" - else - rc_failed $STATUS - fi - rc_status -v - ;; - LSB) - if [ -n "$RELOAD" ]; then - killproc ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} "$DAEMON" -HUP - STATUS=$? - fi - ;; - *) - if [ -n "$RELOAD" ]; then - signal_process "$DAEMON" "HUP" - STATUS=$? - fi - ;; - esac - log_fini "$STATUS" -} - -service_force_reload () -{ -# Reload the configuration if the service supports this; -# otherwise, restart the service if it is already running. -# -# Required by LSB, where running "force-reload" on a service already stopped or -# not running should be considered successful. -## - if [ -n "$RELOAD" ]; then - $0 reload - else - $0 try-restart - fi - - case $SYSTEM in - SUSE) - rc_status - ;; - DEBIAN|REDHAT|LSB|*) - STATUS=$? - ;; - esac -} - -service_status () -{ -# Print the current status of the service. -# -# Required by LSB. -## - case $SYSTEM in - REDHAT) - status "$DAEMON" - STATUS=$? - ;; - SUSE) - printf "Checking for service $DESC: " - checkproc ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} "$DAEMON" - rc_status -v - ;; - LSB) - printf "Checking status of $DESC: " - pids=`pidofproc ${PIDFILE:+"-p"} ${PIDFILE:+"$PIDFILE"} \ - "$DAEMON" 2>/dev/null` - STATUS=$? - if [ $STATUS -eq 0 -a -n "$pids" ]; then - echo "running." - elif [ $STATUS -ne 0 -a -s "$PIDFILE" ]; then - echo "dead." - else - echo "stopped." - fi - ;; - DEBIAN|*) - printf "Checking status of $DESC: " - pids=`query_pids "$DAEMON" "$PIDFILE"` - rc=$? - if [ $rc -eq 0 -a -n "$pids" ]; then - echo "running." - STATUS=0 # LSB: program is running - elif [ $rc -ne 0 -a -s "$PIDFILE" ]; then - echo "dead." - STATUS=1 # LSB: program is dead & pidfile exists - elif [ $rc -ne 0 ]; then - echo "stopped." - STATUS=3 # LSB: program is not running - else - echo "unknown." - STATUS=4 # LSB: program status unknown - fi - ;; - esac -} - -query_pids () -{ -# Writes the matching PIDs to stdout. -# Returns 0 on success (ie, pids found). -## - PROCNAME="$1" - PIDFILE="$2" - - if type pgrep >/dev/null 2>&1; then - pids=`pgrep -d ' ' -x "\`basename \"$PROCNAME\"\`" 2>/dev/null` - rc=$? - elif type pidof >/dev/null 2>&1; then - pids=`pidof -o $$ -x "$PROCNAME" 2>/dev/null` - rc=$? - else - pids=`(ps awx -o pid -o command || ps -e -f -o pid -o args) 2>/dev/null \ - | tail +2 | egrep "( |/)$PROCNAME( |$)" | grep -v egrep \ - | sed 's/ *\([0-9]*\).*/\1/' | sort -n | tr '\012' ' '` - [ -n "$pids" ] && rc=0 || rc=1 - fi - - unset pids_running - if [ -n "$pids" -a -r "$PIDFILE" ]; then - read pid_line < "$PIDFILE" - for pid in $pid_line; do - expr -- "$pid" : '[0-9]*$' >/dev/null 2>&1 \ - && expr -- " $pids " : ".* $pid .*" >/dev/null 2>&1 \ - && pids_running="$pids_running $pid" - done - [ -n "$pids_running" ] && pids=$pids_running - fi - - echo $pids - return $rc -} - -signal_process () -{ -# Returns 0 on success, 1 if kill failed, 2 if PROCNAME is not running. -## - PROCNAME="$1" - SIGNUM="$2" - - pids=`query_pids "$DAEMON" "$PIDFILE"` - [ $? -ne 0 -o -z "$pids" ] && return 2 - - kill ${SIGNUM:+"-$SIGNUM"} $pids >/dev/null 2>&1 - [ $? -ne 0 ] && return 1 - [ -n "$SIGNUM" ] && return 0 - - pids=`query_pids "$DAEMON" "$PIDFILE"` - [ $? -ne 0 -o -z "$pids" ] && return 0 - [ -z "$SIGTERM_TIMEOUT" ] && return 1 - - sleep "$SIGTERM_TIMEOUT" - kill -KILL $pids >/dev/null 2>&1 - pids=`query_pids "$DAEMON" "$PIDFILE"` - [ $? -ne 0 -o -z "$pids" ] && return 0 - return 1 -} - -log_init () -{ -# Output informational message at beginning of action. -## - MESSAGE="$1" - PROCNAME="$2" - - case $SYSTEM in - DEBIAN) - if [ "$VERBOSE" != no ]; then - if type log_daemon_msg >/dev/null 2>&1; then - log_daemon_msg "$MESSAGE" "$PROCNAME" - else - printf "$MESSAGE: $PROCNAME" - fi - fi - ;; - REDHAT|SUSE|LSB|*) - printf "$MESSAGE: $PROCNAME" - ;; - esac -} - -log_fini () -{ -# Output informational/error message at end of action. -## - STATUS="$1" - ERRMSG="$2" - - case $SYSTEM in - DEBIAN) - if [ "$VERBOSE" != no ]; then - if ( type log_end_msg && type log_failure_msg ) >/dev/null 2>&1; then - log_end_msg "$STATUS" - [ $STATUS -eq 0 -o -z "$ERRMSG" ] || log_failure_msg "$ERRMSG" - else - [ $STATUS -eq 0 ] && echo "." || echo " (failed)." - [ $STATUS -eq 0 -o -z "$ERRMSG" ] || echo "$ERRMSG" >&2 - fi - fi - ;; - REDHAT) - echo - ;; - SUSE) - [ $STATUS -eq 0 -o -z "$ERRMSG" ] || echo "$ERRMSG" >&2 - ;; - LSB|*) - [ $STATUS -eq 0 ] && echo "." || echo " (failed)." - [ $STATUS -eq 0 -o -z "$ERRMSG" ] || echo "$ERRMSG" >&2 - ;; - esac -} - -############################################################################### - -service_init "$@" - -case "$1" in - start) - service_start - ;; - stop) - service_stop - ;; - restart) - service_restart - ;; - try-restart|condrestart) - service_try_restart - ;; - reload) - service_reload - ;; - force-reload) - service_force_reload - ;; - status) - service_status - ;; - *) - echo "Usage: `basename \"$0\"`" \ - "(start|stop|restart|try-restart|reload|force-reload|status)" >&2 - exit 2 # LSB: invalid or excess argument(s) - ;; -esac - -service_fini diff --git a/contribs/cray/munge_build_script.sh b/contribs/cray/munge_build_script.sh deleted file mode 100644 index df778941f1a075d24e50a6c776742e08c2af8a1a..0000000000000000000000000000000000000000 --- a/contribs/cray/munge_build_script.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash -# -# Build munge from sources on Cray -# - -#---------------------------------------------------------------------------- -# CONFIGURATION -#---------------------------------------------------------------------------- -# source and build directories -LIBROOT="${LIBROOT:-/ufs/slurm/build}" -MUNGE_BUILD="${LIBROOT}/munge" - -# packaging installation directory -DESTDIR="/tmp/munge-build" - -# installation and runtime directories -MUNGE_DIR="/opt/slurm/munge" -MUNGE_LOG="/var" - -# input and output tarballs -ZIP="${MUNGE_BUILD}/zip" -MUNGE_TAR=${ZIP}/munge*bz2 -TARBALL="${LIBROOT}/munge_build-$(date +%F).tar.gz" -#---------------------------------------------------------------------------- -# SUBROUTINES -#---------------------------------------------------------------------------- -function die() { echo -e "$@" >&2; exit 1; } - -function extract_top_level_from_tarball() { - local tarball="${1:?}" dir - test -r "${tarball}" || die "can not read ${tarball}" - - case $(file "${tarball}") in - *gzip*) compression="-z";; - *bzip2*) compression="--bzip2";; - *compress*data) compression="--uncompress";; - *tar*) compression="";; - *) compression="--auto-compress";; - esac - dir="$(tar ${compression} -tf ${tarball} | \ - sed -n '/\// { s@^\([^/]\+\).*$@\1@p;q }')" - test -n "${dir}" || die "can not determine directory from $tarball" - echo $dir -} -#---------------------------------------------------------------------------- -# SCRIPT PROPER -#---------------------------------------------------------------------------- -test ${UID} -eq 0 || die "This script wants to be run by root" -test -d $ZIP || die "No tarball directory '$ZIP'" -test -f ${MUNGE_TAR} || die "No munge tarball in $ZIP?" -test -d ${LIBROOT} || die "Can not cd to LIBROOT=$LIBROOT " -test -d ${MUNGE_BUILD} || mkdir -vp ${MUNGE_BUILD} -test -n "${DESTDIR}" || die "DESTDIR not set" - -# generate a clean build directory -rm -rf ${DESTDIR} ${TARBALL} - -# DEPENDENT CONFIGURATION -shopt -s nullglob -MUNGE_SRC="${MUNGE_BUILD}/$(extract_top_level_from_tarball ${MUNGE_TAR})" || exit 1 -MUNGE_LIB="${DESTDIR}${MUNGE_DIR}/lib" - -# extract source -test -d "${LIBROOT}" || mkdir -vp "${LIBROOT}" -test -d "${MUNGE_SRC}" || tar jxvf ${MUNGE_TAR} -C ${MUNGE_BUILD} -test -d "${MUNGE_SRC}" || die "need to extract munge tarball" -cd ${MUNGE_SRC} - -# Build -set -e -./configure --prefix=${MUNGE_DIR} --localstatedir=${MUNGE_LOG} - -make -j - -mkdir -p ${DESTDIR} -make DESTDIR=${DESTDIR%/}/ install - -# final tarball -tar -C ${DESTDIR} -zcpPvf ${TARBALL} .${MUNGE_DIR%/} -# scp ${TARBALL} boot: -echo generated output tarball ${TARBALL} diff --git a/contribs/cray/opt_modulefiles_slurm b/contribs/cray/opt_modulefiles_slurm deleted file mode 100644 index 675cbb7afa8a6e6711e519b7cb8813ebdfb44161..0000000000000000000000000000000000000000 --- a/contribs/cray/opt_modulefiles_slurm +++ /dev/null @@ -1,48 +0,0 @@ -#%Module1.0##################################################################### -# slurm/munge support module -# Put into /opt/modulefiles/slurm or some other part of $MODULEPATH -################################################################################ - -# SUBROUTINES -proc ModulesHelp { } { - puts stderr "\tThis is slurm $::version.\n" - puts stderr "\tPlease consult http://www.schedmd.com/slurmdocs/cray.html" -} - -# CONFIGURATION -conflict xt-pbs pbs torque -set slurmdir "/opt/slurm/default" -set mungedir "/opt/slurm/munge" - -set version "UNKNOWN" -if {![catch {exec $slurmdir/bin/sbatch --version} out]} { - set version [lindex $out 1] -} -set helptext "Support for the SLURM $version resource allocation system" - -# SCRIPT PROPER -module-whatis $helptext - -prepend-path PATH "$slurmdir/bin" -prepend-path PATH "$mungedir/bin" - -prepend-path MANPATH "$slurmdir/share/man" -prepend-path MANPATH "$mungedir/share/man" - -prepend-path PERL5LIB "$slurmdir/lib/perl5/site_perl" - -# other useful environment variables -setenv SINFO_FORMAT {%9P %5a %8s %.10l %.6c %.6z %.7D %10T %N} -setenv SQUEUE_FORMAT {%.6i %.8u %.7a %.14j %.3t %9r %19S %.10M %.10L %.5D %.4C} -setenv SQUEUE_ALL {yes} ;# show hidden partitions, too -setenv SQUEUE_SORT {-t,e,S} - -# logfile aliases -set-alias sd_log {tail -f "/ufs/slurm/var/log/slurmd.log"} -set-alias sc_log {tail -f "/ufs/slurm/var/log/slurmctld.log"} - -if {[exec id -u] == 0} { - prepend-path PATH "$slurmdir/sbin" - prepend-path PATH "$mungedir/sbin" - set-alias sdown {scontrol shutdown} -} diff --git a/contribs/cray/opt_modulefiles_slurm.in b/contribs/cray/opt_modulefiles_slurm.in index a4ba766cfa00f505e39dedc84d723369ae19cbce..e35f1f37f00a7515c1e10ed1fd0a0d6c6afcdb23 100644 --- a/contribs/cray/opt_modulefiles_slurm.in +++ b/contribs/cray/opt_modulefiles_slurm.in @@ -6,7 +6,7 @@ # SUBROUTINES proc ModulesHelp { } { puts stderr "\tThis is slurm $::version.\n" - puts stderr "\tPlease consult http://www.schedmd.com/slurmdocs/cray.html" + puts stderr "\tPlease consult http://slurm.schedmd.com/cray.html" } # CONFIGURATION diff --git a/contribs/cray/slurm-build-script.sh b/contribs/cray/slurm-build-script.sh deleted file mode 100644 index e6e7b42083f8eaa53d508d0edc112f90eee77b01..0000000000000000000000000000000000000000 --- a/contribs/cray/slurm-build-script.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash -# -# Build script for slurm on Cray XT/XE -# -#------------------------------------------------------------------------------- -# CONFIGURATION -#------------------------------------------------------------------------------- -#REBUILD="true" # remuild (no distclean/configure) - -# source and build directories -LIBROOT="${LIBROOT:-/ufs/slurm/build}" -SLURM_SRC="${SLURM_SRC:-${LIBROOT}/slurm-2.3.0-0.pre4}" - -BUILD_ERR="make.err" # make: stderr only -BUILD_LOG="make.log" # make: stdout + stderr - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# installation -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# packaging installation directory -DESTDIR="/tmp/slurm-build" - -# installation directory -SLURM_ROOT="/opt/slurm" - -# symlink to current version -SLURM_DEFAULT="${SLURM_ROOT}/default" - -# separate system configuration directory -SLURM_CONF="${SLURM_DEFAULT}/etc" - -# space-separated list of things to be built in the contribs/ folder -SLURM_CONTRIBS="contribs/perlapi contribs/torque" -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# dependencies -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# path to 'mysql_config' (will be overridden if mysql_config is in $PATH) -MYSQLCONF="${MYSQLCONF:-${LIBROOT}/mysql}" - -# munge installation directory containing lib/ and include/ subdirectories -MUNGE_DIR="${SLURM_ROOT}/munge" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -#------------------------------------------------------------------------------- -# SUBROUTINES -#------------------------------------------------------------------------------- -function die() { echo -e "$@">&2; exit -1; } - -function get_slurm_version() { - local vers_file="META" - if ! test -f $vers_file; then - die "ERROR: no version file '$vers_file'"\ - "\nRun this script from within the slurm source directory" - fi - sed -n 's/^.*Version:[^0-9]*\([0-9\.]\+\).*$/\1/p' ${vers_file} -} - -#------------------------------------------------------------------------------- -# SCRIPT PROPER -#------------------------------------------------------------------------------- -shopt -u nullglob -test ${UID} -eq 0 || die "This script wants to be run by root" -test -d ${SLURM_SRC} || die "can not cd to SLURM_SRC=$SLURM_SRC" -test -d $MUNGE_DIR/lib || die "munge is not yet installed" -test -d ${LIBROOT} || die "can not cd to LIBROOT=$LIBROOT" -test -n "${DESTDIR}" || die "DESTDIR not set" - -#------------------------------------------------------------------- -# Dependent Configuration -#------------------------------------------------------------------- -cd ${SLURM_SRC} - -# get current slurm version -SLURM_VER=$(get_slurm_version) || die "check your PWD (current: $(pwd))" -SLURM_DIR="${SLURM_ROOT}/${SLURM_VER}" - -# name of the tarball to generate at the end of the build process -TARBALL="${LIBROOT}/slurm_build-${SLURM_VER}.tar.gz" -#------------------------------------------------------------------- -# Dependent Tests -#------------------------------------------------------------------- -MYSQL_CONFIG="$(which mysql_config 2>/dev/null)" -if test -z "$MYSQL_CONFIG" -a -z "$MYSQLCONF"; then - die 'no mysql_config in $PATH - set $MYSQLCONF manually' -elif test -n "$MYSQL_CONFIG"; then - MYSQLCONF="$(dirname ${MYSQL_CONFIG})" -fi - -# generate a clean build directory -rm -rf ${DESTDIR} ${TARBALL} -rm -f ${BUILD_ERR} ${BUILD_LOG} - -# (re)configure -if test -z "${REBUILD}"; then - set -x - # clean everything else - make -j distclean &>/dev/null - - ./configure \ - --prefix="${SLURM_DIR}" \ - --sysconfdir="${SLURM_CONF}" \ - --enable-debug \ - --enable-front-end\ - --enable-memory-leak-debug \ - --with-mysql_config=${MYSQLCONF}\ - --with-munge="${MUNGE_DIR}" \ - --with-hwloc="${HWLOC_DIR}" \ - || die "configure failed" -else - # avoid the slow reconfiguration process, don't build extras - unset SLURM_CONTRIBS - touch -r config.status configure config.* configure.ac Mak* -fi - -# Build -tail -F ${BUILD_LOG} & TAIL_PID=$! -set -ex - -# swap stderr, stdout, redirect errors in separate, and both into log file -(make -j 3>&1 1>&2 2>&3 | tee ${BUILD_ERR}) &>${BUILD_LOG} -kill ${TAIL_PID} 2>/dev/null -test -s ${BUILD_ERR} && cat ${BUILD_ERR} >&2 - -# Installation -mkdir -p ${DESTDIR} -make -j DESTDIR=${DESTDIR%/}/ install - -if false;then -# Perl-API and wrappers for qsub/qstat etc. -for CONTRIB in ${SLURM_CONTRIBS} -do - test -n "${REBUILD}" || make -C ${CONTRIB} clean - make -C ${CONTRIB} - make -C ${CONTRIB} DESTDIR=${DESTDIR%/} install -done -fi - -# create the default symlink -rm -vf ${DESTDIR}${SLURM_DEFAULT} -ln -s ${SLURM_VER} ${DESTDIR}${SLURM_DEFAULT} - -# Synchronize sources or generate tarball. -tar -C ${DESTDIR} -zcf ${TARBALL} .${SLURM_ROOT} && scp ${TARBALL} boot: diff --git a/contribs/env_cache_builder.c b/contribs/env_cache_builder.c index 9baf52f7062722b0e6bec4cf19e65337b5306941..d91bc51c0af8316b5d0ef1232604b08038c0aa4a 100644 --- a/contribs/env_cache_builder.c +++ b/contribs/env_cache_builder.c @@ -30,7 +30,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/lua/Makefile.in b/contribs/lua/Makefile.in index 86236ac064ba7669eb994c0fff64a8e7132341b9..6138dd2b77336c5a68b62938c47af518d2969146 100644 --- a/contribs/lua/Makefile.in +++ b/contribs/lua/Makefile.in @@ -55,6 +55,7 @@ subdir = contribs/lua DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -127,6 +131,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -147,6 +153,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -156,6 +165,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -163,6 +174,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -197,6 +217,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -224,6 +247,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/mic/Makefile.am b/contribs/mic/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..7c2509a62455ef7d94d85fc134da861098005a21 --- /dev/null +++ b/contribs/mic/Makefile.am @@ -0,0 +1,2 @@ +EXTRA_DIST = \ + mpirun-mic diff --git a/contribs/arrayrun/Makefile.in b/contribs/mic/Makefile.in similarity index 93% rename from contribs/arrayrun/Makefile.in rename to contribs/mic/Makefile.in index 3feadf5d639f6fb23874aa2fbcd092cfe2e0df65..1e31add2ae0b7dc41ea530bc06e2acaded368741 100644 --- a/contribs/arrayrun/Makefile.in +++ b/contribs/mic/Makefile.in @@ -51,10 +51,11 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -subdir = contribs/arrayrun -DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in +subdir = contribs/mic +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -127,6 +131,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -147,6 +153,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -156,6 +165,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -163,6 +174,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -197,6 +217,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -224,6 +247,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -313,9 +339,7 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = \ - arrayrun \ - arrayrun_worker \ - README + mpirun-mic all: all-am @@ -329,9 +353,9 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi exit 1;; \ esac; \ done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu contribs/arrayrun/Makefile'; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu contribs/mic/Makefile'; \ $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu contribs/arrayrun/Makefile + $(AUTOMAKE) --gnu contribs/mic/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ diff --git a/contribs/mic/mpirun-mic b/contribs/mic/mpirun-mic new file mode 100644 index 0000000000000000000000000000000000000000..bd708c90e6590a8c6c9435211837626c882e4d59 --- /dev/null +++ b/contribs/mic/mpirun-mic @@ -0,0 +1,125 @@ +#!/bin/bash +# +# mpirun-mic +# Helper script for launching symmetric and MIC-only MPI tasks within SLURM +# +# (C) Olli-Pekka Lehto - CSC IT Center for Science Ltd. +# 4-2013 +# + +USAGE=" + mpirun-mic: MPI execution helper script for using Xeon Phi with SLURM + + Usage: + MIC binary to be run: -m [mic_binary] + Host binary to be run: -c [host_binary] + Run using TotalView: --tv + Run using TotalView cli: --tvcli + MIC task count set using MIC_PPN environment variable + Pass additional flags to mpiexec using MPIEXEC_FLAGS_HOST and MPIEXEC_FLAGS_MIC +" + + +# Maximum number of MIC threads +MIC_MAX_THREADS=240 + +#MPIEXEC=$I_MPI_ROOT/intel64/bin/mpiexec.hydra +#MIC_MPIEXEC=$I_MPI_ROOT/mic/bin/mpiexec.hydra +RUNCMD=mpiexec.hydra + +# If not under SLURM just run on the local system +if [[ -z "$SLURM_PROCID" ]] ; then + SLURM_PROCID=0 +fi +if [[ -z "$SLURM_NODELIST" ]] ; then + SLURM_NODELIST=`hostname` +fi +if [[ -z "$SLURM_TASKS_PER_NODE" ]] ; then + SLURM_TASKS_PER_NODE=1 +fi + + +# If task count is not specified, use 1 task per MIC +if [[ -z "$MIC_PPN" ]] ; then + MIC_PPN=1 +fi + +# If threads are not set, divide the thread number by PPN +if [[ -z "$MIC_OMP_NUM_THREADS" ]] ; then + MIC_OMP_NUM_THREADS=$((MIC_MAX_THREADS/$MIC_PPN)) +fi + +if [ $# -lt 1 ] ; then + echo "$USAGE" >&2 + exit 1 +fi + +while getopts "vhm:c:-:" OPTION +do + case $OPTION in + h) + echo $USAGE + exit 0 + ;; + m) + MIC_BINARY=$OPTARG + ;; + c) + CPU_BINARY=$OPTARG + ;; + v) + MPIRUN_MIC_VERBOSE=1 + ;; + -) + case $OPTARG in + tv) + USE_TOTALVIEW=1 + ;; + tvcli) + USE_TOTALVIEWCLI=1 + ;; + \?) echo $USAGE >&2 + exit 1 + ;; + esac + ;; + \?) echo $USAGE >&2 + exit 1 + ;; + esac +done +unset I_MPI_PMI_LIBRARY +if [ $SLURM_PROCID -eq 0 ] ; then + for i in `scontrol show hostname $SLURM_NODELIST` ; do + if [[ -n "$CPU_BINARY" ]] ; then + RUNCMD="$RUNCMD : $MPIEXEC_FLAGS_HOST -host $i -n $SLURM_TASKS_PER_NODE $CPU_BINARY " + fi + if [[ -n "$MIC_BINARY" ]] ; then + RUNCMD="$RUNCMD : -env OMP_NUM_THREADS $MIC_OMP_NUM_THREADS -env LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH:$LD_LIBRARY_PATH $MPIEXEC_FLAGS_MIC -host $i-mic0 -n $MIC_PPN $MIC_BINARY " + fi + done + + if [[ -n "$USE_TOTALVIEW" ]] ; then + RUNCMD="totalview -args $RUNCMD" + elif [[ -n "$USE_TOTALVIEWCLI" ]] ; then + RUNCMD="totalviewcli -args $RUNCMD" + fi + + if [[ -n "$MPIRUN_MIC_VERBOSE" ]] ; then + echo + echo "########################################################################" + echo "MPI Tasks per host: $SLURM_TASKS_PER_NODE" + echo "Threads per host MPI task: $OMP_NUM_THREADS" + echo "MPI Tasks per MIC: $MIC_PPN" + echo "Threads per MIC MPI task: $MIC_OMP_NUM_THREADS" + echo + echo "Run command: " + echo "$RUNCMD" + echo "########################################################################" + echo + fi + + $RUNCMD + +fi + diff --git a/contribs/pam/Makefile.in b/contribs/pam/Makefile.in index 0bab4f4d64496a385dd4bd8d167e0cd345e4dd19..656e18ae681fa4fdc9e052f3431d1e398d31eace 100644 --- a/contribs/pam/Makefile.in +++ b/contribs/pam/Makefile.in @@ -59,6 +59,7 @@ subdir = contribs/pam DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -187,6 +191,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -207,6 +213,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -216,6 +225,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -223,6 +234,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -257,6 +277,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -284,6 +307,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c index 9381d750e28bbbec35045aa6a0a2cd9dfa160170..901704d5ee17f0df1ae0b969ffe55260c751490b 100644 --- a/contribs/pam/pam_slurm.c +++ b/contribs/pam/pam_slurm.c @@ -321,8 +321,8 @@ _slurm_match_allocation(uid_t uid) DBG ("does uid %ld have \"%s\" allocated?", uid, nodename); - if (slurm_load_jobs((time_t) 0, &msg, SHOW_ALL) < 0) { - _log_msg(LOG_ERR, "slurm_load_jobs: %s", + if (slurm_load_job_user(&msg, uid, SHOW_ALL) < 0) { + _log_msg(LOG_ERR, "slurm_load_job_user: %s", slurm_strerror(errno)); return 0; } @@ -332,7 +332,7 @@ _slurm_match_allocation(uid_t uid) for (i = 0; i < msg->record_count; i++) { job_info_t *j = &msg->job_array[i]; - if ( (j->user_id == uid) && (j->job_state == JOB_RUNNING)) { + if (j->job_state == JOB_RUNNING) { DBG ("jobid %ld: nodes=\"%s\"", j->job_id, j->nodes); diff --git a/contribs/perlapi/Makefile.in b/contribs/perlapi/Makefile.in index f2fbaf9afd1f4df0570fbed9a93730da3cb9f3f6..6e973fc724b2fa0f5aa10e4c93131a33017bab53 100644 --- a/contribs/perlapi/Makefile.in +++ b/contribs/perlapi/Makefile.in @@ -55,6 +55,7 @@ subdir = contribs/perlapi DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/perlapi/libslurm/Makefile.in b/contribs/perlapi/libslurm/Makefile.in index 2920d71ac27ded7c35a1f40e13938e09ca9d6578..243f2b2d0b08ba7d5b80397aa25a8cba291a98d6 100644 --- a/contribs/perlapi/libslurm/Makefile.in +++ b/contribs/perlapi/libslurm/Makefile.in @@ -55,6 +55,7 @@ subdir = contribs/perlapi/libslurm DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -127,6 +131,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -147,6 +153,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -156,6 +165,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -163,6 +174,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -197,6 +217,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -224,6 +247,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/perlapi/libslurm/perl/alloc.c b/contribs/perlapi/libslurm/perl/alloc.c index b56a2596896000972898117129f75a3a9597e5e2..f1fa3a2c2fc6cacb1f7a26543576daadbc6c5260 100644 --- a/contribs/perlapi/libslurm/perl/alloc.c +++ b/contribs/perlapi/libslurm/perl/alloc.c @@ -32,7 +32,7 @@ hv_to_job_desc_msg(HV *hv, job_desc_msg_t *job_desc) slurm_init_job_desc_msg(job_desc); FETCH_FIELD(hv, job_desc, account, charp, FALSE); - FETCH_FIELD(hv, job_desc, acctg_freq, uint16_t, FALSE); + FETCH_FIELD(hv, job_desc, acctg_freq, charp, FALSE); FETCH_FIELD(hv, job_desc, alloc_node, charp, FALSE); FETCH_FIELD(hv, job_desc, alloc_resp_port, uint16_t, FALSE); FETCH_FIELD(hv, job_desc, alloc_sid, uint32_t, FALSE); @@ -57,6 +57,7 @@ hv_to_job_desc_msg(HV *hv, job_desc_msg_t *job_desc) Perl_warn(aTHX_ "`argv' of job descriptor is not an array reference, ignored"); } } + FETCH_FIELD(hv, job_desc, array_inx, charp, FALSE); FETCH_FIELD(hv, job_desc, begin_time, time_t, FALSE); FETCH_FIELD(hv, job_desc, ckpt_interval, uint16_t, FALSE); FETCH_FIELD(hv, job_desc, ckpt_dir, charp, FALSE); @@ -111,6 +112,7 @@ hv_to_job_desc_msg(HV *hv, job_desc_msg_t *job_desc) FETCH_FIELD(hv, job_desc, partition, charp, FALSE); FETCH_FIELD(hv, job_desc, plane_size, uint16_t, FALSE); FETCH_FIELD(hv, job_desc, priority, uint32_t, FALSE); + FETCH_FIELD(hv, job_desc, profile, uint32_t, FALSE); FETCH_FIELD(hv, job_desc, qos, charp, FALSE); FETCH_FIELD(hv, job_desc, resp_host, charp, FALSE); FETCH_FIELD(hv, job_desc, req_nodes, charp, FALSE); diff --git a/contribs/perlapi/libslurm/perl/conf.c b/contribs/perlapi/libslurm/perl/conf.c index d3676c6b5b6efbf752c27f7261510a90193b15e4..eb44a4e5591a808989a8f18326cecb524c6020b3 100644 --- a/contribs/perlapi/libslurm/perl/conf.c +++ b/contribs/perlapi/libslurm/perl/conf.c @@ -17,6 +17,12 @@ int slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) { STORE_FIELD(hv, conf, last_update, time_t); + if(conf->acct_gather_profile_type) + STORE_FIELD(hv, conf, acct_gather_profile_type, charp); + if(conf->acct_gather_infiniband_type) + STORE_FIELD(hv, conf, acct_gather_infiniband_type, charp); + if(conf->acct_gather_filesystem_type) + STORE_FIELD(hv, conf, acct_gather_filesystem_type, charp); STORE_FIELD(hv, conf, accounting_storage_enforce, uint16_t); if(conf->accounting_storage_backup_host) STORE_FIELD(hv, conf, accounting_storage_backup_host, charp); @@ -54,12 +60,16 @@ slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) STORE_FIELD(hv, conf, debug_flags, uint32_t); STORE_FIELD(hv, conf, def_mem_per_cpu, uint32_t); STORE_FIELD(hv, conf, disable_root_jobs, uint16_t); + STORE_FIELD(hv, conf, dynalloc_port, uint16_t); STORE_FIELD(hv, conf, enforce_part_limits, uint16_t); if(conf->epilog) STORE_FIELD(hv, conf, epilog, charp); STORE_FIELD(hv, conf, epilog_msg_time, uint32_t); if(conf->epilog_slurmctld) STORE_FIELD(hv, conf, epilog_slurmctld, charp); + if(conf->ext_sensors_type) + STORE_FIELD(hv, conf, ext_sensors_type, charp); + STORE_FIELD(hv, conf, ext_sensors_freq, uint16_t); STORE_FIELD(hv, conf, fast_schedule, uint16_t); STORE_FIELD(hv, conf, first_job_id, uint32_t); @@ -69,10 +79,12 @@ slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) STORE_FIELD(hv, conf, group_info, uint16_t); STORE_FIELD(hv, conf, hash_val, uint32_t); STORE_FIELD(hv, conf, health_check_interval, uint16_t); + STORE_FIELD(hv, conf, health_check_node_state, uint16_t); if(conf->health_check_program) STORE_FIELD(hv, conf, health_check_program, charp); STORE_FIELD(hv, conf, inactive_limit, uint16_t); - STORE_FIELD(hv, conf, job_acct_gather_freq, uint16_t); + if (conf->job_acct_gather_type) + STORE_FIELD(hv, conf, job_acct_gather_freq, charp); if(conf->job_acct_gather_type) STORE_FIELD(hv, conf, job_acct_gather_type, charp); @@ -97,12 +109,14 @@ slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) if(conf->job_submit_plugins) STORE_FIELD(hv, conf, job_submit_plugins, charp); + STORE_FIELD(hv, conf, keep_alive_time, uint16_t); STORE_FIELD(hv, conf, kill_on_bad_exit, uint16_t); STORE_FIELD(hv, conf, kill_wait, uint16_t); if(conf->licenses) STORE_FIELD(hv, conf, licenses, charp); if(conf->mail_prog) STORE_FIELD(hv, conf, mail_prog, charp); + STORE_FIELD(hv, conf, max_array_sz, uint16_t); STORE_FIELD(hv, conf, max_job_cnt, uint16_t); STORE_FIELD(hv, conf, max_mem_per_cpu, uint32_t); STORE_FIELD(hv, conf, max_tasks_per_node, uint16_t); @@ -154,7 +168,11 @@ slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) STORE_FIELD(hv, conf, resume_program, charp); STORE_FIELD(hv, conf, resume_rate, uint16_t); STORE_FIELD(hv, conf, resume_timeout, uint16_t); + if(conf->resv_epilog) + STORE_FIELD(hv, conf, resv_epilog, charp); STORE_FIELD(hv, conf, resv_over_run, uint16_t); + if(conf->resv_prolog) + STORE_FIELD(hv, conf, resv_prolog, charp); STORE_FIELD(hv, conf, ret2service, uint16_t); if(conf->salloc_default_command) STORE_FIELD(hv, conf, salloc_default_command, charp); @@ -187,6 +205,8 @@ slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) STORE_FIELD(hv, conf, slurmctld_logfile, charp); if(conf->slurmctld_pidfile) STORE_FIELD(hv, conf, slurmctld_pidfile, charp); + if(conf->slurmctld_plugstack) + STORE_FIELD(hv, conf, slurmctld_plugstack, charp); STORE_FIELD(hv, conf, slurmctld_port, uint32_t); STORE_FIELD(hv, conf, slurmctld_port_count, uint16_t); STORE_FIELD(hv, conf, slurmctld_timeout, uint16_t); @@ -251,6 +271,9 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) memset(conf, 0, sizeof(slurm_ctl_conf_t)); FETCH_FIELD(hv, conf, last_update, time_t, TRUE); + FETCH_FIELD(hv, conf, acct_gather_profile_type, charp, FALSE); + FETCH_FIELD(hv, conf, acct_gather_infiniband_type, charp, FALSE); + FETCH_FIELD(hv, conf, acct_gather_filesystem_type, charp, FALSE); FETCH_FIELD(hv, conf, accounting_storage_enforce, uint16_t, TRUE); FETCH_FIELD(hv, conf, accounting_storage_backup_host, charp, FALSE); FETCH_FIELD(hv, conf, accounting_storage_host, charp, FALSE); @@ -274,10 +297,12 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) FETCH_FIELD(hv, conf, debug_flags, uint32_t, TRUE); FETCH_FIELD(hv, conf, def_mem_per_cpu, uint32_t, TRUE); FETCH_FIELD(hv, conf, disable_root_jobs, uint16_t, TRUE); + FETCH_FIELD(hv, conf, dynalloc_port, uint16_t, TRUE); FETCH_FIELD(hv, conf, enforce_part_limits, uint16_t, TRUE); FETCH_FIELD(hv, conf, epilog, charp, FALSE); FETCH_FIELD(hv, conf, epilog_msg_time, uint32_t, TRUE); FETCH_FIELD(hv, conf, epilog_slurmctld, charp, FALSE); + FETCH_FIELD(hv, conf, ext_sensors_freq, uint16_t, TRUE); FETCH_FIELD(hv, conf, fast_schedule, uint16_t, TRUE); FETCH_FIELD(hv, conf, first_job_id, uint32_t, TRUE); @@ -286,9 +311,10 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) FETCH_FIELD(hv, conf, group_info, uint16_t, TRUE); FETCH_FIELD(hv, conf, hash_val, uint32_t, TRUE); FETCH_FIELD(hv, conf, health_check_interval, uint16_t, TRUE); + FETCH_FIELD(hv, conf, health_check_node_state, uint16_t, TRUE); FETCH_FIELD(hv, conf, health_check_program, charp, FALSE); FETCH_FIELD(hv, conf, inactive_limit, uint16_t, TRUE); - FETCH_FIELD(hv, conf, job_acct_gather_freq, uint16_t, TRUE); + FETCH_FIELD(hv, conf, job_acct_gather_freq, charp, TRUE); FETCH_FIELD(hv, conf, job_acct_gather_type, charp, FALSE); FETCH_FIELD(hv, conf, job_ckpt_dir, charp, FALSE); @@ -303,10 +329,12 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) FETCH_FIELD(hv, conf, job_requeue, uint16_t, TRUE); FETCH_FIELD(hv, conf, job_submit_plugins, charp, FALSE); + FETCH_FIELD(hv, conf, keep_alive_time, uint16_t, TRUE); FETCH_FIELD(hv, conf, kill_on_bad_exit, uint16_t, TRUE); FETCH_FIELD(hv, conf, kill_wait, uint16_t, TRUE); FETCH_FIELD(hv, conf, licenses, charp, FALSE); FETCH_FIELD(hv, conf, mail_prog, charp, FALSE); + FETCH_FIELD(hv, conf, max_array_sz, uint16_t, TRUE); FETCH_FIELD(hv, conf, max_job_cnt, uint16_t, TRUE); FETCH_FIELD(hv, conf, max_mem_per_cpu, uint32_t, TRUE); FETCH_FIELD(hv, conf, max_tasks_per_node, uint16_t, TRUE); @@ -345,7 +373,9 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) FETCH_FIELD(hv, conf, resume_program, charp, FALSE); FETCH_FIELD(hv, conf, resume_rate, uint16_t, TRUE); FETCH_FIELD(hv, conf, resume_timeout, uint16_t, TRUE); + FETCH_FIELD(hv, conf, resv_epilog, charp, FALSE); FETCH_FIELD(hv, conf, resv_over_run, uint16_t, TRUE); + FETCH_FIELD(hv, conf, resv_prolog, charp, FALSE); FETCH_FIELD(hv, conf, ret2service, uint16_t, TRUE); FETCH_FIELD(hv, conf, salloc_default_command, charp, FALSE); @@ -368,6 +398,7 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) FETCH_FIELD(hv, conf, slurmctld_debug, uint16_t, TRUE); FETCH_FIELD(hv, conf, slurmctld_logfile, charp, FALSE); FETCH_FIELD(hv, conf, slurmctld_pidfile, charp, FALSE); + FETCH_FIELD(hv, conf, slurmctld_plugstack, charp, FALSE); FETCH_FIELD(hv, conf, slurmctld_port, uint32_t, TRUE); FETCH_FIELD(hv, conf, slurmctld_port_count, uint16_t, TRUE); FETCH_FIELD(hv, conf, slurmctld_timeout, uint16_t, TRUE); @@ -468,7 +499,11 @@ hv_to_step_update_request_msg(HV *hv, step_update_request_msg_t *update_msg) { slurm_init_update_step_msg(update_msg); + FETCH_FIELD(hv, update_msg, end_time, time_t, TRUE); + FETCH_FIELD(hv, update_msg, exit_code, uint32_t, TRUE); FETCH_FIELD(hv, update_msg, job_id, uint32_t, TRUE); + FETCH_FIELD(hv, update_msg, name, charp, FALSE); + FETCH_FIELD(hv, update_msg, start_time, time_t, TRUE); FETCH_FIELD(hv, update_msg, step_id, uint32_t, TRUE); FETCH_FIELD(hv, update_msg, time_limit, uint32_t, TRUE); diff --git a/contribs/perlapi/libslurm/perl/job.c b/contribs/perlapi/libslurm/perl/job.c index 24fcd39ec9546867b5360fbc6bab1fb8740bdb95..d7c53568328af987f68fa5c04c5707cc855324c4 100644 --- a/contribs/perlapi/libslurm/perl/job.c +++ b/contribs/perlapi/libslurm/perl/job.c @@ -24,6 +24,8 @@ job_info_to_hv(job_info_t *job_info, HV *hv) if(job_info->alloc_node) STORE_FIELD(hv, job_info, alloc_node, charp); STORE_FIELD(hv, job_info, alloc_sid, uint32_t); + STORE_FIELD(hv, job_info, array_job_id, uint32_t); + STORE_FIELD(hv, job_info, array_task_id, uint32_t); STORE_FIELD(hv, job_info, assoc_id, uint32_t); STORE_FIELD(hv, job_info, batch_flag, uint16_t); if(job_info->command) @@ -60,6 +62,7 @@ job_info_to_hv(job_info_t *job_info, HV *hv) STORE_FIELD(hv, job_info, licenses, charp); STORE_FIELD(hv, job_info, max_cpus, uint32_t); STORE_FIELD(hv, job_info, max_nodes, uint32_t); + STORE_FIELD(hv, job_info, profile, uint32_t); STORE_FIELD(hv, job_info, sockets_per_node, uint16_t); STORE_FIELD(hv, job_info, cores_per_socket, uint16_t); STORE_FIELD(hv, job_info, threads_per_core, uint16_t); @@ -152,6 +155,8 @@ hv_to_job_info(HV *hv, job_info_t *job_info) FETCH_FIELD(hv, job_info, account, charp, FALSE); FETCH_FIELD(hv, job_info, alloc_node, charp, FALSE); FETCH_FIELD(hv, job_info, alloc_sid, uint32_t, TRUE); + FETCH_FIELD(hv, job_info, array_job_id, uint32_t, TRUE); + FETCH_FIELD(hv, job_info, array_task_id, uint32_t, TRUE); FETCH_FIELD(hv, job_info, batch_flag, uint16_t, TRUE); FETCH_FIELD(hv, job_info, command, charp, FALSE); FETCH_FIELD(hv, job_info, comment, charp, FALSE); @@ -184,6 +189,7 @@ hv_to_job_info(HV *hv, job_info_t *job_info) FETCH_FIELD(hv, job_info, licenses, charp, FALSE); FETCH_FIELD(hv, job_info, max_cpus, uint32_t, TRUE); FETCH_FIELD(hv, job_info, max_nodes, uint32_t, TRUE); + FETCH_FIELD(hv, job_info, profile, uint32_t, TRUE); FETCH_FIELD(hv, job_info, sockets_per_node, uint16_t, TRUE); FETCH_FIELD(hv, job_info, cores_per_socket, uint16_t, TRUE); FETCH_FIELD(hv, job_info, threads_per_core, uint16_t, TRUE); @@ -213,6 +219,8 @@ hv_to_job_info(HV *hv, job_info_t *job_info) FETCH_FIELD(hv, job_info, pn_min_cpus, uint16_t, TRUE); FETCH_FIELD(hv, job_info, pn_min_tmp_disk, uint32_t, TRUE); FETCH_FIELD(hv, job_info, partition, charp, FALSE); + FETCH_FIELD(hv, job_info, pre_sus_time, time_t, TRUE); + FETCH_FIELD(hv, job_info, priority, uint32_t, TRUE); FETCH_FIELD(hv, job_info, qos, charp, FALSE); FETCH_FIELD(hv, job_info, req_nodes, charp, FALSE); svp = hv_fetch(hv, "req_node_inx", 12, FALSE); @@ -244,6 +252,7 @@ hv_to_job_info(HV *hv, job_info_t *job_info) FETCH_FIELD(hv, job_info, suspend_time, time_t, TRUE); FETCH_FIELD(hv, job_info, time_limit, uint32_t, TRUE); FETCH_FIELD(hv, job_info, time_min, uint32_t, TRUE); + FETCH_FIELD(hv, job_info, user_id, uint32_t, TRUE); FETCH_FIELD(hv, job_info, wait4switch, uint32_t, FALSE); FETCH_FIELD(hv, job_info, wckey, charp, FALSE); FETCH_FIELD(hv, job_info, work_dir, charp, FALSE); diff --git a/contribs/perlapi/libslurm/perl/step.c b/contribs/perlapi/libslurm/perl/step.c index 535708e33063bcaaae84912424e5075993aae109..d3b3c2a194770ca43e5eac52b994341d59847220 100644 --- a/contribs/perlapi/libslurm/perl/step.c +++ b/contribs/perlapi/libslurm/perl/step.c @@ -20,6 +20,8 @@ job_step_info_to_hv(job_step_info_t *step_info, HV *hv) int j; AV *av; + STORE_FIELD(hv, step_info, array_job_id, uint32_t); + STORE_FIELD(hv, step_info, array_task_id, uint32_t); if(step_info->ckpt_dir) STORE_FIELD(hv, step_info, ckpt_dir, charp); STORE_FIELD(hv, step_info, ckpt_interval, uint16_t); @@ -44,6 +46,7 @@ job_step_info_to_hv(job_step_info_t *step_info, HV *hv) STORE_FIELD(hv, step_info, num_tasks, uint32_t); if(step_info->partition) STORE_FIELD(hv, step_info, partition, charp); + STORE_FIELD(hv, step_info, profile, uint32_t); if(step_info->resv_ports) STORE_FIELD(hv, step_info, resv_ports, charp); STORE_FIELD(hv, step_info, run_time, time_t); @@ -51,6 +54,7 @@ job_step_info_to_hv(job_step_info_t *step_info, HV *hv) STORE_FIELD(hv, step_info, step_id, uint32_t); STORE_FIELD(hv, step_info, time_limit, uint32_t); STORE_FIELD(hv, step_info, user_id, uint32_t); + STORE_FIELD(hv, step_info, state, uint16_t); return 0; } @@ -65,6 +69,8 @@ hv_to_job_step_info(HV *hv, job_step_info_t *step_info) AV *av; int i, n; + FETCH_FIELD(hv, step_info, array_job_id, uint32_t, TRUE); + FETCH_FIELD(hv, step_info, array_task_id, uint32_t, TRUE); FETCH_FIELD(hv, step_info, ckpt_dir, charp, FALSE); FETCH_FIELD(hv, step_info, ckpt_interval, uint16_t, TRUE); FETCH_FIELD(hv, step_info, gres, charp, FALSE); @@ -90,12 +96,14 @@ hv_to_job_step_info(HV *hv, job_step_info_t *step_info) FETCH_FIELD(hv, step_info, num_cpus, uint32_t, TRUE); FETCH_FIELD(hv, step_info, num_tasks, uint32_t, TRUE); FETCH_FIELD(hv, step_info, partition, charp, FALSE); + FETCH_FIELD(hv, step_info, profile, uint32_t, TRUE); FETCH_FIELD(hv, step_info, resv_ports, charp, FALSE); FETCH_FIELD(hv, step_info, run_time, time_t, TRUE); FETCH_FIELD(hv, step_info, start_time, time_t, TRUE); FETCH_FIELD(hv, step_info, step_id, uint32_t, TRUE); FETCH_FIELD(hv, step_info, time_limit, uint32_t, TRUE); FETCH_FIELD(hv, step_info, user_id, uint32_t, TRUE); + FETCH_FIELD(hv, step_info, state, uint16_t, TRUE); return 0; } diff --git a/contribs/perlapi/libslurm/perl/step_ctx.c b/contribs/perlapi/libslurm/perl/step_ctx.c index b5de001d0510fd128202f35e7e7e9d847bbbdf37..4d8909f07bbf43ee2bd1c7a95af4e7f97ee590b7 100644 --- a/contribs/perlapi/libslurm/perl/step_ctx.c +++ b/contribs/perlapi/libslurm/perl/step_ctx.c @@ -23,7 +23,7 @@ hv_to_slurm_step_ctx_params(HV *hv, slurm_step_ctx_params_t *params) FETCH_FIELD(hv, params, exclusive, uint16_t, FALSE); FETCH_FIELD(hv, params, immediate, uint16_t, FALSE); FETCH_FIELD(hv, params, job_id, uint32_t, FALSE); /* for slurm_step_ctx_create_no_alloc */ - FETCH_FIELD(hv, params, mem_per_cpu, uint16_t, FALSE); + FETCH_FIELD(hv, params, pn_min_memory, uint32_t, FALSE); FETCH_FIELD(hv, params, ckpt_dir, charp, FALSE); FETCH_FIELD(hv, params, gres, charp, FALSE); FETCH_FIELD(hv, params, name, charp, FALSE); @@ -34,6 +34,7 @@ hv_to_slurm_step_ctx_params(HV *hv, slurm_step_ctx_params_t *params) FETCH_FIELD(hv, params, node_list, charp, FALSE); FETCH_FIELD(hv, params, overcommit, bool, FALSE); FETCH_FIELD(hv, params, plane_size, uint16_t, FALSE); + FETCH_FIELD(hv, params, profile, uint32_t, FALSE); FETCH_FIELD(hv, params, relative, uint16_t, FALSE); FETCH_FIELD(hv, params, resv_port_cnt, uint32_t, FALSE); FETCH_FIELD(hv, params, task_count, uint32_t, FALSE); @@ -136,6 +137,7 @@ hv_to_slurm_step_launch_params(HV *hv, slurm_step_launch_params_t *params) FETCH_FIELD(hv, params, msg_timeout, uint32_t, FALSE); FETCH_FIELD(hv, params, buffered_stdio, bool, FALSE); FETCH_FIELD(hv, params, labelio, bool, FALSE); + FETCH_FIELD(hv, params, profile, uint32_t, FALSE); FETCH_FIELD(hv, params, remote_output_filename, charp, FALSE); FETCH_FIELD(hv, params, remote_error_filename, charp, FALSE); FETCH_FIELD(hv, params, remote_input_filename, charp, FALSE); @@ -199,7 +201,7 @@ hv_to_slurm_step_launch_params(HV *hv, slurm_step_launch_params_t *params) FETCH_FIELD(hv, params, mpi_plugin_name, charp, FALSE); FETCH_FIELD(hv, params, open_mode, uint8_t, FALSE); - FETCH_FIELD(hv, params, acctg_freq, uint16_t, FALSE); + FETCH_FIELD(hv, params, acctg_freq, charp, FALSE); FETCH_FIELD(hv, params, pty, bool, FALSE); FETCH_FIELD(hv, params, ckpt_dir, charp, FALSE); FETCH_FIELD(hv, params, restart_dir, charp, FALSE); diff --git a/contribs/perlapi/libslurmdb/Makefile.in b/contribs/perlapi/libslurmdb/Makefile.in index 00018272e3497e978f3d0020426ca3b69481ddcb..2ec146697eb9fb0fdbabe44f6501c6c5676ea2ad 100644 --- a/contribs/perlapi/libslurmdb/Makefile.in +++ b/contribs/perlapi/libslurmdb/Makefile.in @@ -55,6 +55,7 @@ subdir = contribs/perlapi/libslurmdb DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -127,6 +131,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -147,6 +153,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -156,6 +165,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -163,6 +174,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -197,6 +217,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -224,6 +247,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/perlapi/libslurmdb/perl/Slurmdb.pm b/contribs/perlapi/libslurmdb/perl/Slurmdb.pm index 52975591f4699e5a9e810e31f491904a7c797aa9..5b12030dfc76a9e08ae71073877914894e006c5f 100644 --- a/contribs/perlapi/libslurmdb/perl/Slurmdb.pm +++ b/contribs/perlapi/libslurmdb/perl/Slurmdb.pm @@ -135,7 +135,7 @@ None by default. =head1 SEE ALSO -http://www.schedmd.com/slurmdocs/accounting.html +http://slurm.schedmd.com/accounting.html =head1 AUTHOR @@ -148,7 +148,7 @@ Don Lipari, <lt>lipari@llnl.gov<gt> CODE-OCEC-09-009. All rights reserved. This file is part of SLURM, a resource management program. For - details, see <http://www.schedmd.com/slurmdocs/>. Please also + details, see <http://slurm.schedmd.com/>. Please also read the included file: DISCLAIMER. SLURM is free software; you can redistribute it and/or modify it diff --git a/contribs/phpext/Makefile.in b/contribs/phpext/Makefile.in index 974b212bd5b258baf6ad38824535a12f4b86bc15..70661fc1f1d5faa4486c35a1a13ff2f6b9a91ab9 100644 --- a/contribs/phpext/Makefile.in +++ b/contribs/phpext/Makefile.in @@ -55,6 +55,7 @@ subdir = contribs/phpext DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -127,6 +131,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -147,6 +153,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -156,6 +165,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -163,6 +174,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -197,6 +217,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -224,6 +247,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/pmi2/COPYRIGHT b/contribs/pmi2/COPYRIGHT new file mode 100644 index 0000000000000000000000000000000000000000..a9216d4953d21be1cac8e3b4789ad54b4138f935 --- /dev/null +++ b/contribs/pmi2/COPYRIGHT @@ -0,0 +1,39 @@ + + COPYRIGHT + +The following is a notice of limited availability of the code, and disclaimer +which must be included in the prologue of the code and in all source listings +of the code. + +Copyright Notice + + 2002 University of Chicago + +Permission is hereby granted to use, reproduce, prepare derivative works, and +to redistribute to others. This software was authored by: + +Mathematics and Computer Science Division +Argonne National Laboratory, Argonne IL 60439 + +(and) + +Department of Computer Science +University of Illinois at Urbana-Champaign + + + GOVERNMENT LICENSE + +Portions of this material resulted from work developed under a U.S. +Government Contract and are subject to the following license: the Government +is granted for itself and others acting on its behalf a paid-up, nonexclusive, +irrevocable worldwide license in this computer software to reproduce, prepare +derivative works, and perform publicly and display publicly. + + DISCLAIMER + +This computer code material was prepared, in part, as an account of work +sponsored by an agency of the United States Government. Neither the United +States, nor the University of Chicago, nor any of their employees, makes any +warranty express or implied, or assumes any legal liability or responsibility +for the accuracy, completeness, or usefulness of any information, apparatus, +product, or process disclosed, or represents that its use would not infringe +privately owned rights. diff --git a/contribs/pmi2/Makefile.am b/contribs/pmi2/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..8ebff5144e05285c315a2c3ff42972f8d4842c4d --- /dev/null +++ b/contribs/pmi2/Makefile.am @@ -0,0 +1,37 @@ +# Makefile for PMI2 client side library. +# + +AUTOMAKE_OPTIONS = foreign + +pkginclude_HEADERS = slurm/pmi2.h + +if WITH_GNU_LD +PMI2_VERSION_SCRIPT = \ + pmi2_version.map +PMI2_OTHER_FLAGS = \ + -Wl,--version-script=$(PMI2_VERSION_SCRIPT) +endif + +libpmi2_current = 0 +libpmi2_age = 0 +libpmi2_rev = 0 + +BUILT_SOURCES = $(PMI2_VERSION_SCRIPT) +lib_LTLIBRARIES = libpmi2.la + +libpmi2_la_SOURCES = pmi2_api.c pmi2_util.c slurm/pmi2.h +libpmi2_la_LDFLAGS = $(LIB_LDFLAGS) -version-info $(libpmi2_current):$(libpmi2_rev):$(libpmi2_age) \ + $(PMI2_OTHER_FLAGS) + +$(PMI2_VERSION_SCRIPT) : + (echo "{ global:"; \ + echo " PMI2_*;"; \ + echo " local: *;"; \ + echo "};") > $(PMI2_VERSION_SCRIPT) + +CLEANFILES = \ + $(PMI_VERSION_SCRIPT) + +DISTCLEANFILES = \ + $(PMI_VERSION_SCRIPT) + diff --git a/contribs/pmi2/Makefile.in b/contribs/pmi2/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..2f5120a0bdbb17a5174450e066404ad08fdeeb25 --- /dev/null +++ b/contribs/pmi2/Makefile.in @@ -0,0 +1,769 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for PMI2 client side library. +# + + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = contribs/pmi2 +DIST_COMMON = $(pkginclude_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgincludedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libpmi2_la_LIBADD = +am_libpmi2_la_OBJECTS = pmi2_api.lo pmi2_util.lo +libpmi2_la_OBJECTS = $(am_libpmi2_la_OBJECTS) +libpmi2_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libpmi2_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libpmi2_la_SOURCES) +DIST_SOURCES = $(libpmi2_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(pkginclude_HEADERS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +pkginclude_HEADERS = slurm/pmi2.h +@WITH_GNU_LD_TRUE@PMI2_VERSION_SCRIPT = \ +@WITH_GNU_LD_TRUE@ pmi2_version.map + +@WITH_GNU_LD_TRUE@PMI2_OTHER_FLAGS = \ +@WITH_GNU_LD_TRUE@ -Wl,--version-script=$(PMI2_VERSION_SCRIPT) + +libpmi2_current = 0 +libpmi2_age = 0 +libpmi2_rev = 0 +BUILT_SOURCES = $(PMI2_VERSION_SCRIPT) +lib_LTLIBRARIES = libpmi2.la +libpmi2_la_SOURCES = pmi2_api.c pmi2_util.c slurm/pmi2.h +libpmi2_la_LDFLAGS = $(LIB_LDFLAGS) -version-info $(libpmi2_current):$(libpmi2_rev):$(libpmi2_age) \ + $(PMI2_OTHER_FLAGS) + +CLEANFILES = \ + $(PMI_VERSION_SCRIPT) + +DISTCLEANFILES = \ + $(PMI_VERSION_SCRIPT) + +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign contribs/pmi2/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign contribs/pmi2/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libpmi2.la: $(libpmi2_la_OBJECTS) $(libpmi2_la_DEPENDENCIES) $(EXTRA_libpmi2_la_DEPENDENCIES) + $(libpmi2_la_LINK) -rpath $(libdir) $(libpmi2_la_OBJECTS) $(libpmi2_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pmi2_api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pmi2_util.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkgincludeHEADERS: $(pkginclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludedir)" || exit $$?; \ + done + +uninstall-pkgincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgincludedir)'; $(am__uninstall_files_from_dir) + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-pkgincludeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES uninstall-pkgincludeHEADERS + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-pkgincludeHEADERS install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am \ + uninstall-libLTLIBRARIES uninstall-pkgincludeHEADERS + + +$(PMI2_VERSION_SCRIPT) : + (echo "{ global:"; \ + echo " PMI2_*;"; \ + echo " local: *;"; \ + echo "};") > $(PMI2_VERSION_SCRIPT) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/contribs/pmi2/pmi2.h b/contribs/pmi2/pmi2.h new file mode 100644 index 0000000000000000000000000000000000000000..e72a985ff676963f85833608f293ad1b66e153f0 --- /dev/null +++ b/contribs/pmi2/pmi2.h @@ -0,0 +1,704 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef PMI2_H_INCLUDED +#define PMI2_H_INCLUDED + +#ifndef USE_PMI2_API +/*#error This header file defines the PMI2 API, but PMI2 was not selected*/ +#endif + +#define PMI2_MAX_KEYLEN 64 +#define PMI2_MAX_VALLEN 1024 +#define PMI2_MAX_ATTRVALUE 1024 +#define PMI2_ID_NULL -1 + +#define PMII_COMMANDLEN_SIZE 6 +#define PMII_MAX_COMMAND_LEN (64*1024) + +#if defined(__cplusplus) +extern "C" { +#endif + +static const char FULLINIT_CMD[] = "fullinit"; +static const char FULLINITRESP_CMD[] = "fullinit-response"; +static const char FINALIZE_CMD[] = "finalize"; +static const char FINALIZERESP_CMD[] = "finalize-response"; +static const char ABORT_CMD[] = "abort"; +static const char JOBGETID_CMD[] = "job-getid"; +static const char JOBGETIDRESP_CMD[] = "job-getid-response"; +static const char JOBCONNECT_CMD[] = "job-connect"; +static const char JOBCONNECTRESP_CMD[] = "job-connect-response"; +static const char JOBDISCONNECT_CMD[] = "job-disconnect"; +static const char JOBDISCONNECTRESP_CMD[] = "job-disconnect-response"; +static const char KVSPUT_CMD[] = "kvs-put"; +static const char KVSPUTRESP_CMD[] = "kvs-put-response"; +static const char KVSFENCE_CMD[] = "kvs-fence"; +static const char KVSFENCERESP_CMD[] = "kvs-fence-response"; +static const char KVSGET_CMD[] = "kvs-get"; +static const char KVSGETRESP_CMD[] = "kvs-get-response"; +static const char GETNODEATTR_CMD[] = "info-getnodeattr"; +static const char GETNODEATTRRESP_CMD[] = "info-getnodeattr-response"; +static const char PUTNODEATTR_CMD[] = "info-putnodeattr"; +static const char PUTNODEATTRRESP_CMD[] = "info-putnodeattr-response"; +static const char GETJOBATTR_CMD[] = "info-getjobattr"; +static const char GETJOBATTRRESP_CMD[] = "info-getjobattr-response"; +static const char NAMEPUBLISH_CMD[] = "name-publish"; +static const char NAMEPUBLISHRESP_CMD[] = "name-publish-response"; +static const char NAMEUNPUBLISH_CMD[] = "name-unpublish"; +static const char NAMEUNPUBLISHRESP_CMD[] = "name-unpublish-response"; +static const char NAMELOOKUP_CMD[] = "name-lookup"; +static const char NAMELOOKUPRESP_CMD[] = "name-lookup-response"; + +static const char PMIJOBID_KEY[] = "pmijobid"; +static const char PMIRANK_KEY[] = "pmirank"; +static const char SRCID_KEY[] = "srcid"; +static const char THREADED_KEY[] = "threaded"; +static const char RC_KEY[] = "rc"; +static const char ERRMSG_KEY[] = "errmsg"; +static const char PMIVERSION_KEY[] = "pmi-version"; +static const char PMISUBVER_KEY[] = "pmi-subversion"; +static const char RANK_KEY[] = "rank"; +static const char SIZE_KEY[] = "size"; +static const char APPNUM_KEY[] = "appnum"; +static const char SPAWNERJOBID_KEY[] = "spawner-jobid"; +static const char DEBUGGED_KEY[] = "debugged"; +static const char PMIVERBOSE_KEY[] = "pmiverbose"; +static const char ISWORLD_KEY[] = "isworld"; +static const char MSG_KEY[] = "msg"; +static const char JOBID_KEY[] = "jobid"; +static const char KVSCOPY_KEY[] = "kvscopy"; +static const char KEY_KEY[] = "key"; +static const char VALUE_KEY[] = "value"; +static const char FOUND_KEY[] = "found"; +static const char WAIT_KEY[] = "wait"; +static const char NAME_KEY[] = "name"; +static const char PORT_KEY[] = "port"; +static const char THRID_KEY[] = "thrid"; +static const char INFOKEYCOUNT_KEY[] = "infokeycount"; +static const char INFOKEY_KEY[] = "infokey%d"; +static const char INFOVAL_KEY[] = "infoval%d"; + +static const char TRUE_VAL[] = "TRUE"; +static const char FALSE_VAL[] = "FALSE"; + +/* Local types */ + +/* Parse commands are in this structure. Fields in this structure are + dynamically allocated as necessary */ +typedef struct PMI2_Keyvalpair { + const char *key; + const char *value; + int valueLen; /* Length of a value (values may contain nulls, so + we need this) */ + int isCopy; /* The value is a copy (and will need to be freed) + if this is true, otherwise, + it is a null-terminated string in the original + buffer */ +} PMI2_Keyvalpair; + +typedef struct PMI2_Command { + int nPairs; /* Number of key=value pairs */ + char *command; /* Overall command buffer */ + PMI2_Keyvalpair **pairs; /* Array of pointers to pairs */ + int complete; +} PMI2_Command; + + +/*D +PMI2_CONSTANTS - PMI2 definitions + +Error Codes: ++ PMI2_SUCCESS - operation completed successfully +. PMI2_FAIL - operation failed +. PMI2_ERR_NOMEM - input buffer not large enough +. PMI2_ERR_INIT - PMI not initialized +. PMI2_ERR_INVALID_ARG - invalid argument +. PMI2_ERR_INVALID_KEY - invalid key argument +. PMI2_ERR_INVALID_KEY_LENGTH - invalid key length argument +. PMI2_ERR_INVALID_VAL - invalid val argument +. PMI2_ERR_INVALID_VAL_LENGTH - invalid val length argument +. PMI2_ERR_INVALID_LENGTH - invalid length argument +. PMI2_ERR_INVALID_NUM_ARGS - invalid number of arguments +. PMI2_ERR_INVALID_ARGS - invalid args argument +. PMI2_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument +. PMI2_ERR_INVALID_KEYVALP - invalid keyvalp argument +. PMI2_ERR_INVALID_SIZE - invalid size argument +- PMI2_ERR_OTHER - other unspecified error + +D*/ +#define PMI2_SUCCESS 0 +#define PMI2_FAIL -1 +#define PMI2_ERR_INIT 1 +#define PMI2_ERR_NOMEM 2 +#define PMI2_ERR_INVALID_ARG 3 +#define PMI2_ERR_INVALID_KEY 4 +#define PMI2_ERR_INVALID_KEY_LENGTH 5 +#define PMI2_ERR_INVALID_VAL 6 +#define PMI2_ERR_INVALID_VAL_LENGTH 7 +#define PMI2_ERR_INVALID_LENGTH 8 +#define PMI2_ERR_INVALID_NUM_ARGS 9 +#define PMI2_ERR_INVALID_ARGS 10 +#define PMI2_ERR_INVALID_NUM_PARSED 11 +#define PMI2_ERR_INVALID_KEYVALP 12 +#define PMI2_ERR_INVALID_SIZE 13 +#define PMI2_ERR_OTHER 14 + +/* This is here to allow spawn multiple functions to compile. This + needs to be removed once those functions are fixed for pmi2 */ +/* +typedef struct PMI_keyval_t +{ + char * key; + char * val; +} PMI_keyval_t; +*/ + +/*@ + PMI2_Connect_comm_t - connection structure used when connecting to other jobs + + Fields: + + read - Read from a connection to the leader of the job to which + this process will be connecting. Returns 0 on success or an MPI + error code on failure. + . write - Write to a connection to the leader of the job to which + this process will be connecting. Returns 0 on success or an MPI + error code on failure. + . ctx - An anonymous pointer to data that may be used by the read + and write members. + - isMaster - Indicates which process is the "master"; may have the + values 1 (is the master), 0 (is not the master), or -1 (neither is + designated as the master). The two processes must agree on which + process is the master, or both must select -1 (neither is the + master). + + Notes: + A typical implementation of these functions will use the read and + write calls on a pre-established file descriptor (fd) between the + two leading processes. This will be needed only if the PMI server + cannot access the KVS spaces of another job (this may happen, for + example, if each mpiexec creates the KVS spaces for the processes + that it manages). + +@*/ +typedef struct PMI2_Connect_comm { + int (*read)( void *buf, int maxlen, void *ctx ); + int (*write)( const void *buf, int len, void *ctx ); + void *ctx; + int isMaster; +} PMI2_Connect_comm_t; + + +/*S + MPID_Info - Structure of an MPID info + + Notes: + There is no reference count because 'MPI_Info' values, unlike other MPI + objects, may be changed after they are passed to a routine without + changing the routine''s behavior. In other words, any routine that uses + an 'MPI_Info' object must make a copy or otherwise act on any info value + that it needs. + + A linked list is used because the typical 'MPI_Info' list will be short + and a simple linked list is easy to implement and to maintain. Similarly, + a single structure rather than separate header and element structures are + defined for simplicity. No separate thread lock is provided because + info routines are not performance critical; they may use the single + critical section lock in the 'MPIR_Process' structure when they need a + thread lock. + + This particular form of linked list (in particular, with this particular + choice of the first two members) is used because it allows us to use + the same routines to manage this list as are used to manage the + list of free objects (in the file 'src/util/mem/handlemem.c'). In + particular, if lock-free routines for updating a linked list are + provided, they can be used for managing the 'MPID_Info' structure as well. + + The MPI standard requires that keys can be no less that 32 characters and + no more than 255 characters. There is no mandated limit on the size + of values. + + Module: + Info-DS + S*/ +typedef struct MPID_Info { + int handle; + int pobj_mutex; + int ref_count; + struct MPID_Info *next; + char *key; + char *value; +} MPID_Info; + +#define PMI2U_Info MPID_Info + +/*@ + PMI2_Init - initialize the Process Manager Interface + + Output Parameter: + + spawned - spawned flag + . size - number of processes in the job + . rank - rank of this process in the job + - appnum - which executable is this on the mpiexec commandline + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + Initialize PMI for this process group. The value of spawned indicates whether + this process was created by 'PMI2_Spawn_multiple'. 'spawned' will be non-zero + iff this process group has a parent. + +@*/ +int PMI2_Init(int *spawned, int *size, int *rank, int *appnum); + +/*@ + PMI2_Finalize - finalize the Process Manager Interface + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + Finalize PMI for this job. + +@*/ +int PMI2_Finalize(void); + +/*@ + PMI2_Initialized - check if PMI has been initialized + + Return values: + Non-zero if PMI2_Initialize has been called successfully, zero otherwise. + +@*/ +int PMI2_Initialized(void); + +/*@ + PMI2_Abort - abort the process group associated with this process + + Input Parameters: + + flag - non-zero if all processes in this job should abort, zero otherwise + - error_msg - error message to be printed + + Return values: + If the abort succeeds this function will not return. Returns an MPI + error code otherwise. + +@*/ +int PMI2_Abort(int flag, const char msg[]); + +/*@ + PMI2_Spawn - spawn a new set of processes + + Input Parameters: + + count - count of commands + . cmds - array of command strings + . argcs - size of argv arrays for each command string + . argvs - array of argv arrays for each command string + . maxprocs - array of maximum processes to spawn for each command string + . info_keyval_sizes - array giving the number of elements in each of the + 'info_keyval_vectors' + . info_keyval_vectors - array of keyval vector arrays + . preput_keyval_size - Number of elements in 'preput_keyval_vector' + . preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space + - jobIdSize - size of the buffer provided in jobId + + Output Parameter: + + jobId - job id of the spawned processes + - errors - array of errors for each command + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This function spawns a set of processes into a new job. The 'count' + field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs', + 'info_keyval_sizes' and 'info_keyval_vectors'. The 'preput_keyval_size' refers + to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector' + contains keyval pairs that will be put in the keyval space of the newly + created job before the processes are started. The 'maxprocs' array + specifies the desired number of processes to create for each 'cmd' string. + The actual number of processes may be less than the numbers specified in + maxprocs. The acceptable number of processes spawned may be controlled by + ``soft'' keyvals in the info arrays. The ``soft'' option is specified by + mpiexec in the MPI-2 standard. Environment variables may be passed to the + spawned processes through PMI implementation specific 'info_keyval' parameters. +@*/ +int PMI2_Job_Spawn(int count, const char * cmds[], + int argcs[], const char ** argvs[], + const int maxprocs[], + const int info_keyval_sizes[], + const struct MPID_Info *info_keyval_vectors[], + int preput_keyval_size, + const struct MPID_Info *preput_keyval_vector[], + char jobId[], int jobIdSize, + int errors[]); + + +/*@ + PMI2_Job_GetId - get job id of this job + + Input parameters: + . jobid_size - size of buffer provided in jobid + + Output parameters: + . jobid - the job id of this job + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Job_GetId(char jobid[], int jobid_size); + +/*@ + PMI2_Job_GetRank - get rank of this job + + Output parameters: + . rank - the rank of this job + + Return values: + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. + +@*/ +int PMI2_Job_GetRank(int* rank); + +/*@ + PMI2_Info_GetSize - get the number of processes on the node + + Output parameters: + . rank - the rank of this job + + Return values: + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. +@*/ +int PMI2_Info_GetSize(int* size); + +/*@ + PMI2_Job_Connect - connect to the parallel job with ID jobid + + Input parameters: + . jobid - job id of the job to connect to + + Output parameters: + . conn - connection structure used to exteblish communication with + the remote job + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This just "registers" the other parallel job as part of a parallel + program, and is used in the PMI2_KVS_xxx routines (see below). This + is not a collective call and establishes a connection between all + processes that are connected to the calling processes (on the one + side) and that are connected to the named jobId on the other + side. Processes that are already connected may call this routine. + +@*/ +int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn); + +/*@ + PMI2_Job_Disconnect - disconnects from the job with ID jobid + + Input parameters: + . jobid - job id of the job to connect to + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Job_Disconnect(const char jobid[]); + +/*@ + PMI2_KVS_Put - put a key/value pair in the keyval space for this job + + Input Parameters: + + key - key + - value - value + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + If multiple PMI2_KVS_Put calls are made with the same key between + calls to PMI2_KVS_Fence, the behavior is undefined. That is, the + value returned by PMI2_KVS_Get for that key after the PMI2_KVS_Fence + is not defined. + +@*/ +int PMI2_KVS_Put(const char key[], const char value[]); +/*@ + PMI2_KVS_Fence - commit all PMI2_KVS_Put calls made before this fence + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This is a collective call across the job. It has semantics that are + similar to those for MPI_Win_fence and hence is most easily + implemented as a barrier across all of the processes in the job. + Specifically, all PMI2_KVS_Put operations performed by any process in + the same job must be visible to all processes (by using PMI2_KVS_Get) + after PMI2_KVS_Fence completes. However, a PMI implementation could + make this a lazy operation by not waiting for all processes to enter + their corresponding PMI2_KVS_Fence until some process issues a + PMI2_KVS_Get. This might be appropriate for some wide-area + implementations. + +@*/ +int PMI2_KVS_Fence(void); + +/*@ + PMI2_KVS_Get - returns the value associated with key in the key-value + space associated with the job ID jobid + + Input Parameters: + + jobid - the job id identifying the key-value space in which to look + for key. If jobid is NULL, look in the key-value space of this job. + . src_pmi_id - the pmi id of the process which put this keypair. This + is just a hint to the server. PMI2_ID_NULL should be passed if no + hint is provided. + . key - key + - maxvalue - size of the buffer provided in value + + Output Parameters: + + value - value associated with key + - vallen - length of the returned value, or, if the length is longer + than maxvalue, the negative of the required length is returned + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxvalue, int *vallen); + +/*@ + PMI2_Info_GetNodeAttr - returns the value of the attribute associated + with this node + + Input Parameters: + + name - name of the node attribute + . valuelen - size of the buffer provided in value + - waitfor - if non-zero, the function will not return until the + attribute is available + + Output Parameters: + + value - value of the attribute + - found - non-zero indicates that the attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This provides a way, when combined with PMI2_Info_PutNodeAttr, for + processes on the same node to share information without requiring a + more general barrier across the entire job. + + If waitfor is non-zero, the function will never return with found + set to zero. + + Predefined attributes: + + memPoolType - If the process manager allocated a shared memory + pool for the MPI processes in this job and on this node, return + the type of that pool. Types include sysv, anonmmap and ntshm. + . memSYSVid - Return the SYSV memory segment id if the memory pool + type is sysv. Returned as a string. + . memAnonMMAPfd - Return the FD of the anonymous mmap segment. The + FD is returned as a string. + - memNTName - Return the name of the Windows NT shared memory + segment, file mapping object backed by system paging + file. Returned as a string. + +@*/ +int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor); + +/*@ + PMI2_Info_GetNodeAttrIntArray - returns the value of the attribute associated + with this node. The value must be an array of integers. + + Input Parameters: + + name - name of the node attribute + - arraylen - number of elements in array + + Output Parameters: + + array - value of attribute + . outlen - number of elements returned + - found - non-zero if attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + Notice that, unlike PMI2_Info_GetNodeAttr, this function does not + have a waitfor parameter, and will return immediately with found=0 + if the attribute was not found. + + Predefined array attribute names: + + localRanksCount - Return the number of local ranks that will be + returned by the key localRanks. + . localRanks - Return the ranks in MPI_COMM_WORLD of the processes + that are running on this node. + - cartCoords - Return the Cartesian coordinates of this process in + the underlying network topology. The coordinates are indexed from + zero. Value only if the Job attribute for physTopology includes + cartesian. + +@*/ +int PMI2_Info_GetNodeAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); + +/*@ + PMI2_Info_PutNodeAttr - stores the value of the named attribute + associated with this node + + Input Parameters: + + name - name of the node attribute + - value - the value of the attribute + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + For example, it might be used to share segment ids with other + processes on the same SMP node. + +@*/ +int PMI2_Info_PutNodeAttr(const char name[], const char value[]); + +/*@ + PMI2_Info_GetJobAttr - returns the value of the attribute associated + with this job + + Input Parameters: + + name - name of the job attribute + - valuelen - size of the buffer provided in value + + Output Parameters: + + value - value of the attribute + - found - non-zero indicates that the attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found); + +/*@ + PMI2_Info_GetJobAttrIntArray - returns the value of the attribute associated + with this job. The value must be an array of integers. + + Input Parameters: + + name - name of the job attribute + - arraylen - number of elements in array + + Output Parameters: + + array - value of attribute + . outlen - number of elements returned + - found - non-zero if attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Predefined array attribute names: + + + universeSize - The size of the "universe" (defined for the MPI + attribute MPI_UNIVERSE_SIZE + + . hasNameServ - The value hasNameServ is true if the PMI2 environment + supports the name service operations (publish, lookup, and + unpublish). + + . physTopology - Return the topology of the underlying network. The + valid topology types include cartesian, hierarchical, complete, + kautz, hypercube; additional types may be added as necessary. If + the type is hierarchical, then additional attributes may be + queried to determine the details of the topology. For example, a + typical cluster has a hierarchical physical topology, consisting + of two levels of complete networks - the switched Ethernet or + Infiniband and the SMP nodes. Other systems, such as IBM BlueGene, + have one level that is cartesian (and in virtual node mode, have a + single-level physical topology). + + . physTopologyLevels - Return a string describing the topology type + for each level of the underlying network. Only valid if the + physTopology is hierarchical. The value is a comma-separated list + of physical topology types (except for hierarchical). The levels + are ordered starting at the top, with the network closest to the + processes last. The lower level networks may connect only a subset + of processes. For example, for a cartesian mesh of SMPs, the value + is cartesian,complete. All processes are connected by the + cartesian part of this, but for each complete network, only the + processes on the same node are connected. + + . cartDims - Return a string of comma-separated values describing + the dimensions of the Cartesian topology. This must be consistent + with the value of cartCoords that may be returned by + PMI2_Info_GetNodeAttrIntArray. + + These job attributes are just a start, but they provide both an + example of the sort of external data that is available through the + PMI interface and how extensions can be added within the same API + and wire protocol. For example, adding more complex network + topologies requires only adding new keys, not new routines. + + . isHeterogeneous - The value isHeterogeneous is true if the + processes belonging to the job are running on nodes with different + underlying data models. + +@*/ +int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); + +/*@ + PMI2_Nameserv_publish - publish a name + + Input parameters: + + service_name - string representing the service being published + . info_ptr - + - port - string representing the port on which to contact the service + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Nameserv_publish(const char service_name[], const struct MPID_Info *info_ptr, const char port[]); + +/*@ + PMI2_Nameserv_lookup - lookup a service by name + + Input parameters: + + service_name - string representing the service being published + . info_ptr - + - portLen - size of buffer provided in port + + Output parameters: + . port - string representing the port on which to contact the service + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Nameserv_lookup(const char service_name[], const struct MPID_Info *info_ptr, + char port[], int portLen); +/*@ + PMI2_Nameserv_unpublish - unpublish a name + + Input parameters: + + service_name - string representing the service being unpublished + - info_ptr - + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Nameserv_unpublish(const char service_name[], + const struct MPID_Info *info_ptr); + + + +#if defined(__cplusplus) +} +#endif + +#endif /* PMI2_H_INCLUDED */ diff --git a/contribs/pmi2/pmi2_api.c b/contribs/pmi2/pmi2_api.c new file mode 100644 index 0000000000000000000000000000000000000000..1e1e9325feb68652edc191f8cfa99835e4df8dd5 --- /dev/null +++ b/contribs/pmi2/pmi2_api.c @@ -0,0 +1,1930 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include "pmi2_util.h" +#include "slurm/pmi2.h" + +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <sys/socket.h> +#include <unistd.h> +#include <errno.h> + +#ifndef MAXHOSTNAME +#define MAXHOSTNAME 256 +#endif + +#define PMII_EXIT_CODE -1 + +#define PMI_VERSION 2 +#define PMI_SUBVERSION 0 + +#define MAX_INT_STR_LEN 11 /* number of digits in MAX_UINT + 1 */ + +typedef enum { + PMI2_UNINITIALIZED = 0, + SINGLETON_INIT_BUT_NO_PM = 1, + NORMAL_INIT_WITH_PM, + SINGLETON_INIT_WITH_PM +} PMI2State; + +static PMI2State PMI2_initialized = PMI2_UNINITIALIZED; + +static int PMI2_debug = 0; +static int PMI2_fd = -1; +static int PMI2_size = 1; +static int PMI2_rank = 0; + + +/* XXX DJG the "const"s on both of these functions and the Keyvalpair + * struct are wrong in the isCopy==TRUE case! */ +/* init_kv_str -- fills in keyvalpair. val is required to be a + null-terminated string. isCopy is set to FALSE, so caller must + free key and val memory, if necessary. +*/ +static void init_kv_str(PMI2_Keyvalpair *kv, const char key[], const char val[]) +{ + kv->key = key; + kv->value = val; + kv->valueLen = strlen(val); + kv->isCopy = 0/*FALSE*/; +} + +/* same as init_kv_str, but strdup's the key and val first, and sets isCopy=TRUE */ +static void init_kv_strdup(PMI2_Keyvalpair *kv, const char key[], const char val[]) +{ + /* XXX DJG could be slightly more efficient */ + init_kv_str(kv, strdup(key), strdup(val)); + kv->isCopy = 1/*TRUE*/; +} + +/* same as init_kv_strdup, but converts val into a string first */ +/* XXX DJG could be slightly more efficient */ +static void init_kv_strdup_int(PMI2_Keyvalpair *kv, const char key[], int val) +{ + char tmpbuf[32] = {0}; + int rc = PMI2_SUCCESS; + + rc = snprintf(tmpbuf, sizeof(tmpbuf), "%d", val); + PMI2U_Assert(rc >= 0); + init_kv_strdup(kv, key, tmpbuf); +} + +/* initializes the key with ("%s%d", key_prefix, suffix), uses a string value */ +/* XXX DJG could be slightly more efficient */ +static void init_kv_strdup_intsuffix(PMI2_Keyvalpair *kv, const char key_prefix[], int suffix, const char val[]) +{ + char tmpbuf[256/*XXX HACK*/] = {0}; + int rc = PMI2_SUCCESS; + + rc = snprintf(tmpbuf, sizeof(tmpbuf), "%s%d", key_prefix, suffix); + PMI2U_Assert(rc >= 0); + init_kv_strdup(kv, tmpbuf, val); +} + + +static int getPMIFD(void); +static int PMIi_ReadCommandExp( int fd, PMI2_Command *cmd, const char *exp, int* rc, const char **errmsg ); +static int PMIi_ReadCommand( int fd, PMI2_Command *cmd ); + +static int PMIi_WriteSimpleCommand( int fd, PMI2_Command *resp, const char cmd[], PMI2_Keyvalpair *pairs[], int npairs); +static int PMIi_WriteSimpleCommandStr( int fd, PMI2_Command *resp, const char cmd[], ...); +static int PMIi_InitIfSingleton(void); +static int PMII_singinit(void); + +static void freepairs(PMI2_Keyvalpair** pairs, int npairs); +static int getval(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, const char **value, int *vallen); +static int getvalint(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, int *val); +static int getvalptr(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, void *val); +static int getvalbool(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, int *val); + +static int accept_one_connection(int list_sock); +static int GetResponse(const char request[], const char expectedCmd[], int checkRc); + +static void dump_PMI2_Command(PMI2_Command *cmd); +static void dump_PMI2_Keyvalpair(PMI2_Keyvalpair *kv); +static void phony(void); + +typedef struct pending_item +{ + struct pending_item *next; + PMI2_Command *cmd; +} pending_item_t; + +pending_item_t *pendingq_head = NULL; +pending_item_t *pendingq_tail = NULL; + +/* phony() + * Collect unused functions which make the + * gcc complain ;defined but not used' + */ +static void +phony(void) +{ + if (0) { + accept_one_connection(0); + GetResponse(NULL, NULL, 0); + dump_PMI2_Command(NULL); + PMII_singinit(); + } +} + +static inline void ENQUEUE(PMI2_Command *cmd) +{ + pending_item_t *pi = malloc(sizeof(pending_item_t)); + + pi->next = NULL; + pi->cmd = cmd; + + if (pendingq_head == NULL) { + pendingq_head = pendingq_tail = pi; + } else { + pendingq_tail->next = pi; + pendingq_tail = pi; + } +} + +static inline int SEARCH_REMOVE(PMI2_Command *cmd) +{ + pending_item_t *pi, *prev; + + pi = pendingq_head; + if (pi->cmd == cmd) { + pendingq_head = pi->next; + if (pendingq_head == NULL) + pendingq_tail = NULL; + free(pi); + return 1; + } + prev = pi; + pi = pi->next; + + for ( ; pi ; pi = pi->next) { + if (pi->cmd == cmd) { + prev->next = pi->next; + if (prev->next == NULL) + pendingq_tail = prev; + free(pi); + return 1; + } + } + + return 0; +} + +/* ------------------------------------------------------------------------- */ +/* PMI-2 API Routines */ +/* ------------------------------------------------------------------------- */ +int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) +{ + int pmi2_errno = PMI2_SUCCESS; + char *p; + char buf[PMI2_MAXLINE], cmdline[PMI2_MAXLINE]; + char *jobid; + char *pmiid; + int ret; + + PMI2U_printf("[BEGIN]"); + + /* Get the value of PMI2_DEBUG from the environment if possible, since + we may have set it to help debug the setup process */ + p = getenv("PMI2_DEBUG"); + if (p) PMI2_debug = atoi(p); + + /* Get the fd for PMI commands; if none, we're a singleton */ + pmi2_errno = getPMIFD(); + if (pmi2_errno) PMI2U_ERR_POP(pmi2_errno); + + if (PMI2_fd == -1) { + /* Singleton init: Process not started with mpiexec, + so set size to 1, rank to 0 */ + PMI2_size = 1; + PMI2_rank = 0; + *spawned = 0; + + PMI2_initialized = SINGLETON_INIT_BUT_NO_PM; + goto fn_exit; + } + + /* do initial PMI1 init */ + ret = snprintf(buf, PMI2_MAXLINE, "cmd=init pmi_version=%d pmi_subversion=%d\n", PMI_VERSION, PMI_SUBVERSION); + PMI2U_ERR_CHKANDJUMP(ret < 0, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "failed to generate init line"); + + ret = PMI2U_writeline(PMI2_fd, buf); + PMI2U_ERR_CHKANDJUMP(ret < 0, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_init_send"); + + ret = PMI2U_readline(PMI2_fd, buf, PMI2_MAXLINE); + PMI2U_ERR_CHKANDJUMP(ret < 0, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_initack %s", strerror(pmi2_errno)); + + PMI2U_parse_keyvals(buf); + cmdline[0] = 0; + PMI2U_getval("cmd", cmdline, PMI2_MAXLINE); + PMI2U_ERR_CHKANDJUMP(strncmp(cmdline, "response_to_init", PMI2_MAXLINE) != 0, pmi2_errno, PMI2_ERR_OTHER, "**bad_cmd"); + + PMI2U_getval("rc", buf, PMI2_MAXLINE); + if (strncmp(buf, "0", PMI2_MAXLINE) != 0) { + char buf1[PMI2_MAXLINE]; + PMI2U_getval("pmi_version", buf, PMI2_MAXLINE); + PMI2U_getval("pmi_subversion", buf1, PMI2_MAXLINE); + PMI2U_ERR_SETANDJUMP(pmi2_errno, PMI2_ERR_OTHER, "**pmi2_version %s %s %d %d", buf, buf1, PMI_VERSION, PMI_SUBVERSION); + } + + PMI2U_printf("do full PMI2 init ..."); + /* do full PMI2 init */ + { + PMI2_Keyvalpair pairs[3]; + PMI2_Keyvalpair *pairs_p[] = { pairs, pairs+1, pairs+2 }; + int npairs = 0; + int isThreaded = 0; + const char *errmsg; + int rc; + int found; + int version, subver; + const char *spawner_jobid; + int spawner_jobid_len; + PMI2_Command cmd = {0}; + int debugged; + int PMI2_pmiverbose; + + + jobid = getenv("PMI_JOBID"); + if (jobid) { + init_kv_str(&pairs[npairs], PMIJOBID_KEY, jobid); + ++npairs; + } + + pmiid = getenv("PMI_ID"); + if (pmiid) { + init_kv_str(&pairs[npairs], SRCID_KEY, pmiid); + ++npairs; + } + else { + pmiid = getenv("PMI_RANK"); + if (pmiid) { + init_kv_str(&pairs[npairs], PMIRANK_KEY, pmiid); + PMI2_rank = strtol(pmiid, NULL, 10); + ++npairs; + } + } + + init_kv_str(&pairs[npairs], THREADED_KEY, isThreaded ? "TRUE" : "FALSE"); + ++npairs; + + pmi2_errno = PMIi_WriteSimpleCommand(PMI2_fd, 0, FULLINIT_CMD, pairs_p, npairs); /* don't pass in thread id for init */ + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommand"); + + /* Read auth-response */ + /* Send auth-response-complete */ + + /* Read fullinit-response */ + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, FULLINITRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_fullinit %s", errmsg ? errmsg : "unknown"); + + found = getvalint(cmd.pairs, cmd.nPairs, PMIVERSION_KEY, &version); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + found = getvalint(cmd.pairs, cmd.nPairs, PMISUBVER_KEY, &subver); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + found = getvalint(cmd.pairs, cmd.nPairs, RANK_KEY, rank); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + found = getvalint(cmd.pairs, cmd.nPairs, SIZE_KEY, size); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + PMI2_size = *size; + + found = getvalint(cmd.pairs, cmd.nPairs, APPNUM_KEY, appnum); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + found = getval(cmd.pairs, cmd.nPairs, SPAWNERJOBID_KEY, &spawner_jobid, &spawner_jobid_len); + PMI2U_ERR_CHKANDJUMP(found == -1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + if (found) + *spawned = TRUE; + else + *spawned = FALSE; + + debugged = 0; + found = getvalbool(cmd.pairs, cmd.nPairs, DEBUGGED_KEY, &debugged); + PMI2U_ERR_CHKANDJUMP(found == -1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + PMI2_debug |= debugged; + + PMI2_pmiverbose = 0; + found = getvalbool(cmd.pairs, cmd.nPairs, PMIVERBOSE_KEY, &PMI2_pmiverbose); + PMI2U_ERR_CHKANDJUMP(found == -1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + } + + if (! PMI2_initialized) { + PMI2_initialized = NORMAL_INIT_WITH_PM; + pmi2_errno = PMI2_SUCCESS; + } + + + phony(); + +fn_exit: + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Finalize(void) +{ + int pmi2_errno = PMI2_SUCCESS; + int rc; + const char *errmsg; + PMI2_Command cmd = {0}; + + PMI2U_printf("[BEGIN]"); + + if (PMI2_initialized > SINGLETON_INIT_BUT_NO_PM) { + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, FINALIZE_CMD, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, FINALIZERESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_finalize %s", errmsg ? errmsg : "unknown"); + + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + + shutdown(PMI2_fd, SHUT_RDWR); + close(PMI2_fd); + } + +fn_exit: + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Initialized(void) +{ + /* Turn this into a logical value (1 or 0). This allows us + to use PMI2_initialized to distinguish between initialized with + an PMI service (e.g., via mpiexec) and the singleton init, + which has no PMI service */ + return (PMI2_initialized != 0); +} + +int PMI2_Abort(int flag, const char msg[]) +{ + PMI2U_printf("aborting job:\n%s", msg); + + /* ignoring return code, because we're exiting anyway */ + PMIi_WriteSimpleCommandStr(PMI2_fd, NULL, ABORT_CMD, ISWORLD_KEY, flag ? TRUE_VAL : FALSE_VAL, MSG_KEY, msg, NULL); + + exit(PMII_EXIT_CODE); + return PMI2_SUCCESS; +} + +int PMI2_Job_Spawn(int count, const char * cmds[], + int argcs[], const char ** argvs[], + const int maxprocs[], + const int info_keyval_sizes[], + const struct MPID_Info *info_keyval_vectors[], + int preput_keyval_size, + const struct MPID_Info *preput_keyval_vector[], + char jobId[], int jobIdSize, + int errors[]) +{ + int i,rc,spawncnt,total_num_processes,num_errcodes_found; + int found; + const char *jid; + int jidlen; + char tempbuf[PMI2_MAXLINE]; + char *lead, *lag; + int spawn_rc; + const char *errmsg = NULL; + PMI2_Command resp_cmd = {0}; + int pmi2_errno = 0; + PMI2_Keyvalpair **pairs_p = NULL; + int npairs = 0; + int total_pairs = 0; + + PMI2U_printf("[BEGIN]"); + + /* Connect to the PM if we haven't already */ + if (PMIi_InitIfSingleton() != 0) return -1; + + total_num_processes = 0; + +/* XXX DJG from Pavan's email: +cmd=spawn;thrid=string;ncmds=count;preputcount=n;ppkey0=name;ppval0=string;...;\ + subcmd=spawn-exe1;maxprocs=n;argc=narg;argv0=name;\ + argv1=name;...;infokeycount=n;infokey0=key;\ + infoval0=string;...;\ +(... one subcmd for each executable ...) +*/ + + /* FIXME overall need a better interface for building commands! + * Need to be able to append commands, and to easily accept integer + * valued arguments. Memory management should stay completely out + * of mind when writing a new PMI command impl like this! */ + + /* Calculate the total number of keyval pairs that we need. + * + * The command writing utility adds "cmd" and "thrid" fields for us, + * don't include them in our count. */ + total_pairs = 2; /* ncmds,preputcount */ + total_pairs += (3 * count); /* subcmd,maxprocs,argc */ + total_pairs += (2 * preput_keyval_size); /* ppkeyN,ppvalN */ + for (spawncnt = 0; spawncnt < count; ++spawncnt) { + total_pairs += argcs[spawncnt]; /* argvN */ + if (info_keyval_sizes) { + total_pairs += 1; /* infokeycount */ + total_pairs += 2 * info_keyval_sizes[spawncnt]; /* infokeyN,infovalN */ + } + } + + pairs_p = malloc(total_pairs * sizeof(PMI2_Keyvalpair*)); + /* individiually allocating instead of batch alloc b/c freepairs assumes it */ + for (i = 0; i < total_pairs; ++i) { + /* FIXME we are somehow still leaking some of this memory */ + pairs_p[i] = malloc(sizeof(PMI2_Keyvalpair)); + PMI2U_Assert(pairs_p[i]); + } + + init_kv_strdup_int(pairs_p[npairs++], "ncmds", count); + + init_kv_strdup_int(pairs_p[npairs++], "preputcount", preput_keyval_size); + for (i = 0; i < preput_keyval_size; ++i) { + init_kv_strdup_intsuffix(pairs_p[npairs++], "ppkey", i, preput_keyval_vector[i]->key); + init_kv_strdup_intsuffix(pairs_p[npairs++], "ppval", i, preput_keyval_vector[i]->value); + } + + for (spawncnt = 0; spawncnt < count; ++spawncnt) + { + total_num_processes += maxprocs[spawncnt]; + + init_kv_strdup(pairs_p[npairs++], "subcmd", cmds[spawncnt]); + init_kv_strdup_int(pairs_p[npairs++], "maxprocs", maxprocs[spawncnt]); + + init_kv_strdup_int(pairs_p[npairs++], "argc", argcs[spawncnt]); + for (i = 0; i < argcs[spawncnt]; ++i) { + init_kv_strdup_intsuffix(pairs_p[npairs++], "argv", i, argvs[spawncnt][i]); + } + + if (info_keyval_sizes) { + init_kv_strdup_int(pairs_p[npairs++], "infokeycount", info_keyval_sizes[spawncnt]); + for (i = 0; i < info_keyval_sizes[spawncnt]; ++i) { + init_kv_strdup_intsuffix(pairs_p[npairs++], "infokey", i, info_keyval_vectors[spawncnt][i].key); + init_kv_strdup_intsuffix(pairs_p[npairs++], "infoval", i, info_keyval_vectors[spawncnt][i].value); + } + } + } + + if (npairs < total_pairs) { PMI2U_printf("about to fail assertion, npairs=%d total_pairs=%d", npairs, total_pairs); } + PMI2U_Assert(npairs == total_pairs); + + pmi2_errno = PMIi_WriteSimpleCommand(PMI2_fd, &resp_cmd, "spawn", pairs_p, npairs); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommand"); + + freepairs(pairs_p, npairs); + pairs_p = NULL; + + /* XXX DJG TODO release any upper level MPICH2 critical sections */ + rc = PMIi_ReadCommandExp(PMI2_fd, &resp_cmd, "spawn-response", &spawn_rc, &errmsg); + if (rc != 0) { return PMI2_FAIL; } + + /* XXX DJG TODO deal with the response */ + PMI2U_Assert(errors != NULL); + + if (jobId && jobIdSize) { + found = getval(resp_cmd.pairs, resp_cmd.nPairs, JOBID_KEY, &jid, &jidlen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + MPIU_Strncpy(jobId, jid, jobIdSize); + } + + if (PMI2U_getval("errcodes", tempbuf, PMI2_MAXLINE)) { + num_errcodes_found = 0; + lag = &tempbuf[0]; + do { + lead = strchr(lag, ','); + if (lead) *lead = '\0'; + errors[num_errcodes_found++] = atoi(lag); + lag = lead + 1; /* move past the null char */ + PMI2U_Assert(num_errcodes_found <= total_num_processes); + } while (lead != NULL); + PMI2U_Assert(num_errcodes_found == total_num_processes); + } + else { + /* gforker doesn't return errcodes, so we'll just pretend that means + that it was going to send all `0's. */ + for (i = 0; i < total_num_processes; ++i) { + errors[i] = 0; + } + } + +fn_fail: + free(resp_cmd.command); + freepairs(resp_cmd.pairs, resp_cmd.nPairs); + if (pairs_p) freepairs(pairs_p, npairs); + + PMI2U_printf("[END]"); + return pmi2_errno; +} + +int PMI2_Job_GetId(char jobid[], int jobid_size) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + const char *jid; + int jidlen; + int rc; + const char *errmsg; + PMI2_Command cmd = {0}; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, JOBGETID_CMD, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, JOBGETIDRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_jobgetid %s", errmsg ? errmsg : "unknown"); + + found = getval(cmd.pairs, cmd.nPairs, JOBID_KEY, &jid, &jidlen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + MPIU_Strncpy(jobid, jid, jobid_size); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Job_GetRank(int* rank) +{ + *rank = PMI2_rank; + return PMI2_SUCCESS; +} + +int PMI2_Info_GetSize(int* size) +{ + *size = PMI2_size; + return PMI2_SUCCESS; +} + +#undef FUNCNAME +#define FUNCNAME PMI2_Job_Connect +#undef FCNAME +#define FCNAME PMI2DI_QUOTE(FUNCNAME) + +int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) +{ + int pmi2_errno = PMI2_SUCCESS; + PMI2_Command cmd = {0}; + int found; + int kvscopy; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, JOBCONNECT_CMD, JOBID_KEY, jobid, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, JOBCONNECTRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_jobconnect %s", errmsg ? errmsg : "unknown"); + + found = getvalbool(cmd.pairs, cmd.nPairs, KVSCOPY_KEY, &kvscopy); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + PMI2U_ERR_CHKANDJUMP(kvscopy, pmi2_errno, PMI2_ERR_OTHER, "**notimpl"); + + fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; + fn_fail: + goto fn_exit; +} + +int PMI2_Job_Disconnect(const char jobid[]) +{ + int pmi2_errno = PMI2_SUCCESS; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, JOBDISCONNECT_CMD, JOBID_KEY, jobid, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, JOBDISCONNECTRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_jobdisconnect %s", errmsg ? errmsg : "unknown"); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_KVS_Put(const char key[], const char value[]) +{ + int pmi2_errno = PMI2_SUCCESS; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, KVSPUT_CMD, KEY_KEY, key, VALUE_KEY, value, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, KVSPUTRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_kvsput %s", errmsg ? errmsg : "unknown"); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_KVS_Fence(void) +{ + int pmi2_errno = PMI2_SUCCESS; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, KVSFENCE_CMD, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, KVSFENCERESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_kvsfence %s", errmsg ? errmsg : "unknown"); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxValue, int *valLen) +{ + int pmi2_errno = PMI2_SUCCESS; + int found, keyfound; + const char *kvsvalue; + int kvsvallen; + PMI2_Command cmd = {0}; + int rc; + int ret; + char src_pmi_id_str[256]; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + snprintf(src_pmi_id_str, sizeof(src_pmi_id_str), "%d", src_pmi_id); + + pmi2_errno = PMIi_InitIfSingleton(); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_InitIfSingleton"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, KVSGET_CMD, JOBID_KEY, jobid, SRCID_KEY, src_pmi_id_str, KEY_KEY, key, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, KVSGETRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_kvsget %s", errmsg ? errmsg : "unknown"); + + found = getvalbool(cmd.pairs, cmd.nPairs, FOUND_KEY, &keyfound); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + PMI2U_ERR_CHKANDJUMP(!keyfound, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_kvsget_notfound"); + + found = getval(cmd.pairs, cmd.nPairs, VALUE_KEY, &kvsvalue, &kvsvallen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + ret = MPIU_Strncpy(value, kvsvalue, maxValue); + *valLen = ret ? -kvsvallen : kvsvallen; + + fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; + fn_fail: + goto fn_exit; +} + +int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *flag, int waitfor) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + const char *kvsvalue; + int kvsvallen; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_InitIfSingleton(); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_InitIfSingleton"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, GETNODEATTR_CMD, KEY_KEY, name, WAIT_KEY, waitfor ? "TRUE" : "FALSE", NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, GETNODEATTRRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_getnodeattr %s", errmsg ? errmsg : "unknown"); + + found = getvalbool(cmd.pairs, cmd.nPairs, FOUND_KEY, flag); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + if (*flag) { + found = getval(cmd.pairs, cmd.nPairs, VALUE_KEY, &kvsvalue, &kvsvallen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + MPIU_Strncpy(value, kvsvalue, valuelen); + } + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Info_GetNodeAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *flag) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + const char *kvsvalue; + int kvsvallen; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + int i; + const char *valptr; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_InitIfSingleton(); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_InitIfSingleton"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, GETNODEATTR_CMD, KEY_KEY, name, WAIT_KEY, "FALSE", NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, GETNODEATTRRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_getnodeattr %s", errmsg ? errmsg : "unknown"); + + found = getvalbool(cmd.pairs, cmd.nPairs, FOUND_KEY, flag); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + if (*flag) { + found = getval(cmd.pairs, cmd.nPairs, VALUE_KEY, &kvsvalue, &kvsvallen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + valptr = kvsvalue; + i = 0; + rc = sscanf(valptr, "%d", &array[i]); + PMI2U_ERR_CHKANDJUMP(rc != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "unable to parse intarray"); + ++i; + while ((valptr = strchr(valptr, ',')) && i < arraylen) { + ++valptr; /* skip over the ',' */ + rc = sscanf(valptr, "%d", &array[i]); + PMI2U_ERR_CHKANDJUMP(rc != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "unable to parse intarray"); + ++i; + } + + *outlen = i; + } + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Info_PutNodeAttr(const char name[], const char value[]) +{ + int pmi2_errno = PMI2_SUCCESS; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, PUTNODEATTR_CMD, KEY_KEY, name, VALUE_KEY, value, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, PUTNODEATTRRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_putnodeattr %s", errmsg ? errmsg : "unknown"); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *flag) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + const char *kvsvalue; + int kvsvallen; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_InitIfSingleton(); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_InitIfSingleton"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, GETJOBATTR_CMD, KEY_KEY, name, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, GETJOBATTRRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_getjobattr %s", errmsg ? errmsg : "unknown"); + + found = getvalbool(cmd.pairs, cmd.nPairs, FOUND_KEY, flag); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + if (*flag) { + found = getval(cmd.pairs, cmd.nPairs, VALUE_KEY, &kvsvalue, &kvsvallen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + MPIU_Strncpy(value, kvsvalue, valuelen); + } + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *flag) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + const char *kvsvalue; + int kvsvallen; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + int i; + const char *valptr; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_InitIfSingleton(); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_InitIfSingleton"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, GETJOBATTR_CMD, KEY_KEY, name, NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, GETJOBATTRRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_getjobattr %s", errmsg ? errmsg : "unknown"); + + found = getvalbool(cmd.pairs, cmd.nPairs, FOUND_KEY, flag); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + if (*flag) { + found = getval(cmd.pairs, cmd.nPairs, VALUE_KEY, &kvsvalue, &kvsvallen); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + valptr = kvsvalue; + i = 0; + rc = sscanf(valptr, "%d", &array[i]); + PMI2U_ERR_CHKANDJUMP(rc != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "unable to parse intarray"); + ++i; + while ((valptr = strchr(valptr, ',')) && i < arraylen) { + ++valptr; /* skip over the ',' */ + rc = sscanf(valptr, "%d", &array[i]); + PMI2U_ERR_CHKANDJUMP(rc != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "unable to parse intarray"); + ++i; + } + + *outlen = i; + } + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Nameserv_publish(const char service_name[], const PMI2U_Info *info_ptr, const char port[]) +{ + int pmi2_errno = PMI2_SUCCESS; + PMI2_Command cmd = {0}; + int rc; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + /* ignoring infokey functionality for now */ + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, NAMEPUBLISH_CMD, + NAME_KEY, service_name, PORT_KEY, port, + INFOKEYCOUNT_KEY, "0", NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, NAMEPUBLISHRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_nameservpublish %s", errmsg ? errmsg : "unknown"); + + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + + +int PMI2_Nameserv_lookup(const char service_name[], const PMI2U_Info *info_ptr, + char port[], int portLen) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + int rc; + PMI2_Command cmd = {0}; + int plen; + const char *errmsg; + const char *found_port; + + PMI2U_printf("[BEGIN]"); + + /* ignoring infos for now */ + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, NAMELOOKUP_CMD, + NAME_KEY, service_name, INFOKEYCOUNT_KEY, "0", NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, NAMELOOKUPRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_nameservlookup %s", errmsg ? errmsg : "unknown"); + + found = getval(cmd.pairs, cmd.nPairs, VALUE_KEY, &found_port, &plen); + PMI2U_ERR_CHKANDJUMP(!found, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_nameservlookup %s", "not found"); + MPIU_Strncpy(port, found_port, portLen); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +int PMI2_Nameserv_unpublish(const char service_name[], + const PMI2U_Info *info_ptr) +{ + int pmi2_errno = PMI2_SUCCESS; + int rc; + PMI2_Command cmd = {0}; + const char *errmsg; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_WriteSimpleCommandStr(PMI2_fd, &cmd, NAMEUNPUBLISH_CMD, + NAME_KEY, service_name, INFOKEYCOUNT_KEY, "0", NULL); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_WriteSimpleCommandStr"); + pmi2_errno = PMIi_ReadCommandExp(PMI2_fd, &cmd, NAMEUNPUBLISHRESP_CMD, &rc, &errmsg); + if (pmi2_errno) PMI2U_ERR_SETANDJUMP(1, pmi2_errno, "PMIi_ReadCommandExp"); + PMI2U_ERR_CHKANDJUMP(rc, pmi2_errno, PMI2_ERR_OTHER, "**pmi2_nameservunpublish %s", errmsg ? errmsg : "unknown"); + +fn_exit: + free(cmd.command); + freepairs(cmd.pairs, cmd.nPairs); + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + +/* ------------------------------------------------------------------------- */ +/* Service Routines */ +/* ------------------------------------------------------------------------- */ + +/* ------------------------------------------------------------------------- */ +/* + * PMIi_ReadCommand - Reads an entire command from the PMI socket. This + * routine blocks the thread until the command is read. + * + * PMIi_WriteSimpleCommand - Write a simple command to the PMI socket; this + * allows printf - style arguments. This blocks the thread until the buffer + * has been written (for fault-tolerance, we may want to keep it around + * in case of PMI failure). + * + * PMIi_WaitFor - Wait for a particular PMI command request to complete. + */ +/* ------------------------------------------------------------------------- */ + +/* frees all of the keyvals pointed to by a keyvalpair* array and the array iteself*/ +static void freepairs(PMI2_Keyvalpair** pairs, int npairs) +{ + int i; + + if (!pairs) + return; + + for (i = 0; i < npairs; ++i) + if (pairs[i]->isCopy) { + /* FIXME casts are here to suppress legitimate constness warnings */ + free((void *)pairs[i]->key); + free((void *)pairs[i]->value); + free(pairs[i]); + } + free(pairs); +} + +/* getval & friends -- these functions search the pairs list for a + * matching key, set val appropriately and return 1. If no matching + * key is found, 0 is returned. If the value is invalid, -1 is returned */ + +static int getval(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, const char **value, int *vallen) +{ + int i; + + for (i = 0; i < npairs; ++i) + if (strncmp(key, pairs[i]->key, PMI2_MAX_KEYLEN) == 0) { + *value = pairs[i]->value; + *vallen = pairs[i]->valueLen; + return 1; + } + return 0; +} + +static int getvalint(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, int *val) +{ + int found; + const char *value; + int vallen; + int ret; + /* char *endptr; */ + + found = getval(pairs, npairs, key, &value, &vallen); + if (found != 1) + return found; + + if (vallen == 0) + return -1; + + ret = sscanf(value, "%d", val); + if (ret != 1) + return -1; + + /* *val = strtoll(value, &endptr, 0); */ + /* if (endptr - value != vallen) */ + /* return -1; */ + + return 1; +} + +static int getvalptr(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, void *val) +{ + int found; + const char *value; + int vallen; + int ret; + void **val_ = val; + /* char *endptr; */ + + found = getval(pairs, npairs, key, &value, &vallen); + if (found != 1) + return found; + + if (vallen == 0) + return -1; + + ret = sscanf(value, "%p", val_); + if (ret != 1) + return -1; + + /* *val_ = (void *)(PMI2R_Upint)strtoll(value, &endptr, 0); */ + /* if (endptr - value != vallen) */ + /* return -1; */ + + return 1; +} + + +static int getvalbool(PMI2_Keyvalpair *const pairs[], int npairs, const char *key, int *val) +{ + int found; + const char *value; + int vallen; + + + found = getval(pairs, npairs, key, &value, &vallen); + if (found != 1) + return found; + + if (strlen("TRUE") == vallen && !strncmp(value, "TRUE", vallen)) + *val = 1/*TRUE*/; + else if (strlen("FALSE") == vallen && !strncmp(value, "FALSE", vallen)) + *val = 0/*FALSE*/; + else + return -1; + + return 1; +} + + + +/* parse_keyval(cmdptr, len, key, val, vallen) + Scans through buffer specified by cmdptr looking for the first key and value. + IN/OUT cmdptr - IN: pointer to buffer; OUT: pointer to byte after the ';' terminating the value + IN/OUT len - IN: length of buffer; OUT: length of buffer not read + OUT key - pointer to null-terminated string containing the key + OUT val - pointer to string containing the value + OUT vallen - length of the value string + + This function will modify the buffer passed through cmdptr to + insert '\0' following the key, and to replace escaped ';;' with + ';'. + */ +static int parse_keyval(char **cmdptr, int *len, char **key, char **val, int *vallen) +{ + int pmi2_errno = PMI2_SUCCESS; + char *c = *cmdptr; + char *d; + + /*PMI2U_printf("[BEGIN]");*/ + + /* find key */ + *key = c; /* key is at the start of the buffer */ + while (*len && *c != '=') { + --*len; + ++c; + } + PMI2U_ERR_CHKANDJUMP(*len == 0, pmi2_errno, PMI2_ERR_OTHER, "**bad_keyval"); + PMI2U_ERR_CHKANDJUMP((c - *key) > PMI2_MAX_KEYLEN, pmi2_errno, PMI2_ERR_OTHER, "**bad_keyval"); + *c = '\0'; /* terminate the key string */ + + /* skip over the '=' */ + --*len; + ++c; + + /* find val */ + *val = d = c; /* val is next */ + while (*len) { + if (*c == ';') { /* handle escaped ';' */ + if (*(c+1) != ';') + break; + else + { + --*len; + ++c; + } + } + --*len; + *(d++) = *(c++); + } + PMI2U_ERR_CHKANDJUMP(*len == 0, pmi2_errno, PMI2_ERR_OTHER, "**bad_keyval"); + PMI2U_ERR_CHKANDJUMP((d - *val) > PMI2_MAX_VALLEN, pmi2_errno, PMI2_ERR_OTHER, "**bad_keyval"); + *c = '\0'; /* terminate the val string */ + *vallen = d - *val; + + *cmdptr = c+1; /* skip over the ';' */ + --*len; + + fn_exit: + /*PMI2U_printf("[END]");*/ + return pmi2_errno; + fn_fail: + goto fn_exit; +} + +static int create_keyval(PMI2_Keyvalpair **kv, const char *key, const char *val, int vallen) +{ + int pmi2_errno = PMI2_SUCCESS; + int key_len = strlen(key); + char *key_p; + char *value_p; + PMI2U_CHKMEM_DECL(3); + + /*PMI2U_printf("[BEGIN]");*/ + /*PMI2U_printf("[BEGIN] create_keyval(%p, %s, %s, %d)", kv, key, val, vallen);*/ + + PMI2U_CHKMEM_MALLOC(*kv, PMI2_Keyvalpair *, sizeof(PMI2_Keyvalpair), pmi2_errno, "pair"); + + PMI2U_CHKMEM_MALLOC(key_p, char *, key_len+1, pmi2_errno, "key"); + MPIU_Strncpy(key_p, key, key_len+1); + key_p[key_len] = '\0'; + + PMI2U_CHKMEM_MALLOC(value_p, char *, vallen+1, pmi2_errno, "value"); + memcpy(value_p, val, vallen); + value_p[vallen] = '\0'; + + (*kv)->key = key_p; + (*kv)->value = value_p; + (*kv)->valueLen = vallen; + (*kv)->isCopy = 1/*TRUE*/; + +fn_exit: + PMI2U_CHKMEM_COMMIT(); + /*PMI2U_printf("[END]");*/ + return pmi2_errno; +fn_fail: + PMI2U_CHKMEM_REAP(); + goto fn_exit; +} + + +/* Note that we fill in the fields in a command that is provided. + We may want to share these routines with the PMI version 2 server */ +int PMIi_ReadCommand( int fd, PMI2_Command *cmd ) +{ + int pmi2_errno = PMI2_SUCCESS; + char cmd_len_str[PMII_COMMANDLEN_SIZE+1]; + int cmd_len, remaining_len, vallen = 0; + char *c, *cmd_buf = NULL; + char *key, *val = NULL; + ssize_t nbytes; + ssize_t offset; + int num_pairs; + int pair_index; + char *command = NULL; + int nPairs; + int found; + PMI2_Keyvalpair **pairs = NULL; + PMI2_Command *target_cmd; + + PMI2U_printf("[BEGIN]"); + + memset(cmd_len_str, 0, sizeof(cmd_len_str)); + +#ifdef MPICH_IS_THREADED + MPIU_THREAD_CHECK_BEGIN; + { + MPID_Thread_mutex_lock(&mutex); + + while (blocked && !cmd->complete) + MPID_Thread_cond_wait(&cond, &mutex); + + if (cmd->complete) { + MPID_Thread_mutex_unlock(&mutex); + goto fn_exit; + } + + blocked = 1/*TRUE*/; + MPID_Thread_mutex_unlock(&mutex); + } + MPIU_THREAD_CHECK_END; +#endif + + do { + + /* get length of cmd */ + offset = 0; + do + { + do { + nbytes = read(fd, &cmd_len_str[offset], PMII_COMMANDLEN_SIZE - offset); + } while (nbytes == -1 && errno == EINTR); + + PMI2U_ERR_CHKANDJUMP(nbytes <= 0, pmi2_errno, PMI2_ERR_OTHER, "**read %s", strerror(errno)); + + offset += nbytes; + } + while (offset < PMII_COMMANDLEN_SIZE); + + cmd_len = atoi(cmd_len_str); + + cmd_buf = malloc(cmd_len+1); + if (!cmd_buf) PMI2U_CHKMEM_SETERR(pmi2_errno, cmd_len+1, "cmd_buf"); + + memset(cmd_buf, 0, cmd_len+1); + + /* get command */ + offset = 0; + do + { + do { + nbytes = read(fd, &cmd_buf[offset], cmd_len - offset); + } while (nbytes == -1 && errno == EINTR); + + PMI2U_ERR_CHKANDJUMP(nbytes <= 0, pmi2_errno, PMI2_ERR_OTHER, "**read %s", strerror(errno)); + + offset += nbytes; + } + while (offset < cmd_len); + + PMI2U_printf("PMI received (cmdlen %d): %s", cmd_len, cmd_buf); + + /* count number of "key=val;" */ + c = cmd_buf; + remaining_len = cmd_len; + num_pairs = 0; + + while (remaining_len > 0) { + while (remaining_len && *c != ';') { + --remaining_len; + ++c; + } + if (*c == ';' && *(c+1) == ';') { + remaining_len -= 2; + c += 2; + } else { + ++num_pairs; + --remaining_len; + ++c; + } + } + + c = cmd_buf; + remaining_len = cmd_len; + pmi2_errno = parse_keyval(&c, &remaining_len, &key, &val, &vallen); + if (pmi2_errno) PMI2U_ERR_POP(pmi2_errno); + + PMI2U_ERR_CHKANDJUMP(strncmp(key, "cmd", PMI2_MAX_KEYLEN) != 0, pmi2_errno, PMI2_ERR_OTHER, "**bad_cmd"); + + command = malloc(vallen+1); + if (!command) PMI2U_CHKMEM_SETERR(pmi2_errno, vallen+1, "command"); + memcpy(command, val, vallen); + val[vallen] = '\0'; + + nPairs = num_pairs-1; /* num_pairs-1 because the first pair is the command */ + + pairs = malloc(sizeof(PMI2_Keyvalpair *) * nPairs); + if (!pairs) PMI2U_CHKMEM_SETERR(pmi2_errno, sizeof(PMI2_Keyvalpair *) * nPairs, "pairs"); + + pair_index = 0; + while (remaining_len > 0) + { + PMI2_Keyvalpair *pair; + + pmi2_errno = parse_keyval(&c, &remaining_len, &key, &val, &vallen); + if (pmi2_errno) PMI2U_ERR_POP(pmi2_errno); + + pmi2_errno = create_keyval(&pair, key, val, vallen); + if (pmi2_errno) PMI2U_ERR_POP(pmi2_errno); + + pairs[pair_index] = pair; + ++pair_index; + } + + found = getvalptr(pairs, nPairs, THRID_KEY, &target_cmd); + if (!found) /* if there's no thrid specified, assume it's for you */ + target_cmd = cmd; + else + if (PMI2_debug && SEARCH_REMOVE(target_cmd) == 0) { + int i; + + PMI2U_printf("command=%s", command); + for (i = 0; i < nPairs; ++i) + dump_PMI2_Keyvalpair(pairs[i]); + } + + target_cmd->command = command; + target_cmd->nPairs = nPairs; + target_cmd->pairs = pairs; + target_cmd->complete = 1/*TRUE*/; + +#ifdef MPICH_IS_THREADED + target_cmd->complete = 1/*TRUE*/; +#endif + + if (cmd_buf) free(cmd_buf); + cmd_buf = NULL; + } while (!cmd->complete); + +#ifdef MPICH_IS_THREADED + MPIU_THREAD_CHECK_BEGIN; + { + MPID_Thread_mutex_lock(&mutex); + blocked = 0/*FALSE*/; + MPID_Thread_cond_broadcast(&cond); + MPID_Thread_mutex_unlock(&mutex); + } + MPIU_THREAD_CHECK_END; +#endif + +fn_exit: + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + if (cmd_buf) free(cmd_buf); + goto fn_exit; +} + +/* PMIi_ReadCommandExp -- reads a command checks that it matches the + * expected command string exp, and parses the return code */ +int PMIi_ReadCommandExp( int fd, PMI2_Command *cmd, const char *exp, int* rc, const char **errmsg ) +{ + int pmi2_errno = PMI2_SUCCESS; + int found; + int msglen; + + PMI2U_printf("[BEGIN]"); + + pmi2_errno = PMIi_ReadCommand(fd, cmd); + if (pmi2_errno) PMI2U_ERR_POP(pmi2_errno); + + PMI2U_ERR_CHKANDJUMP(strncmp(cmd->command, exp, strlen(exp)) != 0, pmi2_errno, PMI2_ERR_OTHER, "**bad_cmd"); + + found = getvalint(cmd->pairs, cmd->nPairs, RC_KEY, rc); + PMI2U_ERR_CHKANDJUMP(found != 1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + found = getval(cmd->pairs, cmd->nPairs, ERRMSG_KEY, errmsg, &msglen); + PMI2U_ERR_CHKANDJUMP(found == -1, pmi2_errno, PMI2_ERR_OTHER, "**intern"); + + if (!found) *errmsg = NULL; + +fn_exit: + PMI2U_printf("[END]"); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + + +int PMIi_WriteSimpleCommand( int fd, PMI2_Command *resp, const char cmd[], PMI2_Keyvalpair *pairs[], int npairs) +{ + int pmi2_errno = PMI2_SUCCESS; + char cmdbuf[PMII_MAX_COMMAND_LEN]; + char cmdlenbuf[PMII_COMMANDLEN_SIZE+1]; + char *c = cmdbuf; + int ret; + int remaining_len = PMII_MAX_COMMAND_LEN; + int cmdlen; + int i; + ssize_t nbytes; + ssize_t offset; + int pair_index; + + PMI2U_printf("[BEGIN]"); + + /* leave space for length field */ + memset(c, ' ', PMII_COMMANDLEN_SIZE); + c += PMII_COMMANDLEN_SIZE; + + PMI2U_ERR_CHKANDJUMP(strlen(cmd) > PMI2_MAX_VALLEN, pmi2_errno, PMI2_ERR_OTHER, "**cmd_too_long"); + + /* Subtract the PMII_COMMANDLEN_SIZE to prevent + * certain implementation of snprintf() to + * segfault when zero out the buffer. + * PMII_COMMANDLEN_SIZE must be added later on + * back again to send out the right protocol + * message size. + */ + remaining_len -= PMII_COMMANDLEN_SIZE; + + ret = snprintf(c, remaining_len, "cmd=%s;", cmd); + PMI2U_ERR_CHKANDJUMP(ret >= remaining_len, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "Ran out of room for command"); + c += ret; + remaining_len -= ret; + +#ifdef MPICH_IS_THREADED + MPIU_THREAD_CHECK_BEGIN; + if (resp) { + ret = snprintf(c, remaining_len, "thrid=%p;", resp); + PMI2U_ERR_CHKANDJUMP(ret >= remaining_len, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "Ran out of room for command"); + c += ret; + remaining_len -= ret; + } + MPIU_THREAD_CHECK_END; +#endif + + for (pair_index = 0; pair_index < npairs; ++pair_index) { + /* write key= */ + PMI2U_ERR_CHKANDJUMP(strlen(pairs[pair_index]->key) > PMI2_MAX_KEYLEN, pmi2_errno, PMI2_ERR_OTHER, "**key_too_long"); + ret = snprintf(c, remaining_len, "%s=", pairs[pair_index]->key); + PMI2U_ERR_CHKANDJUMP(ret >= remaining_len, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "Ran out of room for command"); + c += ret; + remaining_len -= ret; + + /* write value and escape ;'s as ;; */ + PMI2U_ERR_CHKANDJUMP(pairs[pair_index]->valueLen > PMI2_MAX_VALLEN, pmi2_errno, PMI2_ERR_OTHER, "**val_too_long"); + for (i = 0; i < pairs[pair_index]->valueLen; ++i) { + if (pairs[pair_index]->value[i] == ';') { + *c = ';'; + ++c; + --remaining_len; + } + *c = pairs[pair_index]->value[i]; + ++c; + --remaining_len; + } + + /* append ; */ + *c = ';'; + ++c; + --remaining_len; + } + + /* prepend the buffer length stripping off the trailing '\0' + * Add back the PMII_COMMANDLEN_SIZE to get the correct + * protocol size. + */ + cmdlen = PMII_MAX_COMMAND_LEN - (remaining_len + PMII_COMMANDLEN_SIZE); + ret = snprintf(cmdlenbuf, sizeof(cmdlenbuf), "%d", cmdlen); + PMI2U_ERR_CHKANDJUMP(ret >= PMII_COMMANDLEN_SIZE, pmi2_errno, PMI2_ERR_OTHER, "**intern %s", "Command length won't fit in length buffer"); + + memcpy(cmdbuf, cmdlenbuf, ret); + + cmdbuf[cmdlen+PMII_COMMANDLEN_SIZE] = '\0'; /* silence valgrind warnings in PMI2U_printf */ + PMI2U_printf("PMI sending: %s", cmdbuf); + + + #ifdef MPICH_IS_THREADED + MPIU_THREAD_CHECK_BEGIN; + { + MPID_Thread_mutex_lock(&mutex); + + while (blocked) + MPID_Thread_cond_wait(&cond, &mutex); + + blocked = 1/*TRUE*/; + MPID_Thread_mutex_unlock(&mutex); + } + MPIU_THREAD_CHECK_END; +#endif + + if (PMI2_debug) + ENQUEUE(resp); + + offset = 0; + do { + do { + nbytes = write(fd, &cmdbuf[offset], cmdlen + PMII_COMMANDLEN_SIZE - offset); + } while (nbytes == -1 && errno == EINTR); + + PMI2U_ERR_CHKANDJUMP(nbytes <= 0, pmi2_errno, PMI2_ERR_OTHER, "**write %s", strerror(errno)); + + offset += nbytes; + } while (offset < cmdlen + PMII_COMMANDLEN_SIZE); +#ifdef MPICH_IS_THREADED + MPIU_THREAD_CHECK_BEGIN; + { + MPID_Thread_mutex_lock(&mutex); + blocked = 0/*FALSE*/; + MPID_Thread_cond_broadcast(&cond); + MPID_Thread_mutex_unlock(&mutex); + } + MPIU_THREAD_CHECK_END; +#endif + +fn_fail: + goto fn_exit; +fn_exit: + PMI2U_printf("[END]"); + return pmi2_errno; +} + +int PMIi_WriteSimpleCommandStr(int fd, PMI2_Command *resp, const char cmd[], ...) +{ + int pmi2_errno = PMI2_SUCCESS; + va_list ap; + PMI2_Keyvalpair *pairs; + PMI2_Keyvalpair **pairs_p; + int npairs; + int i; + const char *key; + const char *val; + PMI2U_CHKMEM_DECL(2); + + PMI2U_printf("[BEGIN]"); + + npairs = 0; + va_start(ap, cmd); + while ((key = va_arg(ap, const char*))) { + val = va_arg(ap, const char*); + ++npairs; + } + va_end(ap); + + /* allocates n+1 pairs in case npairs is 0, avoiding unnecessary warning logs */ + PMI2U_CHKMEM_MALLOC(pairs, PMI2_Keyvalpair*, (sizeof(PMI2_Keyvalpair) * (npairs+1)), pmi2_errno, "pairs"); + PMI2U_CHKMEM_MALLOC(pairs_p, PMI2_Keyvalpair**, (sizeof(PMI2_Keyvalpair*) * (npairs+1)), pmi2_errno, "pairs_p"); + + i = 0; + va_start(ap, cmd); + while ((key = va_arg(ap, const char *))) { + val = va_arg(ap, const char *); + pairs_p[i] = &pairs[i]; + pairs[i].key = key; + pairs[i].value = val; + pairs[i].valueLen = strlen(val); + pairs[i].isCopy = 0/*FALSE*/; + ++i; + } + va_end(ap); + + pmi2_errno = PMIi_WriteSimpleCommand(fd, resp, cmd, pairs_p, npairs); + if (pmi2_errno) PMI2U_ERR_POP(pmi2_errno); + +fn_exit: + PMI2U_printf("[END]"); + PMI2U_CHKMEM_FREEALL(); + return pmi2_errno; +fn_fail: + goto fn_exit; +} + + +/* + * This code allows a program to contact a host/port for the PMI socket. + */ +#include <sys/types.h> +#include <sys/param.h> + +/* sockaddr_in (Internet) */ +#include <netinet/in.h> +/* TCP_NODELAY */ +#include <netinet/tcp.h> + +/* sockaddr_un (Unix) */ +#include <sys/un.h> + +/* defs of gethostbyname */ +#include <netdb.h> + +/* fcntl, F_GET/SETFL */ +#include <fcntl.h> + +/* This is really IP!? */ +#ifndef TCP +#define TCP 0 +#endif + +/* stub for connecting to a specified host/port instead of using a + specified fd inherited from a parent process */ +static int PMII_Connect_to_pm( char *hostname, int portnum ) +{ + struct hostent *hp; + struct sockaddr_in sa; + int fd; + int optval = 1; + int q_wait = 1; + + hp = gethostbyname( hostname ); + if (!hp) { + PMI2U_printf("Unable to get host entry for %s", hostname ); + return -1; + } + + memset( (void *)&sa, 0, sizeof(sa) ); + /* POSIX might define h_addr_list only and node define h_addr */ +#ifdef HAVE_H_ADDR_LIST + memcpy( (void *)&sa.sin_addr, (void *)hp->h_addr_list[0], hp->h_length); +#else + memcpy( (void *)&sa.sin_addr, (void *)hp->h_addr, hp->h_length); +#endif + sa.sin_family = hp->h_addrtype; + sa.sin_port = htons( (unsigned short) portnum ); + + fd = socket( AF_INET, SOCK_STREAM, TCP ); + if (fd < 0) { + PMI2U_printf("Unable to get AF_INET socket" ); + return -1; + } + + if (setsockopt( fd, IPPROTO_TCP, TCP_NODELAY, + (char *)&optval, sizeof(optval) )) { + perror( "Error calling setsockopt:" ); + } + + /* We wait here for the connection to succeed */ + if (connect( fd, (struct sockaddr *)&sa, sizeof(sa) ) < 0) { + switch (errno) { + case ECONNREFUSED: + PMI2U_printf("connect failed with connection refused" ); + /* (close socket, get new socket, try again) */ + if (q_wait) + close(fd); + return -1; + + case EINPROGRESS: /* (nonblocking) - select for writing. */ + break; + + case EISCONN: /* (already connected) */ + break; + + case ETIMEDOUT: /* timed out */ + PMI2U_printf("connect failed with timeout" ); + return -1; + + default: + PMI2U_printf("connect failed with errno %d", errno ); + return -1; + } + } + + return fd; +} + +/* ------------------------------------------------------------------------- */ +/* + * Singleton Init. + * + * MPI-2 allows processes to become MPI processes and then make MPI calls, + * such as MPI_Comm_spawn, that require a process manager (this is different + * than the much simpler case of allowing MPI programs to run with an + * MPI_COMM_WORLD of size 1 without an mpiexec or process manager). + * + * The process starts when either the client or the process manager contacts + * the other. If the client starts, it sends a singinit command and + * waits for the server to respond with its own singinit command. + * If the server start, it send a singinit command and waits for the + * client to respond with its own singinit command + * + * client sends singinit with these required values + * pmi_version=<value of PMI_VERSION> + * pmi_subversion=<value of PMI_SUBVERSION> + * + * and these optional values + * stdio=[yes|no] + * authtype=[none|shared|<other-to-be-defined>] + * authstring=<string> + * + * server sends singinit with the same required and optional values as + * above. + * + * At this point, the protocol is now the same in both cases, and has the + * following components: + * + * server sends singinit_info with these required fields + * versionok=[yes|no] + * stdio=[yes|no] + * kvsname=<string> + * + * The client then issues the init command (see PMII_getmaxes) + * + * cmd=init pmi_version=<val> pmi_subversion=<val> + * + * and expects to receive a + * + * cmd=response_to_init rc=0 pmi_version=<val> pmi_subversion=<val> + * + * (This is the usual init sequence). + * + */ +/* ------------------------------------------------------------------------- */ +/* This is a special routine used to re-initialize PMI when it is in + the singleton init case. That is, the executable was started without + mpiexec, and PMI2_Init returned as if there was only one process. + + Note that PMI routines should not call PMII_singinit; they should + call PMIi_InitIfSingleton(), which both connects to the process mangager + and sets up the initial KVS connection entry. +*/ + +static int PMII_singinit(void) +{ + return 0; +} + +/* Promote PMI to a fully initialized version if it was started as + a singleton init */ +static int PMIi_InitIfSingleton(void) +{ + return 0; +} + +static int accept_one_connection(int list_sock) +{ + int gotit, new_sock; + struct sockaddr_in from; + socklen_t len; + + len = sizeof(from); + gotit = 0; + while ( ! gotit ) + { + new_sock = accept(list_sock, (struct sockaddr *)&from, &len); + if (new_sock == -1) + { + if (errno == EINTR) + continue; /* interrupted? If so, try again */ + else + { + PMI2U_printf("accept failed in accept_one_connection"); + exit (-1); + } + } + else gotit = 1; + } + + return new_sock; +} + + +/* Get the FD to use for PMI operations. If a port is used, rather than + a pre-established FD (i.e., via pipe), this routine will handle the + initial handshake. +*/ +static int getPMIFD(void) +{ + int pmi2_errno = PMI2_SUCCESS; + char *p; + + /* Set the default */ + PMI2_fd = -1; + + p = getenv("PMI_FD"); + if (p) { + PMI2_fd = atoi(p); + goto fn_exit; + } + + p = getenv( "PMI_PORT" ); + if (p) { + int portnum; + char hostname[MAXHOSTNAME+1]; + char *pn, *ph; + + /* Connect to the indicated port (in format hostname:portnumber) + and get the fd for the socket */ + + /* Split p into host and port */ + pn = p; + ph = hostname; + while (*pn && *pn != ':' && (ph - hostname) < MAXHOSTNAME) { + *ph++ = *pn++; + } + *ph = 0; + + PMI2U_ERR_CHKANDJUMP(*pn != ':', pmi2_errno, PMI2_ERR_OTHER, "**pmi2_port %s", p); + + portnum = atoi( pn+1 ); + /* FIXME: Check for valid integer after : */ + /* This routine only gets the fd to use to talk to + the process manager. The handshake below is used + to setup the initial values */ + PMI2_fd = PMII_Connect_to_pm( hostname, portnum ); + PMI2U_ERR_CHKANDJUMP(PMI2_fd < 0, pmi2_errno, PMI2_ERR_OTHER, "**connect_to_pm %s %d", hostname, portnum); + } + + /* OK to return success for singleton init */ + + fn_exit: + return pmi2_errno; + fn_fail: + goto fn_exit; +} + +/* ----------------------------------------------------------------------- */ +/* + * This function is used to request information from the server and check + * that the response uses the expected command name. On a successful + * return from this routine, additional PMI2U_getval calls may be used + * to access information about the returned value. + * + * If checkRc is true, this routine also checks that the rc value returned + * was 0. If not, it uses the "msg" value to report on the reason for + * the failure. + */ +static int GetResponse( const char request[], const char expectedCmd[], + int checkRc ) +{ + int err = 0; + + return err; +} + +static void dump_PMI2_Keyvalpair(PMI2_Keyvalpair *kv) +{ + PMI2U_printf("key = %s", kv->key); + PMI2U_printf("value = %s", kv->value); + PMI2U_printf("valueLen = %d", kv->valueLen); + PMI2U_printf("isCopy = %s", kv->isCopy ? "TRUE" : "FALSE"); +} + +static void dump_PMI2_Command(PMI2_Command *cmd) +{ + int i; + + PMI2U_printf("cmd = %s", cmd->command); + PMI2U_printf("nPairs = %d", cmd->nPairs); + + for (i = 0; i < cmd->nPairs; ++i) + dump_PMI2_Keyvalpair(cmd->pairs[i]); +} diff --git a/contribs/pmi2/pmi2_util.c b/contribs/pmi2/pmi2_util.c new file mode 100644 index 0000000000000000000000000000000000000000..8c9b4ee94f3cc65d2cb5fd3840eaa8bb5db0f8d5 --- /dev/null +++ b/contribs/pmi2/pmi2_util.c @@ -0,0 +1,275 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2001 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +/* Allow fprintf to logfile */ +/* style: allow:fprintf:1 sig:0 */ + +/* Utility functions associated with PMI implementation, but not part of + the PMI interface itself. Reading and writing on pipes, signals, and parsing + key=value messages + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> + +#include "pmi2_util.h" + +#define MAXVALLEN 1024 +#define MAXKEYLEN 32 + +/* These are not the keyvals in the keyval space that is part of the + PMI specification. + They are just part of this implementation's internal utilities. + */ +struct PMI2U_keyval_pairs { + char key[MAXKEYLEN]; + char value[MAXVALLEN]; +}; +static struct PMI2U_keyval_pairs PMI2U_keyval_tab[64] = { { { 0 }, { 0 } } }; +static int PMI2U_keyval_tab_idx = 0; + +/* This is used to prepend printed output. Set the initial value to + "unset" */ +static char PMI2U_print_id[PMI2_IDSIZE] = "unset"; + +void PMI2U_Set_rank(int PMI_rank) { + snprintf(PMI2U_print_id, PMI2_IDSIZE, "cli_%d", PMI_rank); +} +void PMI2U_SetServer(void) { + strncpy(PMI2U_print_id, "server", PMI2_IDSIZE); +} + +#define MAX_READLINE 1024 +/* + * Return the next newline-terminated string of maximum length maxlen. + * This is a buffered version, and reads from fd as necessary. A + */ +int PMI2U_readline(int fd, char *buf, int maxlen) { + static char readbuf[MAX_READLINE]; + static char *nextChar = 0, *lastChar = 0; /* lastChar is really one past + last char */ + int curlen, n; + char *p, ch; + + /* Note: On the client side, only one thread at a time should + be calling this, and there should only be a single fd. + Server side code should not use this routine (see the + replacement version in src/pm/util/pmiserv.c) */ + /*PMI2U_Assert(nextChar == lastChar || fd == lastfd);*/ + + p = buf; + curlen = 1; /* Make room for the null */ + while (curlen < maxlen) { + if (nextChar == lastChar) { + do { + n = read(fd, readbuf, sizeof(readbuf) - 1); + } while (n == -1 && errno == EINTR); + if (n == 0) { + /* EOF */ + break; + } else if (n < 0) { + if (curlen == 1) { + curlen = 0; + } + break; + } + nextChar = readbuf; + lastChar = readbuf + n; + /* Add a null at the end just to make it easier to print + the read buffer */ + readbuf[n] = 0; + /* FIXME: Make this an optional output */ + /* printf( "Readline %s\n", readbuf ); */ + } + + ch = *nextChar++; + *p++ = ch; + curlen++; + if (ch == '\n') + break; + } + + /* We null terminate the string for convenience in printing */ + *p = 0; + + PMI2U_printf("PMI received: %s", buf); + + /* Return the number of characters, not counting the null */ + return curlen - 1; +} + +int PMI2U_writeline(int fd, char *buf) { + int size = strlen(buf), n; + + if (buf[size - 1] != '\n') /* error: no newline at end */ + PMI2U_printf("write_line: message string doesn't end in newline: :%s:", buf); + else { + PMI2U_printf("PMI sending: %s", buf); + + do { + n = write(fd, buf, size); + } while (n == -1 && errno == EINTR); + + if (n < 0) { + PMI2U_printf("write_line error; fd=%d buf=:%s:", fd, buf); + return (-1); + } + if (n < size) + PMI2U_printf("write_line failed to write entire message"); + } + return 0; +} + +/* + * Given an input string st, parse it into internal storage that can be + * queried by routines such as PMI2U_getval. + */ +int PMI2U_parse_keyvals(char *st) { + char *p, *keystart, *valstart; + int offset; + + if (!st) + return (-1); + + PMI2U_keyval_tab_idx = 0; + p = st; + while (1) { + while (*p == ' ') + p++; + /* got non-blank */ + if (*p == '=') { + PMI2U_printf("PMI2U_parse_keyvals: unexpected = at character %ld in %s", + (long int) (p - st), st); + return (-1); + } + if (*p == '\n' || *p == '\0') + return (0); /* normal exit */ + /* got normal character */ + keystart = p; /* remember where key started */ + while (*p != ' ' && *p != '=' && *p != '\n' && *p != '\0') + p++; + if (*p == ' ' || *p == '\n' || *p == '\0') { + PMI2U_printf("PMI2U_parse_keyvals: unexpected key delimiter at character %ld in %s", + (long int) (p - st), st); + return (-1); + } + /* Null terminate the key */ + *p = 0; + /* store key */ + strncpy(PMI2U_keyval_tab[PMI2U_keyval_tab_idx].key, keystart, + MAXKEYLEN); + PMI2U_keyval_tab[PMI2U_keyval_tab_idx].key[MAXKEYLEN-1] = '\0'; + valstart = ++p; /* start of value */ + while (*p != ' ' && *p != '\n' && *p != '\0') + p++; + /* store value */ + strncpy(PMI2U_keyval_tab[PMI2U_keyval_tab_idx].value, valstart, + MAXVALLEN); + offset = p - valstart; + /* When compiled with -fPIC, the pgcc compiler generates incorrect + code if "p - valstart" is used instead of using the + intermediate offset */ + PMI2U_keyval_tab[PMI2U_keyval_tab_idx].value[offset] = '\0'; + PMI2U_keyval_tab_idx++; + if (*p == ' ') + continue; + if (*p == '\n' || *p == '\0') + return (0); /* value has been set to empty */ + } +} + +void PMI2U_dump_keyvals(void) { + int i; + for (i = 0; i < PMI2U_keyval_tab_idx; i++) + PMI2U_printf(" %s=%s", PMI2U_keyval_tab[i].key, PMI2U_keyval_tab[i].value); +} + +char *PMI2U_getval(const char *keystr, char *valstr, int vallen) { + int i; + + for (i = 0; i < PMI2U_keyval_tab_idx; i++) { + if (strcmp(keystr, PMI2U_keyval_tab[i].key) == 0) { + MPIU_Strncpy(valstr, PMI2U_keyval_tab[i].value, vallen); + PMI2U_keyval_tab[i].value[vallen-1] = '\0'; + return valstr; + } + } + valstr[0] = '\0'; + return NULL ; +} + +void PMI2U_chgval(const char *keystr, char *valstr) { + int i; + + for (i = 0; i < PMI2U_keyval_tab_idx; i++) { + if (strcmp(keystr, PMI2U_keyval_tab[i].key) == 0) { + strncpy(PMI2U_keyval_tab[i].value, valstr, MAXVALLEN); + PMI2U_keyval_tab[i].value[MAXVALLEN - 1] = '\0'; + } + } +} + +/* This code is borrowed from mpich2-1.5/src/pm/util/safestr2.c. + The reason is to keep the save code logic around strncpy() as + as in the original PMI2 implementation. + + @ MPIU_Strncpy - Copy a string with a maximum length + Input Parameters: ++ instr - String to copy +- maxlen - Maximum total length of 'outstr' + + Output Parameter: +. outstr - String to copy into + + Notes: + This routine is the routine that you wish 'strncpy' was. In copying + 'instr' to 'outstr', it stops when either the end of 'outstr' (the + null character) is seen or the maximum length 'maxlen' is reached. + Unlike 'strncpy', it does not add enough nulls to 'outstr' after + copying 'instr' in order to move precisely 'maxlen' characters. + Thus, this routine may be used anywhere 'strcpy' is used, without any + performance cost related to large values of 'maxlen'. + + If there is insufficient space in the destination, the destination is + still null-terminated, to avoid potential failures in routines that neglect + to check the error code return from this routine. + + Module: + Utility + @*/ +int +MPIU_Strncpy(char *dest, const char *src, size_t n) +{ + char *d_ptr = dest; + const char *s_ptr = src; + register int i; + + if (n == 0) return 0; + + i = (int)n; + while (*s_ptr && i-- > 0) { + *d_ptr++ = *s_ptr++; + } + + if (i > 0) { + *d_ptr = 0; + return 0; + } + else { + /* Force a null at the end of the string (gives better safety + in case the user fails to check the error code) + */ + dest[n-1] = 0; + /* We may want to force an error message here, at least in the + debugging version + */ + /* printf( "failure in copying %s with length %d\n", src, n ); */ + return 1; + } +} diff --git a/contribs/pmi2/pmi2_util.h b/contribs/pmi2/pmi2_util.h new file mode 100644 index 0000000000000000000000000000000000000000..5b53dc8163c5ea1a872aa64c903d7a8ef5039e40 --- /dev/null +++ b/contribs/pmi2/pmi2_util.h @@ -0,0 +1,111 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef PMI2UTIL_H_INCLUDED +#define PMI2UTIL_H_INCLUDED + +#include <stdio.h> +#include <stdlib.h> + +/* maximum sizes for arrays */ +#define PMI2_MAXLINE 1024 +#define PMI2_IDSIZE 32 + +#define TRUE 1 +#define FALSE 0 + +#ifdef HAVE__FUNCTION__ +#define PMI2U_FUNC __FUNCTION__ +#elif defined(HAVE_CAP__FUNC__) +#define PMI2U_FUNC __FUNC__ +#elif defined(HAVE__FUNC__) +#define PMI2U_FUNC __func__ +#else +#define PMI2U_FUNC __FILE__ +#endif + +#ifdef DEBUG + #define PMI2U_printf(x...) do { \ + char logstr[1024]; \ + snprintf(logstr, 1024, x); \ + fprintf(stderr, "[%s (%d): %s] %s\n", \ + __FILE__, __LINE__, __FUNCTION__, logstr); \ + } while (0) +#else + #define PMI2U_printf(x...) +#endif + +#define PMI2U_Assert(a_) do { \ + if (!(a_)) { \ + PMI2U_printf("ASSERT( %s )", #a_); \ + } \ +} while (0) + +#define PMI2U_ERR_POP(err) do { \ + pmi2_errno = err; \ + PMI2U_printf("err. %d", pmi2_errno); \ + goto fn_fail; \ +} while (0) + +#define PMI2U_ERR_SETANDJUMP(err, class, x...) do { \ + char errstr[1024]; \ + snprintf(errstr, 1024, x); \ + PMI2U_printf("err. %s", errstr);\ + pmi2_errno = class; \ + goto fn_fail; \ +} while (0) + +#define PMI2U_ERR_CHKANDJUMP(cond, err, class, x...) do { \ + if (cond) PMI2U_ERR_SETANDJUMP(err, class, x); \ +} while (0) + +#define PMI2U_CHKMEM_SETERR(rc_, nbytes_, name_) do { \ + PMI2U_printf("ERROR: memory allocation of %lu bytes failed for %s", \ + (long unsigned int) nbytes_, name_); \ + rc_ = PMI2_ERR_NOMEM; \ + goto fn_exit; \ +} while(0) + +/* Persistent memory that we may want to recover if something goes wrong */ +#define PMI2U_CHKMEM_DECL(n_) \ + void* pmi2u_chkmem_stk_[n_] = {0}; \ + int pmi2u_chkmem_stk_sp_= 0; \ + const int pmi2u_chkmem_stk_sz_ = n_ + +#define PMI2U_CHKMEM_REAP() \ + while (pmi2u_chkmem_stk_sp_ > 0) { \ + free ((void*)( pmi2u_chkmem_stk_[--pmi2u_chkmem_stk_sp_] )); \ + } + +#define PMI2U_CHKMEM_COMMIT() pmi2u_chkmem_stk_sp_ = 0 + +#define PMI2U_CHKMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_) do { \ + pointer_ = (type_)malloc(nbytes_); \ + if (pointer_ && (pmi2u_chkmem_stk_sp_< pmi2u_chkmem_stk_sz_)) { \ + pmi2u_chkmem_stk_[pmi2u_chkmem_stk_sp_++] = pointer_; \ + } else { \ + PMI2U_CHKMEM_SETERR(rc_,nbytes_,name_); \ + goto fn_fail; \ + } \ +} while(0) + +#define PMI2U_CHKMEM_FREEALL() \ + while (pmi2u_chkmem_stk_sp_ > 0) { \ + free ((void*)( pmi2u_chkmem_stk_[--pmi2u_chkmem_stk_sp_] )); \ +} + +/* prototypes for PMIU routines */ +void PMI2U_Set_rank( int PMI_rank ); +void PMI2U_SetServer( void ); +int PMI2U_readline( int fd, char *buf, int max ); +int PMI2U_writeline( int fd, char *buf ); +int PMI2U_parse_keyvals( char *st ); +void PMI2U_dump_keyvals( void ); +char *PMI2U_getval( const char *keystr, char *valstr, int vallen ); +void PMI2U_chgval( const char *keystr, char *valstr ); +int MPIU_Strncpy(char *, const char *, size_t); + +#endif /* PMI2UTIL_H_INCLUDED */ diff --git a/contribs/pmi2/slurm/pmi2.h b/contribs/pmi2/slurm/pmi2.h new file mode 100644 index 0000000000000000000000000000000000000000..e72a985ff676963f85833608f293ad1b66e153f0 --- /dev/null +++ b/contribs/pmi2/slurm/pmi2.h @@ -0,0 +1,704 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef PMI2_H_INCLUDED +#define PMI2_H_INCLUDED + +#ifndef USE_PMI2_API +/*#error This header file defines the PMI2 API, but PMI2 was not selected*/ +#endif + +#define PMI2_MAX_KEYLEN 64 +#define PMI2_MAX_VALLEN 1024 +#define PMI2_MAX_ATTRVALUE 1024 +#define PMI2_ID_NULL -1 + +#define PMII_COMMANDLEN_SIZE 6 +#define PMII_MAX_COMMAND_LEN (64*1024) + +#if defined(__cplusplus) +extern "C" { +#endif + +static const char FULLINIT_CMD[] = "fullinit"; +static const char FULLINITRESP_CMD[] = "fullinit-response"; +static const char FINALIZE_CMD[] = "finalize"; +static const char FINALIZERESP_CMD[] = "finalize-response"; +static const char ABORT_CMD[] = "abort"; +static const char JOBGETID_CMD[] = "job-getid"; +static const char JOBGETIDRESP_CMD[] = "job-getid-response"; +static const char JOBCONNECT_CMD[] = "job-connect"; +static const char JOBCONNECTRESP_CMD[] = "job-connect-response"; +static const char JOBDISCONNECT_CMD[] = "job-disconnect"; +static const char JOBDISCONNECTRESP_CMD[] = "job-disconnect-response"; +static const char KVSPUT_CMD[] = "kvs-put"; +static const char KVSPUTRESP_CMD[] = "kvs-put-response"; +static const char KVSFENCE_CMD[] = "kvs-fence"; +static const char KVSFENCERESP_CMD[] = "kvs-fence-response"; +static const char KVSGET_CMD[] = "kvs-get"; +static const char KVSGETRESP_CMD[] = "kvs-get-response"; +static const char GETNODEATTR_CMD[] = "info-getnodeattr"; +static const char GETNODEATTRRESP_CMD[] = "info-getnodeattr-response"; +static const char PUTNODEATTR_CMD[] = "info-putnodeattr"; +static const char PUTNODEATTRRESP_CMD[] = "info-putnodeattr-response"; +static const char GETJOBATTR_CMD[] = "info-getjobattr"; +static const char GETJOBATTRRESP_CMD[] = "info-getjobattr-response"; +static const char NAMEPUBLISH_CMD[] = "name-publish"; +static const char NAMEPUBLISHRESP_CMD[] = "name-publish-response"; +static const char NAMEUNPUBLISH_CMD[] = "name-unpublish"; +static const char NAMEUNPUBLISHRESP_CMD[] = "name-unpublish-response"; +static const char NAMELOOKUP_CMD[] = "name-lookup"; +static const char NAMELOOKUPRESP_CMD[] = "name-lookup-response"; + +static const char PMIJOBID_KEY[] = "pmijobid"; +static const char PMIRANK_KEY[] = "pmirank"; +static const char SRCID_KEY[] = "srcid"; +static const char THREADED_KEY[] = "threaded"; +static const char RC_KEY[] = "rc"; +static const char ERRMSG_KEY[] = "errmsg"; +static const char PMIVERSION_KEY[] = "pmi-version"; +static const char PMISUBVER_KEY[] = "pmi-subversion"; +static const char RANK_KEY[] = "rank"; +static const char SIZE_KEY[] = "size"; +static const char APPNUM_KEY[] = "appnum"; +static const char SPAWNERJOBID_KEY[] = "spawner-jobid"; +static const char DEBUGGED_KEY[] = "debugged"; +static const char PMIVERBOSE_KEY[] = "pmiverbose"; +static const char ISWORLD_KEY[] = "isworld"; +static const char MSG_KEY[] = "msg"; +static const char JOBID_KEY[] = "jobid"; +static const char KVSCOPY_KEY[] = "kvscopy"; +static const char KEY_KEY[] = "key"; +static const char VALUE_KEY[] = "value"; +static const char FOUND_KEY[] = "found"; +static const char WAIT_KEY[] = "wait"; +static const char NAME_KEY[] = "name"; +static const char PORT_KEY[] = "port"; +static const char THRID_KEY[] = "thrid"; +static const char INFOKEYCOUNT_KEY[] = "infokeycount"; +static const char INFOKEY_KEY[] = "infokey%d"; +static const char INFOVAL_KEY[] = "infoval%d"; + +static const char TRUE_VAL[] = "TRUE"; +static const char FALSE_VAL[] = "FALSE"; + +/* Local types */ + +/* Parse commands are in this structure. Fields in this structure are + dynamically allocated as necessary */ +typedef struct PMI2_Keyvalpair { + const char *key; + const char *value; + int valueLen; /* Length of a value (values may contain nulls, so + we need this) */ + int isCopy; /* The value is a copy (and will need to be freed) + if this is true, otherwise, + it is a null-terminated string in the original + buffer */ +} PMI2_Keyvalpair; + +typedef struct PMI2_Command { + int nPairs; /* Number of key=value pairs */ + char *command; /* Overall command buffer */ + PMI2_Keyvalpair **pairs; /* Array of pointers to pairs */ + int complete; +} PMI2_Command; + + +/*D +PMI2_CONSTANTS - PMI2 definitions + +Error Codes: ++ PMI2_SUCCESS - operation completed successfully +. PMI2_FAIL - operation failed +. PMI2_ERR_NOMEM - input buffer not large enough +. PMI2_ERR_INIT - PMI not initialized +. PMI2_ERR_INVALID_ARG - invalid argument +. PMI2_ERR_INVALID_KEY - invalid key argument +. PMI2_ERR_INVALID_KEY_LENGTH - invalid key length argument +. PMI2_ERR_INVALID_VAL - invalid val argument +. PMI2_ERR_INVALID_VAL_LENGTH - invalid val length argument +. PMI2_ERR_INVALID_LENGTH - invalid length argument +. PMI2_ERR_INVALID_NUM_ARGS - invalid number of arguments +. PMI2_ERR_INVALID_ARGS - invalid args argument +. PMI2_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument +. PMI2_ERR_INVALID_KEYVALP - invalid keyvalp argument +. PMI2_ERR_INVALID_SIZE - invalid size argument +- PMI2_ERR_OTHER - other unspecified error + +D*/ +#define PMI2_SUCCESS 0 +#define PMI2_FAIL -1 +#define PMI2_ERR_INIT 1 +#define PMI2_ERR_NOMEM 2 +#define PMI2_ERR_INVALID_ARG 3 +#define PMI2_ERR_INVALID_KEY 4 +#define PMI2_ERR_INVALID_KEY_LENGTH 5 +#define PMI2_ERR_INVALID_VAL 6 +#define PMI2_ERR_INVALID_VAL_LENGTH 7 +#define PMI2_ERR_INVALID_LENGTH 8 +#define PMI2_ERR_INVALID_NUM_ARGS 9 +#define PMI2_ERR_INVALID_ARGS 10 +#define PMI2_ERR_INVALID_NUM_PARSED 11 +#define PMI2_ERR_INVALID_KEYVALP 12 +#define PMI2_ERR_INVALID_SIZE 13 +#define PMI2_ERR_OTHER 14 + +/* This is here to allow spawn multiple functions to compile. This + needs to be removed once those functions are fixed for pmi2 */ +/* +typedef struct PMI_keyval_t +{ + char * key; + char * val; +} PMI_keyval_t; +*/ + +/*@ + PMI2_Connect_comm_t - connection structure used when connecting to other jobs + + Fields: + + read - Read from a connection to the leader of the job to which + this process will be connecting. Returns 0 on success or an MPI + error code on failure. + . write - Write to a connection to the leader of the job to which + this process will be connecting. Returns 0 on success or an MPI + error code on failure. + . ctx - An anonymous pointer to data that may be used by the read + and write members. + - isMaster - Indicates which process is the "master"; may have the + values 1 (is the master), 0 (is not the master), or -1 (neither is + designated as the master). The two processes must agree on which + process is the master, or both must select -1 (neither is the + master). + + Notes: + A typical implementation of these functions will use the read and + write calls on a pre-established file descriptor (fd) between the + two leading processes. This will be needed only if the PMI server + cannot access the KVS spaces of another job (this may happen, for + example, if each mpiexec creates the KVS spaces for the processes + that it manages). + +@*/ +typedef struct PMI2_Connect_comm { + int (*read)( void *buf, int maxlen, void *ctx ); + int (*write)( const void *buf, int len, void *ctx ); + void *ctx; + int isMaster; +} PMI2_Connect_comm_t; + + +/*S + MPID_Info - Structure of an MPID info + + Notes: + There is no reference count because 'MPI_Info' values, unlike other MPI + objects, may be changed after they are passed to a routine without + changing the routine''s behavior. In other words, any routine that uses + an 'MPI_Info' object must make a copy or otherwise act on any info value + that it needs. + + A linked list is used because the typical 'MPI_Info' list will be short + and a simple linked list is easy to implement and to maintain. Similarly, + a single structure rather than separate header and element structures are + defined for simplicity. No separate thread lock is provided because + info routines are not performance critical; they may use the single + critical section lock in the 'MPIR_Process' structure when they need a + thread lock. + + This particular form of linked list (in particular, with this particular + choice of the first two members) is used because it allows us to use + the same routines to manage this list as are used to manage the + list of free objects (in the file 'src/util/mem/handlemem.c'). In + particular, if lock-free routines for updating a linked list are + provided, they can be used for managing the 'MPID_Info' structure as well. + + The MPI standard requires that keys can be no less that 32 characters and + no more than 255 characters. There is no mandated limit on the size + of values. + + Module: + Info-DS + S*/ +typedef struct MPID_Info { + int handle; + int pobj_mutex; + int ref_count; + struct MPID_Info *next; + char *key; + char *value; +} MPID_Info; + +#define PMI2U_Info MPID_Info + +/*@ + PMI2_Init - initialize the Process Manager Interface + + Output Parameter: + + spawned - spawned flag + . size - number of processes in the job + . rank - rank of this process in the job + - appnum - which executable is this on the mpiexec commandline + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + Initialize PMI for this process group. The value of spawned indicates whether + this process was created by 'PMI2_Spawn_multiple'. 'spawned' will be non-zero + iff this process group has a parent. + +@*/ +int PMI2_Init(int *spawned, int *size, int *rank, int *appnum); + +/*@ + PMI2_Finalize - finalize the Process Manager Interface + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + Finalize PMI for this job. + +@*/ +int PMI2_Finalize(void); + +/*@ + PMI2_Initialized - check if PMI has been initialized + + Return values: + Non-zero if PMI2_Initialize has been called successfully, zero otherwise. + +@*/ +int PMI2_Initialized(void); + +/*@ + PMI2_Abort - abort the process group associated with this process + + Input Parameters: + + flag - non-zero if all processes in this job should abort, zero otherwise + - error_msg - error message to be printed + + Return values: + If the abort succeeds this function will not return. Returns an MPI + error code otherwise. + +@*/ +int PMI2_Abort(int flag, const char msg[]); + +/*@ + PMI2_Spawn - spawn a new set of processes + + Input Parameters: + + count - count of commands + . cmds - array of command strings + . argcs - size of argv arrays for each command string + . argvs - array of argv arrays for each command string + . maxprocs - array of maximum processes to spawn for each command string + . info_keyval_sizes - array giving the number of elements in each of the + 'info_keyval_vectors' + . info_keyval_vectors - array of keyval vector arrays + . preput_keyval_size - Number of elements in 'preput_keyval_vector' + . preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space + - jobIdSize - size of the buffer provided in jobId + + Output Parameter: + + jobId - job id of the spawned processes + - errors - array of errors for each command + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This function spawns a set of processes into a new job. The 'count' + field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs', + 'info_keyval_sizes' and 'info_keyval_vectors'. The 'preput_keyval_size' refers + to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector' + contains keyval pairs that will be put in the keyval space of the newly + created job before the processes are started. The 'maxprocs' array + specifies the desired number of processes to create for each 'cmd' string. + The actual number of processes may be less than the numbers specified in + maxprocs. The acceptable number of processes spawned may be controlled by + ``soft'' keyvals in the info arrays. The ``soft'' option is specified by + mpiexec in the MPI-2 standard. Environment variables may be passed to the + spawned processes through PMI implementation specific 'info_keyval' parameters. +@*/ +int PMI2_Job_Spawn(int count, const char * cmds[], + int argcs[], const char ** argvs[], + const int maxprocs[], + const int info_keyval_sizes[], + const struct MPID_Info *info_keyval_vectors[], + int preput_keyval_size, + const struct MPID_Info *preput_keyval_vector[], + char jobId[], int jobIdSize, + int errors[]); + + +/*@ + PMI2_Job_GetId - get job id of this job + + Input parameters: + . jobid_size - size of buffer provided in jobid + + Output parameters: + . jobid - the job id of this job + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Job_GetId(char jobid[], int jobid_size); + +/*@ + PMI2_Job_GetRank - get rank of this job + + Output parameters: + . rank - the rank of this job + + Return values: + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. + +@*/ +int PMI2_Job_GetRank(int* rank); + +/*@ + PMI2_Info_GetSize - get the number of processes on the node + + Output parameters: + . rank - the rank of this job + + Return values: + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. +@*/ +int PMI2_Info_GetSize(int* size); + +/*@ + PMI2_Job_Connect - connect to the parallel job with ID jobid + + Input parameters: + . jobid - job id of the job to connect to + + Output parameters: + . conn - connection structure used to exteblish communication with + the remote job + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This just "registers" the other parallel job as part of a parallel + program, and is used in the PMI2_KVS_xxx routines (see below). This + is not a collective call and establishes a connection between all + processes that are connected to the calling processes (on the one + side) and that are connected to the named jobId on the other + side. Processes that are already connected may call this routine. + +@*/ +int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn); + +/*@ + PMI2_Job_Disconnect - disconnects from the job with ID jobid + + Input parameters: + . jobid - job id of the job to connect to + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Job_Disconnect(const char jobid[]); + +/*@ + PMI2_KVS_Put - put a key/value pair in the keyval space for this job + + Input Parameters: + + key - key + - value - value + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + If multiple PMI2_KVS_Put calls are made with the same key between + calls to PMI2_KVS_Fence, the behavior is undefined. That is, the + value returned by PMI2_KVS_Get for that key after the PMI2_KVS_Fence + is not defined. + +@*/ +int PMI2_KVS_Put(const char key[], const char value[]); +/*@ + PMI2_KVS_Fence - commit all PMI2_KVS_Put calls made before this fence + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This is a collective call across the job. It has semantics that are + similar to those for MPI_Win_fence and hence is most easily + implemented as a barrier across all of the processes in the job. + Specifically, all PMI2_KVS_Put operations performed by any process in + the same job must be visible to all processes (by using PMI2_KVS_Get) + after PMI2_KVS_Fence completes. However, a PMI implementation could + make this a lazy operation by not waiting for all processes to enter + their corresponding PMI2_KVS_Fence until some process issues a + PMI2_KVS_Get. This might be appropriate for some wide-area + implementations. + +@*/ +int PMI2_KVS_Fence(void); + +/*@ + PMI2_KVS_Get - returns the value associated with key in the key-value + space associated with the job ID jobid + + Input Parameters: + + jobid - the job id identifying the key-value space in which to look + for key. If jobid is NULL, look in the key-value space of this job. + . src_pmi_id - the pmi id of the process which put this keypair. This + is just a hint to the server. PMI2_ID_NULL should be passed if no + hint is provided. + . key - key + - maxvalue - size of the buffer provided in value + + Output Parameters: + + value - value associated with key + - vallen - length of the returned value, or, if the length is longer + than maxvalue, the negative of the required length is returned + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxvalue, int *vallen); + +/*@ + PMI2_Info_GetNodeAttr - returns the value of the attribute associated + with this node + + Input Parameters: + + name - name of the node attribute + . valuelen - size of the buffer provided in value + - waitfor - if non-zero, the function will not return until the + attribute is available + + Output Parameters: + + value - value of the attribute + - found - non-zero indicates that the attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + This provides a way, when combined with PMI2_Info_PutNodeAttr, for + processes on the same node to share information without requiring a + more general barrier across the entire job. + + If waitfor is non-zero, the function will never return with found + set to zero. + + Predefined attributes: + + memPoolType - If the process manager allocated a shared memory + pool for the MPI processes in this job and on this node, return + the type of that pool. Types include sysv, anonmmap and ntshm. + . memSYSVid - Return the SYSV memory segment id if the memory pool + type is sysv. Returned as a string. + . memAnonMMAPfd - Return the FD of the anonymous mmap segment. The + FD is returned as a string. + - memNTName - Return the name of the Windows NT shared memory + segment, file mapping object backed by system paging + file. Returned as a string. + +@*/ +int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor); + +/*@ + PMI2_Info_GetNodeAttrIntArray - returns the value of the attribute associated + with this node. The value must be an array of integers. + + Input Parameters: + + name - name of the node attribute + - arraylen - number of elements in array + + Output Parameters: + + array - value of attribute + . outlen - number of elements returned + - found - non-zero if attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + Notice that, unlike PMI2_Info_GetNodeAttr, this function does not + have a waitfor parameter, and will return immediately with found=0 + if the attribute was not found. + + Predefined array attribute names: + + localRanksCount - Return the number of local ranks that will be + returned by the key localRanks. + . localRanks - Return the ranks in MPI_COMM_WORLD of the processes + that are running on this node. + - cartCoords - Return the Cartesian coordinates of this process in + the underlying network topology. The coordinates are indexed from + zero. Value only if the Job attribute for physTopology includes + cartesian. + +@*/ +int PMI2_Info_GetNodeAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); + +/*@ + PMI2_Info_PutNodeAttr - stores the value of the named attribute + associated with this node + + Input Parameters: + + name - name of the node attribute + - value - the value of the attribute + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Notes: + For example, it might be used to share segment ids with other + processes on the same SMP node. + +@*/ +int PMI2_Info_PutNodeAttr(const char name[], const char value[]); + +/*@ + PMI2_Info_GetJobAttr - returns the value of the attribute associated + with this job + + Input Parameters: + + name - name of the job attribute + - valuelen - size of the buffer provided in value + + Output Parameters: + + value - value of the attribute + - found - non-zero indicates that the attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found); + +/*@ + PMI2_Info_GetJobAttrIntArray - returns the value of the attribute associated + with this job. The value must be an array of integers. + + Input Parameters: + + name - name of the job attribute + - arraylen - number of elements in array + + Output Parameters: + + array - value of attribute + . outlen - number of elements returned + - found - non-zero if attribute was found + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + + Predefined array attribute names: + + + universeSize - The size of the "universe" (defined for the MPI + attribute MPI_UNIVERSE_SIZE + + . hasNameServ - The value hasNameServ is true if the PMI2 environment + supports the name service operations (publish, lookup, and + unpublish). + + . physTopology - Return the topology of the underlying network. The + valid topology types include cartesian, hierarchical, complete, + kautz, hypercube; additional types may be added as necessary. If + the type is hierarchical, then additional attributes may be + queried to determine the details of the topology. For example, a + typical cluster has a hierarchical physical topology, consisting + of two levels of complete networks - the switched Ethernet or + Infiniband and the SMP nodes. Other systems, such as IBM BlueGene, + have one level that is cartesian (and in virtual node mode, have a + single-level physical topology). + + . physTopologyLevels - Return a string describing the topology type + for each level of the underlying network. Only valid if the + physTopology is hierarchical. The value is a comma-separated list + of physical topology types (except for hierarchical). The levels + are ordered starting at the top, with the network closest to the + processes last. The lower level networks may connect only a subset + of processes. For example, for a cartesian mesh of SMPs, the value + is cartesian,complete. All processes are connected by the + cartesian part of this, but for each complete network, only the + processes on the same node are connected. + + . cartDims - Return a string of comma-separated values describing + the dimensions of the Cartesian topology. This must be consistent + with the value of cartCoords that may be returned by + PMI2_Info_GetNodeAttrIntArray. + + These job attributes are just a start, but they provide both an + example of the sort of external data that is available through the + PMI interface and how extensions can be added within the same API + and wire protocol. For example, adding more complex network + topologies requires only adding new keys, not new routines. + + . isHeterogeneous - The value isHeterogeneous is true if the + processes belonging to the job are running on nodes with different + underlying data models. + +@*/ +int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); + +/*@ + PMI2_Nameserv_publish - publish a name + + Input parameters: + + service_name - string representing the service being published + . info_ptr - + - port - string representing the port on which to contact the service + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Nameserv_publish(const char service_name[], const struct MPID_Info *info_ptr, const char port[]); + +/*@ + PMI2_Nameserv_lookup - lookup a service by name + + Input parameters: + + service_name - string representing the service being published + . info_ptr - + - portLen - size of buffer provided in port + + Output parameters: + . port - string representing the port on which to contact the service + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Nameserv_lookup(const char service_name[], const struct MPID_Info *info_ptr, + char port[], int portLen); +/*@ + PMI2_Nameserv_unpublish - unpublish a name + + Input parameters: + + service_name - string representing the service being unpublished + - info_ptr - + + Return values: + Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + +@*/ +int PMI2_Nameserv_unpublish(const char service_name[], + const struct MPID_Info *info_ptr); + + + +#if defined(__cplusplus) +} +#endif + +#endif /* PMI2_H_INCLUDED */ diff --git a/contribs/pmi2/testpmi2.c b/contribs/pmi2/testpmi2.c new file mode 100644 index 0000000000000000000000000000000000000000..e19fa8d0717b5ca9b05825d14838b498d7d5c530 --- /dev/null +++ b/contribs/pmi2/testpmi2.c @@ -0,0 +1,15 @@ +#include <stdio.h> +#include <slurm/pmi2.h> + + +int main(int argc, char **argv) +{ + int spawned, size, rank, appnum; + int ret; + + ret = PMI2_Init(&spawned, &size, &rank, &appnum); + printf("spawned=%d, size=%d, rank=%d, appnum=%d\n", spawned, size, rank, appnum); + PMI2_Finalize(); + + return 0; +} diff --git a/contribs/pmi2/testpmi2_put.c b/contribs/pmi2/testpmi2_put.c new file mode 100644 index 0000000000000000000000000000000000000000..937ff2c914a646962966a58151e72986670a72e0 --- /dev/null +++ b/contribs/pmi2/testpmi2_put.c @@ -0,0 +1,45 @@ +#include <stdio.h> +#include <stdlib.h> +#include <slurm/pmi2.h> + + +int main(int argc, char **argv) +{ + int spawned, size, rank, appnum; + int ret; + char jobid[50]; + + ret = PMI2_Init(&spawned, &size, &rank, &appnum); + if (ret != PMI2_SUCCESS) { + perror("PMI2_Init failed"); + return 1; + } + + PMI2_Job_GetId(jobid, sizeof(jobid)); + printf("spawned=%d, size=%d, rank=%d, appnum=%d, jobid=%s\n", + spawned, size, rank, appnum, jobid); + fflush(stdout); + + PMI2_KVS_Fence(); + + /* broadcast msg=42 from proc 0 */ + int msg = 0; + char val[20] = "0\n"; + if (rank == 0) { + msg = 42; + snprintf(val, sizeof(val), "%d\n", msg); + PMI2_KVS_Put("msg", val); + printf("%d> send %d\n", rank, msg); + fflush(stdout); + } + + PMI2_KVS_Fence(); + int len = 0; + PMI2_KVS_Get(jobid, PMI2_ID_NULL, "msg", val, sizeof(val), &len); + msg = atoi(val); + printf("%d> got %d\n", rank, msg); + fflush(stdout); + + PMI2_Finalize(); + return 0; +} diff --git a/contribs/sjobexit/Makefile.in b/contribs/sjobexit/Makefile.in index 71516ca3c1ce8733de2ceff5df07fa9a192de06a..5522480964ff02f241758f928d3124d0ca23ab0a 100644 --- a/contribs/sjobexit/Makefile.in +++ b/contribs/sjobexit/Makefile.in @@ -59,6 +59,7 @@ subdir = contribs/sjobexit DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/sjstat b/contribs/sjstat index 6fee05efb65a4e9d6b4efaea746fdfed0101782b..5375f243f9a35789f3add61bee46f93e78f87039 100755 --- a/contribs/sjstat +++ b/contribs/sjstat @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/slurm_completion_help/README.md b/contribs/slurm_completion_help/README.md index f7d36c0831249f2824be5d8aae34d97b78d3e4be..eb8d5ff9a4ca9b53433a1d31e8c870d347c6ec21 100644 --- a/contribs/slurm_completion_help/README.md +++ b/contribs/slurm_completion_help/README.md @@ -30,6 +30,10 @@ __Known issues__ * Some regex needed to validate options or parameter values are not exactly correct, but should work in most cases. * Any new option unknown to the syntax file will be spotted as an error. +* On a Debian system (Ubuntu) you may see messages like... + _get_comp_words_by_ref: command not found + after a tab. + Based on http://askubuntu.com/questions/33440/tab-completion-doesnt-work-for-commands you need to alter your /etc/bash.bashrc to make this work correctly. Bash completion --------------- @@ -44,6 +48,44 @@ __Instalation__ Simply source the script in your .bashrc or .profile -__Knwon issues__ - -Keyword arguments are not auto-compelted beyond the first one. +__Examples__ + + root@frontend:~ # squeue --<tab><tab> + --account<account_list> --iterate<seconds> --qos<qos_list> --usage + --clusters<string> --jobs<job_id_list> --sort<sort_list> --user<user_list> + --format<fmtstring> --nodes<hostlist> --start --verbose + --help --noheader --state<state_list> --version + --hide --partition<part_list> --steps + root@frontend:~ # squeue --us<tab><tab> + --usage --user + root@frontend:~ # squeue --user <tab><tab> + user1 user2 user3 user4 + + root@frontend:~ # scontrol <tab><tab> + abort delete pidinfo requeue shutdown update + checkpoint hold ping resume suspend version + completing listpids reconfigure setdebug takeover + create notify release show uhold + root@frontend:~ # scontrol update <tab><tab> + jobid= nodename= partitionname= reservationname= step= + root@frontend:~ # scontrol update nodename=<tab><tab> + root@frontend:~ # scontrol update nodename=node<tab><tab> + node01 node03 node05 node07 node09 node11 node13 node15 node17 node19 + node02 node04 node06 node08 node10 node12 node14 node16 node18 node20 + root@frontend:~ # scontrol update nodename=node12 + features=<features> reason=<reason> weight=<weight> + gres=<gres> state=<state> + root@frontend:~ # scontrol update nodename=node12 state=<tab><tab> + alloc down fail idle mixed power_up + allocated drain failing maint power_down resume + root@frontend:~ # scontrol update nodename=node12 state=resume + + root@frontend:~ # squeue --format "%<TAB><TAB> + %a(Account) %E(dependency) %i(id) %M(time) %s(selecplugin) + %A(NTasks) %e(end) %I(Ncores/socket) %N(alloc_nodes) %t(state) + %b(gres) %f(features) %j(name) %n(reqnodes) %T(state) + %c(mincpu) %G(gID) %k(comment) %O(contiguous) %U(uID) + %C(Ncpus) %g(group) %l(limit) %p(priority) %u(user) + %d(minTmp) %H(Nsockets) %L(timeleft) %r(reason) %v(reservation) + %D(NNodes) %h(shared) %m(mem) %R(reason) %x(excnodes) + diff --git a/contribs/slurm_completion_help/slurm.vim b/contribs/slurm_completion_help/slurm.vim index ca0b111a44dd844cb04ea125300baac16d223cb3..828335b8fa1e27852daeef3bb33a61716027ac89 100644 --- a/contribs/slurm_completion_help/slurm.vim +++ b/contribs/slurm_completion_help/slurm.vim @@ -7,7 +7,7 @@ " Written by Damien François. <damien.francois@uclouvain.Be>. " " This file is part of SLURM, a resource management program. -" For details, see <http://www.schedmd.com/slurmdocs/>. +" For details, see <http://slurm.schedmd.com/>. " Please also read the included file: DISCLAIMER. " " SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/slurm_completion_help/slurm_completion.sh b/contribs/slurm_completion_help/slurm_completion.sh index d51e1cb6943eb9c266a5a3003fb720a2ee3306a1..476e8690adebcbbada15e3e011b70db688b06a6c 100644 --- a/contribs/slurm_completion_help/slurm_completion.sh +++ b/contribs/slurm_completion_help/slurm_completion.sh @@ -7,7 +7,7 @@ # Written by Damien François. <damien.francois@uclouvain.Be>. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -37,149 +37,1295 @@ # ############################################################################### -_scontrol() +function compute_set_diff(){ + res="" + for i in $1; do + [[ "$2" =~ ${i%%=*} ]] && continue + res="$i $res" + done + echo $res +} + +_split_long_opt() { + [[ $cur == = || $cur == : ]] && cur="" + [[ $prev == = ]] && prev=${COMP_WORDS[$cword-2]} +} + +function find_first_partial_occurence(){ + res="" + for item1 in $1; do + for item2 in $2; do + if [[ $item2 == "$item1=" ]]; then + res="$item1" + break + fi + done + if [[ $res != "" ]]; then + break + fi + done + echo $res +} + +function find_first_occurence(){ + res="" + for item1 in $1; do + for item2 in $2; do + if [[ $item1 = $item2 ]]; then + res="$item1" + break + fi + done + if [[ $res != "" ]]; then + break + fi + done + echo $res +} + +function offer (){ + remainings=$(compute_set_diff "$1" "${COMP_WORDS[*]}") + COMPREPLY=( $( compgen -W "$remainings" -- $cur ) ) + if [[ "$1" == *=* || "$1" == *%* || "$1" == *:* ]]; + then + #echo "NO SPACE $1" >> loglog + compopt -o nospace + fi +} + +function offer_list () { + curlist=${cur%,*} + curitem=${cur##*,} + + if [[ $curlist == $curitem ]] + then + COMPREPLY=( $( compgen -W "${1}" -- $cur ) ) ; return + elif [[ $cur == *, ]] ; + then + compvalues="" + for i in $1;do + [[ $cur =~ $i ]] && continue + compvalues="$i $compvalues " + done + uniqueprefix=1 + prefix=${compvalues:0:1} + for i in $compvalues;do + [[ ${i:0:1} == $prefix ]] || uniqueprefix=0 + done + if [[ $uniqueprefix == 1 ]] + then + compvalues="" + for i in $1;do + [[ $cur =~ $i ]] && continue + compvalues="$compvalues $curlist,$i" + done + fi + COMPREPLY=( $( compgen -W "${compvalues}" -- "" ) ) ; return + else + compvalues="" + for i in $1;do + [[ $cur =~ $i ]] && continue + compvalues="$compvalues $curlist,$i" + done + COMPREPLY=( $( compgen -W "${compvalues}" -- $cur ) ) ; + fi +} + +function offer_many () { + availablevalues="" + for i in $1;do + [[ $cur =~ $i ]] && continue + availablevalues="$i $availablevalues" + done + + # Check that there is no unique prefix for all remaining options (God knows why I have to do this. Must be missing something) + # TODO when all suboptions start with the same prefix, it is not working great + uniqueprefix=1 + prefix=${availablevalues:0:1} + for i in $availablevalues;do + [[ ${i:0:1} == $prefix ]] || uniqueprefix=0 + done + + + #if [[ "$1" == *'\"'% ]]; + #then + # compopt -o nospace + #fi #added for --format in squeue + + if [[ ${COMP_WORDS[COMP_CWORD-1]} == "$argname" ]]; then + # echo "The first value is about to be entered" >> loglog + cur="" + COMPREPLY=( $( compgen -W "${1}" -- $cur ) ) ; return + fi + if [[ ${COMP_WORDS[COMP_CWORD-1]} == '=' && "$cur" != *,* ]]; then + # echo "A supplementary value is being entered" >> loglog + COMPREPLY=( $( compgen -W "${1}" -- $cur ) ) ; return + fi + if [[ ${cur:${#cur}-1:1} == "," && $uniqueprefix == 0 ]]; then + echo "A supplementary value is about to be entered and there is a no unique suffix" >> loglog + compvalues="" + for i in $1;do + [[ $cur =~ $i ]] && continue + compvalues="$i $compvalues" + done + cur="" + COMPREPLY=( $( compgen -W "${compvalues}" -- $cur ) ) ; + return + fi + if [[ "$cur" =~ "," ]] ; then + echo "A supplementary value is about to be entered and there is a unique prefix or we are in the middle of one" >> loglog + compvalues="" + for i in $1;do + [[ $cur =~ $i ]] && continue + compvalues="$compvalues ${cur%,*},$i" + #compvalues="$compvalues $i" + done + COMPREPLY=( $( compgen -W "${compvalues}" -- $cur ) ) ; + + # This is lame, we show complete list rather than last element + return + fi + return 255 +} + +function param () { + argname="$1" + [[ ${COMP_WORDS[COMP_CWORD]} == "=" && ${COMP_WORDS[COMP_CWORD-1]} == $1 ]] && return 0 + [[ ${COMP_WORDS[COMP_CWORD-1]} == "=" && ${COMP_WORDS[COMP_CWORD-2]} == $1 ]] && return 0 + [[ ${COMP_WORDS[COMP_CWORD-1]} == $1 ]] && return 0 + return 255 +} + +function _jobs() { +echo $( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 ) ; +} +function _wckeys() { +echo $(sacctmgr -p -n list wckeys | cut -d'|' -f1) ; +} +function _qos() { +echo $(sacctmgr -p -n list qos | cut -d'|' -f1) ; +} +function _clusters() { +echo $(sacctmgr -p -n list clusters | cut -d'|' -f1) ; +} +function _jobnames() { +echo $( scontrol -o show jobs | cut -d' ' -f 2 | cut -d'=' -f 2 ) ; +} +function _partitions() { +echo $(scontrol show partitions|grep PartitionName|cut -c 15- |cut -f 1 -d' '|paste -s -d ' ') ; +} +function _nodes() { +echo $(scontrol show nodes | grep NodeName | cut -c 10- | cut -f 1 -d' ' | paste -s -d ' ') ; +} +function _accounts() { +echo $(sacctmgr -pn list accounts | cut -d'|' -f1 | paste -s -d' ') ; +} +function _licenses() { +echo $(scontrol show config| grep Licenses | sed 's/Licenses *=//'| paste -s -d' ') ; +} +function _nodes() { +echo $(scontrol show nodes | grep NodeName | cut -c 10- | cut -f 1 -d' ' | paste -s -d ' ') ; +} +function _features() { +echo $(scontrol -o show nodes|cut -d' ' -f7|sed 's/Features=//'|sort -u|tr -d '()'|paste -d, -s) ; +} +function _users() { +echo $(sacctmgr -pn list users | cut -d'|' -f1) ; +} +function _reservations() { +echo $(scontrol -o show reservations | cut -d' ' -f1 | cut -d= -f2) ; +} +function _gres() { +echo $(scontrol show config | grep GresTypes | cut -d= -f2) +} +function _jobname() { +echo $(scontrol show -o jobs | cut -d' ' -f 2 | sed 's/Name=//') +} + +_sacctmgr() { - local cur=${COMP_WORDS[COMP_CWORD]} - local prev=${COMP_WORDS[COMP_CWORD-1]} - - local subopts="" - - case "$prev" in - node) - local pprev=${COMP_WORDS[COMP_CWORD-2]} - if [[ "$pprev" == "show" ]]; then - subopts=$( scontrol show nodes | grep NodeName | cut -c 10- | cut -f 1 -d' ') - elif [[ "$pprev" == "update" ]]; then - subopts="NodeName= Features= Gres= Reason= State= Weight=" - else - subopts="" - fi - ;; - - job) - local pprev=${COMP_WORDS[COMP_CWORD-2]} - if [[ "$pprev" == "show" ]]; then - subopts=$( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 ) - elif [[ "$pprev" == "update" ]]; then - subopts="Account= Conn-Type= Contiguous= Dependency= EligibleTime=" - subopts="$subopts ExcNodeList= Features= Geometry= Gres= JobId=" - subopts="$subopts JobId= MinCpusNode= MinMemoryNode= MinTmpDiskNodea" - subopts="$subopts Name= Nice= NodeList= NumCPUs= NumNodes= NumTasks=" - subopts="$subopts Partition= Priority= QOS= ReqCores= ReqNodelist=" - subopts="$subopts ReqSockets= ReqThreads= Requeue= ReservationName=" - subopts="$subopts Rotate= Shared= StartTime= TimeLimit= WCKey=" - else - subopts="" - fi - ;; - show) - subopts="config daemons job node partition reservation slurmd step topology" - subopts="$subopts hostlist hostnames" - ;; - shutdown) - subopts="slurmctld controller" - ;; - setdebug) - subopts="quiet fatal error info verbose debug debug2 debug3 debug4 debug5" - ;; - notify | uhold | suspend | release | requeue | resume | hold ) - subopts=$( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 ) - ;; - step) - subopts="StepId= TimeLimit=" - ;; - delete) - subopts="PartitionName= Reservation=" - ;; - reservation) - subopts="Reservation= Accounts= Licences= NodeCnt= Nodes= StartTime=" - subopts="$subopts EndTime= Duration= PartitionName=Flags= Features= Users=" - ;; - partition) - subopts="AllowGroups= AllocNodes= Alternate= Default= DefaultTime=" - subopts="$subopts DisableRootJobs= Hidden= MaxNodes= MatxTime= MinNodes= Nodes=" - subopts="$subopts PartitionName= PreemtpMode= Piority= RootOnly= Shared= State=" - ;; - create) - subopts="partition reservation" - ;; - update) - subopts="job step node partition reservation" - ;; - disable | enable | able | create | vacate | error | restart) - subopts=$( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 ) - ;; - checkpoint) - subopts="disable enable able create vacate error restart" - ;; - scontrol) - if [[ "$cur" == - ]]; then - subopts="-a -d -h -M -o -Q -v -V" - elif [[ "$cur" == -- ]]; then - subopts="--all --details --help --hide --cluster" - subopts="$subopts --oneliner --quiet --verbose --version" - else - subopts="abort checkpoint create completing delete hold notify" - subopts="$subopts pidinfo listpids ping reconfigure release requeue" - subopts="$subopts resume setdebug show shutdown suspend takeover" - subopts="$subopts uhold update version" - fi - ;; - *) - return 0 - ;; - esac - COMPREPLY=( $(compgen -W "${subopts}" -- $cur) ) + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local subopts="" + local commands="add create delete dump list load modify show " + local entities="account association cluster configuration coordinator\ + event job qos transaction user wckeys" + + local shortoptions="-h -i -n -p -P -Q -r -s -s -v -V" + local longoptions="--help --immediate --noheader --parsable \ + --parsable2 --quiet --readonly --associations --verbose --version" + + + local assocparams="clusters= accounts= users= partition= " + local assocbasedparams="defaultqos= fairshare= grpcpumins= grpcpus= \ + grpjobs= grpnodes= grpsubmitjobs= grpwall= maxcpumins= maxcpus= maxjobs= \ + maxnodes= maxsubmitjobs= maxwall= qoslevel=" + + # Check whether we are in the middle of an option. If so serve them. + remainings=$(compute_set_diff "$longoptions" "${COMP_WORDS[*]}") + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == --* ]] && { offer "$remainings" ; return ; } + + # Search for a command in the argument list (first occurence) + # the command might be in any position because of the options + command=$(find_first_occurence "${COMP_WORDS[*]}" "$commands") + + # If no command has been entered, serve the list of valid commands + [[ $command == "" ]] && { offer "$commands" ; return ; } + + # Load command has a specific syntax. Treat it first + [[ $command == "load" ]] && { _filedir ; return ; } + + entity=$(find_first_occurence "${COMP_WORDS[*]}" "$entities") + + [[ $entity == "" ]] && { offer "$entities" ; return ; } + + case $command in + add|create) + objects="account cluster coordinator qos user " + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + account) + params="cluster= description= name= organization= parent= " + if param "cluster" ; then offer_list "$(_clusters)" ; + elif param "parent" ; then offer_list "$(_accounts)" ; + else offer "$params" + fi + ;; + cluster) + params="classification= flags= name= rpc=" + if param "flags" ; then offer_list "aix bgl bgq bluegene crayxt frontend \ + multipleslumd sunconstellation xcpu" ; + else offer "$params" + fi + ;; + coordinator) + params="accounts= names=" + if param "names" ; then offer_list "$(_users)" ; + elif param "accounts" ; then offer_list "$(_accounts)" ; + else offer "$params" + fi + ;; + qos) + params="flags= grpcpumins= grpcpus= grpjobs= grpnodes= grpsubmitjobs= grpwall= \ + maxcpumins= maxcpus= maxjobs= maxsubmitjobs= maxwall= name= preempt= \ + preemptmode= priority= usagefactor= usagethreshold= " + if param "flags" ; then offer_list "EnforceUsageThreshold NoReserve \ + PartitionMaxNodes PartitionMinNodes PartitionTimeLimit" ; + elif param "preemptmode" ; then offer_list "cluster cancel checkpoint requeue suspend" ; + elif param "flags" ; then offer_list "enforceusagethreshold noreserve \ + parittionmaxnodes partitionminnodes partitiontimelimit" ; + elif param "preempt" ; then offer_list "$(_qos)" ; + else offer "$params" + fi + ;; + user) + params="account= adminlevel= cluster= defaultaccount= defaultwckey= name= " + if param "defaultaccount" ; then offer_list "$(_accounts)" ; + elif param "account" ; then offer_list "$(_accounts)"; + elif param "adminlevel" ; then offer_list "none operator admin" ; + elif param "cluster" ; then offer_list "$(_cluster)" ; + elif param "defaultwckey" ; then offer_list "$(_wckey)" ; + else offer "$params" + fi + ;; + *) offer "$objects" ;; + esac + ;; + delete) + objects="account cluster coordinator qos user" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + account) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="cluster= description= name= organization= parent=" + if param "cluster" ; then offer_list "$(_clusters)" ; + elif param "parent" ; then offer_list "$(_accounts)" ; + elif param "name" ; then offer_list "$(_accounts)" ; + else offer "$params" + fi + ;; + cluster) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="classification= flags= name= rpc= $assocbasedparams" + if param "flags" ; then offer_list "aix bgl bgq bluegene crayxt frontend \ + multipleslumd sunconstellation xcpu" ; + elif param "defaultqos" ; then offer_list "$(_qos)" ; + else offer "$params" + fi + ;; + coordinator) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="accounts= names=" + if param "names" ; then offer_list "$(_users)" ; + elif param "accounts" ; then offer_list "$(_accounts)" ; + else offer "$params" + fi + ;; + user) + params="account= adminlevel= cluster= defaultaccount= defaultwckey= name= wckeys= withassoc" + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + if param "defaultaccount" ; then offer_list "$(_accounts)" ; + elif param "account" ; then offer_list "$(_accounts)"; + elif param "adminlevel" ; then offer_list "none operator admin" ; + elif param "cluster" ; then offer_list "$(_cluster)" ; + elif param "wckeys" ; then offer_list "$(_wckeys)" ; + elif param "defaultwckey" ; then offer_list "$(_wckey)" ; + else offer "$params" ; + fi + ;; + *) offer "$objects" + ;; + esac + ;; + list|show) + objects="account association cluster configuration \ + event problem qos transaction user wckey" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + account) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="cluster= description= name= organization= parent= withassoc \ + withcoord withdeleted $assocparams $assocbasedparams" + if param "cluster" ; then offer_list "$(_clusters)" ; + elif param "parent" ; then offer_list "$(_accounts)" ; + elif param "users" ; then offer_list "$(_users)" ; + elif param "partition" ; then offer_list "$(_partition)" ; + elif param "defaultqos" ; then offer_list "$(_qos)" ; + elif param "name" ; then offer_list "$(_accounts)" ; + else offer "$params" + fi + ;; + association) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="$assocparams onlydefaults tree withdeleted withsubaccounts \ + wolimits wopinfo woplimits" + if param "clusters" ; then offer_list "$(_clusters)" ; + elif param "accounts" ; then offer_list "$(_accounts)" ; + elif param "users" ; then offer_list "$(_users)" ; + elif param "partition" ; then offer_list "$(_partitions)" ; + else offer "$params" + fi + ;; + cluster) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="classification= flags= name= rpc= $assocbasedparams" + if param "flags" ; then offer_list "aix bgl bgq bluegene crayxt frontend \ + multipleslumd sunconstellation xcpu" ; + elif param "defaultqos" ; then offer_list "$(_qos)" ; + else offer "$params" + fi + ;; + event) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="all_clusters all_time clusters= end= event= maxcpu= mincpus= \ + nodes= reason= start= states= user= " + if param "clusters" ; then offer_list "$(_clusters)" ; + elif param "nodes" ; then offer_list "$(_nodes)" ; + elif param "event" ; then offer_list "cluster node" ; + elif param "states" ; then offer_list "alloc allocated down drain \ + fail failing idle mixed maint power_down power_up resume" ; + elif param "users" ; then offer_list "$(_users)" ; + else offer "$params" + fi + ;; + qos) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="flags= grpcpumins= grpcpus= grpjobs= grpnodes= grpsubmitjobs= grpwall= \ + maxcpumins= maxcpus= maxjobs= maxsubmitjobs= maxwall= name= preempt= \ + preemptmode= priority= usagefactor= usagethreshold= withdeleted" + if param "flags" ; then offer_list "EnforceUsageThreshold NoReserve \ + PartitionMaxNodes PartitionMinNodes PartitionTimeLimit" ; + elif param "preemptmode" ; then offer_list "cluster cancel checkpoint requeue suspend" ; + elif param "flags" ; then offer_list "enforceusagethreshold noreserve \ + parittionmaxnodes partitionminnodes partitiontimelimit" ; + elif param "preempt" ; then offer_list "$(_qos)" ; + else offer "$params" + fi + ;; + transaction) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="accounts= action= actor= clusters= endtime= startime= users= withassoc" + if param "accounts" ; then offer_list "$(_accounts)" ; + elif param "actor" ; then offer_list "$(_users)" ; + elif param "clusters" ; then offer_list "$(_clusters)" ; + else offer "$params" + fi + ;; + user) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="account= adminlevel= cluster= defaultaccount= defaultwckey= name= wckeys= withassoc" + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + if param "defaultaccount" ; then offer_list "$(_accounts)" ; + elif param "account" ; then offer_list "$(_accounts)"; + elif param "adminlevel" ; then offer_list "none operator admin" ; + elif param "cluster" ; then offer_list "$(_cluster)" ; + elif param "wckeys" ; then offer_list "$(_wckeys)" ; + elif param "defaultwckey" ; then offer_list "$(_wckey)" ; + else offer "$params" ; + fi + ;; + *) offer "$objects" ;; + esac + ;; + modify) + objects="account cluster job qos user" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + account) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="cluster= description= name= organization= parent=" + if param "cluster" ; then offer_list "$(_clusters)" ; + elif param "parent" ; then offer_list "$(_accounts)" ; + elif param "name" ; then offer_list "$(_accounts)" ; + else offer "$params set" + fi + ;; + cluster) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="classification= flags= name= rpc= $assocbasedparams" + if param "flags" ; then offer_list "aix bgl bgq bluegene crayxt frontend \ + multipleslumd sunconstellation xcpu" ; + elif param "defaultqos" ; then offer_list "$(_qos)" ; + else offer "$params set" + fi + ;; + qos) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="flags= grpcpumins= grpcpus= grpjobs= grpnodes= grpsubmitjobs= grpwall= \ + maxcpumins= maxcpus= maxjobs= maxsubmitjobs= maxwall= name= preempt= \ + preemptmode= priority= usagefactor= usagethreshold= withdeleted" + if param "flags" ; then offer_list "EnforceUsageThreshold NoReserve \ + PartitionMaxNodes PartitionMinNodes PartitionTimeLimit" ; + elif param "preemptmode" ; then offer_list "cluster cancel checkpoint requeue suspend" ; + elif param "flags" ; then offer_list "enforceusagethreshold noreserve \ + parittionmaxnodes partitionminnodes partitiontimelimit" ; + elif param "preempt" ; then offer_list "$(_qos)" ; + else offer "$params set" + fi + ;; + user) + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + params="account= adminlevel= cluster= defaultaccount= defaultwckey= name= wckeys= withassoc" + if [[ "${COMP_WORDS[*]}" != *where* ]] ; then offer "where" ; return ;fi + if param "defaultaccount" ; then offer_list "$(_accounts)" ; + elif param "account" ; then offer_list "$(_accounts)"; + elif param "adminlevel" ; then offer_list "none operator admin" ; + elif param "cluster" ; then offer_list "$(_cluster)" ; + elif param "wckeys" ; then offer_list "$(_wckeys)" ; + elif param "defaultwckey" ; then offer_list "$(_wckey)" ; + else offer "$params" ; + fi + ;; + *) offer "$objects" + ;; + esac + ;; + + esac } -complete -F _scontrol scontrol +complete -F _sacctmgr sacctmgr _sreport() { - local cur=${COMP_WORDS[COMP_CWORD]} - local prev=${COMP_WORDS[COMP_CWORD-1]} - - local subopts="" - local opts4all="All_Clusters Clusters= End= Format= Start=" - - case "$prev" in - user) - subopts="TopUsage" - ;; - TopUsage) - subopts="$opts4all Accounts= Group TopCount= Users=" - ;; - reservation) - subopts="Utilization" - ;; - Utilization) - subopts="$opts4all Names= Nodes=" - ;; - job) - subopts="SizesByAccount SizesByAccountAndWckey SizesByWckey" - ;; - SizesByAccount|SizesByAccountAndWckey|SizesByWckey) - subopts="$opts4all Accounts= FlatView GID= Grouping= Jobs= Nodes= OPartitions= PrintJobCount Users= Wckeys=" - ;; - cluster) - subopts="AccountUtilizationByUser UserUtilizationByAccount UserUtilizationByWCKey Utilization WCKeyUtilizationByUser" - ;; - AccountUtilizationByUser|UserUtilizationByAccount|UserUtilizationByWCKey|Utilization|WCKeyUtilizationByUser) - subopts="$opts4all Accounts= Tree Users= Wckeys=" - ;; - sreport) - if [[ "$cur" == - ]]; then - subopts="-a -n -h -p -P -t -v -V" - elif [[ "$cur" == -- ]]; then - subopts="--all_clusters --help --noheader --parsable" - subopts="$subopts --parsable2--quiet --verbose --version" - else - subopts="cluster job user reservation" - fi - ;; - *) - return 0 - ;; - esac - COMPREPLY=( $(compgen -W "${subopts}" -- $cur) ) + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local subopts="" + local commands="cluster job user reservation" + + local shortoptions="-a -n -h -p -P -t -v -V" + local longoptions="--all_clusters --help --noheader --parsable\ + --parsable2--quiet --verbose --version" + + # Check whether we are in the middle of an option. If so serve them. + remainings=$(compute_set_diff "$longoptions" "${COMP_WORDS[*]}") + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == --* ]] && { offer "$remainings" ; return ; } + + # Search for a command in the argument list (first occurence) + # the command might be in any position because of the options + command=$(find_first_occurence "${COMP_WORDS[*]}" "$commands") + + # If no command has been entered, serve the list of valid commands + [[ $command == "" ]] && { offer "$commands" ; return ; } + + opts_all="All_Clusters Clusters= End= Format= Start=" + + case $command in + user) + objects="TopUsage" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + TopUsage) + params="$opts_all Accounts= Group TopCount= Users=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Account Cluster Login Proper User" ; + elif param "Accounts" ; then offer_list "$(_accounts)" ; + elif param "Users" ; then offer_list "$(_users)" ; + else offer "$params" + fi + ;; + *) offer "$objects" ;; + esac + ;; + reservation) + objects="Utilization" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + Utilization) + params="$opts_all Names= Nodes=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Allocated Associations \ + Clusters CPUCount CPUTime End Idle Name Nodes Start TotalTime" ; + elif param "Nodes" ; then offer_list "$(_nodes)" ; + else offer "$params" + fi + ;; + *) offer "$objects" ;; + esac + ;; + job) + objects="SizesByAccount SizesByAccountAndWckey SizesByWckey" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + SizesByAccount|SizesByAccountAndWckey) + params="$opts_all Accounts= FlatView GID= Grouping= \ + Jobs= Nodes= OPartitions= PrintJobCount Users= Wckeys=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Account Cluster" ; + elif param "Accounts" ; then offer_list "$(_accounts)" ; + elif param "GID" ; then _gids ; + elif param "Users" ; then offer_list "$(_users)" ; + elif param "Wckeys" ; then offer_list "$(_wckeys)" ; + else offer "$params" + fi + ;; + SizesByWckey) + params="$opts_all Accounts= FlatView GID= Grouping= \ + Jobs= Nodes= OPartitions= PrintJobCount Users= Wckeys=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Wckey Cluster" ; + elif param "Accounts" ; then offer_list "$(_accounts)" ; + elif param "GID" ; then _gids ; + elif param "Users" ; then offer_list "$(_users)" ; + elif param "Wckeys" ; then offer_list "$(_wckeys)" ; + else offer "$params" + fi + ;; + *) offer "$objects" ;; + esac + ;; + cluster) + objects="AccountUtilizationByUser UserUtilizationByAccount \ + UserUtilizationByWCKey Utilization WCKeyUtilizationByUser" + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + case $object in + Utilization) + params="$opts_all Names= Nodes=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Allocated Cluster \ + CPUCount Down Idle Overcommited PlannedDown Reported Reserved" ; + elif param "Nodes" ; then offer_list "$(_nodes)" ; + else offer "$params" + fi + ;; + AccountUtilizationByUser|UserUtilizationByAccount) + params="$opts_all Accounts= Tree Users= Wckeys=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Accounts Cluster CPUCount \ + Login Proper Used" ; + elif param "Accounts" ; then offer_list "$(_accounts)" ; + elif param "Users" ; then offer_list "$(_users)" ; + elif param "Wckeys" ; then offer_list "$(_wckeys)" ; + else offer "$params" + fi + ;; + UserUtilizationByWCKey|WCKeyUtilizationByUser) + params="$opts_all Accounts= Tree Users= Wckeys=" + if param "Clusters" ; then offer_list "$(_clusters)" ; + elif param "Format" ; then offer_list "Cluster CPUCount Login \ + Proper Used Wckey" ; + elif param "Accounts" ; then offer_list "$(_accounts)" ; + elif param "Users" ; then offer_list "$(_users)" ; + elif param "Wckeys" ; then offer_list "$(_wckeys)" ; + else offer "$params" + fi + ;; + *) offer "$objects" ;; + esac + ;; + + esac } complete -F _sreport sreport + +_scontrol() +{ + local cur=${COMP_WORDS[COMP_CWORD]} + local prev=${COMP_WORDS[COMP_CWORD-1]} + + local commands="abort checkpoint create completing delete hold notify \ + pidinfo listpids ping reconfigure release requeue resume\ + setdebug show shutdown suspend takeover uhold update version" + + local shortoptions="-a -d -h -M -o -Q -v -V " + local longoptions="--all --details --help --hide --cluster --oneliner \ + --quiet --verbose --version" + + # Check whether we are in the middle of an option. If so serve them. + remainings=$(compute_set_diff "$longoptions" "${COMP_WORDS[*]}") + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$remainings" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $remainings)"; return ; } + + # Search for a command in the argument list (first occurence) + # the command might be in any position because of the options + command=$(find_first_occurence "${COMP_WORDS[*]}" "$commands") + + # If no command has been entered, serve the list of valid commands + [[ $command == "" ]] && { offer "$commands" ; return ; } + + # Otherwise process command + case $command in + shutdown) # scontrol shutdown object + offer "slurmctld controller" + ;; + setdebug) # scontrol setdebug value + offer "quiet info warning error debug debug2 debug3 debug4 debug5 " # FIXME + ;; + uhold | suspend | release | requeue | resume | hold ) + offer "$(_jobs)" + ;; #TODO notify + checkpoint) # scontrol checkpoint create jobid [parameter1=value1,...] + # This one has unsusual ordering: object is before command. + # command subcommand argument #TODO add support for additional options cfr manpage + objects="disable enable able create vacate error restart" + + if [[ $prev == checkpoint ]]; then + offer "$objects"; + elif [[ $objects == *$prev* ]]; then + offer "$(_jobs)"; + else + echo todo + #TODO + fi + ;; + show) # scontrol show object [id] + objects="config daemons job nodes partitions reservations \ + slurmd steps topology hostlist hostnames" + + # Search for the current object in the argument list + object=$(find_first_occurence "${COMP_WORDS[*]}" "$objects") + + # If no object has yet been (fully) typed in, serve the list of objects + [[ $object == "" ]] && { offer "$objects" ; return ; } + + # Otherwise, offer the ids depending on the object + if param "job" ; then offer "$(_jobs)" ; fi + if param "nodes" ; then offer_list "$(_nodes)" ; fi + if param "partitions" ; then offer "$(_partitions)" ; fi + if param "reservations" ; then offer "$(_reservations)" ; fi + #TODO if object "steps" + ;; + delete) # scontrol delete objectname=id + parameters="partitionname= reservationname=" + + # If a parameter has been fully typed in, serve the corresponding + # values, otherwise, serve the list of parameters. + if param "partitionname" ; then offer_many "$(_partitions)" + elif param "reservationname" ; then offer_many "$(_reservations)" + else offer "$parameters" ; fi + ;; + update) + parameters="jobid= step= nodename= partitionname= reservationname=" + + param=$(find_first_partial_occurence "${COMP_WORDS[*]}" "$parameters") + [[ $param == "" ]] && { offer "$parameters" ; return ; } + + # If a parameter has been fully typed in, serve the corresponding + # values, if it is the first one. + if param "jobid" ; then offer_many "$(_jobs)" ; return + elif param "nodename" ; then offer_many "$(_nodes)" ; return + elif param "partitionname" ; then offer_many "$(_partitions)" ; return + elif param "reservationname" ; then offer_many "$(_reservations)" ; return + fi + + # Otherwise, process the others based on the first one + case $param in + jobid) + local parameters="account=<account> conn-type=<type> \ + contiguous=<yes|no> dependency=<dependency_list> \ + eligibletime=yyyy-mm-dd excnodelist=<nodes>\ + features=<features> geometry=<geo> gres=<list> \ + licenses=<name> mincpusnode=<count> minmemorycpu=<megabytes> \ + mintmpdisknode=<megabytes> name=<name> nice[=delta] \ + nodelist=<nodes> numcpus=<min_count[-max_count] \ + numnodes=<min_count[-max_count]> numtasks=<count> \ + partition=<name> priority=<number> qos=<name> \ + reqcores=<count> reqthreads=<count> requeue=<0|1> \ + reservationname=<name> rotate=<yes|no> shared=<yes|no> \ + starttime=yyyy-mm-dd timelimit=[d-]h:m:s wckey=<key>" + + remainings=$(compute_set_diff "$parameters" "${COMP_WORDS[*]}") + + # If a new named argument is about to be entered, serve the list of options + [[ $cur == "" && $prev != "=" ]] && { offer "$remainings" ; return ; } + + # Test all potential arguments and server corresponding values + if param "account" ; then offer_many "$(_accounts)" + elif param "excnodelist" ; then offer_many "$(_nodes)" + elif param "nodelist" ; then offer_many "$(_nodes)" + elif param "features" ; then offer_many "$(_features)" + elif param "gres" ; then offer_many "$(_gres)" + elif param "licences" ; then offer_many "$(_licenses)" + elif param "partition" ; then offer_many "$(_partitions)" + elif param "reservationname" ; then offer_many "$(_reservations)" + elif param "qos" ; then offer_many "$(_qos)" + elif param "wckey" ; then offer_many "$(wckeys)" + elif param "conn-type" ; then offer_many "MESH TORUS NAV" + elif param "rotate" ; then offer_many "yes no" + elif param "shared" ; then offer_many "yes no" + else offer "$(sed 's/\=[^ ]*/\=/g' <<< $remainings)" + fi + ;; + nodename) + local parameters="features=<features> gres=<gres> \ + reason=<reason> state=<state> weight=<weight>" + + remainings=$(compute_set_diff "$parameters" "${COMP_WORDS[*]}") + + # If a new named argument is about to be entered, serve the list of options + [[ $cur == "" && $prev != "=" ]] && { offer "$remainings" ; return ; } + + # Test all potential arguments and server corresponding values + if param "features" ; then offer_many "$(_features)" + elif param "gres" ; then offer_many "$(_gres)" + elif param "state" ; then offer_many "alloc allocated down drain \ + fail failing idle mixed maint power_down power_up resume" + else offer "$(sed 's/\=[^ ]*/\=/g' <<< $remainings)" + fi + ;; + partitionname) + local parameters="nodes=<node_list> alternate=<partition_name> default=yes|no + defaulttime=d-h:m:s|unlimited disablerootjobs=yes|no hidden=yes|no \ + maxnodes=<count> maxtime=d-h:m:s|unlimited minnodes=<count> \ + allocnodes=<node_list> preemptmode=off|cancel|checkpoint|requeue|suspend \ + priority=count rootonly=yes|no shared=yes|no|exclusive|force \ + state=up|down|drain|inactive allowgroups=<name>" + + remainings=$(compute_set_diff "$parameters" "${COMP_WORDS[*]}") + # If a new named argument is about to be entered, serve the list of options + [[ $cur == "" && $prev != "=" ]] && { offer "$remainings" ; return ; } + + # Test all potential arguments and server corresponding values + if param "allocnodes" ; then offer_many "$(_nodes)" + elif param "alternate" ; then offer_many "$(_partitions)" + elif param "default" ; then offer_many "yes no" + elif param "preemptmode" ; then offer_many "off cancel checkpoint requeue suspend" + elif param "shared" ; then offer_many "yes no exclusive force" + elif param "state" ; then offer_many "up down drain inactive" + else offer "$(sed 's/\=[^ ]*/\=/g' <<< $remainings)" + fi + ;; + reservationname) + local parameters="users=<user_list> nodecnt=<count> \ + nodes=<node_list> starttime=yyyy-mm-dd[thh:mm[:ss]] \ + endtime=yyyy-mm-dd[thh:mm[:ss]] duration=[days-]hours:minutes:seconds \ + flags=maint,overlap,ignore_jobs,daily,weekly \ + partitionname=<partition_list> features=<feature_list> \ + accounts=<account_list> licenses=<license>" + + remainings=$(compute_set_diff "$parameters" "${COMP_WORDS[*]}") + # If a new named argument is about to be entered, serve the list of options + [[ $cur == "" && $prev != "=" ]] && { offer "$remainings" ; return ; } + + # test all potential arguments and server corresponding values + if param "accounts" ; then offer_many "$(_accounts)" + elif param "licences" ; then offer_many "$(_licenses)" + elif param "nodes" ; then offer_many "$(_nodes)" + elif param "features" ; then offer_many "$(_features)" + elif param "users" ; then offer_many "$(_users)" + elif param "flags" ; then offer_many " maint overlap ignore_jobs daily weekly " + else offer "$(sed 's/\=[^ ]*/\=/g' <<< $remainings)" + fi + ;; + esac + ;; + create) # command object attribute1=value1 etc. + parameters="partition reservation" + + param=$(find_first_occurence "${COMP_WORDS[*]}" "$parameters") + [[ $param == "" ]] && { offer "$parameters" ; return ; } + + # Process object + case $param in + partition) + local parameters="partitionname=<name> nodes=<node_list> \ + alternate=<partition_name> default=yes|no \ + defaulttime=days-hours:minutes:seconds|unlimited \ + disablerootjobs=yes|no hidden=yes|no maxnodes=<count> \ + maxtime=days-hours:minutes:seconds|unlimited minnodes=<count> \ + allocnodes=<node_list> \ + preemptmode=off|cancel|checkpoint|requeue|suspend \ + priority=count rootonly=yes|no shared=yes|no|exclusive|force \ + state=up|down|drain|inactive allowgroups=<name>" + + remainings=$(compute_set_diff "$parameters" "${COMP_WORDS[*]}") + # If a new named argument is about to be entered, serve the list of options + [[ $cur == "" && $prev != "=" ]] && { offer "$remainings" ; return ; } + + if param "allocnodes" ; then offer_many "$(_nodes)" + elif param "alternate" ; then offer_many "$(_partitions)" + elif param "partitionname" ; then offer_many "$(_partitions)" + elif param "nodes" ; then offer_many "$(_nodes)" + elif param "preemptmode" ; then offer_many "off cancel checkpoint requeue suspend" + elif param "shared" ; then offer_many "yes no exclusive force" + else offer "$(sed 's/\=[^ ]*/\=/g' <<< $remainings)" + fi + ;; + reservation) + local parameters="reservation=<name> users=<user_list> nodecnt=<count> \ + nodes=<node_list> starttime=yyyy-mm-dd[thh:mm[:ss]] \ + endtime=yyyy-mm-dd[thh:mm[:ss]] duration=[days-]hours:minutes:seconds \ + flags=maint,overlap,ignore_jobs,daily,weekly \ + partitionname=<partition_list> features=<feature_list> \ + accounts=<account_list> licenses=<license>" + + remainings=$(compute_set_diff "$parameters" "${COMP_WORDS[*]}") + # If a new named argument is about to be entered, serve the list of options + [[ $cur == "" && $prev != "=" ]] && { offer "$remainings" ; return ; } + + # Test all potential arguments and server corresponding values + if param "accounts" ; then offer "$(_accounts)" + elif param "licences" ; then offer_many "$(_licenses)" + elif param "nodes" ; then offer_many "$(_nodes)" + elif param "features" ; then offer_many "$(_features)" + elif param "users" ; then offer_many "$(_users)" + elif param "flags" ; then offer_many " maint overlap ignore_jobs daily weekly " + else offer "$(sed 's/\=[^ ]*/\=/g' <<< $remainings)" + fi + ;; + esac + ;; + esac +} +complete -F _scontrol scontrol + +_squeue() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-A -i -j -M -n -o -p -q -s -S -t -u -a -h -l -s -V -v" + local longoptions="--help --hide --steps --start --usage --verbose \ + --version --noheader --account<account_list> \ + --iterate<seconds> --jobs<job_id_list> \ + --clusters<string> --nodes<hostlist> --format<fmtstring> \ + --partition<part_list> --qos<qos_list> --sort<sort_list> \ + --state<state_list> --user<user_list> " + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + if [[ $cur == *% ]] ; + then + offer "%a(Account) %A(NTasks) %b(gres) %c(mincpu) %C(Ncpus) %d(minTmp) \ + %D(NNodes) %e(end) %E(dependency) %f(features) %g(group) %G(gID) %h(shared) \ + %H(Nsockets) %i(id) %I(Ncores/socket) %j(name) %k(comment) %l(limit) \ + %L(timeleft) %m(mem) %M(time) %n(reqnodes) %N(alloc_nodes) %O(contiguous) \ + %p(priority) %r(reason) %R(reason) %s(selecplugin) %t(state) %T(state) \ + %u(user) %U(uID) %v(reservation) %x(excnodes)" ; + return; + fi + + case $prev in + --partition|-p) offer_list "$(_partitions)" ;; + --jobs|-j) offer_list "$(_jobs)" ;; + --account|-A) offer_list "$(_accounts)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --nodes|-N) offer_list "$(_nodes)" ;; + --qos) offer_list "$(_qos)" ;; + --user|-u) offer_list "$(_users)" ;; + --state|-s) offer_list "pending running suspended completing completed" ;; + --format|-o) offer "\\\"%" ;; + esac +} +complete -F _squeue squeue + +_scancel() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions=" -a -i -v -V -A -b -M -n -p -q -R -s -t -u -w" + local longoptions="--batch --ctld --help --quiet --usage --verbose --version \ + --account<account_list> --name<job_name> \ + --clusters<string> --nodelist<hostlist> --reservation<reservation_name>\ + --partition<part_list> --qos<qos_list> --signal<SIGXXX>\ + --state<state_list> --user<user_list> --wckeys<wckey>" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + case $prev in + --partition|-p) offer_list "$(_partitions)" ;; + --account|-A) offer_list "$(_accounts)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --qos) offer_list "$(_qos)" ;; + --wckeys) offer_list "$(_wckeys)" ;; + --user|-u) offer_list "$(_users)" ;; + --nodelist|-w) offer_list "$(_nodes)" ;; + --name) offer_list "$(_jobnames)" ;; + --reservation|-R) offer_list "$(_reservations)" ;; + --state) offer_list "pending running suspended completing completed" ;; + *) offer_list "$(_jobs)";; + esac +} +complete -F _scancel scancel + +_sshare() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-a -h -l -p -P -v -V -A -M -u" + local longoptions="--noheader --parsable --parsable2 --verbose --version \ + --accounts<accounts> --clusters<string> --users<user_list>" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + case $prev in + --accounts|-A) offer_list "$(_accounts)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --users|-u) offer_list "$(_users)" ;; + esac +} +complete -F _sshare sshare + +_sbcast() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-C -f -p -v -V -F -s -t" + local longoptions="--compress --force --preserve --verbose --version \ + fanout<number> --size<bytes> --timeout<seconds>" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + _filedir +} +complete -F _sbcast sbcast + +_sinfo() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-a -b -d -e -h -i -l -n -N -o -p -R -s -S -t -v -V" + local longoptions="--all --exact --noheader --help --hide --iterate<seconds> \ + --long --clusters<clusternames> --nodes<nodelist> --Node --format<fmtstr> \ + --partition<partition> --summarize --sort<sortlist> --states<statelist> \ + --usage --verbose --version" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + if [[ $cur == *% ]] ; + then + offer "%a(Availabilit) %A(cpu_usage) %c(cpus_per_node) %C(cpu_usage) %d(diskspace) \ + %D(NNodes) %E(reason) %f(features) %F(nodes_usage) %g(group) %G(Gres) %h(shared) \ + %H(timestamp) %l(time_limit) %L(default_time) %m(mem) %M(preemt_mode) \ + %N(node_names) %P(partition) %r(root_jobs) %R(reason) %s(max_job_size) \ + %S(allowed_allocating_nodes) %t(state) %T(state) %u(user) %U(uID) %w(weight)\ + %X(sockets_per_node) %Y(cores_per_socket) %Z(threads_per_core)" ; + return; + fi + + case $prev in + --partition|-p) offer_list "$(_partitions)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --nodes|-n) offer_list "$(_nodes)" ;; + --state) offer_list "pending running suspended completing completed" ;; + --format|-o) offer "\\\"%" ;; + esac +} +complete -F _sinfo sinfo + +_sprio() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-h -j -l -M -n -o -u -v -V -w" + local longoptions="--noheader --help --job<jobids> --long --clusters<clustername> \ + --norm --format<fmtstr> --user<userlist> --usage --verbose --version --weights" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + if [[ $cur == *% ]] ; + then + offer "%a(n_age) %A(w_age) %f(n_fair-share) %F(w_fair-share) %i(JobId) \ + %j(n_job_size) %J(w_job_size) %N(Nice adjustmen) %p(n_partition) \ + %P(w_partition) %q(n_qos) %Q(w_qos) %u(User) %Y(priority) %y(n_priority) " ; + return; + fi + + case $prev in + --jobs|-j) offer_list "$(_jobs)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --format|-o) offer "\\\"%" ;; + --user|-u) offer_list "$(_users)" ;; + esac +} +complete -F _sprio sprio + +_sacct() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-a -A -b -c -d -e -E -f -g -h -j -k -K -l -L -M -n \ + -N -o -O -p -P -q -r -s -S -T -u -v -V -W -x -X" + local longoptions="--allusers --accounts<accountlist> --brief --completion \ + --dump --duplicates --helpformat --endtime<time> --file<path> --group<gidlist> \ + --help -- jobs<joblist> --timelimit-min<time> --timelimit-max<time> --long \ + --allclusters --clusters<clusterlist> --noheader --nodes<nodes> \ + --format<itemlist> --formatted_dump --parsable --parsable2 --qos<qos> \ + --partition<partitionlist> --state<statelist> --starttime<time> --truncate \ + --user<userlist> --usage --verbose --version --wckeys<wckeyslist> \ + --associations<assoclist> --allocations" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + case $prev in + --group|--gid|-g) _gids ;; + --partition) offer_list "$(_partitions)" ;; + --jobs) offer_list "$(_jobs)" ;; + --accounts|-A) offer_list "$(_accounts)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --nodes) offer_list "$(_nodes)" ;; + --qos) offer_list "$(_qos)" ;; + --wckeys|-W) offer_list "$(_wckeys)" ;; + --associations|-x) offer_list "$(_associations)" ;; + --user|-u) offer_list "$(_users)" ;; + --state|-s) offer_list "pending running suspended completing completed" ;; + --format) offer_list "$(sacct -e)" ;; + esac +} +complete -F _sacct sacct + +_salloc() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-A -B -C -c -d -D -F -h -H -I -J -K -l -L -m -N \ + -n -O -Q -s -t -u -V -v -W -w -x" + local longoptions="--account<account> --acctg-freq<seconds> \ + --extra-node-info<sockets[:cores[:threads]]> --sockets-per-node<number>\ + --cores-per-sopcket<number> --threads-per-core<number> --begin<time> --bell \ + --comment<string> --constraint<list> --contiguous --cpu-bind<type> \ + --cpus-per-task<number> --dependency<deplist> --chdir<path> --exclusive \ + --nodefile<nodefile> --get-user-env --gid<group> --gres<list> --hold \ + --help --hint<type> --immediate[<seconds>] --jobid<jobid> --killcommand \ + --no-kill --licenses<licenses> --distribution<dist> --mail-type<type> \ + --mail-user<email> --mem<MB> --mem-per-cpu<MB> --mem-bind<type> \ + --min-cpus<number> --nodes<minnodes[-maxnodes]> --ntasks<number> \ + --network<type> --nice<[adjustment]> --ntasks-per-core<number> \ + --no-bell --no-shell --overcommit --partition<partitionname> --quiet \ + --qos<qos> --reservation<name> --share --signal<sig_num>[@<sig_time>] \ + --time<time> --time-min<time> --tmp<MB> --usage --uid<user> --version \ + --verbose --wait<seconds> --nodelist<nodelist> --wait-all-nodes<0|1> \ + --wckey<wckey> --exclude<nodelist>" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + case $prev in + --account|-A) offer_list "$(_accounts)" ;; + --constraint|-C) offer_list "$(_features)" ;; + --cpu-bind) offer_list "none rank map_cpu: mask_cpu: sockets cores \ + threads ldoms" ;; + --dependency) offer_list "after: afterany: afternotok: + afterok: singleton" ;; + --gid) _gids ;; + --partition|-p) offer_list "$(_partitions)" ;; + --gres) offer_list "$(_gres)" ;; + --hint) offer "compute_bound memory_bound multithread nomultithread" ;; + --jobid) offer_list "$(_jobs)" ;; + --licenses|-L) offer_list "$(_licenses)" ;; + --distribution|-d) offer "block cyclic plane arbitrary" ;; + --mail-type) offer_list "BEGIN END FAIL REQUEUE ALL" ;; + --mem-bind) offer "none rank local map_mem: mask_mem:" ;; + # TODO --network) _configured_interfaces ;; + --reservation) offer_list "$(_reservations)" ;; + --clusters) offer_list "$(_clusters)" ;; + --nodelist) offer_list "$(_nodes)" ;; + --exclude) offer_list "$(_nodes)" ;; + --qos) offer_list "$(_qos)" ;; + :|afterany|after|afternotok|afterok) offer_list "$(_jobs)" ;; + esac + #TODO options for blue gene systems +} +complete -F _salloc salloc + +_sbatch() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-A -B -C -c -d -D -F -h -H -I -J -K -l -L -m -N -n -O \ + -Q -s -t -u -V -v -W -w -x -q -r -T -X -Z" + local longoptions="--account<account> --acctg-freq<seconds> \ + --extra-node-info<sockets[:cores[:threads]]> --sockets-per-node<number> \ + --cores-per-sopcket<number> --threads-per-core<number> --begin<time> \ + --bell --comment<string> --constraint<list> --contiguous --cpu-bind<type> \ + --cpus-per-task<number> --dependency<deplist> --chdir<path> --exclusive \ + --nodefile<nodefile> --get-user-env --gid<group> --gres<list> --hold \ + --help --hint<type> --immediate[<seconds>] --jobid<jobid> --no-kill \ + --licenses<licenses> --distribution<dist> --mail-type<type> \ + --mail-user<email> --mem<MB> --mem-per-cpu<MB> --mem-bind<type> \ + --min-cpus<number> --nodes<minnodes[-maxnodes]> --ntasks<number> \ + --network<type> --nice<[adjustment]> --ntasks-per-core<number> \ + --overcommit --partition<partitionname> --quiet --qos<qos> \ + --reservation<name> --share --signal<sig_num>[@<sig_time>] \ + --time<time> --time-min<time> --tmp<MB> --usage --uid<user> \ + --version --verbose --wait<seconds> --nodelist<nodelist> \ + --wait-all-nodes<0|1> --wckey<wckey> --exclude<nodelist> \ + --checkpoint<time> --checkpoint-dir<directory> --error<file> \ + --preserve-env --epilog<path> --input<file> --job-name<name> \ + --kill-on-bad-exit --label --msg-timeout --mpi<type> \ + --multi-prog --output<file> --open-mode<append|truncate> \ + --prolog<path> --propagate<rlimits> --pty --quit-on-interrupt \ + --relative<number> --resv-ports --restart-dir<dir> --slurmd-debug<level> \ + --threads<number> --task-epilog<path> --task-prolog<path> --test-only \ + --unbuffered --disable-status --no-allocate --export<env_var> \ + --ntasks-per-socket<number> --ntasks-per-node<number> --tasks-per-node<number>" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + case $prev in + --account|-A) offer_list "$(_accounts)" ;; + --constraint|-C) offer_list "$(_features)" ;; + --cpu-bind) offer "none rank map_cpu: mask_cpu: sockets \ + cores threads ldoms" ;; + --dependency|-d) offer "after: afterany: afternotok: \ + afterok: singleton" ;; + --gid) _gids ;; + --partition|-p) offer_list "$(_partitions)" ;; + --gres) offer_list "$(_gres)" ;; + --hint) offer "compute_bound memory_bound multithread \ + nomultithread" ;; + --jobid) offer_list "$(_jobs)" ;; + --licenses|-L) offer_list "$(_licenses)" ;; + --distribution|-m) offer_list "block cyclic plane arbitrary" ;; + --mail-type) offer_list "begin end fail requeue all" ;; + --mem-bind) offer "none rank local map_mem: mask_mem:" ;; + --mpi) offer "lam mpich1_shmem mpichgm mvapich openmpi none" ;; + --propagate) offer_list "all as core cpu data fsize memlock \ + nofile nproc rss stack" ;; + # TODO --network) _configured_interfaces ;; + --reservation) offer_list "$(_reservations)" ;; + --clusters|-M) offer_list "$(_clusters)" ;; + --nodelist) offer_list "$(_nodes)" ;; + --exclude|-x) offer_list "$(_nodes)" ;; + --qos) offer_list "$(_qos)" ;; + :|afterany|after|afternotok|afterok) offer_list "$(_jobs)" ;; + *) _filedir + esac + #TODO options for blue gene systems +} +complete -o filenames -F _sbatch sbatch + +_srun() +{ + _get_comp_words_by_ref cur prev words cword + _split_long_opt + + local shortoptions="-A -B -C -c -d -D -F -h -H -I -J -K -l -L -m -N -n -O \ + -Q -s -t -u -V -v -W -w -x -q -r -T -X -Z" + local longoptions=" --open-mode<append|truncate> --account<account> --acctg-freq<seconds> \ + --extra-node-info<sockets[:cores[:threads]]> --sockets-per-node<number> \ + --cores-per-socket<number> --threads-per-core<number> --begin<time> \ + --bell --comment<string> --constraint<list> --contiguous --cpu-bind<type> \ + --cpus-per-task<number> --dependency<deplist> --chdir<path> --exclusive \ + --nodefile<nodefile> --get-user-env --gid<group> --gres<list> --hold \ + --help --hint<type> --immediate<[seconds>] --jobid<jobid> --no-kill \ + --licenses<licenses> --distribution<dist> --mail-type<type> \ + --mail-user<email> --mem<MB> --mem-per-cpu<MB> --mem-bind<type> \ + --min-cpus<number> --nodes<minnodes[-maxnodes]> --ntasks<number> \ + --network<type> --nice<[adjustment]> --ntasks-per-core<number> \ + --overcommit --partition<partitionname> --quiet --qos<qos> \ + --reservation<name> --share --signal<sig_num[@sig_time]> \ + --time<time> --time-min<time> --tmp<MB> --usage --uid<user> \ + --version --verbose --wait<seconds> --nodelist<nodelist> \ + --wait-all-nodes<0|1> --wckey<wckey> --exclude<nodelist> \ + --checkpoint<time> --checkpoint-dir<directory> --error<file> \ + --preserve-env --epilog<path> --input<file> --job-name<name> \ + --kill-on-bad-exit --label --msg-timeout --mpi<type> \ + --multi-prog --output<file> \ + --prolog<path> --propagate<rlimits> --pty --quit-on-interrupt \ + --relative<number> --resv-ports --restart-dir<dir> --slurmd-debug<level> \ + --threads<number> --task-epilog<path> --task-prolog<path> --test-only \ + --unbuffered --disable-status --no-allocate --export<env_var> \ + --ntasks-per-socket<number> --ntasks-per-node<number> --tasks-per-node<number>" + + [[ $cur == - ]] && { offer "$shortoptions" ; return ; } + [[ $cur == -- ]] && { offer "$longoptions" ; return ; } + [[ $cur == --* ]] && { offer "$(sed 's/<[^>]*>//g' <<< $longoptions)"; return ; } + + case $prev in + --gid) _gids ;; + --nodefile) _filedir ;; + # TODO --network) _configured_interfaces ;; + --prolog|--task-epilog|--task-prolog) _filedir ;; + --chdir|--restart-dir|--checkpoint-dir) _filedir ;; + --reservation) offer_list "$(_reservations)" ;; + --constraint|-C) offer_list "$(_features)" ;; + --clusters) offer_list "$(_clusters)" ;; + --account|-A) offer_list "$(_accounts)" ;; + --qos) offer_list "$(_qos)" ;; + --gres) offer_list "$(_gres)" ;; + --jobid) offer_list "$(_jobs)" ;; + --exclude|-x) offer_list "$(_nodes)" ;; + --nodelist|-w) offer_list "$(_nodes)" ;; + --licenses|-L) offer_list "$(_licenses)" ;; + --partition|-p) offer_list "$(_partitions)" ;; + --begin) offer $(date -dtomorrow +"%Y-%m-%d");; + --open-mode) offer "append truncate" ;; + --mail-type) offer_list "begin end fail requeue all" ;; + --distribution|-m) offer "block cyclic plane arbitrary" ;; + --mpi) offer "lam mpich1_shmem mpichgm mvapich openmpi none" ;; + --hint) offer "compute_bound memory_bound multithread \ + nomultithread" ;; + --propagate) offer_list "all as core cpu data fsize memlock \ + nofile nproc rss stack" ;; + --mem-bind) offer "none rank local map_mem: mask_mem:" ;; + --cpu-bind) offer "none rank map_cpu: mask_cpu: sockets \ + cores threads ldoms" ;; + --dependency|-d) offer "after: afterany: afternotok: afterok: singleton" ;; + :|afterany|after|afternotok|afterok) offer_list "$(_jobs)" ;; + *) COMPREPLY=( $( compgen -c -- "$cur" ) ) ; return + esac + #TODO options for blue gene systems +} +complete -F _srun srun + +# vim: sw=4:ts=4:expandtab diff --git a/contribs/slurmdb-direct/Makefile.in b/contribs/slurmdb-direct/Makefile.in index c2bb9574cfa9df3692420cb371a40de52fb58ceb..a1aca42d58365db198357e2a9491c50b25549bcd 100644 --- a/contribs/slurmdb-direct/Makefile.in +++ b/contribs/slurmdb-direct/Makefile.in @@ -59,6 +59,7 @@ subdir = contribs/slurmdb-direct DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/contribs/time_login.c b/contribs/time_login.c index b63c86e8aeffff1ca19d142eb634796b2e224915..d5140a13931330dbcab96b6e106db2c31af16d43 100644 --- a/contribs/time_login.c +++ b/contribs/time_login.c @@ -17,7 +17,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/torque/Makefile.am b/contribs/torque/Makefile.am index b5c1b26b3ff20218350e38e2ebfc44fade31dc91..690613ce6bf165540c74a89e056ddbaaf5af18cb 100644 --- a/contribs/torque/Makefile.am +++ b/contribs/torque/Makefile.am @@ -3,15 +3,18 @@ AUTOMAKE_OPTIONS = foreign -bin_SCRIPTS = pbsnodes qdel qhold qrls qstat qsub mpiexec +bin_SCRIPTS = pbsnodes qalter qdel qhold qrerun qrls qstat qsub mpiexec generate_pbs_nodefile pbsnodes: +qalter: qdel: qhold: +qrerun: qrls: qstat: qsub: mpiexec: +generate_pbs_nodefile: _perldir=$(exec_prefix)`perl -e 'use Config; $$T=$$Config{installsitearch}; $$P=$$Config{installprefix}; $$P1="$$P/local"; $$T =~ s/$$P1//; $$T =~ s/$$P//; print $$T;'` diff --git a/contribs/torque/Makefile.in b/contribs/torque/Makefile.in index 98542058dbcbacd40971e76aa7e4c566e3f3d3a4..a3793548e0a8618cbcaa25881e6501b5a5335aab 100644 --- a/contribs/torque/Makefile.in +++ b/contribs/torque/Makefile.in @@ -59,6 +59,7 @@ subdir = contribs/torque DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -346,7 +372,7 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign -bin_SCRIPTS = pbsnodes qdel qhold qrls qstat qsub mpiexec +bin_SCRIPTS = pbsnodes qalter qdel qhold qrerun qrls qstat qsub mpiexec generate_pbs_nodefile _perldir = $(exec_prefix)`perl -e 'use Config; $$T=$$Config{installsitearch}; $$P=$$Config{installprefix}; $$P1="$$P/local"; $$T =~ s/$$P1//; $$T =~ s/$$P//; print $$T;'` all: all-am @@ -541,12 +567,15 @@ uninstall-am: uninstall-binSCRIPTS pbsnodes: +qalter: qdel: qhold: +qrerun: qrls: qstat: qsub: mpiexec: +generate_pbs_nodefile: install-binSCRIPTS: $(bin_SCRIPTS) @$(NORMAL_INSTALL) diff --git a/contribs/torque/generate_pbs_nodefile.pl b/contribs/torque/generate_pbs_nodefile.pl new file mode 100755 index 0000000000000000000000000000000000000000..ed364f619170352ed1ee9ef2da6f5dea3b4d5387 --- /dev/null +++ b/contribs/torque/generate_pbs_nodefile.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w + +# Copyright 2013 Brigham Young University +# Written by Ryan Cox <ryan_cox@byu.edu> +# +# Licensed under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) any later version. +# +# Uses SLURM environment variables to produce a $PBS_NODEFILE -style +# output file. The output goes in a temporary file and the name of +# the file is printed on stdout. Intended usage is: +# export PBS_NODEFILE=`generate_pbs_nodefile` + +use strict; +use File::Temp qw( tempfile ); +use FindBin; +use lib "${FindBin::Bin}/../lib/perl"; +use Slurm ':all'; + +my ($fh, $filename) = tempfile(UNLINK => 0); +die "No SLURM_NODELIST given, run generate_pbs_nodefile inside a " + . "Slurm allocation or batch script.\n" if (!$ENV{'SLURM_NODELIST'}); + +my $hl = Slurm::Hostlist::create($ENV{'SLURM_NODELIST'}); +my $tasks = $ENV{SLURM_TASKS_PER_NODE}; +my @counts = split(",", $tasks); + +foreach my $count(@counts) { + my $ppn; + my $nodes; + $count =~ /^(\d+)(\(x(\d+)\))?$/; + $ppn = $1; + if ($3) { + $nodes = $3; + } else { + $nodes = 1; + } + for (my $j = 0; $j < $nodes; $j++) { + my $node = Slurm::Hostlist::shift($hl); + foreach (my $i = 0; $i < $ppn; $i++) { + print $fh "$node\n"; + } + } +} +close($fh); + +print "$filename\n"; diff --git a/contribs/torque/mpiexec.pl b/contribs/torque/mpiexec.pl index 9f6791b5348e0974c94e54b6cd1d68f5c2500dcd..fea1b3e3f36144a625c40b74ce08bd0fdcdffdf0 100755 --- a/contribs/torque/mpiexec.pl +++ b/contribs/torque/mpiexec.pl @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/torque/pbsnodes.pl b/contribs/torque/pbsnodes.pl index 98b242cbc76a894409ed2c4dc97585a40649ae32..fdcaebcdd7ac36efb50e0128b54614d661b2553c 100755 --- a/contribs/torque/pbsnodes.pl +++ b/contribs/torque/pbsnodes.pl @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/torque/qalter.pl b/contribs/torque/qalter.pl new file mode 100644 index 0000000000000000000000000000000000000000..e5ac6a462b24809f0fc0b0cf353683b3d25f4a4e --- /dev/null +++ b/contribs/torque/qalter.pl @@ -0,0 +1,232 @@ +#! /usr/bin/perl -w +############################################################################### +# +# qalter - PBS wrapper for changing job status using scontrol +# +############################################################################### + +use strict; +use FindBin; +use Getopt::Long 2.24 qw(:config no_ignore_case); +use lib "${FindBin::Bin}/../lib/perl"; +use autouse 'Pod::Usage' => qw(pod2usage); +use Slurm ':all'; +use Slurmdb ':all'; # needed for getting the correct cluster dims +use Switch; + +# ------------------------------------------------------------------ +# This makes the assumption job_id will always be the last argument +# ------------------------------------------------------------------- +my $job_id = $ARGV[$#ARGV]; +my ( + $err, + $new_name, + $output, + $rerun, + $resp, + $slurm, + $man, + $help +); + +# Remove this +my $scontrol = "/usr/slurm/bin/scontrol"; + +# ------------------------------ +# Parse Command Line Arguments +# ------------------------------ +GetOptions( + 'N=s' => \$new_name, + 'r=s' => \$rerun, + 'o=s' => \$output, + 'help|?' => \$help, + 'man' => \$man + ) + or pod2usage(2); + +pod2usage(0) if $help; + +if ($man) +{ + if ($< == 0) # Cannot invoke perldoc as root + { + my $id = eval { getpwnam("nobody") }; + $id = eval { getpwnam("nouser") } unless defined $id; + $id = -2 unless defined $id; + $< = $id; + } + $> = $<; # Disengage setuid + $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH + delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; + if ($0 =~ /^([-\/\w\.]+)$/) { + $0 = $1; # Untaint $0 + } else { + die "Illegal characters were found in \$0 ($0)\n"; + } + pod2usage(-exitstatus => 0, -verbose => 2); +} + +# ---------------------- +# Check input arguments +# ---------------------- +if (@ARGV < 1) { + pod2usage(-message=>"Missing Job ID", -verbose=>0); +} else { + $slurm = Slurm::new(); + $resp = $slurm->get_end_time($job_id); + if (not defined($resp)) { + pod2usage(-message=>"Job id $job_id not valid!", -verbose=>0); + } + if ((not defined($new_name)) and (not defined($rerun)) and (not defined($output))) { + pod2usage(-message=>"no argument given!", -verbose=>0); + } +} + +# -------------------------------------------- +# Use Slurm's Perl API to change name of a job +# -------------------------------------------- +if ($new_name) { + my %update = (); + + $update{job_id} = $job_id; + $update{name} = $new_name; + if (Slurm->update_job(\%update)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id name change error: $resp", -verbose=>0); + exit(1); + } +} + +# --------------------------------------------------- +# Use Slurm's Perl API to change the requeue job flag +# --------------------------------------------------- +if ($rerun) { + my %update = (); + + $update{job_id} = $job_id; + if (($rerun eq "n") || ($rerun eq "N")) { + $update{requeue} = 0; + } else { + $update{requeue} = 1; + } + if (Slurm->update_job(\%update)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id requeue error: $resp", -verbose=>0); + exit(1); + } +} + +# ------------------------------------------------------------ +# Use Slurm's Perl API to change Comment string +# Comment is used to relocate an output log file +# ------------------------------------------------------------ +if ($output) { + # Example: + # $comment="on:16337,stdout=/gpfsm/dhome/lgerner/tmp/slurm-16338.out,stdout=~lgerner/tmp/new16338.out"; + # + my $comment; + my %update = (); + + # --------------------------------------- + # Get current comment string from job_id + # --------------------------------------- + my($job) = $slurm->load_job($job_id); + $comment = $$job{'job_array'}[0]->{comment}; + + # ---------------- + # Split at stdout + # ---------------- + if ($comment) { + my(@outlog) = split("stdout", $comment); + + # --------------------------------- + # Only 1 stdout argument add a ',' + # --------------------------------- + if ($#outlog < 2) { + $outlog[1] .= "," + } + + # ------------------------------------------------ + # Add new log file location to the comment string + # ------------------------------------------------ + $outlog[2] = "=".$output; + $comment = join("stdout", @outlog); + } else { + $comment = "stdout=$output"; + } + + # ------------------------------------------------- + # Make sure that "%j" is changed to current $job_id + # ------------------------------------------------- + $comment =~ s/%j/$job_id/g ; + + # ----------------------------------------------------- + # Update comment and print usage if there is a response + # ----------------------------------------------------- + $update{job_id} = $job_id; + $update{comment} = $comment; + if (Slurm->update_job(\%update)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id comment change error: $resp", -verbose=>0); + exit(1); + } +} +exit(0); + +############################################################################## + +__END__ + +=head1 NAME + +B<qalter> - alter a job name, the job rerun flag or the job output file name. + +=head1 SYNOPSIS + +qalter [-N Name] + [-r y|n] + [-o output file] + <job ID> + +=head1 DESCRIPTION + +The B<qalter> updates job name, job rerun flag or job output(stdout) log location. + +It is aimed to be feature-compatible with PBS' qsub. + +=head1 OPTIONS + +=over 4 + +=item B<-N> + +Update job name in the queue + +=item B<-r> + +Alter a job rerunnable flag. "y" will allow a qrerun to be issued. "n" disable qrerun option. + +=item B<-o> + +Alter a job output log file name (stdout). + +The job log will be move/rename after the job has B<terminated>. + +=item B<-?> | B<--help> + +brief help message + +=item B<-man> + +full documentation + +=back + +=head1 SEE ALSO + +qrerun(1) qsub(1) +=cut + diff --git a/contribs/torque/qdel.pl b/contribs/torque/qdel.pl index 410ac48e15ee7e5328326c884905c370eeab32d6..cf0693aa1da4601b7d46738a78dbfd81c6e61771 100755 --- a/contribs/torque/qdel.pl +++ b/contribs/torque/qdel.pl @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/torque/qhold.pl b/contribs/torque/qhold.pl index 4edb55edfd7c9050420efa8349b1160c1ac7f944..028dcf9bfcab991a117f90aadf0f5e7c1b4d2f8f 100755 --- a/contribs/torque/qhold.pl +++ b/contribs/torque/qhold.pl @@ -12,7 +12,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/torque/qrerun.pl b/contribs/torque/qrerun.pl new file mode 100644 index 0000000000000000000000000000000000000000..2cc1c5419ddc132b00e38e2649c4594fdc908473 --- /dev/null +++ b/contribs/torque/qrerun.pl @@ -0,0 +1,134 @@ +#! /usr/bin/perl -w +############################################################################### +# +# qrerun - PBS wrapper to cancel and resubmit a job +# +############################################################################### +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# In addition, as a special exception, the copyright holders give permission +# to link the code of portions of this program with the OpenSSL library under +# certain conditions as described in each individual source file, and +# distribute linked combinations including the two. You must obey the GNU +# General Public License in all respects for all of the code used other than +# OpenSSL. If you modify file(s) with this exception, you may extend this +# exception to your version of the file(s), but you are not obligated to do +# so. If you do not wish to do so, delete this exception statement from your +# version. If you delete this exception statement from all source files in +# the program, then also delete it here. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Based off code with permission copyright 2006, 2007 Cluster Resources, Inc. +############################################################################### +use strict; +use FindBin; +use Getopt::Long 2.24 qw(:config no_ignore_case); +use lib "${FindBin::Bin}/../lib/perl"; +use autouse 'Pod::Usage' => qw(pod2usage); +use Slurm ':all'; +use Slurmdb ':all'; # needed for getting the correct cluster dims +use Switch; + +# Parse Command Line Arguments +my ( + $help, $man, + $err, $pid, $resp +); + +GetOptions( + 'help|?' => \$help, + '--man' => \$man, + ) or pod2usage(2); + +pod2usage(2) if $help; +# Handle man page flag +if ($man) +{ + if ($< == 0) # Cannot invoke perldoc as root + { + my $id = eval { getpwnam("nobody") }; + $id = eval { getpwnam("nouser") } unless defined $id; + $id = -2 unless defined $id; + $< = $id; + } + $> = $<; # Disengage setuid + $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH + delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; + if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; } # Untaint $0 + else { die "Illegal characters were found in \$0 ($0)\n"; } + pod2usage(-exitstatus => 0, -verbose => 2); +} + + +# This makes the assumption JOBID will always be the last argument +my $job_id = $ARGV[$#ARGV]; + +if (@ARGV < 1) { + pod2usage(-message=>"Invalid Argument", -verbose=>1); + exit(1); +} + +if (Slurm->requeue($job_id)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id rerun error: $resp", -verbose=>0); + exit(1); +} +exit(0); + +__END__ + +=head1 NAME + +B<qrerun> - To rerun a job is to terminate the job and return the job to the queued state in the execution queue in which the job currently resides. +If a job is marked as not rerunable then the rerun request will fail for that job. + +See the option on the qsub and qalter commands. + +It is aimed to be feature-compatible with PBS' qsub. + +=head1 SYNOPSIS + +B<qrerun> [-? | --help] [--man] [--verbose] <job_id> + +=head1 DESCRIPTION + +The B<qrerun> command directs that the specified job is to be rerun if possible. + +=head1 OPTIONS + +=over 4 + +=item B<-? | --help> + +a brief help message + +=item B<--man> + +full documentation + +=back + +=head1 EXIT STATUS + +On success, B<qrerun> will exit with a value of zero. On failure, B<qrerun> will exit with a value greater than zero. + +=head1 SEE ALSO + +qalter(1) qsub(1) +=cut diff --git a/contribs/torque/qrls.pl b/contribs/torque/qrls.pl index 9f2e32780aa500886c94d16526c4a49d28ea599e..9734d265d970beae449f39c9c99b43eee05761fd 100755 --- a/contribs/torque/qrls.pl +++ b/contribs/torque/qrls.pl @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/contribs/torque/qstat.pl b/contribs/torque/qstat.pl index 9c1c4733e479b134f5614abc8c95488ef1b6d98e..b8c4f89682f19e2deb8e21b13b19335ad81136e8 100755 --- a/contribs/torque/qstat.pl +++ b/contribs/torque/qstat.pl @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -412,7 +412,7 @@ sub get_exec_host my ($job) = @_; my $execHost = "--"; - if ($job->{'nodes'}) { + if ($job->{'nodes'} && $job->{'job_resrcs'}) { my @allocNodes = (); my $hl = Slurm::Hostlist::create($job->{'nodes'}); my $inx = 0; @@ -478,7 +478,7 @@ sub print_job_select $job->{'name'}, $sessID, $job->{'num_nodes'} || "--", - $job->{'num_procs'} || "--", + $job->{'num_cpus'} || "--", $job->{'job_min_memory'} || "--", hhmm($job->{'time_limit'} * 60), $job->{'stateCode'}, @@ -530,8 +530,8 @@ sub print_job_full printf("\tResource_List.walltime = %s\n", hhmmss($job->{'time_limit'} * 60)); printf("\tResource_List.nodect = %d\n", $job->{'num_nodes'}) if $job->{'num_nodes'}; - printf("\tResource_List.ncpus = %s\n", $job->{'num_procs'}) - if $job->{'num_procs'}; + printf("\tResource_List.ncpus = %s\n", $job->{'num_cpus'}) + if $job->{'num_cpus'}; if ($job->{'reqNodes'}) { my $nodeExpr = $job->{'reqNodes'}; @@ -597,7 +597,7 @@ sub print_part_limits printf("%-16.16s -- -- ", $part->{'name'}); if($part->{'max_time'} != INFINITE) { - printf("%8u ", $part->{'max_time'}); + print(hhmmss($part->{'max_time'}*60)); } else { printf(" -- "); diff --git a/contribs/torque/qsub.pl b/contribs/torque/qsub.pl index 366b3928ada9654d67c64c5f87b7d905efb1e248..5b06b4692d6566973d758b2690ae8e7ba4cd3709 100755 --- a/contribs/torque/qsub.pl +++ b/contribs/torque/qsub.pl @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -48,16 +48,13 @@ use lib "${FindBin::Bin}/../lib/perl"; use autouse 'Pod::Usage' => qw(pod2usage); use Slurm ':all'; use Switch; +use English; my ($start_time, $account, - # $checkpoint_interval, - $directive_prefix, $err_path, $interactive, $hold, -# $join, -# $keep, $resource_list, $mail_options, $mail_user_list, @@ -65,14 +62,10 @@ my ($start_time, $out_path, $priority, $destination, -# $rerunable, -# $script_path, -# $running_user_list, -# $variable_list, -# $all_env, - $additional_attributes, -# $no_std, + $variable_list, + @additional_attributes, $help, + $resp, $man); my $sbatch = "${FindBin::Bin}/sbatch"; @@ -81,14 +74,11 @@ my $srun = "${FindBin::Bin}/srun"; GetOptions('a=s' => \$start_time, 'A=s' => \$account, -# 'c=i' => \$checkpoint_interval, - 'C=s' => \$directive_prefix, 'e=s' => \$err_path, 'h' => \$hold, 'I' => \$interactive, 'j:s' => sub { warn "option -j is the default, " . "stdout/stderr go into the same file\n" }, -# 'k=s' => \$keep, 'l=s' => \$resource_list, 'm=s' => \$mail_options, 'M=s' => \$mail_user_list, @@ -96,17 +86,13 @@ GetOptions('a=s' => \$start_time, 'o=s' => \$out_path, 'p=i' => \$priority, 'q=s' => \$destination, -# 'r=s' => \$rerunable, -# 'S=s' => \$script_path, -# 'u=s' => \$running_user_list, - 'v=s' => sub { warn "option -v is not supported, " . - "since the current environment " . - "is exported by default\n" }, + 'S=s' => sub { warn "option -S is ignored, " . + "specify shell via #!<shell> in the job script\n" }, + 'v=s' => \$variable_list, 'V' => sub { warn "option -V is not necessary, " . "since the current environment " . "is exported by default\n" }, - 'W' => \$additional_attributes, -# 'z' => \$no_std, + 'W=s' => \@additional_attributes, 'help|?' => \$help, 'man' => \$man, ) @@ -135,13 +121,40 @@ if ($ARGV[0]) { foreach (@ARGV) { $script .= "$_ "; } -} else { - pod2usage(2); } +my $block="false"; +my $depend; +my $group_list; +my $job_id; my %res_opts; my %node_opts; -if($resource_list) { +# remove PBS_NODEFILE environment as passed in to qsub. +if ($ENV{PBS_NODEFILE}) { + delete $ENV{PBS_NODEFILE}; +} + +# Process options provided with the -W name=value syntax. +my $W; +foreach $W (@additional_attributes) { + my($name, $value) = split('=', $W); + if ($name eq 'umask') { + $ENV{SLURM_UMASK} = $value; + } elsif ($name eq 'depend') { + $depend = $value; + } elsif ($name eq 'group_list') { + $group_list = $value; + } elsif (lc($name) eq 'block') { + if (defined $value) { + $block = $value; + } +# } else { +# print("Invalid attribute: $W!"); +# exit(1); + } +} + +if ($resource_list) { %res_opts = %{parse_resource_list($resource_list)}; # while((my $key, my $val) = each(%res_opts)) { @@ -156,6 +169,26 @@ if($resource_list) { if($res_opts{nodes}) { %node_opts = %{parse_node_opts($res_opts{nodes})}; } + if ($res_opts{select} && (!$node_opts{node_cnt} || ($res_opts{select} > $node_opts{node_cnt}))) { + $node_opts{node_cnt} = $res_opts{select}; + } + if ($res_opts{select} && $res_opts{ncpus} && $res_opts{mpiprocs}) { + my $cpus_per_task = int ($res_opts{ncpus} / $res_opts{mppnppn}); + if (!$res_opts{mppdepth} || ($cpus_per_task > $res_opts{mppdepth})) { + $res_opts{mppdepth} = $cpus_per_task; + } + } +} + +if($variable_list) { + $variable_list =~ s/\'/\"/g; + my @parts = $variable_list =~ m/(?:(?<=")[^"]*(?=(?:\s*"\s*,|\s*"\s*$)))|(?<=,)(?:[^",]*(?=(?:\s*,|\s*$)))|(?<=^)(?:[^",]+(?=(?:\s*,|\s*$)))|(?<=^)(?:[^",]*(?=(?:\s*,)))/g; + foreach my $part (@parts) { + my ($key, $value) = $part =~ /(.*)=(.*)/; + if ($key && $value) { + $ENV{$key} = $value; + } + } } my $command; @@ -163,18 +196,44 @@ my $command; if($interactive) { $command = "$salloc"; +# Always want at least one node in the allocation + if (!$node_opts{node_cnt}) { + $node_opts{node_cnt} = 1; + } + +# Calculate the task count based of the node cnt and the amount +# of ppn's in the request + if ($node_opts{task_cnt}) { + $node_opts{task_cnt} *= $node_opts{node_cnt}; + } + + if (!$node_opts{node_cnt} && !$node_opts{task_cnt} && !$node_opts{hostlist}) { + $node_opts{task_cnt} = 1; + } } else { + if (!$script) { + pod2usage(2); + } + $command = "$sbatch"; - $command .= " -D $directive_prefix" if $directive_prefix; $command .= " -e $err_path" if $err_path; $command .= " -o $out_path" if $out_path; + +# The job size specification may be within the batch script, +# Reset task count if node count also specified + if ($node_opts{task_cnt} && $node_opts{node_cnt}) { + $node_opts{task_cnt} *= $node_opts{node_cnt}; + } } $command .= " -N$node_opts{node_cnt}" if $node_opts{node_cnt}; $command .= " -n$node_opts{task_cnt}" if $node_opts{task_cnt}; $command .= " -w$node_opts{hostlist}" if $node_opts{hostlist}; +$command .= " --mincpus=$res_opts{ncpus}" if $res_opts{ncpus}; +$command .= " --ntasks-per-node=$res_opts{mppnppn}" if $res_opts{mppnppn}; + if($res_opts{walltime}) { $command .= " -t$res_opts{walltime}"; } elsif($res_opts{cput}) { @@ -183,14 +242,19 @@ if($res_opts{walltime}) { $command .= " -t$res_opts{pcput}"; } -$command .= " --tmp=$res_opts{file}" if $res_opts{file}; -$command .= " --mem=$res_opts{mem}" if $res_opts{mem}; +$command .= " --account='$group_list'" if $group_list; +$command .= " --constraint='$res_opts{proc}'" if $res_opts{proc}; +$command .= " --dependency=$depend" if $depend; +$command .= " --tmp=$res_opts{file}" if $res_opts{file}; +$command .= " --mem=$res_opts{mem}" if $res_opts{mem}; $command .= " --nice=$res_opts{nice}" if $res_opts{nice}; + +$command .= " --gres=gpu:$res_opts{naccelerators}" if $res_opts{naccelerators}; + # Cray-specific options $command .= " -n$res_opts{mppwidth}" if $res_opts{mppwidth}; $command .= " -w$res_opts{mppnodes}" if $res_opts{mppnodes}; $command .= " --cpus-per-task=$res_opts{mppdepth}" if $res_opts{mppdepth}; -$command .= " --ntasks-per-node=$res_opts{mppnppn}" if $res_opts{mppnppn}; $command .= " --begin=$start_time" if $start_time; $command .= " --account=$account" if $account; @@ -205,49 +269,127 @@ $command .= " --mail-user=$mail_user_list" if $mail_user_list; $command .= " -J $job_name" if $job_name; $command .= " --nice=$priority" if $priority; $command .= " -p $destination" if $destination; -$command .= " -C $additional_attributes" if $additional_attributes; +$command .= " $script" if $script; +# print "$command\n"; -$command .= " $script"; +# Execute the command and capture its stdout, stderr, and exit status. Note +# that if interactive mode was requested, the standard output and standard +# error are _not_ captured. +if ($interactive) { + my $ret = system($command); + exit ($ret >> 8); +} else { + # Capture stderr from the command to the stdout stream. + $command .= ' 2>&1'; + + # Execute the command and capture the combined stdout and stderr. + my @command_output = `$command 2>&1`; + + # Save the command exit status. + my $command_exit_status = $CHILD_ERROR; + + # If available, extract the job ID from the command output and print + # it to stdout, as done in the PBS version of qsub. + # The "Submitted batch job" header is for backward compatability + # with earlier versions of Slurm's qsub wrapper + if ($command_exit_status == 0) { + my @spcommand_output=split(" ", $command_output[$#command_output]); + $job_id= $spcommand_output[$#spcommand_output]; + print "Submitted batch job "; + print "$job_id\n"; + } else { + print("There was an error running the SLURM sbatch command.\n" . + "The command was:\n" . + "'$command'\n" . + "and the output was:\n" . + "'@command_output'\n"); + } -system($command); + # If block is true wait for the job to finish + my($resp, $count); + my $slurm = Slurm::new(); + if ( (lc($block) eq "true" ) and ($command_exit_status == 0) ) { + sleep 2; + my($job) = $slurm->load_job($job_id); + $resp = $$job{'job_array'}[0]->{job_state}; + while ( $resp < JOB_COMPLETE ) { + $job = $slurm->load_job($job_id); + $resp = $$job{'job_array'}[0]->{job_state}; + sleep 1; + } + } + # Exit with the command return code. + exit($command_exit_status >> 8); +} sub parse_resource_list { my ($rl) = @_; - my %opt = ('arch' => "", + my %opt = ('accelerator' => "", + 'arch' => "", + 'block' => "", 'cput' => "", 'file' => "", 'host' => "", 'mem' => "", + 'mpiprocs' => "", + 'ncpus' => "", 'nice' => "", 'nodes' => "", + 'naccelerators' => "", 'opsys' => "", 'other' => "", 'pcput' => "", 'pmem' => "", + 'proc' => '', 'pvmem' => "", + 'select' => "", 'software' => "", 'vmem' => "", + 'walltime' => "", # Cray-specific resources 'mppwidth' => "", 'mppdepth' => "", 'mppnppn' => "", 'mppmem' => "", - 'mppnodes' => "", - 'walltime' => "" + 'mppnodes' => "" ); my @keys = keys(%opt); +# The select option uses a ":" separator rather than "," +# This wrapper currently does not support multiple select options + +# Protect the colons used to separate elements in walltime=hh:mm:ss. +# Convert to NNhNNmNNs format. + $rl =~ s/walltime=(\d{1,2}):(\d{2}):(\d{2})/walltime=$1h$2m$3s/; + + $rl =~ s/:/,/g; foreach my $key (@keys) { #print "$rl\n"; ($opt{$key}) = $rl =~ m/$key=([\w:\+=+]+)/; } + +# If needed, un-protect the walltime string. + if ($opt{walltime}) { + $opt{walltime} =~ s/(\d{1,2})h(\d{2})m(\d{2})s/$1:$2:$3/; +# Convert to minutes for SLURM. + $opt{walltime} = get_minutes($opt{walltime}); + } + + if($opt{accelerator} && $opt{accelerator} =~ /^[Tt]/ && !$opt{naccelerators}) { + $opt{naccelerators} = 1; + } + if($opt{cput}) { $opt{cput} = get_minutes($opt{cput}); } + if ($opt{mpiprocs} && (!$opt{mppnppn} || ($opt{mpiprocs} > $opt{mppnppn}))) { + $opt{mppnppn} = $opt{mpiprocs}; + } + if($opt{mppmem}) { $opt{mem} = convert_mb_format($opt{mppmem}); } elsif($opt{mem}) { @@ -294,18 +436,6 @@ sub parse_node_opts { my $hl_cnt = Slurm::Hostlist::count($hl); $opt{node_cnt} = $hl_cnt if $hl_cnt > $opt{node_cnt}; - # we always want at least one here - if(!$opt{node_cnt}) { - - $opt{node_cnt} = 1; - } - - # figure out the amount of tasks based of the node cnt and the amount - # of ppn's in the request - if($opt{task_cnt}) { - $opt{task_cnt} *= $opt{node_cnt}; - } - return \%opt; } @@ -366,25 +496,24 @@ B<qsub> - submit a batch job in a familiar pbs format =head1 SYNOPSIS -qsub [-a date_time] - [-A account_string] - [-b secs] - [-C directive_prefix] - [-e path] +qsub [-a start_time] + [-A account] + [-e err_path] [-I] [-l resource_list] - [-m mail_options] [-M user_list] - [-N name] - [-o path] + [-m mail_options] [-M user_list] + [-N job_name] + [-o out_path] [-p priority] [-q destination] + [-v variable_list] [-W additional_attributes] [-h] [script] =head1 DESCRIPTION -The B<qsub> command displays information about nodes. It is aimed to be feature-compatible with PBS' qsub. +The B<qsub> submits batch jobs. It is aimed to be feature-compatible with PBS' qsub. =head1 OPTIONS @@ -392,24 +521,58 @@ The B<qsub> command displays information about nodes. It is aimed to be feature- =item B<-a> -Display information for all nodes. This is the default if no node name is specified. +Earliest start time of job. Format: [HH:MM][MM/DD/YY] + +=item B<-A account> + +Specify the account to which the job should be charged. + +=item B<-e err_path> + +Specify a new path to receive the standard error output for the job. =item B<-I> Interactive execution. -=item B<-j> join +=item B<-l resource_list> + +Specify an additional list of resources to request for the job. + +=item B<-m mail_options> + +Specify a list of events on which email is to be generated. + +=item B<-M user_list> + +Specify a list of email addresses to receive messages on specified events. + +=item B<-N job_name> + +Specify a name for the job. + +=item B<-o out_path> + +Specify the path to a file to hold the standard output from the job. + +=item B<-p priority> + +Specify the priority under which the job should run. + +=item B<-p priority> -It is not necessary (currently also not possible) since stderr/stdout are always joined. +Specify the priority under which the job should run. =item B<-v> [variable_list] -Exporting single variables via -v is not supported, since the entire login environment -is exported by the default. +Exporting single variables via -v is generally not required, since the entire +login environment is exported by the default. However this option can be used +to add newly defined environment variables to specific jobs. =item B<-V> -The -V option to export the current environment is not required since it is done by default. +The -V option to export the current environment is not required since it is +done by default. =item B<-?> | B<--help> diff --git a/contribs/web_apps/chart_stats.cgi b/contribs/web_apps/chart_stats.cgi index 22ffbf99fcdb38a45a9a225bcda6825ebf126148..e237784d21f2cee91188da68461e1af0def9936a 100755 --- a/contribs/web_apps/chart_stats.cgi +++ b/contribs/web_apps/chart_stats.cgi @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/doc/Makefile.in b/doc/Makefile.in index 92c9b85fa79d87f8ef59dc9de37bf7b40491ae49..935d0d3c98ff67c3e49adcff5ed40a751cc10d59 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -55,6 +55,7 @@ subdir = doc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am index 0f53ddc423a2231e38dbc9b147108d329f8f2f10..0ba1001cc410a2f554d86814eeccf9f18322ce48 100644 --- a/doc/html/Makefile.am +++ b/doc/html/Makefile.am @@ -1,10 +1,12 @@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html generated_html = \ accounting.html \ accounting_storageplugins.html \ acct_gather_energy_plugins.html \ + acct_gather_profile_plugins.html \ + add.html \ api.html \ authplugins.html \ big_sys.html \ @@ -22,17 +24,21 @@ generated_html = \ dist_plane.html \ documentation.html \ download.html \ + dynalloc.html \ elastic_computing.html \ + ext_sensorsplugins.html \ faq.html \ gang_scheduling.html \ gres.html \ gres_design.html \ gres_plugins.html \ help.html \ + hdf5_profile_user_guide.html \ high_throughput.html \ ibm.html \ ibm-pe.html \ jobacct_gatherplugins.html \ + job_array.html \ job_exit_code.html \ job_launch.html \ job_submit_plugins.html \ @@ -71,6 +77,7 @@ generated_html = \ select_design.html \ selectplugins.html \ slurm.html \ + slurmctld_plugstack.html \ slurm_ug_agenda.html \ slurm_ug_cfp.html \ slurm_ug_registration.html \ @@ -94,6 +101,8 @@ html_DATA = \ configurator.easy.html \ entities.gif \ example_usage.gif \ + hdf5_task_attr.png \ + hdf5_job_outline.png \ ibm_pe_fig1.png \ ibm_pe_fig2.png \ linuxstyles.css \ @@ -106,7 +115,6 @@ html_DATA = \ plane_ex5.gif \ plane_ex6.gif \ plane_ex7.gif \ - rosetta.gif \ slurm_logo.png \ schedmd.png \ slurm_design.pdf \ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index 317b4e8dfb9c4fde3d08aa9c3c124306fd76f0b2..2cba7b2ac475fe78f743b9268471044445c36d0c 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -58,6 +58,7 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(srcdir)/configurator.html.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -159,6 +163,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -179,6 +185,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -188,6 +197,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -195,6 +206,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -229,6 +249,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -256,6 +279,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -315,7 +341,7 @@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ @@ -348,6 +374,8 @@ generated_html = \ accounting.html \ accounting_storageplugins.html \ acct_gather_energy_plugins.html \ + acct_gather_profile_plugins.html \ + add.html \ api.html \ authplugins.html \ big_sys.html \ @@ -365,17 +393,21 @@ generated_html = \ dist_plane.html \ documentation.html \ download.html \ + dynalloc.html \ elastic_computing.html \ + ext_sensorsplugins.html \ faq.html \ gang_scheduling.html \ gres.html \ gres_design.html \ gres_plugins.html \ help.html \ + hdf5_profile_user_guide.html \ high_throughput.html \ ibm.html \ ibm-pe.html \ jobacct_gatherplugins.html \ + job_array.html \ job_exit_code.html \ job_launch.html \ job_submit_plugins.html \ @@ -414,6 +446,7 @@ generated_html = \ select_design.html \ selectplugins.html \ slurm.html \ + slurmctld_plugstack.html \ slurm_ug_agenda.html \ slurm_ug_cfp.html \ slurm_ug_registration.html \ @@ -437,6 +470,8 @@ html_DATA = \ configurator.easy.html \ entities.gif \ example_usage.gif \ + hdf5_task_attr.png \ + hdf5_job_outline.png \ ibm_pe_fig1.png \ ibm_pe_fig2.png \ linuxstyles.css \ @@ -449,7 +484,6 @@ html_DATA = \ plane_ex5.gif \ plane_ex6.gif \ plane_ex7.gif \ - rosetta.gif \ slurm_logo.png \ schedmd.png \ slurm_design.pdf \ diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index 29e0e39b75a192d36a637305523925b88b3ee61e..04ca1cc1fb65657d16762722469ca8c2dfdbe2b5 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -210,6 +210,13 @@ checking for mysql_config... /usr/bin/mysql_config MySQL test program built properly. </pre> +<p>Note that at least the first time running the slurmdbd with MySQL you need +to make sure your my.cnf file has innodb_buffer_pool_size equal to at least 64 +megabytes. You can accomplish this by adding the line:<br> +<i>innodb_buffer_pool_size=64M</i><br> +under the [mysqld] reference in the my.cnf file and restarting the mysqld. +This is needed when converting large tables over to the new database schema.</p> + <h2>SLURM Accounting Configuration After Build</h2> <p>For simplicity sake we are going to reference everything as if you @@ -239,6 +246,14 @@ prevent users from accessing invalid accounts. is used and has limits those as well. By setting this option, the 'associations' option is automatically set. </li> +<li>nojobs - This will make it so no job information is stored in + accounting. By setting this 'nosteps' is also set. +</li> +<li>nosteps - This will make it so no step information is stored in + accounting. Both nojobs and nosteps could be helpful in an + environment where you want to use limits but don't really care about + utilization. +</li> <li>qos - This will require all jobs to specify (either overtly or by default) a valid qos (Quality of Service). QOS values are defined for each association in the database. By setting this option, the @@ -692,7 +707,7 @@ in the <a href="resource_limits.html">Resource Limits</a> document.</p> <li><b>MaxCPUMinsPerJob=</b> A limit of cpu minutes to be used by jobs running from this association. If this limit is - reached the job will be killed will be allowed to run. + reached the job will be killed. </li> <li><b>MaxCPUsPerJob=</b> The maximum size in cpus any given job can diff --git a/doc/html/acct_gather_profile_plugins.shtml b/doc/html/acct_gather_profile_plugins.shtml new file mode 100644 index 0000000000000000000000000000000000000000..30d62b8ef161a7754fa09a955f73ff6ea65e9479 --- /dev/null +++ b/doc/html/acct_gather_profile_plugins.shtml @@ -0,0 +1,345 @@ +<!--#include virtual="header.txt"--> +<!-- Copyright (C) 2013 Bull S. A. S. + Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. --> + +<h1><a name="top">SLURM Profile Accounting Plugin API (AcctGatherProfileType) +</a></h1> + +<h2> Overview</h2> +<p> This document describes SLURM profile accounting plugins and the API that +defines them. It is intended as a resource to programmers wishing to write +their own SLURM profile accounting plugins. + +<p>A profiling plugin allows more detailed information on the execution of jobs +than can reasonably be kept in the accounting database. (All jobs may also not +be profiled.) + +A seperate +<a href="hdf5_profile_user_guide.html">User Guide</a> documents how to use +the hdf5 version of the plugin. + +<p>The plugin provides an API for making calls to store data at various +points in a step's lifecycle. It collects data periodically from potentially +several sources. The periodic samples are eventually +consolidated into one <i>time series</i> dataset for each node of a job. + +<p>The plugin's primary work is done within slurmstepd on the compute nodes. +It assumes a shared file system, presumably on the management network. This +avoids having to transfer files back to the controller at step end. Data is +typically gathered at job_acct_gather interval or acct_gather_energy interval +and the volume is not expected to be burdensome. + +<p>The <i>hdf5</i> implementation records I/O counts from the +network interface (Infiniband), I/O counts from the node from the Lustre +parallel file system, disk I/O counts, cpu and memory utilization +for each task, and a record of energy use. + +<p>This implementation stores this data in a HDF5 file for each step +on each node for the jobs. A separate program +(<a href="sh5util.html">sh5util</a>) is provided to +consolidate all the node-step files in one container for the job. +HDF5 is a well known structured data set that allows different types of +related data to be stored in one file. Its internal structure resembles a +file system with <i>groups</i> being similar to <i>directories</i> and +<i>data sets</i> being similar to <i>files</i>. There are commodity programs, +notably <b>HDF5View</b> for viewing and manipulating these files. +<b>sh5util</b> also provides some capability for extracting subsets of date +for import into other analysis tools like spreadsheets. + +<p>This plugin is incompatible with --enable-front-end. It you need to +simulate a large configuration, please use --enable-multiple-slurmd. +<p>SLURM profile accounting plugins must conform to the SLURM Plugin API with +the following specifications: +<p><span class="commandline">const char +plugin_name[]="<i>full text name</i>"</span> +<p style="margin-left:.2in"> +A free-formatted ASCII text string that identifies the plugin. + +<p><span class="commandline">const char +plugin_type[]="<i>major/minor</i>"</span><br> +<p style="margin-left:.2in"> +The major type must be "acct_gather_profile." +The minor type can be any suitable name +for the type of profile accounting. We currently use +<ul> +<li><b>none</b>— No profile data is gathered. +<li><b>hdf5</b>—Gets profile data about energy use, i/o sources +(Lustre, network) and task data such as local disk i/o, CPU and memory usage. +</ul> +<p>The programmer is urged to study +<span class="commandline"> +src/plugins/acct_gather_profile/acct_gather_profile_hdf5.c</span> and +<span class="commandline">src/common/slurm_acct_gather_profile.c</span> +for a sample implementation of a SLURM profile accounting plugin. +<p class="footer"><a href="#top">top</a> + +<h2>API Functions</h2> +<p>All of the following functions are required. Functions which are not +implemented must be stubbed. + +<p class="commandline"> +void acct_gather_profile_g_conf_options(s_p_options_t **full_options, +int *full_options_cnt) +<p style="margin-left:.2in"><b>Description</b>:<br> +Defines configuration options in acct_gather.conf<br /> +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">full(out) option definitions.</span> +<span class="commandline">full_options_cnt(out) number in full.</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +void acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl) +<p style="margin-left:.2in"><b>Description</b>:<br> +Set configuration options from acct_gather.conf<br /> +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">tbl -- hash table of options./span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +void acct_gather_profile_g_conf_get(s_p_hashtbl_t *tbl) +<p style="margin-left:.2in"><b>Description</b>:<br> +Gets configuration options from acct_gather.conf<br /> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">void* pointer to slurm_acct_gather_conf_t</span> + on success, or<br> <span class="commandline">NULL</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_node_step_start(slurmd_job_t* job) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per step on each node from slurmstepd, before launching tasks. +<br /> +Provides an opportunity to create files and other node-step level +initialization. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_node_step_end(slurmd_job_t* job) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per step on each node from slurmstepd, after all tasks end. +<br /> +Provides an opportunity to close files, etc. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_task_start(slurmd_job_t* job, uint32_t taskid) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per task from slurmstepd, BEFORE node step start is called. +<br /> +Provides an opportunity to gather beginning values from node counters +(bytes_read ...) +<br /> +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<br /><span class="commandline">taskid -- SLURM taskid. </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_task_end(slurmd_job_t* job, pid_t taskpid) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per task from slurmstepd. +<br /> +Provides an opportunity to put final data for a task. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<br /><span class="commandline">pid -- task process id (pid_t). </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_add_sample_data(uint32_t type, void* data); +<p style="margin-left:.2in"><b>Description</b>:<br> +Put data at the Node Samples level. Typically called from something called +at either job_acct_gather interval or acct_gather_energy interval. +<br /> +All samples in the same group will eventually be consolidated in one +time series. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<br /><span class="commandline">type -- identifies the type of data. </span> +<br /><span class="commandline">data -- data structure to be put to the file. +</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<h2>Parameters</h2> + +<p>These parameters can be used in the slurm.conf to configure the +plugin and the frequency at which to gather node profile data.</p> +<dl> +<dt><span class="commandline">AcctGatherProfileType</span> +<dd>Specifies which plugin should be used. +</dl> + +<p>The <a href="acct_gather.conf.html">acct_gather.conf</a> provides profile +configuration options. +<dl> +<dt><span class="commandline">ProfileDir</span> +<dd>Path to location in a shared file system in which to write profile data. +There is no default as there is no standard location for a shared file system. +It this parameter is not specified, no profiling will occur. +<dt><span class="commandline">ProfileDefaultProfile</span> +<dd>Default setting for --profile command line option for srun, salloc, sbatch. +</dl> +The default profile value is <b>none</b> which means no profiling will be done +for jobs. The hdf5 plugin also includes; +<ul> +<li> +<b>energy</b> sample energy use for the node. +</li> +<li> +<b>lustre</b> sample i/o to the Lustre file system for the node. +</li> +<li> +<b>network</b> sample i/o through the network (infiniband) interface +for the node. +</li> +<li> +<b>task</b> sample local disk I/O, cpu and memory use for each task. +</li> +<li> +<b>all</b> all of the above. +</li> +</ul> +Use caution when setting the default to values other than none as a file for +each job will be created. This option is provided for test systems. +<p>Most of the sources of profile data are associated with various +acct_gather plugins. The acct_gather.conf file has setting for various +sampling mechanisms that can be used to change the frequency at which +samples occur. + +<h2>Data Types</h2> +A plugin-like structure is implemented to generalize HDF5 data operations from +various sources. A <i>C</i> <b>typedef</b> is defined for each datatype. These +declarations are in /common/slurm_acct_gather_profile.h so the datatype are +common to all profile plugins. +<p> +The operations are defined via structures of function pointers, and they are +defined in /plugins/acct_gather_profile/common/profile_hdf5.h and should work +on any HDF5 implementation, not only hdf5. +<p> +Functions must be implemented to perform various operations for the datatype. +The api for the plugin includes an argument for the datatype so that the +implementation of that api can call the specific operation for that datatype. +<p>Groups in the HDF5 file containing a dataset will include an attribute for +the datatype so that the program that merges step files into the job can +discover the type of the group and do the right thing. +<p> +For example, the typedef for the energy sample datatype; +<pre> +typedef struct profile_energy { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t watts; + uint64_t cpu_freq; +} profile_energy_t; +</pre> +<p> +A <i>factory</i> method is implemented for each type to construct a structure +with functions implementing various operations for the type. +The following structure of functions is required for each type. +<pre> +/* + * Structure of function pointers of common operations on a + * profile data type. (Some may be stubs, particularly if the data type + * does not represent a time series. + * dataset_size -- size of one dataset (structure size). + * create_memory_datatype -- creates hdf5 memory datatype + * corresponding to the datatype structure. + * create_file_datatype -- creates hdf5 file datatype + * corresponding to the datatype structure. + * create_s_memory_datatype -- creates hdf5 memory datatype + * corresponding to the summary datatype structure. + * create_s_file_datatype -- creates hdf5 file datatype + * corresponding to the summary datatype structure. + * init_job_series -- allocates a buffer for a complete time + * series (in job merge) and initializes each member + * merge_step_series -- merges all the individual time samples + * into a single data set with one item per sample. + * Data items can be scaled (e.g. subtracting beginning time) + * differenced (to show counts in interval) or other things + * appropriate for the series. + * series_total -- accumulate or average members in the entire + * series to be added to the file as totals for the node or + * task. + * extract_series -- format members of a structure for putting + * to a file data extracted from a time series to be imported into + * another analysis tool. (e.g. format as comma separated value.) + * extract_totals -- format members of a structure for putting + * to a file data extracted from a time series total to be imported + * into another analysis tool. (e.g. format as comma,separated value.) + */ +typedef struct profile_hdf5_ops { + int (*dataset_size) (); + hid_t (*create_memory_datatype) (); + hid_t (*create_file_datatype) (); + hid_t (*create_s_memory_datatype) (); + hid_t (*create_s_file_datatype) (); + void* (*init_job_series) (int, int); + void (*merge_step_series) (hid_t, void*, void*, void*); + void* (*series_total) (int, void*); + void (*extract_series) (FILE*, bool, int, int, char*, + char*, void*); + void (*extract_totals) (FILE*, bool, int, int, char*, + char*, void*); +} profile_hdf5_ops_t; +</pre> + +Note there are two different data types for supporting time series.<br> +1) A primary type is defined for gathering data in the node step file. +It is typically named profile_{series_name}_t.<br> +2) Another type is defined for summarizing series totals. +It is typically named profile_{series_name}_s_t. It does not have a 'factory'. +It is only used in the functions of the primary data type and the +primaries structure has operations to create appropriate hdf5 objects. + +<p>When adding a new type, the <b>profile_factory</b> function has to be +modified to return an <i>ops</i> for the type. + +<p>Interaction between type and hdf5. +<ul> +<li> +The profile_{type}_t structure is used by callers of the <b>add_sample_data</b> +functions. +</li> +<li> +HDF5 needs a <b>memory</b>_datatype to transform this structure into its +dataset object in memory. The <i>create_memory_datatype</i> function creates +the appropriate object. +</li> +<li> +HDF5 needs a <b>file</b>_datatype to transform the dataset into how it will be +written to the HDF5 file (or to transform what it reads from a file into a +dataset.) The <i>create_file_datatype</i> function creates +the appropriate object. +</li> +</ul> +<h2>Versioning</h2> +<p>This document describes version 1 of the SLURM Profile Accounting API. +Future releases of SLURM may revise this API. A profile accounting plugin +conveys its ability to implement a particular API version using the mechanism +outlined for SLURM plugins.</p> + +<p class="footer"><a href="#top">top</a> + +<p style="text-align:center;">Last modified 1 April 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/add.shtml b/doc/html/add.shtml new file mode 100644 index 0000000000000000000000000000000000000000..d7636792850cfc1ebf5e9b6f2e16935e98917387 --- /dev/null +++ b/doc/html/add.shtml @@ -0,0 +1,61 @@ +<!--#include virtual="header.txt"--> + +<h1>Adding Files or Plugins to Slurm</h1> + +<h2>Adding a File to Slurm</h2> + +<p>This is the procedure to follow in order to add a new C file to the Slurm +code base. We recommend using a git branch for this purpose.</p> + +<ol> +<li>Add your new file to the git repository.</li> + +<li>Modify the "Makefile.am" file in the file's parent directory.</li> + +<li>Execute "./autogen.sh" in Slurm's top level directory. +If you have older versions of the autoconf, automake, libtool or aclocal then +you may need to manually modify the Makefile.in file in the file's parent +directory. If you have different versions of the files than were originally +used by the Slurm team, this may rebuild all of the Makefile.in files in Slurm.</li> +</ol> + +<h2>Adding a Plugin to Slurm</h2> + +<p>This is the procedure to follow in order to add a new plugin to the Slurm +code base. We recommend using a git branch for this purpose. In this example, +we show which files would need to be modified in order to add a plugin named +"topology/4d_torus".</p> + +<ol> +<li>Create a new directory for this plugin +(e.g. "src/plugins/topology/4d_torus").</li> + +<li>Add this new directory to it's parent directory's "Makefile.am" file +(e.g. "src/plugins/topology/Makefile.am").</li> + +<li>Put your new file(s) in the appropriate directory +(e.g. "src/plugins/topology/4d_torus/topology_4d_torus.c"). </li> + +<li>Create a "Makefile.am" file in the new directory identifying the new file(s) +(e.g. "src/plugins/topology/4d_torus/Makefile.am"). Use an existing "Makefile.am" +file as a model.</li> + +<li>Identify the new Makefile to be built at Slurm configure time in the file +"configure.ac". Please maintain the alphabetic ordering of entries.</li> + +<li>Execute "./autogen.sh" in Slurm's top level directory. +If you have older versions of the autoconf, automake, libtool or aclocal then +you may need to manually create or modify the Makefile.in files. +If you have different versions of the files than were originally used by the +Slurm team, this may rebuild all of the Makefile.in files in Slurm.</li> + +<li>Modify the "slurm.spec" file to include the new plugin file in an +appropriate RPM.</li> + +<li>Add the new files, including "Makefile.am" and "Makefile.in", to the git +repository.</li> +</ol> + +<p style="text-align:center;">Last modified 28 April 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/big_sys.shtml b/doc/html/big_sys.shtml index 9fa58938f92fd23783df149186c7ea9aad7055e7..7f302cfa7a314ab60ef941171657b326fc3dd8ae 100644 --- a/doc/html/big_sys.shtml +++ b/doc/html/big_sys.shtml @@ -2,16 +2,28 @@ <h1>Large Cluster Administration Guide</h1> -<p>This document contains SLURM administrator information specifically +<p>This document contains Slurm administrator information specifically for clusters containing 1,024 nodes or more. -Virtually all SLURM components have been validated (through emulation) -for clusters containing up to 65,536 compute nodes. +The largest system currently managed by Slurm is 122,880 compute nodes +and 1,966,080 cores (IBM Bluegene/Q at Lawrence Livermore National Laboratory). +Slurm operation on systems orders of magnitude larger has been validated +using emulation. Getting optimal performance at that scale does require some tuning and this document should help you off to a good start. -A working knowledge of SLURM should be considered a prerequisite +A working knowledge of Slurm should be considered a prerequisite for this material.</p> -<h2>System configuration</h2> +<h2>Performance</h2> + +<p>Times below are for execution of an MPI program printing "Hello world" and +exiting and includes the time for processing output. Your performance may +vary due to differences hardware, software, and configuration.</p> +<ul> +<li>1,966,080 tasks on 122,880 compute nodes of a BlueGene/Q: 322 seconds</li> +<li>30,000 tasks on 15,000 compute nodes of a Linux cluster: 30 seconds</li> +</ul> + +<h2>System Configuration</h2> <p>Three system configuration parameters must be set to support a large number of open files and TCP connections with large bursts of messages. Changes can @@ -40,7 +52,7 @@ using the ifconfig command. A value of 4096 has been found to work well for one site with a very large cluster (e.g. <i>"ifconfig <interface> txqueuelen 4096"</i>).</p> -<h2>User limits</h2> +<h2>User Limits</h2> <p>The <b>ulimit</b> values in effect for the <b>slurmctld</b> daemon should be set quite high for memory size, open file count and stack size.</p> @@ -72,7 +84,7 @@ on data transmission.</p> <h2>Node Configuration</h2> -<p>While SLURM can track the amount of memory and disk space actually found +<p>While Slurm can track the amount of memory and disk space actually found on each compute node and use it for scheduling purposes, this entails extra overhead. Optimize performance by specifying the expected configuration using @@ -81,7 +93,7 @@ the available parameters (<i>RealMemory</i>, <i>CPUs</i>, and If the node is found to contain less resources than configured, it will be marked DOWN and not used. Also set the <i>FastSchedule</i> parameter. -While SLURM can easily handle a heterogeneous cluster, configuring +While Slurm can easily handle a heterogeneous cluster, configuring the nodes using the minimal number of lines in <i>slurm.conf</i> will both make for easier administration and better performance.</p> @@ -90,7 +102,7 @@ will both make for easier administration and better performance.</p> <p>If a high throughput of jobs is anticipated (i.e. large numbers of jobs with brief execution times) then configure <i>MinJobAge</i> to the smallest interval practical for your environment. <i>MinJobAge</i> specifies the -minimum number of seconds that a terminated job will be retained by SLURM's +minimum number of seconds that a terminated job will be retained by Slurm's control daemon before purging. After this time, information about terminated jobs will only be available through accounting records.</p> @@ -127,7 +139,7 @@ but this will result in a delay for reallocating resources to new jobs.</p> <h2>Other</h2> -<p>SLURM uses hierarchical communications between the slurmd daemons +<p>Slurm uses hierarchical communications between the slurmd daemons in order to increase parallelism and improve performance. The <i>TreeWidth</i> configuration parameter controls the fanout of messages. The default value is 50, meaning each slurmd daemon can communicate @@ -144,6 +156,6 @@ the hard limit in order to process all of the standard input and output connections to the launched tasks. It is recommended that you set the open file hard limit to 8192 across the cluster.</p> -<p style="text-align:center;">Last modified 2 November 2012</p> +<p style="text-align:center;">Last modified 5 August 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/bluegene.shtml b/doc/html/bluegene.shtml index 8d4bc4ebcae8a332f9fe6adeee4d3c92383950f5..58d9f5956efb0680a4cb79b3d9927334ea03240c 100644 --- a/doc/html/bluegene.shtml +++ b/doc/html/bluegene.shtml @@ -402,6 +402,9 @@ The value of <i>SelectType</i> must be set to "select/bluegene" (which happens automatically) in order to have node selection performed using a system aware of the system's topography and interfaces. +The value of <i>TopologyPlugin</i> must be set to "topology/none" (which +happens automatically) since topology information is managed by the +select/bluegene plugin. The value of <i>Prolog</i> should be set to the full pathname of a program that will delay execution until the job's block is ready for use by the user running the job. It is recommended that you construct a script @@ -874,6 +877,6 @@ scheduling logic, etc. </p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 7 November 2012</p> +<p style="text-align:center;">Last modified 21 December 2012</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/checkpoint_blcr.shtml b/doc/html/checkpoint_blcr.shtml index c5ac40abfbc696e9913ac5204258106f9b9b12ca..1ba135a3a89209e562b946352bbb484c390bc243 100644 --- a/doc/html/checkpoint_blcr.shtml +++ b/doc/html/checkpoint_blcr.shtml @@ -61,7 +61,7 @@ files of a job step will be read from</li> <p>Environment variables are available for all of these options:</p> <ul> -<li<b>SLURM_CHECKPOINT</b> is equivalent to <b>--checkpoint</b>:</li> +<li><b>SLURM_CHECKPOINT</b> is equivalent to <b>--checkpoint</b>:</li> <li><b>SLURM_CHECKPOINT_DIR</b> is equivalent to <b>--checkpoint-dir</b></li> <li><b>SLURM_RESTART_DIR</b> is equivalent to <b>--restart-dir</b></li> </li> @@ -193,6 +193,6 @@ option at job submit time or scontrol's <b>ImageDir</b> option. <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 20 June 2012</p> +<p style="text-align:center;">Last modified 12 August 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/checkpoint_plugins.shtml b/doc/html/checkpoint_plugins.shtml index de5054a43dc6a91546371459d007e6f1746084c2..32fe7e5ac45e16682b4f1a2b362c392e365838c8 100644 --- a/doc/html/checkpoint_plugins.shtml +++ b/doc/html/checkpoint_plugins.shtml @@ -5,7 +5,7 @@ <h2> Overview</h2> <p> This document describes SLURM job checkpoint plugins and the API that defines them. It is intended as a resource to programmers wishing to write their own SLURM -job checkpoint plugins. This is version 0 of the API.</p> +job checkpoint plugins. This is version 100 of the API.</p> <p>SLURM job checkpoint plugins are SLURM plugins that implement the SLURM API for checkpointing and restarting jobs. @@ -75,26 +75,35 @@ checkpoint data that was previously allocated by slurm_ckpt_alloc_job.</p> the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> -<p class="commandline">int slurm_ckpt_pack_job (check_jobinfo_t jobinfo, Buf buffer);</p> +<p class="commandline">int slurm_ckpt_pack_job (check_jobinfo_t jobinfo, Buf buffer, uint16_t protocol_version);</p> <p style="margin-left:.2in"><b>Description</b>: Store job-step specific checkpoint data into a buffer.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> <b>jobinfo</b> (input) pointer to the previously allocated storage.<br> -<b>Buf</b> (input/output) buffer to which jobinfo has been appended.</p> +<b>Buf</b> (input/output) buffer to which jobinfo has been appended.<br> +<b>protocol_version</b> (input) communication protocol version.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> -<p class="commandline">int slurm_ckpt_unpack_job (check_jobinfo_t jobinfo, Buf buffer);</p> +<p class="commandline">int slurm_ckpt_unpack_job (check_jobinfo_t jobinfo, Buf buffer, uint16_t protocol_version);</p> <p style="margin-left:.2in"><b>Description</b>: Retrieve job-step specific checkpoint data from a buffer.</p> <p style="margin-left:.2in"><b>Arguments</b>:</br> <b>jobinfo</b> (output) pointer to the previously allocated storage.<br> -<b>Buf</b> (input/output) buffer from which jobinfo has been removed.</p> +<b>Buf</b> (input/output) buffer to which jobinfo has been appended.<br> +<b>protocol_version</b> (input) communication protocol version.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> + +<p class="commandline">check_jobinfo_t slurm_ckpt_copy_job (check_jobinfo_t jobinfo);</p> +<p style="margin-left:.2in"><b>Description</b>: Duplicate job-step specific checkpoint data.</p> +<p style="margin-left:.2in"><b>Arguments</b>:</br> +<b>jobinfo</b> (input) pointer to the previously allocated storage.<br> +<p style="margin-left:.2in"><b>Returns</b>: copy of jobinfo if successful. NULL on failure.</p> + <p class="commandline">int slurm_ckpt_op ( uint32_t job_id, uint32_t step_id, struct step_record *step_ptr, uint16_t op, uint16_t data, char *image_dir, time_t *event_time, @@ -186,6 +195,6 @@ A checkpoint plugin conveys its ability to implement a particular API version using the mechanism outlined for SLURM plugins.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 20 June 2012</p> +<p style="text-align:center;">Last modified 7 January 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/configurator.easy.html.in b/doc/html/configurator.easy.html.in index 0f9982c91051f02a1cdb6af96eedab3822c97e13..ff2dd530cbfd1e4ac874d5c72df2536b5916f619 100644 --- a/doc/html/configurator.easy.html.in +++ b/doc/html/configurator.easy.html.in @@ -349,10 +349,10 @@ Select one value for <B>ReturnToService</B>:<BR> <H2>Scheduling</H2> Define the mechanism to be used for controlling job ordering.<BR> Select one value for <B>SchedulerType</B>:<BR> -<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In -First-Out (FIFO)<BR> <input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>: FIFO with backfill<BR> +<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In +First-Out (FIFO)<BR> <P> Define what node configuration (sockets, cores, memory, etc.) should be used. Using values defined in the configuration file will provide faster scheduling.<BR> diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index a61647be938234c54bb8d35fe7718be551bee0b2..f753fd8700bfbaef93267d06fb8abcb213869671 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -226,6 +226,7 @@ function displayfile() "# <br>" + "# <br>" + "# JOB PRIORITY <br>" + + "#PriorityFlags= <br>" + "#PriorityType=priority/basic <br>" + "#PriorityDecayHalfLife= <br>" + "#PriorityCalcPeriod= <br>" + @@ -522,10 +523,10 @@ Select one value for <B>ReturnToService</B>:<BR> <H2>Scheduling</H2> Define the mechanism to be used for controlling job ordering.<BR> Select one value for <B>SchedulerType</B>:<BR> -<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In -First-Out (FIFO)<BR> <input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>: FIFO with backfill<BR> +<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In +First-Out (FIFO)<BR> <input type="radio" name="sched_type" value="gang"> <B>Gang</B>: Gang scheduling (time-slicing for parallel jobs)<BR> <input type="radio" name="sched_type" value="wiki"> <B>Wiki</B>: Wiki interface @@ -861,5 +862,5 @@ before terminating all remaining tasks. A value of zero indicates unlimited wait </FORM> <HR> <a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br> -Last modified 2 August 2012</P> +Last modified 14 January 2013</P> </BODY> diff --git a/doc/html/cons_res.shtml b/doc/html/cons_res.shtml index 29c1bcbeeea4c379885a3dcd96fe6af157f563b4..196d45bf74962237f6cd4731236753a5927362d4 100644 --- a/doc/html/cons_res.shtml +++ b/doc/html/cons_res.shtml @@ -1,213 +1,108 @@ <!--#include virtual="header.txt"--> -<h1><a name="top">Consumable Resources in SLURM</a></h1> +<h1><a name="top">Consumable Resources in Slurm</a></h1> -<p>SLURM, using the default node allocation plug-in, allocates nodes to jobs in +<p>Slurm, using the default node allocation plug-in, allocates nodes to jobs in exclusive mode. This means that even when all the resources within a node are not utilized by a given job, another job will not have access to these resources. Nodes possess resources such as processors, memory, swap, local disk, etc. and jobs consume these resources. The exclusive use default policy -in SLURM can result in inefficient utilization of the cluster and of its nodes +in Slurm can result in inefficient utilization of the cluster and of its nodes resources. +Slurm's <i>cons_res</i> or consumable resource plugin is available to +manage resources on a much more fine-grained basis as described below.</p> -<p>A plug-in supporting CPUs as a consumable resource is available in -SLURM 0.5.0 and newer versions of SLURM. Information on how to use -this plug-in is described below. -</p> +<h2>Using the Consumable Resource Allocation Plugin: <b>select/cons_res</b></h2> -<h2>Using the Consumable Resource Node Allocation Plugin: <b>select/cons_res</b></h2> - -<ol start=1 type=1> - <li><b>SLURM version 1.2 and newer</b></li> +<ul> +<li>Consumable resources has been enhanced with several new resources +--namely CPU (same as in previous version), Socket, Core, Memory +as well as any combination of the logical processors with Memory:</li> +<ul> + <li><b>CPU</b> (<i>CR_CPU</i>): CPU as a consumable resource.</li> <ul> - <li>Consumable resources has been enhanced with several new resources - --namely CPU (same as in previous version), Socket, Core, Memory - as well as any combination of the logical processors with Memory:</li> - <ul> - <li><b>CPU</b> (<i>CR_CPU</i>): CPU as a consumable resource. - <ul> - <li>No notion of sockets, cores, or threads.</li> - <li>On a multi-core system CPUs will be cores.</li> - <li>On a multi-core/hyperthread system CPUs will be threads.</li> - <li>On a single-core systems CPUs are CPUs. ;-) </li> - </ul> - <li><b>Socket</b> (<i>CR_Socket</i>): Socket as a consumable - resource.</li> - <li/><b>Core</b> (<i>CR_Core</i>): Core as a consumable - resource.</li> - <li><b>Memory</b> (<i>CR_Memory</i>) Memory <u>only</u> as a - consumable resource. Note! CR_Memory assumes Shared=Yes</li> - <li><b>Socket and Memory</b> (<i>CR_Socket_Memory</i>): Socket - and Memory as consumable resources.</li> - <li><b>Core and Memory</b> (<i>CR_Core_Memory</i>): Core and - Memory as consumable resources.</li> - <li><b>CPU and Memory</b> (<i>CR_CPU_Memory</i>) CPU and Memory - as consumable resources.</li> - </ul> - <li>In the cases where Memory is the consumable resource or one of - the two consumable resources the <b>RealMemory</b> parameter, which - defines a node's amount of real memory in slurm.conf, must be - set when FastSchedule=1. - <li>srun's <i>-E</i> extension for sockets, cores, and threads are - ignored within the node allocation mechanism when CR_CPU or - CR_CPU_MEMORY is selected. It is considered to compute the total - number of tasks when -n is not specified. </li> - <li>A new srun switch <i>--job-mem=MB</i> was added to allow users - to specify the maximum amount of real memory per node required - by their application. This switch is needed in the environments - were Memory is a consumable resource. It is important to specify - enough memory since slurmd will not allow the application to use - more than the requested amount of real memory per node. The - default value for --job-mem is 1 MB. see srun man page for more - details.</li> - <li><b>All CR_s assume Shared=No</b> or Shared=Force EXCEPT for - <b>CR_MEMORY</b> which <b>assumes Shared=Yes</b></li> - <li>The consumable resource plugin is enabled via SelectType and - SelectTypeParameter in the slurm.conf.</li> + <li>No notion of sockets, cores, or threads.</li> + <li>On a multi-core system CPUs will be cores.</li> + <li>On a multi-core/hyperthread system CPUs will be threads.</li> + <li>On a single-core systems CPUs are CPUs. ;-)</li> + </ul> + <li><b>Board</b> (<i>CR_Board</i>): Baseboard as a consumable resource.</li> + <li><b>Socket</b> (<i>CR_Socket</i>): Socket as a consumable resource.</li> + <li/><b>Core</b> (<i>CR_Core</i>): Core as a consumable resource.</li> + <li><b>Memory</b> (<i>CR_Memory</i>) Memory <u>only</u> as a + consumable resource. Note! CR_Memory assumes Shared=Yes</li> + <li><b>Socket and Memory</b> (<i>CR_Socket_Memory</i>): Socket + and Memory as consumable resources.</li> + <li><b>Core and Memory</b> (<i>CR_Core_Memory</i>): Core and + Memory as consumable resources.</li> + <li><b>CPU and Memory</b> (<i>CR_CPU_Memory</i>) CPU and Memory + as consumable resources.</li> +</ul> + +<li>In the cases where Memory is the consumable resource or one of +the two consumable resources the <b>RealMemory</b> parameter, which +defines a node's amount of real memory in slurm.conf, must be +set when FastSchedule=1.</li> + +<li>srun's <i>-E</i> extension for sockets, cores, and threads are +ignored within the node allocation mechanism when CR_CPU or +CR_CPU_MEMORY is selected. It is considered to compute the total +number of tasks when -n is not specified.</li> + +<li>The job submission commands (salloc, sbatch and srun) support the options +<i>--mem=MB</i> and <i>--mem-per-cpu=MB</i> permitting users to specify +the maximum amount of real memory per node or per allocated required. +This option is required in the environments where Memory is a consumable +resource. It is important to specify enough memory since Slurm will not allow +the application to use more than the requested amount of real memory. The +default value for --mem is 1 MB. see srun man page for more details.</li> + +<li><b>All CR_s assume Shared=No</b> or Shared=Force EXCEPT for +<b>CR_MEMORY</b> which <b>assumes Shared=Yes</b></li> + +<li>The consumable resource plugin is enabled via SelectType and +SelectTypeParameter in the slurm.conf.</li> + <pre> # -# "SelectType" : node selection logic for scheduling. -# "select/bluegene" : the default on BlueGene systems, aware of -# system topology, manages bglblocks, etc. -# "select/cons_res" : allocate individual consumable resources -# (i.e. processors, memory, etc.) -# "select/linear" : the default on non-BlueGene systems, -# no topology awareness, oriented toward -# allocating nodes to jobs rather than -# resources within a node (e.g. CPUs) -# -# SelectType=select/linear -SelectType=select/cons_res +# Excerpts from sample slurm.conf file -# o Define parameters to describe the SelectType plugin. For -# - select/bluegene - this parameter is currently ignored -# - select/linear - this parameter is currently ignored -# - select/cons_res - the parameters available are -# - CR_CPU (1) - CPUs as consumable resources. -# No notion of sockets, cores, or threads. -# On a multi-core system CPUs will be cores -# On a multi-core/hyperthread system CPUs -# will be threads -# On a single-core systems CPUs are CPUs. -# - CR_Socket (2) - Sockets as a consumable resource. -# - CR_Core (3) - Cores as a consumable resource. -# - CR_Memory (4) - Memory as a consumable resource. -# Note! CR_Memory assumes Shared=Yes -# - CR_Socket_Memory (5) - Socket and Memory as consumable -# resources. -# - CR_Core_Memory (6) - Core and Memory as consumable -# resources. (Not yet implemented) -# - CR_CPU_Memory (7) - CPU and Memory as consumable -# resources. -# -# (#) refer to the output of "scontrol show config" -# -# NB!: The -E extension for sockets, cores, and threads -# are ignored within the node allocation mechanism -# when CR_CPU or CR_CPU_MEMORY is selected. -# They are considered to compute the total number of -# tasks when -n is not specified -# -# NB! All CR_s assume Shared=No or Shared=Force EXCEPT for -# CR_MEMORY which assumes Shared=Yes -# -#SelectTypeParameters=CR_CPU (default) +SelectType=select/cons_res +SelectTypeParameters=CR_Core_Memory </pre> - <li>Using <i>--overcommit</i> or <i>-O</i> is allowed in this new version - of consumable resources. When the process to logical processor pinning is - enabled (task/affinity plug-in) the extra processes will not affect - co-scheduled jobs other than other jobs started with the -O flag. - We are currently investigating alternative approaches of handling the - pinning of jobs started with <i>--overcommit</i></li> - <li><i>-c</i> or <i>--cpus-per-task</i> works in this version of - consumable resources</li> - </ul> - <li><b>General comments</b></li> - <ul> - <li>SLURM's default <b>select/linear</b> plugin is using a best fit algorithm based on - number of consecutive nodes. The same node allocation approach is used in - <b>select/cons_res</b> for consistency.</li> - <li>The <b>select/cons_res</b> plugin is enabled or disabled cluster-wide.</li> - <li>In the case where <b>select/cons_res</b> is not enabled, the normal SLURM behaviors - are not disrupted. The only changes, users see when using the <b>select/cons_res</b> - plug-in, are that jobs can be co-scheduled on nodes when resources permits it. - The rest of SLURM such as srun and switches (except srun -s ...), etc. are not - affected by this plugin. SLURM is, from a user point of view, working the same - way as when using the default node selection scheme.</li> - <li>The <i>--exclusive</i> srun switch allows users to request nodes in - exclusive mode even when consumable resources is enabled. see "man srun" - for details. </li> - <li>srun's <i>-s</i> or <i>--share</i> is incompatible with the consumable resource - environment and will therefore not be honored. Since in this environment nodes - are shared by default, <i>--exclusive</i> allows users to obtain dedicated nodes.</li> - </ul> -</ol> -<p class="footer"><a href="#top">top</a></p> +<li>Using <i>--overcommit</i> or <i>-O</i> is allowed. When the process to +logical processor pinning is enabled by using an appropriate TaskPlugin +configuration parameter, the extra processes will time share the allocated +resources.</li> +</ul> -<h2>Limitation and future work</h2> - -<p>We are aware of several limitations with the current consumable -resource plug-in and plan to make enhancement the plug-in as we get -time as well as request from users to help us prioritize the features. - -Please send comments and requests about the consumable resources to -<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>. - -<ol start=1 type=1> - <li><b>Issue with --max_nodes, --max_sockets_per_node, --max_cores_per_socket and --max_threads_per_core</b></li> - <ul> - <li><b>Problem:</b> The example below was achieve when using CR_CPU - (default mode). The systems are all "dual socket, dual core, - single threaded systems (= 4 cpus per system)".</li> - <li>The first 3 serial jobs are being allocated to node hydra12 - which means that one CPU is still available on hydra12.</li> - <li>The 4th job "srun -N 2-2 -E 2:2 sleep 100" requires 8 CPUs - and since the algorithm fills up nodes in a consecutive order - (when not in dedicated mode) the algorithm will want to use the - remaining CPUs on Hydra12 first. Because the user has requested - a maximum of two nodes the allocation will put the job on - hold until hydra12 becomes available or if backfill is enabled - until hydra12's remaining CPU gets allocated to another job - which will allow the 4th job to get two dedicated nodes</li> - <li><b>Note!</b> This problem is fixed in SLURM version 1.3.</li> - <li><b>Note!</b> If you want to specify <i>--max_????</i> this - problem can be solved in the current implementation by asking - for the nodes in dedicated mode using <i>--exclusive</i></li>. +<h2>General Comments</h2> -<pre> -# srun sleep 100 & -# srun sleep 100 & -# srun sleep 100 & -# squeue -JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1132 allNodes sleep sballe R 0:05 1 hydra12 - 1133 allNodes sleep sballe R 0:04 1 hydra12 - 1134 allNodes sleep sballe R 0:02 1 hydra12 -# srun -N 2-2 -E 2:2 sleep 100 & -srun: job 1135 queued and waiting for resources -#squeue -JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1135 allNodes sleep sballe PD 0:00 2 (Resources) - 1132 allNodes sleep sballe R 0:24 1 hydra12 - 1133 allNodes sleep sballe R 0:23 1 hydra12 - 1134 allNodes sleep sballe R 0:21 1 hydra12 -</pre> - <li><b>Proposed solution:</b> Enhance the selection mechanism to go through {node,socket,core,thread}-tuplets to find available match for specific request (bounded knapsack problem). </li> - </ul> - <li><b>Binding of processes in the case when <i>--overcommit</i> is specified.</b></li> - <ul> - <li>In the current implementation (SLURM 1.2) we have chosen not - to bind process that have been started with <i>--overcommit</i> - flag. The reasoning behind this decision is that the Linux - scheduler will move non-bound processes to available resources - when jobs with process pinning enabled are started. The - non-bound jobs do not affect the bound jobs but co-scheduled - non-bound job would affect each others runtime. We have decided - that for now this is an adequate solution. - </ul> - </ul> -</ol> +<ul> +<li>Slurm's default <b>select/linear</b> plugin is using a best fit algorithm +based on number of consecutive nodes. The same node allocation approach is used +in <b>select/cons_res</b> for consistency.</li> + +<li>The <b>select/cons_res</b> plugin is enabled or disabled cluster-wide.</li> + +<li>In the case where <b>select/cons_res</b> is not enabled, the normal Slurm +behaviors are not disrupted. The only changes, users see when using the +<b>select/cons_res</b> plugin, are that jobs can be co-scheduled on nodes when +resources permit it. +The rest of Slurm, such as srun and switches (except srun -s ...), etc. are not +affected by this plugin. Slurm is, from a user point of view, working the same +way as when using the default node selection scheme.</li> + +<li>The <i>--exclusive</i> srun switch allows users to request nodes in +exclusive mode even when consumable resources is enabled. see "man srun" +for details. </li> + +<li>srun's <i>-s</i> or <i>--share</i> is incompatible with the consumable +resource environment and will therefore not be honored. Since in this +environment nodes are shared by default, <i>--exclusive</i> allows users to +obtain dedicated nodes.</li> +</ul> <p class="footer"><a href="#top">top</a></p> @@ -282,12 +177,12 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) <h2>Example of Node Allocations Using Consumable Resource Plugin</h2> <p>The following example illustrates the different ways four jobs -are allocated across a cluster using (1) SLURM's default allocation +are allocated across a cluster using (1) Slurm's default allocation (exclusive mode) and (2) a processor as consumable resource approach.</p> <p>It is important to understand that the example listed below is a -contrived example and is only given here to illustrate the use of cpu as +contrived example and is only given here to illustrate the use of CPU as consumable resources. Job 2 and Job 3 call for the node count to equal the processor count. This would typically be done because that one task per node requires all of the memory, disk space, etc. The @@ -295,12 +190,12 @@ bottleneck would not be processor count.</p> <p>Trying to execute more than one job per node will almost certainly severely impact parallel job's performance. -The biggest beneficiary of cpus as consumable resources will be serial jobs or -jobs with modest parallelism, which can effectively share resources. On a lot -of systems with larger processor count, jobs typically run one fewer task than +The biggest beneficiary of CPUs as consumable resources will be serial jobs or +jobs with modest parallelism, which can effectively share resources. On many +systems with larger processor count, jobs typically run one fewer task than there are processors to minimize interference by the kernel and daemons.</p> -<p>The example cluster is composed of 4 nodes (10 cpus in total):</p> +<p>The example cluster is composed of 4 nodes (10 CPUs in total):</p> <ul> <li>linux01 (with 2 processors), </li> @@ -322,12 +217,12 @@ there are processors to minimize interference by the kernel and daemons.</p> <p class="footer"><a href="#top">top</a></p> -<h2>Using SLURM's Default Node Allocation (Non-shared Mode)</h2> +<h2>Using Slurm's Default Node Allocation (Non-shared Mode)</h2> <p>The four jobs have been launched and 3 of the jobs are now pending, waiting to get resources allocated to them. Only Job 2 is running -since it uses one cpu on all 4 nodes. This means that linux01 to linux03 each -have one idle cpu and linux04 has 3 idle cpus.</p> +since it uses one CPU on all 4 nodes. This means that linux01 to linux03 each +have one idle CPU and linux04 has 3 idle CPUs.</p> <pre> # squeue @@ -339,7 +234,7 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) </pre> <p>Once Job 2 is finished, Job 3 is scheduled and runs on -linux01, linux02, and linux03. Job 3 is only using one cpu on each of the 3 +linux01, linux02, and linux03. Job 3 is only using one CPU on each of the 3 nodes. Job 4 can be allocated onto the remaining idle node (linux04) so Job 3 and Job 4 can run concurrently on the cluster.</p> @@ -367,30 +262,29 @@ cannot be shared with other jobs.</p> <p>The output of squeue shows that we have 3 out of the 4 jobs allocated and running. This is a 2 running job -increase over the default SLURM approach.</p> +increase over the default Slurm approach.</p> <p> Job 2 is running on nodes linux01 -to linux04. Job 2's allocation is the same as for SLURM's default allocation -which is that it uses one cpu on each of the 4 nodes. Once Job 2 is scheduled -and running, nodes linux01, linux02 and linux03 still have one idle cpu each -and node linux04 has 3 idle cpus. The main difference between this approach and -the exclusive mode approach described above is that idle cpus within a node +to linux04. Job 2's allocation is the same as for Slurm's default allocation +which is that it uses one CPU on each of the 4 nodes. Once Job 2 is scheduled +and running, nodes linux01, linux02 and linux03 still have one idle CPU each +and node linux04 has 3 idle CPUs. The main difference between this approach and +the exclusive mode approach described above is that idle CPUs within a node are now allowed to be assigned to other jobs.</p> <p>It is important to note that <i>assigned</i> doesn't mean <i>oversubscription</i>. The consumable resource approach -tracks how much of each available resource (in our case cpus) must be dedicated +tracks how much of each available resource (in our case CPUs) must be dedicated to a given job. This allows us to prevent per node oversubscription of -resources (cpus).</p> +resources (CPUs).</p> <p>Once Job 2 is running, Job 3 is -scheduled onto node linux01, linux02, and Linux03 (using one cpu on each of the -nodes) and Job 4 is scheduled onto one of the remaining idle cpus on Linux04.</p> +scheduled onto node linux01, linux02, and Linux03 (using one CPU on each of the +nodes) and Job 4 is scheduled onto one of the remaining idle CPUs on Linux04.</p> <p>Job 2, Job 3, and Job 4 are now running concurrently on the cluster.</p> <pre> - # squeue JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) 5 lsf sleep root PD 0:00 1 (Resources) @@ -441,11 +335,11 @@ other jobs if they do not use all of the resources on the nodes.</p> to specify that they would like their allocated nodes in exclusive mode. For more information see "man srun". The reason for that is if users have mpi//threaded/openMP -programs that will take advantage of all the cpus within a node but only need +programs that will take advantage of all the CPUs within a node but only need one mpi process per node.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 3 February 2012</p> +<p style="text-align:center;">Last modified 14 August 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/cray.shtml b/doc/html/cray.shtml index ff503e2d0dd6c7d04e176d9ed2409988ca24c764..b6e73a8c1f24c5e0ebed6ada0b9b1b92b703449d 100644 --- a/doc/html/cray.shtml +++ b/doc/html/cray.shtml @@ -179,6 +179,13 @@ zero compute node job requests.</p> SLURM partitions explicitly configured with <b>MinNodes=0</b> (the default minimum node count for a partition is one compute node).</p> +<h3>External Node Use</h3> + +<p>Slurm interactive jobs are not supported from external nodes, however +batch job submissions and all of the other commands will work. +If desired, the sbatch command can be used to submit a batch job that creates +an xterm on an external node.</p> + <h3>srun options translations on a Cray</h3> <p>The following <i>srun</i> options are translated to these <i>aprun</i> options. <i>srun</i> options not listed below have no equivalent <i>aprun</i> @@ -568,6 +575,15 @@ located in the same directory as the <i>slurm.conf</i> file. Configuration parameters supported by <i>cray.conf</i> are listed below.</p> <p><dl> +<dt><b>AlpsEngine</b></dt> +<dd>Communication protocol version number to be used between Slurm and +ALPS/BASIL. The default value is BASIL's response to the ENGINE query. +Use with caution: Changes in ALPS communications which are not recognized +by Slurm could result in loss of jobs. +Currently supported values include +1.1, 1.2.0, 1.3.0, 3.1.0, 4.0, 4.1.0, 5.0.0, 5.0.1, 5.1.0 or "latest". +A value of "latest" will use the most current version of Slurm's logic and +can be useful for validation with new versions of ALPS.</dd> <dt><b>apbasil</b></dt> <dd>Fully qualified pathname to the apbasil command. The default value is <i>/usr/bin/apbasil</i>.</dd> @@ -657,6 +673,6 @@ login: # /etc/init.d/slurm start <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 17 December 2012</p> +<p style="text-align:center;">Last modified 3 October 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/disclaimer.shtml b/doc/html/disclaimer.shtml index d0acf192b88afe90d6d86b75b00ee53ea0466ce3..d59c68552d3e2a73eea9f74ee78c6e11d562270b 100644 --- a/doc/html/disclaimer.shtml +++ b/doc/html/disclaimer.shtml @@ -55,6 +55,7 @@ Copyright (C) 2008 Vijay Ramasubramanian<br> Copyright (C) 2007-2008 Red Hat, Inc.<br> Copyright (C) 2007-2009 National University of Defense Technology, China<br> Copyright (C) 2007-2012 Bull<br> +Copyright (c) 2005-2010 Troy D. Hanson http://uthash.sourceforge.net<br> Copyright (C) 2005-2008 Hewlett-Packard Development Company, L.P.<br> Copyright (C) 2004-2009, Marcus Holland-Moritz<br> Copyright (C) 2002-2007 The Regents of the University of California<br> @@ -86,7 +87,7 @@ necessarily state or reflect those of the United States government or Lawrence Livermore National Security, LLC, and shall not be used for advertising or product endorsement purposes.</p> -<p style="text-align: center;">Last modified 7 December 2012</p> +<p style="text-align: center;">Last modified 12 December 2012</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml index c7e68e36c3f497f29ab5a3a8e06c765f3344866e..f9f54125d56bda2ce94b931ef1e4882dbf1e0152 100644 --- a/doc/html/documentation.shtml +++ b/doc/html/documentation.shtml @@ -13,10 +13,12 @@ Documenation for other versions of Slurm is distributed with the code</b></p> <li><a href="quickstart.html">Quick Start User Guide</a></li> <li><a href="man_index.html">Man Pages</a></li> <li><a href="rosetta.html">Rosetta Stone of Workload Managers</a></li> +<li><a href="job_array.html">Job Array Support</a></li> <li><a href="cpu_management.html">CPU Management User and Administrator Guide</a></li> <li><a href="mpi_guide.html">MPI and UPC Users Guide</a></li> <li><a href="mc_support.html">Support for Multi-core/Multi-threaded Architectures</a></li> <li><a href="multi_cluster.html">Multi-Cluster Operation</a></li> +<li><a href="hdf5_profile_user_guide.html">Profiling Using HDF5 User Guide</a></li> <li><a href="checkpoint_blcr.html">SLURM Checkpoint/Restart with BLCR</a></li> <li><a href="job_exit_code.html">Job Exit Codes</a></li> <li>Specific Systems</li> @@ -43,12 +45,13 @@ Documenation for other versions of Slurm is distributed with the code</b></p> <li>SLURM Scheduling</li> <ul> <li><a href="cons_res.html">Consumable Resources Guide</a></li> +<li><a href="dynalloc.html">Dynamic Resources Allocation (dynalloc)</a></li> <li><a href="elastic_computing.html">Elastic Computing</a></li> <li><a href="gang_scheduling.html">Gang Scheduling</a></li> <li><a href="gres.html">Generic Resource (GRES) Scheduling</a></li> <li><a href="high_throughput.html">High Throughput Computing Guide</a></li> <li><a href="priority_multifactor.html">Multifactor Job Priority</a></li> -<li><a href="priority_multifactor2.html">Multifactor2 Job Priority (Ticket Based)</a></li> +<li><a href="priority_multifactor2.html">Ticket-Based Multifactor Job Priority</a></li> <li><a href="preempt.html">Preemption</a></li> <li><a href="qos.html">Quality of Service (QOS)</a></li> <li><a href="resource_limits.html">Resource Limits</a></li> @@ -77,6 +80,7 @@ Documenation for other versions of Slurm is distributed with the code</b></p> <li><a href="contributor.html">Contributor Agreement</a></li> <li><a href="programmer_guide.html">Programmer Guide</a></li> <li><a href="api.html">Application Programmer Interface (API) Guide</a></li> +<li><a href="add.html">Adding Files or Plugins to Slurm</a></li> <li>Design Information</li> <ul> <li><a href="gres_design.html">Generic Resource (GRES) Design Guide</a></li> @@ -88,6 +92,7 @@ Documenation for other versions of Slurm is distributed with the code</b></p> <ul> <li><a href="authplugins.html">Authentication Plugin Programmer Guide</a></li> <li><a href="crypto_plugins.html">Cryptographic Plugin Programmer Guide</a></li> +<li><a href="ext_sensorsplugins.html">External Sensors Plugin Programmer Guide</a></li> <li><a href="jobacct_gatherplugins.html">Job Accounting Gather Plugin Programmer Guide</a></li> <li><a href="accounting_storageplugins.html">Accounting Storage Plugin Programmer Guide</a></li> <li><a href="gres_plugins.html">Generic Resources (GRES) Plugin Programmer Guide</a></li> @@ -100,14 +105,16 @@ Documenation for other versions of Slurm is distributed with the code</b></p> <li><a href="preemption_plugins.html">Preemption Plugin Programmer Guide</a></li> <li><a href="priority_plugins.html">Priority Plugin Programmer Guide</a></li> <li><a href="proctrack_plugins.html">Process Tracking Plugin Programmer Guide</a></li> +<li><a href="acct_gather_profile_plugins.html">Profile Accounting Plugin Programmer Guide</a></li> <li><a href="schedplugins.html">Scheduler Plugin Programmer Guide</a></li> <li><a href="selectplugins.html">Resource Selection Plugin Programmer Guide</a></li> +<li><a href="slurmctld_plugstack.html">Slurmctld Generic Plugin Programmer Guide</a></li> <li><a href="switchplugins.html">Switch (Interconnect) Plugin Programmer Guide</a></li> <li><a href="taskplugins.html">Task Plugin Programmer Guide</a></li> <li><a href="topology_plugin.html">Topology Plugin Programmer Guide</a></li> </li> </ul> -<p style="text-align:center;">Last modified 17 January 2013</p> +<p style="text-align:center;">Last modified 6 June 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 6c74362bee88d300484c037d8f03b8da625e6991..12e881a0909dcfefd19ad7bbb3e57926e060ab6c 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -92,23 +92,19 @@ functions such as intersection and difference. Download the code from:<br> <a href="http://www.nsc.liu.se/~kent/python-hostlist"> http://www.nsc.liu.se/~kent/python-hostlist</a></li><br> +<li><b>Interactive Script</b><br> +A wrapper script that makes it very simple to get an interactive shell on a +cluster. Download the code from:<br> +<a href="https://github.com/alanorth/hpc_infrastructure_scripts/blob/master/slurm/interactive"> +https://github.com/alanorth/hpc_infrastructure_scripts/blob/master/slurm/interactive</a></li><br> + <li><b>Interconnect</b> plugins (Switch plugin)</li> <ul> <li><b>Infiniband</b><br> The <b>topology.conf</b> file for an Infiniband switch can be automatically generated using the <b>ib2slurm</b> tool found here: <a href="https://github.com/fintler/ib2slurm">https://github.com/fintler/ib2slurm</a>. - -<li><b>QsNet</b><br> -In order to build the "switch/elan" plugin for SLURM, you will need -the <b>qsnetlibs</b> development libraries from -<a href="http://www.quadrics.com">Quadrics</a>. The Elan -plugin also requires the <b>libelanhosts</b> library and -a corresponding /etc/elanhosts configuration file, used to map -hostnames to Elan IDs. The libelanhosts source is available from -<a href="http://www.schedmd.com/download/extras/libelanhosts-0.9-1.tgz"> -http://www.schedmd.com/download/extras/libelanhosts-0.9-1.tgz</a>. -</ul><br> +</ul> <li><b>I/O Watchdog</b><br> A facility for monitoring user applications, most notably parallel jobs, @@ -165,6 +161,11 @@ Maui Scheduler</a></li> Moab Cluster Suite</a></li> </ul><br> +<li><b>Command wrappers</b><br> +There is a wrapper for Maui/Moab's showq command at +<a href="https://github.com/pedmon/slurm_showq"> +https://github.com/pedmon/slurm_showq</a>. + <li><b>Scripting interfaces</b> <ul> <li>A <b>Perl</b> interface is included in the SLURM distribution in the @@ -223,8 +224,27 @@ there is <a href="http://warewulf.lbl.gov/trac/wiki/Node%20Health%20Check">Node Health Check</a>. It has integration with Slurm as well as Torque resource managers.</li><br> +<li><b>Accounting Tools</b><br> +UBMoD is a web based tool for displaying accounting data from various resource +managers. It aggregates the accounting data from sacct into a MySQL data +warehouse and provide a front end web interface for browsing the data. +For more information, see +<a href="http://ubmod.sourceforge.net/resource-manager-slurm.html">UDMod home page</a> and +<a href="https://github.com/ubccr/ubmod">source code</a>.</li><br> + +<li><b>MSlurm</b><br> +Such a superstructure for the management of multiple Slurm environments is done +with MSlurm. Thereby several Slurm clusters - even across multiple Slurm +databases - can run parallel on a Slurm master and can be administered in an +easy and elegantly manner. +<ul> +<li><a href="mslurm/mslurm_overview.pdf">Overview</a></li> +<li><a href="mslurm/mslurm_install_instructions.pdf">Installation Instructions</a></li> +<li><a href="mslurm/mslurm.tgz">Code</a></li> +<ul> + </ul> -<p style="text-align:center;">Last modified 16 January 2013</p> +<p style="text-align:center;">Last modified 30 October 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/dynalloc.shtml b/doc/html/dynalloc.shtml new file mode 100644 index 0000000000000000000000000000000000000000..72aa435453329c11dc23fc1cda5848a70e5c1b41 --- /dev/null +++ b/doc/html/dynalloc.shtml @@ -0,0 +1,124 @@ +<!--#include virtual="header.txt"--> + +<h1><a name="top">Dynamic Resource Allocation (dynalloc)</a></h1> + +<h2>Overview</h2> +<p>This document describes SLURM resource dynamic allocation (dynalloc) +and the manual how to enable it.</p> + +<p>SLURM dynamic resource allocation (dynalloc) works as a optional running +thread when SLURM's control daemon (slurmctld) starts up. After spawned, the +dynalloc thread runs as a socket server to accept requests such as resource +query, allocation, and deallocation. After receiving such requests, the +dynalloc parses message, performs actions, and then responds to the request.</p> + + +<h2>Configuration</h2> + +<p>To enable the dynalloc, some configurations should be made in SLURM side.</p> + +<h3>SLURM Configuration</h3> + +<h4>configure</h4> +<p>When building from the SLURM source code, add <i>--enable-dynamic-allocation</i> +to the execute line of <i>./configure</i>.</p> + +<h4>slurm.conf</h4> +<p>After installation, set the config parameters in <i>slurm.conf</i> +as follows:</p> +<pre> +SlurmctldPlugstack=dynalloc +DynAllocPort=6820 +</pre> + +<p>The default value of <i>DynAllocPort</i> is 6820. You can chenge it if needed.</p> + +<p class="footer"><a href="#top">top</a> + +<h2>Functionalities</h2> + +<h3>Resource Query</h3> +<p>The client might send messages to query how many nodes and slots either +in total or available in SLURM.</p> + +<h4>Get Total Nodes and Slots</h4> +<p>the request message from client: "get total nodes and slots"<br> +the response message could be like: "total_nodes=4 total_slots=16"</p> + +<h4>Get Available Nodes and Slots</h4> +<p>the request message from client: "get available nodes and slots"<br> +the response message could be like: "avail_nodes=4 avail_slots=16"</p> + +<h3>Resource Allocation</h3> +<p>The client might send request to SLURM for allocating resources.</p> +<p>An allocation request message will consist of two part:</p> +<ol> +<li>Job part, like "jobid=100 return=all timeout=10"</li> +<li>App part, like "app=0 np=5 N=2 node_list=vm[2-3] flag=mandatory +cpu_bind=cores mem_per_cpu=100 resv_port_cnt=2"</li> +</ol> +<p>An allocation message might consist of one job part and at least one +app (application) part. +For example:<br> +"allocate jobid=100 return=all timeout=10:app=0 np=5 N=2 node_list=vm[2-3] +flag=mandatory cpu_bind=cores mem_per_cpu=100 resv_port_cnt=2:app=1 N=2"</p> + +<p>In the job part of the above message, <b>jobid</b> is optional and will +be sent back to client for identifying the allocation results; the <b>return</b> +flag is also optional, if the return flag ("return=all") is specified, all +app's allocation result will be sent back in ONE message, like +"jobid=100:app=0 slurm_jobid=679 allocated_node_list=vm2,vm3 +tasks_per_node=3,2:app=1 slurm_jobid=680 allocated_node_list=vm4,vm5 +tasks_per_node=4(x2)". Otherwise, the allocation result of each app will +be sent back respectively, like, msg-1) "jobid=100:app=0 slurm_jobid=681 +allocated_node_list=vm2,vm3 tasks_per_node=3,2" , and msg-2) "jobid=100:app=1 +slurm_jobid=682 allocated_node_list=vm4,vm5 tasks_per_node=4(x2). +<b>timeout</b> (in sec.) is the time interval during which the client will +wait for the allocation response. +</p> + +<p>In the app part of the above message, <b>app</b> is the application/task +id which will be sent back to client for identifying the allocation result; +<b>np</b> is the number of process will run, namely the number of slots +should be allocated for this app; <b>N</b> is the number of nodes should +be allocated; <b>node_list</b> is the node pool from which to select nodes; +<b>flag</b> is the allocation requirement which can be "mandatory" or +"optional", if "flag=mandatory", all requested nodes must be allocated +from <b>node_list</b>; else if "flag=optional", try best to allocate node +from <b>node_list</b>, and the allocation should include all nodes in the +given list that are currently available, if that is not enough to meet the +requested node number <b>N</b>, then take any other nodes that are available +to fill out the requested number. <b>cpu_bind</b> is to bind tasks to CPUs, +which is used only when the task/affinity or task/cgroup plugin is enabled +(please refer to 'man salloc'). <b>mem_per_cpu</b> is mimimum memory required +per allocated CPU in MegaBytes, which is used when the task/cgroup plugin is +enabled. <b>resv_port_cnt</b> is the port count required to be allocated, if +not specified, resv_port_cnt=1 by default. +</p> + +<p>A response message might be like "jobid=100:app=0 slurm_jobid=679 +allocated_node_list=vm2,vm3 tasks_per_node=3,2 resv_ports=12001-12002:app=1 +allocate_failure". In this example, 'app=0' gets a successful allocation +while the allocation for 'app=1' fails. <b>Note that</b> in the response +message with successful allocation for an app, a <b>slurm_jobid</b> is +returned for later operation, e.g., process launch, resource deallocation, etc. +</p> + +<p class="footer"><a href="#top">top</a> + +<h3>Resource Deallocation</h3> + +<p>After job execution, the client might release resources to SLURM.</p> +<p>A resource deallocation request message from client can be like: +"<b>deallocate</b> slurm_jobid=744 job_return_code=0:slurm_jobid=745 +job_return_code=-1". Note that it is possible to release a number of +allocations in ONE message, and each allocation is labeled by a +<b>slurm_jobid</b>. All resources related with the slurm_job will be +released, e.g., cores/nodes, memory, and ports. +</p> + +<p class="footer"><a href="#top">top</a> + +<p style="text-align:center;">Last modified 19 February 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/ext_sensorsplugins.shtml b/doc/html/ext_sensorsplugins.shtml new file mode 100644 index 0000000000000000000000000000000000000000..1daef16373c0c24341a1b374e974e2621fdaeaa2 --- /dev/null +++ b/doc/html/ext_sensorsplugins.shtml @@ -0,0 +1,112 @@ +<!--#include virtual="header.txt"--> + +<h1><a name="top">SLURM External Sensors Plugin API (ExtSensorsType) +</a></h1> + +<h2> Overview</h2> +<p> This document describes SLURM external sensors plugins and the API that +defines them. It is intended as a resource to programmers wishing to write +their own SLURM external sensors plugins. + +<p>SLURM external sensors plugins must conform to the +SLURM Plugin API with the following specifications: + +<p><span class="commandline">const char +plugin_name[]="<i>full text name</i>"</span> +<p style="margin-left:.2in"> +A free-formatted ASCII text string that identifies the plugin. + +<p><span class="commandline">const char +plugin_type[]="<i>major/minor</i>"</span><br> +<p style="margin-left:.2in"> +The major type must be "ext_sensors." +The minor type can be any suitable name +for the type of external sensors. We currently use +<ul> +<li><b>none</b>— No external sensors data is collected. +<li><b>rrd</b>—Gets external sensors data from the +RRD database. +</ul> +<p>The programmer is urged to study +<span class="commandline">src/plugins/ext_sensors/rrd</span> and +<span class="commandline">src/common/slurm_ext_sensors.c</span> +for a sample implementation of a SLURM external sensors plugin. +<p class="footer"><a href="#top">top</a> + +<h2>API Functions</h2> +<p>All of the following functions are required. Functions which are not +implemented must be stubbed. + +<p class="commandline">extern int ext_sensors_read_conf(void) +<p style="margin-left:.2in"><b>Description</b>:<br> +Reads the external sensors plugin configuration file (ext_sensors.conf) +and populates the configuration structure. +Called by the slurmctld daemon. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline"> None</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline">extern int ext_sensors_free_conf(void) +<p style="margin-left:.2in"><b>Description</b>:<br> +Frees the memory allocated for the external sensors configuration. +Called by the slurmctld daemon. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline"> None</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline">extern int ext_sensors_p_update_component_data(void) +<p style="margin-left:.2in"><b>Description</b>:<br> +Updates external sensors data for data types and component types as configured +in ext_sensors.conf. +Called by the slurmctld daemon. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline"> None</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline">extern int ext_sensors_p_get_stepstartdata(struct step_record *step_rec) +<p style="margin-left:.2in"><b>Description</b>:<br> +Sets external sensors data in the step record when a job step starts. +Called by slurmctld. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline"> step_rec</span> (input) pointer to step record.<br> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline">extern int ext_sensors_p_get_stependdata(struct step_record *step_rec) +<p style="margin-left:.2in"><b>Description</b>:<br> +Sets external sensors data in the step record when a job step ends. +Called by slurmctld. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline"> step_rec</span> (input) pointer to step record.<br> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<h2>Parameters</h2> +<p>These parameters can be used in the slurm.conf to configure the +plugin and the frequency at which to gather external sensors data.</p> +<dl> +<dt><span class="commandline">ExtSensorsType</span> +<dd>Specifies which external sensors plugin should be used. +<dt><span class="commandline">ExtSensorsFreq</span> +<dd>Time interval between pollings in seconds. +</dl> + +<h2>Versioning</h2> +<p>This document describes version 1 of the SLURM External Sensors Plugin API. +Future releases of SLURM may revise this API. A, energy accounting plugin +conveys its ability to implement a particular API version using the mechanism +outlined for SLURM plugins.</p> + +<p class="footer"><a href="#top">top</a> + +<p style="text-align:center;">Last modified 12 February 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index cd35fd7c33ffc9af32bcee097a271191f502ba7b..11cd01b6ea0826ffeb061445d723dad39d57531a 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -1,6 +1,12 @@ <!--#include virtual="header.txt"--> <h1><a name="top">Frequently Asked Questions</a></h1> + +<h2>For Management</h2> +<ol> +<li><a href="#foss">Why should I use Slurm or other Free Open Source Software (FOSS)</a></li> +</ol> + <h2>For Users</h2> <ol> <li><a href="#comp">Why is my job/node in COMPLETING state?</a></li> @@ -49,6 +55,7 @@ SLURM? Why does the DAKOTA program not run with SLURM?</a></li> <li><a href="#estimated_start_time">Why does squeue (and "scontrol show jobid") sometimes not display a job's estimated start time?</a></li> <li><a href="#ansys">How can I run an Ansys program with Slurm?</a></li> +<li><a href="#mic">How can I run programs with on an Intel Phi (MIC) processor?</a></li> </ol> <h2>For Administrators</h2> @@ -148,8 +155,32 @@ priority/multifactor plugin?</a></li> script for Slurm?</a></li> <li><a href="#add_nodes">What process should I follow to add nodes to Slurm?</a></li> <li><a href="#licenses">Can Slurm be configured to manage licenses?</a></li> +<li><a href="#salloc_default_command">Can the salloc command be configured to +launch a shell on a node in the job's allocation?</a></li> +<li><a href="#upgrade">What should I be aware of when upgrading Slurm?</a></li> +<li><a href="#torque">How easy is it to switch from PBS or Torque to Slurm?</a></li> +<li><a href="#sssd">I am having trouble using SSSD with Slurm.</a></li> +<li><a href="#ha_db">How critical is configuring high availability for my + database?</a></li> +<li><a href="#sql">How can I use double quotes in MySQL queries?</a></li> +<li><a href="#reboot">Why is a compute node down with the reason set to +"Node unexpectedly rebooted"?</a></li> </ol> +<h2>For Management</h2> +<p><a name="foss"><b>1. Why should I use Slurm or other Free Open Source Software (FOSS)?</b></a><br> +Free Open Source Software (FOSS) does not mean that it is without cost. +It does mean that the you have access to the code so that you are free to +use it, study it, and/or enhance it. +If the software is large and complex, like Slurm or the Linux kernel, +then its use is not without cost. +If your work is important, you'll want the leading Slurm experts at your +disposal to keep your systems operating at peak efficiency. +While Slurm has a global development community incorporating leading edge +technology, <a href="http://www.schedmd.com">SchedMD</a> personnel have developed +most of the code and can provide competitively priced commercial support. +Customers switching from commercial workload mangers to Slurm typically +report higher scalability, better performance and lower costs.</p> <h2>For Users</h2> <p><a name="comp"><b>1. Why is my job/node in COMPLETING state?</b></a><br> @@ -859,6 +890,54 @@ exit 0 $ srun -n <tasks> ./fluent-srun.sh </pre> +<p><a name="mic"><b>28. How can I run programs with on an Intel Phi (MIC) +processor?</b></a><br> +Two programming models are suported, offloading and native mode. +For a good description of how to build and run applications, please see +<a href="https://confluence.csc.fi/display/HPCproto/HPC+Prototypes#HPCPrototypes-Nativeprogrammingmodel">CSC MIC documentation</a>. +Note that some of the information presented in this document is configuration +dependent. The <i>mpirun-mic</i> is included in the Slurm distribution in the +<i>contribs/mic</i> directory. Excerpts of the CSC documentation follows.</p> + +<p><b>Executable Auto-Offloading</b><br> +The Phi nodes have Executable Auto-Offloading (EAO) enabled by default. +This feature is developed at CSC and is not currently in the standard Xeon Phi +distribution. +With this feature, any executable in the K1OM (MIC) binary format that the user +tries to run on the host, will transparently be executed on the Xeon Phi +coprocessor card instead. The execution is performed using the /usr/bin/micrun +script.</p> +<p>By default all environment variables with the MIC_ prefix will be passed to +the binary, with the prefix stripped away. For example +(MIC_LD_LIBRARY_PATH -> LD_LIBRARY_PATH).</p> +<p>EAO can be disabled by setting the environment variable MICRUN_DISABLE +(i.e. export MICRUN_DISABLE=1).</p> + +<p><b>Offload programming model</b><br> +The Intel compilers support offload compilation automatically. This means +either offloading a code section using offload pragmas or calling an +offload-enabled library. (e.g. MKL).</p> +<p>In order to run offload jobs, one needs to set the GRES (Generic Resource +Scheduling) parameter '--gres=mic:1'. For example:</p> +<pre> +$ srun --gres=mic:1 ./hello +</pre> +<p>If this is not set, the user will the following warning:</p> +<pre> +offload warning: OFFLOAD_DEVICES device number -1 does not correspond to a physical device +</pre> + +<p><b>Native OpenMP code</b><br> +To compile OpenMP code natively, you can use the -mmicflag.</p> +<pre> +$ module load intel +$ icc -mmic -openmp hello.c -o hello.mic +</pre> +<p>To run, use the srun command. You may need to explicity specify a Slurm partitiion containing MIC processors, for example:</p> +<pre> +$ srun -p mic ./hello.mic +</pre> + <p class="footer"><a href="#top">top</a></p> @@ -1079,7 +1158,16 @@ slurm.conf (<i>SlurmdLogFile</i> and <i>SlurmdPidFile</i>). </li> <li>When starting the <i>slurmd</i> daemon, include the <i>NodeName</i> of the node that it is supposed to serve on the execute line (e.g. "slurmd -N hostname").</li> +<li> This is an example of the <i>slurm.conf</i> file with the emulated nodes +and ports configuration. Any valid value for the CPUs, memory or other +valid node resources can be specified.</li> </ol> + +<pre> +NodeName=dummy26[1-100] NodeHostName=achille Port=[6001-6100] NodeAddr=127.0.0.1 CPUs=4 RealMemory=6000 +PartitionName=mira Default=yes Nodes=dummy26[1-100] +</pre> + <p>It is strongly recommended that SLURM version 1.2 or higher be used for this due to its improved support for multiple slurmd daemons. See the @@ -1523,6 +1611,24 @@ SLURM?</b></a></br> <p>The following lines should also be added to the global <i>.tvdrc</i> file for TotalView to operate with SLURM: <pre> +# Enable debug server bulk launch: Checked +dset -set_as_default TV::bulk_launch_enabled true + +# Command: +# Beginning with TV 7X.1, TV supports SLURM and %J. +# Specify --mem-per-cpu=0 in case Slurm configured with default memory +# value and we want TotalView to share the job's memory limit without +# consuming any of the job's memory so as to block other job steps. +dset -set_as_default TV::bulk_launch_string {srun --mem-per-cpu=0 -N%N -n%N -w`awk -F. 'BEGIN {ORS=","} {if (NR==%N) ORS=""; print $1}' %t1` -l --input=none %B/tvdsvr%K -callback_host %H -callback_ports %L -set_pws %P -verbosity %V -working_directory %D %F} + +# Temp File 1 Prototype: +# Host Lines: +# SLURM NodeNames need to be unadorned hostnames. In case %R returns +# fully qualified hostnames, list the hostnames in %t1 here, and use +# awk in the launch string above to strip away domain name suffixes. +dset -set_as_default TV::bulk_launch_tmpfile1_host_lines {%R} +</pre></p> +<!-- OLD FORMAT dset TV::parallel_configs { name: SLURM; description: SLURM; @@ -1533,7 +1639,7 @@ dset TV::parallel_configs { env: ; force_env: false; } -</pre></p> +!--> <p><a name="git_patch"><b>44. How can a patch file be generated from a SLURM commit in github?</b></a></br> @@ -1595,8 +1701,80 @@ without restarting the slurmctld daemon, but it is possible to dynamically reserve licenses and remove them from being available to jobs on the system (e.g. "scontrol update reservation=licenses_held licenses=foo:5,bar:2").</p> +<p><a name="salloc_default_command"><b>50. Can the salloc command be configured to +launch a shell on a node in the job's allocation?</b></a></br> +Yes, just use the SallocDefaultCommand configuration parameter in your +slurm.conf file as shown below.</p> +<pre> +SallocDefaultCommand="srun -n1 -N1 --mem-per-cpu=0 --pty --preserve-env --mpi=none $SHELL" +</pre> + +<p><a name="upgrade"><b>51. What should I be aware of when upgrading Slurm?</b></a></br> +See the Quick Start Administrator Guide <a href="quickstart_admin.html#upgrade">Upgrade</a> +section for details.</p> + +<p><a name="torque"><b>52. How easy is it to switch from PBS or Torque to Slurm?</b></a></br> +A lot of users don't even notice the difference. +Slurm has wrappers available for the mpiexec, pbsnodes, qdel, qhold, qrls, +qstat, and qsub commands (see contribs/torque in the distribution and the +"slurm-torque" RPM). +There is also a wrapper for the showq command at +<a href="https://github.com/pedmon/slurm_showq"> +https://github.com/pedmon/slurm_showq</a>. +Slurm recognizes and translates the "#PBS" options in batch scripts. +Most, but not all options are supported. +Please share any enhancements that you make.</p> + +<p><a name="sssd"><b>53. I am having trouble using SSSD with Slurm.</b></a></br> +SSSD or System Security Services Deamon does not allow enumeration of group +members by default. +SSSD is also case sensitive, which could possible raise other issues. +Add the following lines to <i>/etc/ssd/ssd.conf</i> on your head node to +address these issues:</p> +<pre> +enumerate = True +case_sensitive = False +</pre> + +<p><a name="ha_db"><b>54. How critical is configuring high availability for my +database?</b></a></br> +<ul> +<li>Consider if you really need mysql failover. Short outage of slurdbd is not +a problem, because slurmctld will store all data in memory and send it to +slurmdbd when it's back operating. The slurmctld daemon will also cache all +user limits and fair share information.</li> +<li>You cannot use ndb, since slurmdbd/mysql uses a keys on BLOB values (and +maybe something more from the incompatibility list).</li> +<li>You can set up "classical" Linux HA, with heartbeat/corosync to migrate +IP between master/backup mysql servers and:</li> +<ul> + <li>Configure one way replication of mysql, and change master/backup roles on + failure</li> + <li>Use shared storage for master/slave mysql servers database, and start + backup on master mysql failure.</li> +</ul> +</ul> + +<p><a name="sql"><b>55.How can I use double quotes in MySQL queries?</b></a></br> +Execute: +<pre> +SET session sql_mode='ANSI_QUOTES'; +</pre> +<p>This will allow double quotes in querries like this:</p> +<pre> +show columns from "tux_assoc_table" where Field='is_def'; +</pre> + +<p><a name="reboot"><b>56. Why is a compute node down with the reason set to +"Node unexpectedly rebooted"?</b></a></br> +This is indicative of the slurmctld daemon running on the cluster's head node +as well as the slurmd daemon on the compute node when the compute node reboots. +If you which to prevent this condition from setting the node into a DOWN state +then configure ReturnToService to 2. See the slurm.conf man page for details. +Otherwise use the scontrol or sview to manually return the node to service.</p> + <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 4 April 2013</p> +<p style="text-align:center;">Last modified 3 October 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/hdf5_job_outline.png b/doc/html/hdf5_job_outline.png new file mode 100644 index 0000000000000000000000000000000000000000..7a3844d59d9894cafce21a646956b7ac4189fea2 Binary files /dev/null and b/doc/html/hdf5_job_outline.png differ diff --git a/doc/html/hdf5_profile_user_guide.shtml b/doc/html/hdf5_profile_user_guide.shtml new file mode 100644 index 0000000000000000000000000000000000000000..fe5b73822b95aebf97e8fbb00798cfcd59a96839 --- /dev/null +++ b/doc/html/hdf5_profile_user_guide.shtml @@ -0,0 +1,346 @@ +<!--#include virtual="header.txt"--> +<!-- Copyright (C) 2013 Bull S. A. S. + Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. --> + +<h1>Profiling Using HDF5 User Guide</h1> + +<h2>Contents</h2> +<a href="#Overview">Overview</a><br> +<a href="#Administration">Administration</a><br> +<a href="#Profiling">Profiling Jobs</a><br> +<a href="#HDF5">HDF5</a><br> +<a href="#DataSeries">Data Structure</a><br> + + + +<a id="Overview"></a> +<h2>Overview</h2> +<p>The AcctGatherProfileType/hdf5 plugin allows SLURM to coordinate collecting +data on jobs it runs on a cluster that is more detailed than is practical to +include in its database. The data comes from periodically sampling various +performance data either collected by SLURM, the operating system, or +component software. The plugin will record the data from each source +as a <b>Time Series</b> and also accumulate totals for each statistic for +the job.</p> + +<p>Time Series are energy data collected by an acct_gather_energy plugin, +I/O data from a network interface collected by an acct_gather_infiniband plugin, +I/O data from parallel file systems such as Lustre collected by an +acct_gather_filesystem plugin, and task performance data such as local disk I/O, +cpu consumption, and memory use from a jobacct_gather plugin. +Data from other sources may be added in the future.</p> + +<p>The data is collected into a file on a shared file system for each step on +each allocated node of a job and then merged into a HDF5 file. +Individual files on a shared file system was chosen because it is possible +that the data is voluminous so solutions that pass data to the SLURM control +daemon via RPC may not scale to very large clusters or jobs with +many allocated nodes.</p> + +<p>A separate <a href="acct_gather_profile_plugins.html"> +SLURM Profile Accounting Plugin API (AcctGatherProfileType)</a> documents how +write other Profile Accounting plugins.</P> + +<a id="Administration"></a> +<h2>Administration</h2> + +<h3>Shared File System</h3> +<div style="margin-left: 20px;"> +<p>The HDF5 Profile Plugin requires a common shared file system on all +the compute nodes. While a job is running, the plugin writes a +file into this file system for each step of the job on each node. When +the job ends, the merge process is launched and the node-step files +are combined into one HDF5 file for the job.</p> + +<p>The root of the directory structure is declared in the <b>ProfileHDF5Dir</b> +option in the acct_gather.conf file. The directory will be created by SLURM +if it doesn't exist.</p> + +<p>Each user that creates a profile will have a subdirector to the profile +directory that has read/write permission only for the user.</p> +</span> +</div> +<h3>Configuration parameters</h3> + +<p><div style="margin-left: 20px;"> +<p>The profile plugin is enabled in the +<a href="slurm.conf.html">slurm.conf</a> file and it is internally +configured in the +<a href="acct_gather.conf.html">acct_gather.conf</a> file.</p> +</div> +<div style="margin-left: 20px;"> +<h4>slurm.conf parameters</h4> +<div style="margin-left: 20px;"> +This enables the HDF5 plugin: +<p><b>AcctGatherProfileType</b> = acct_gather_profile/hdf5</p> +This sets the sampling frequency for data types: +<p><b>JobAcctGatherFrequency</b> = <seconds></p> +</div> +</div> +<div style="margin-left: 20px;"> +<h4>act_gather.conf parameters</h4> +<div style="margin-left: 20px;"> +<p>These parameters are directly used by the HDF5 Profile Plugin.</p> +<dl> +<dt><b>ProfileHDF5Dir</b> = <path></dt> +<p> +This parameter is the path to the shared folder into which the +acct_gather_profile plugin will write detailed data as a HDF5 file. +The directory is assumed to be on a file system shared by the controller and +all compute nodes. This is a required parameter.<p> + +<dt><b>ProfileHDF5CollectDefault</b> = [options]</dt> +<p> +A comma delimited list of data types to be collected for each job submission. +Use this option with caution. A node-step file will be created on every +node for every step of every job. They will not automatically be merged +into job files. (Even job files for large numbers of small jobs would fill the +file system.) This option is intended for test environments where you +might want to profile a series of jobs but do not want to have to +add the --profile option to the launch scripts. +The options are described below and in the man pages for acct_gather.conf, +srun, salloc and sbatch commands. +</dd> +</dl> +</div> +</div> + + +<div style="margin-left: 20px;"> +<h4>Time Series Control Parameters</h4> +<div style="margin-left: 20px;"> +<p>Other plugins add time series data to the HDF5 collection. They typically +have a default polling frequency specified in slurm.conf in the +JobAcctGatherFrequency parameter. The polling frequency can be overridden +using the --acctg-freq +<a href="srun.html">srun</a> parameter. +They are both of the form task=sec,energy=sec,luster=sec,network=sec.<p> + +<p>The IPMI energy plugin also needs the EnergyIPMIFrequency value set +in the acct_gather.conf file. This sets the rate at which the plugin samples +the external sensors. This value should be the same as the energy=sec in +either JobAcctGatherFrequency or --acctg-freq.</p> + +<p>Note that the IPMI and profile sampling are not synchronous. +The profile sample simply takes the last available IPMI sample value. +If the profile energy sample is more frequent than the IPMI sample rate, +the IPMI value will be repeated. If the profile energy sample is greater +than the IPMI rate, IPMI values will be lost.</p> + +<p>Also note that smallest effective IPMI (EnergyIPMIFrequency) sample rate +for 2013 era Intel processors is 3 seconds.</p> + +</div> +</div> +<a id="Profiling"></a> +<h2>Profiling Jobs</h2> +<h3>Data Collection</h3> +<p>The --profile option on salloc|sbatch|srun controls whether data is +collected and what type of data is collected. If --profile is not specified +no data collected unless the <B>ProfileHDF5CollectDefault</B> +option is used in acct_gather.conf. --profile on the command line overrides +any value specified in the configuration file.</p> + +<DT><b>--profile</b>=<all|none|[energy[,|task[,|lustre[,|network]]]]> +<DD> +<p>enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in a HDF5 file for +the job.</p></DD> +</DT> +<div style="margin-left: 20px;"> +<DL> +<DT><B>All</B> +<DD>All data types are collected. (Cannot be combined with other values.) +</DD></DT> +<DT><B>None</B> +<DD>No data types are collected. This is the default. (Cannot be +combined with other values.) +</DD></DT> + +<DT><B>Energy</B> +<DD>Energy data is collected.</DD></DT> + +<DT><B>Lustre</B> +<DD>Lustre data is collected.</DD></DT> + +<DT><B>Network</B> +<DD>Network (InfiniBand) data is collected.</DD></DT> + +<DT><B>Task</B> +<DD>Task (I/O, Memory, ...) data is collected.</DD></DT> + +</DL> +</div> + +<h3>Data Consolidation</h3> +<p>The node-step files are merged into one HDF5 file for the job using the +<a href="sh5util.html">sh5util</a>.</p> + +<p>If the job is started with sbatch, the command line may added to the normal +launch script, For example:</p> +<pre> +sbatch -n1 -d$SLURM_JOB_ID --wrap="sh5util -j $SLURM_JOB_ID" +</pre> + +<h3>Data Extraction</h3> +<p>The <a href="sh5util.html">sh5util</a> program can also be used to extract +specific data from the HDF5 file and write it in <i>comma separated value (csv)</i> +form for importation into other analysis tools such as spreadsheets.</p> + +<a id="HDF5"></a> +<h2>HDF5</h2> +<p>HDF5 is a well known structured data set that allows heterogeneous but +related data to be stored in one file. +(.i.e. sections for energy statistics, network I/O, Task data, etc.) +Its internal structure resembles a +file system with <b>groups</b> being similar to <i>directories</i> and +<b>data sets</b> being similar to <i>files</i>. It also allows <b>attributes</b> +to be attached to groups to store application defined properties.</p> + +<p>There are commodity programs, notably +<a href="http://www.hdfgroup.org/hdf-java-html/hdfview/index.html"> +HDFView</a> for viewing and manipulating these files. + +<p>Below is a screen shot from HDFView expanding the job tree and showing the +attributes for a specific task.</p> +<br> +<img src="hdf5_task_attr.png" width="275" height="275" > + + +<a id="DataSeries"></a> +<h2>Data Structure</h2> + +<table> +<tr> +<td><img src="hdf5_job_outline.png" width="205" height="570"></td> +<td style="vertical-align: top;"> +<div style="margin-left: 5px;"> +<p>In the job file, there will be a group for each <b>step</b> of the job. +Within each step, there will be a group for nodes, and a group for tasks.</p> +</div> +<ul> +<li> +The <b>nodes</b> group will have a group for each node in the step allocation. +For each node group, there is a sub-group for Time Series and another +for Totals. +<ul> +<li> +The <b>Time Series</b> group +contains a group/dataset containing the time series for each collector. +</li> +<li> +The <b>Totals</b> group contains a group/dataset that has corresponding +Minimum, Average, Maximum, and Sum Total for each item in the time series. +</li> +</ul> +<li> +The <b>Tasks</b> group will only contain a subgroup for each task. +It primarily contains an attribute stating the node on which the task was +executed. This set of groups is essentially a cross reference table. +</li> +</ul> +</td></tr> +</table> + +<h3>Energy Data</h3> +<p><b>AcctGatherEnergyType</b>=acct_gather_energy/ipmi<p> +is required in slurm.conf to collect energy data. +Appropriately set energy=freq in either JobAcctGatherFrequency in slurm.conf +or in --acctg-freq on the command line. +Also appropriately set EnergyIPMIFrequency in acct_gather.conf.</p> +<p>Each data sample in the Energe Time Series contains the following data items. +</p><DL> +<DT><B>Date Time</B> +<DD>Time of day at which the data sample was taken. This can be used to +correlate activity with other sources such as logs.</DD></DT> +<DT><B>Time</B> +<DD>Elapsed time since the begining of the step.</DD></DT> +<DT><B>Power</B> +<DD>Power consumption during the interval.</DD></DT> +<DT><B>CPU Frequency</B> +<DD>CPU Frequency at time of sample in kilohertz.</DD></DT> +</DL> + +<h3>Luster Data</h3> +<p><b>AcctGatherFilesystemType</b>=acct_gather_filesystem/lustre<p> +is required in slurm.conf to collect task data. +Appropriately set luster=freq in either JobAcctGatherFrequency in slurm.conf +or in --acctg-freq on the command line.</p> + +<p>Each data sample in the Lustre Time Series contains the following data items. +</p><DL> +<DT><B>Date Time</B> +<DD>Time of day at which the data sample was taken. This can be used to +correlate activity with other sources such as logs.</DD></DT> +<DT><B>Time</B> +<DD>Elapsed time since the begining of the step.</DD></DT> +<DT><B>Reads</B> +<DD>Number of read operations.</DD></DT> +<DT><B>Megabytes Read</B> +<DD>Number of megabytes read.</DD></DT> +<DT><B>Writes</B> +<DD>Number of write operations.</DD></DT> +<DT><B>Megabytes Write</B> +<DD>Number of megabytes written.</DD></DT> +</DL> + +<h3>Network (Infiniband Data)</h3> +<p><b>JobAcctInfinibandType</b>=acct_gather_infiniband/ofed<p> +is required in slurm.conf to collect task data. +Appropriately set network=freq in either JobAcctGatherFrequency in slurm.conf +or in --acctg-freq on the command line.</p> +<p>Each data sample in the Network Time Series contains the following +data items.</p> +<DL> +<DT><B>Date Time</B> +<DD>Time of day at which the data sample was taken. This can be used to +correlate activity with other sources such as logs.</DD></DT> +<DT><B>Time</B> +<DD>Elapsed time since the begining of the step.</DD></DT> +<DT><B>Packets In</B> +<DD>Number of packets coming in.</DD></DT> +<DT><B>Megabytes Read</B> +<DD>Number of megabytes coming in through the interface.</DD></DT> +<DT><B>Packets Out</B> +<DD>Number of packets going out.</DD></DT> +<DT><B>Megabytes Write</B> +<DD>Number of megabytes going out through the interface.</DD></DT> +</DL> + +<h3>Task Data</h3> +<p><b>JobAcctGatherType</b>=jobacct_gather/linux<p> +is required in slurm.conf to collect task data. +Appropriately set task=freq in either JobAcctGatherFrequency in slurm.conf +or in --acctg-freq on the command line.</p> +<p>Each data sample in the Task Time Series contains the following data +items.</p> +<DL> +<DT><B>Date Time</B> +<DD>Time of day at which the data sample was taken. This can be used to +correlate activity with other sources such as logs.</DD></DT> +<DT><B>Time</B> +<DD>Elapsed time since the begining of the step.</DD></DT> +<DT><B>CPU Frequency</B> +<DD>CPU Frequency at time of sample.</DD></DT> +<DT><B>CPU Time</B> +<DD>Seconds of CPU time used during the sample.</DD></DT> +<DT><B>CPU Utilization</B> +<DD>CPU Utilization during the interval.</DD></DT> +<DT><B>RSS</B> +<DD>Value of RSS at time of sample.</DD></DT> +<DT><B>VM Size</B> +<DD>Value of VM Size at time of sample.</DD></DT> +<DT><B>Pages</B> +<DD>Pages used in sample.</DD></DT> +<DT><B>Read Megabytes</B> +<DD>Number of megabytes read from local disk.</DD></DT> +<DT><B>Write Megabytes</B> +<DD>Number of megabytes written to local disk.</DD></DT> +</DL> + + +<p class="footer"><a href="#top">top</a></p> + +<p style="text-align:center;">Last modified 1 July 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/hdf5_task_attr.png b/doc/html/hdf5_task_attr.png new file mode 100644 index 0000000000000000000000000000000000000000..5b287fcef10c4160051907a5fb2c058ed9b2020a Binary files /dev/null and b/doc/html/hdf5_task_attr.png differ diff --git a/doc/html/header.txt b/doc/html/header.txt index e41127fb6bfaae52f8c36607a12b1de2905b1679..74835bbf471b1b374c75b1d256cd51aab928a4b8 100644 --- a/doc/html/header.txt +++ b/doc/html/header.txt @@ -66,10 +66,9 @@ window.onresize = window_check; </ul> <div id="slurmh2">Installing</div> <ul> - <li><a href="platforms.shtml" class="nav">Platforms</a></li> <li><a href="download.shtml" class="nav">Download</a></li> - <li><a href="download.shtml#related" class="nav">Related Software</a></li> <li><a href="quickstart_admin.shtml" class="nav">Installation Guide</a></li> + <li><a href="platforms.shtml" class="nav">Platforms</a></li> </ul> <a href="http://www.schedmd.com"><img class="displayed" src="schedmd.png" width="130" usemap="#Map"></a> </div> diff --git a/doc/html/high_throughput.shtml b/doc/html/high_throughput.shtml index 9f6bfbb37ade62d14ef830a822741181ae8d5709..cf4f1486ccb38fddc3af971568f844f0f54e1a55 100644 --- a/doc/html/high_throughput.shtml +++ b/doc/html/high_throughput.shtml @@ -111,9 +111,11 @@ the default value of <b>default_queue_depth</b> should be fine in most cases).</li> <li>The <i>sched/backfill</i> plugin has relatively high overhead if used with large numbers of job. Configuring <b>max_job_bf</b> to a modest size (say 100 -jobs or less) and <b>interval</b> to 30 seconds or more will limit the +jobs or less) and <b>bf_interval</b> to 30 seconds or more will limit the overhead of backfill scheduling (NOTE: the default values are fine for both -of these parameters).</li> +of these parameters). Other backfill options available for tuning backfill +scheduling include <b>bf_max_job_user</b>, <b>bf_resolution</b> and +<b>bf_window</b>. See the slurm.conf man page for details.</li> </ul></li> <li><b>SchedulerType</b>: If most jobs are short lived then use of the <i>sched/builtin</i> plugin is @@ -156,6 +158,6 @@ not appear to add any measurable overhead.</li> appropriate for your environment.</li> </ul> -<p style="text-align:center;">Last modified 9 August 2012</p> +<p style="text-align:center;">Last modified 21 May 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/ibm-pe.shtml b/doc/html/ibm-pe.shtml index e2c7728248f20e591ab2d8e7c8c7752617a1b903..7604aeb3b0ceb48d41663549bccb04a6bfcbd553 100644 --- a/doc/html/ibm-pe.shtml +++ b/doc/html/ibm-pe.shtml @@ -1,6 +1,7 @@ <!--#include virtual="header.txt"--> -<h1>IBM Parallel Environment User and Administrator Guide</h1> +<h1>IBM Parallel Environment<br> +User and Administrator Guide</h1> <p> <a href="#overview">Overview</a><br> @@ -355,6 +356,12 @@ instead of the users tasks. The exit code of the job allocation if started with srun will be correct as we will grab the exit code from the wrapped poe.</p> +<p>Use of the srun command rather than the poe command is recommeded to launch +tasks. If the poe command is used with a hostlist file (e.g. "-hfile" option or +"MP_HOSTFILE" environment variable or a "host.list" file in the current working +directory), the resd option must be set to yes (e.g. "-resd yes" option or +"MP_RESD=yes" environment variable).</p> + <h2><a name="admin">System Administration</a></h2> <p>There are several critical SLURM configuration parameters for use with PE. @@ -375,7 +382,7 @@ must be terminated before the switch resources can be released for use by anothe program. This means that reliable tracking of all spawned processes is critical for switch use. Use of <b>ProctrackType=proctrack/cgroup</b> is strongly recommended. Use of any other process tracking plugin significantly increases -the likelyhood of orphan processes that must be manually identified and killed +the likelihood of orphan processes that must be manually identified and killed in order to release switch resources. While it is possible to to configure distinct <b>NodeName</b> and <b>NodeHostName</b> parameters for the compute nodes, this is discouraged @@ -415,27 +422,33 @@ For example:</p> %_with_libnrt "/opt/ibmhpc/pecurrent/base/intel/lib64" </pre> -<p>The poe command interacts with SLURM by loading a SLURM library providing -a variety of functions for its use. You must specify the location of that -library and note that SLURM is the workload manager in the file named -"/etc/poe.limits". -The library name is "libpermapi.so" and it is in installed with the other SLURM -libraries in the subdirectory "lib/slurm". A sample "/etc/poe.limits" file is -shown below. You will need to modify the value of MP_PE_RMLIB to match SLURM's -installation location on your system.<br> -<b>NOTE:</b> The poe command is loading and using the libpermapi.so library -initially from the /usr/lib64 directory. It later reads the /etc/poe.limits -file and loads the correct library. In order for poe to work with SLURM, it -needs to use the "libpermapi.so" generated by SLURM for all of its functions. -Until poe is modified to only load the correct library, it is necessary for -the file /usr/lib64/libpermapi.so to contain SLURM's library or a link to it.</p> -<pre> -# -# Sample /etc/poe.limits -# Modify the path below as appropriate -# -MP_PE_RMLIB=/usr/local/lib/slurm/libpermapi.so -</pre> +<p><b>IMPORTANT:</b>The poe command interacts with SLURM by loading a + SLURM library providing a variety of functions for its use. The + library name is <i>"libpermapi.so"</i> and it is in installed with the + other SLURM libraries in the subdirectory "lib/slurm". You must + modify the link of /usr/lib64/libpermapi.so to point to the location + of the slurm version of this library.</p> +<p>Modifying the "/etc/poe.limits" file is <b>not</b> enough. The poe + command is loading and using the libpermapi.so library initially + from the /usr/lib64 directory. It later reads the /etc/poe.limits + file and loads the library listed there. In order for poe to work + with SLURM, it needs to use the "libpermapi.so" generated by SLURM + for all of its functions. Until poe is modified to only load the + correct library, it is necessary for /usr/lib64/libpermapi.so to + contain SLURM's library or a link to it.</p> +<p>If you are having problems running on more than 32 nodes this is + most likely your issue.</p> + +<p>Changes to the count of dynmamic switch windows necessitate cold-starting +Slurm (without jobs). The procedure is as follows:</p> +<ol> +<li>Prevent new jobs from being started (e.g. Drain the compute nodes).</li> +<li>Cancel all jobs.</li> +<li>Change the dynamic window count on the compute nodes.</li> +<li>Restart Slurm daemons without preserving state +(e.g. "/etc/init.d/slurm/startclean" or initiate the daemons using the +"-c" option).</li> +</ol> <h3>Job Scheduling</h3> @@ -504,15 +517,17 @@ In figure 2, note that the libpermapi library issues the job and job step creation requests. The srun command is an optional front-end for the poe command and the poe command can be invoked directly by the user if desired.</p> +<hr width=80%> <img src="ibm_pe_fig1.png" width=600> <center> <p>Figure 1: Use of the switch/nrt plugin</p> </center> - +<hr width=80%> <img src="ibm_pe_fig2.png" width=600> <center> <p>Figure 2: Use of the launch/poe plugin</p> </center> +<hr width=80%> <h3>Debugging Notes</h3> @@ -535,6 +550,6 @@ startsrc -s pnsd -a -D <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 14 March 2013</p></td> +<p style="text-align:center;">Last modified 29 May 2013</p></td> <!--#include virtual="footer.txt"--> diff --git a/doc/html/job_array.shtml b/doc/html/job_array.shtml new file mode 100644 index 0000000000000000000000000000000000000000..3f5fc44979a759e950894d02db7ea8511be80b53 --- /dev/null +++ b/doc/html/job_array.shtml @@ -0,0 +1,266 @@ +<!--#include virtual="header.txt"--> + +<h1>Job Array Support</h1> + +<h2>Overview</h2> + +<p>Support for job arrays was added in Slurm version 2.6. +Job arrays offer a mechanism for submitting and managing collections of similar +jobs quickly and easily; tens of thousands of jobs can be submitted in under +one second. +All jobs must have the same initial options (e.g. size, time limit, etc.), +however it is possible to change some of these options after the job has begun +execution using the command<br> +<i>scontrol update job ...</i></p> + +<p>Job arrays are only supported for batch jobs and the array index values are +specified using the <i>--array</i> or <i>-a</i> option of the <i>sbatch</i> +command. The option argument can be specific array index values, a range of +index values, and an optional step size as shown in the examples below. +Note the the minimum index value is zero and the maximum value a Slurm +configuration parameter (<i>MaxArraySize</i> minus one). +Jobs which are part of a job array will have the environment variable +<i>SLURM_ARRAY_TASK_ID</i> set to its array index value.</p> + +<pre> +# Submit a job array with index values between 0 and 31 +$ sbatch --array=0-31 -N1 tmp + +# Submit a job array with index values of 1, 3, 5 and 7 +$ sbatch --array=1,3,5,7 -N1 tmp + +# Submit a job array with index values between 1 and 7 +# with a step size of 2 (i.e. 1, 3, 5 and 7) +$ sbatch --array=1-7:2 -N1 tmp +</pre> + +<h2>Job ID and Environment Variables</h2> + +<p>Job arrays will have two additional environment variable set. +<b>SLURM_ARRAY_JOB_ID</b> will be set to the first job ID of the array. +<b>SLURM_ARRAY_TASK_ID</b> will be set to the job array index value. +For example a job submission of this sort<br> +<i>sbatch --array=1-3 -N1 tmp</i><br> +will generate a job array containing three jobs. If the sbatch command responds<br> +<i>Submitted batch job 36</i><br> +then the environment variables will be set as +follows:<br> +<br> +SLURM_JOBID=36<br> +SLURM_ARRAY_JOB_ID=36<br> +SLURM_ARRAY_TASK_ID=1<br> +<br> +SLURM_JOBID=37<br> +SLURM_ARRAY_JOB_ID=36<br> +SLURM_ARRAY_TASK_ID=2<br> +<br> +SLURM_JOBID=38<br> +SLURM_ARRAY_JOB_ID=36<br> +SLURM_ARRAY_TASK_ID=3<br> +</p> + +<p>All Slurm commands and APIs recognize the SLURM_JOBID value. +Some commands also recognize the SLURM_ARRAY_JOB_ID plus SLURM_ARRAY_TASK_ID +values separated by an underscore as identifying an element of a job array. +Using the example above, "37" or "36_2" would be equivalent ways to identify +the second array element of job 36.</p> + +<h2>File Names</h2> + +<p>Two additional options are available to specify a job's stdin, stdout, and +stderr file names: +<b>%A</b> will be replaced by the value of SLURM_ARRAY_JOB_ID (as defined above) +and +<b>%a</b> will be replaced by the value of SLURM_ARRAY_TASK_ID (as defined above). +The default output file format for a job array is "slurm-%A_%a.out". +An example of explicit use of the formatting is:<br> +<i>sbatch -o slurm-%A_%a.out --array=1-3 -N1 tmp</i><br> +which would generated +output files names of this sort "slurm-36_1.out", "slurm-36_2.out" and +"slurm-36_3.out". +If these file name options are used without being part of a job array then +"%A" will be replaced by the current job ID and "%a" will be replaced by +65534 (NO_VAL).</p> + +<h2>Scancel Command Use</h2> + +<p>If the job ID of a job array is specified as input to the scancel command +then all elements of that job array will be cancelled. +Alternately a array ID, optionally using regular expressions may be specified +for job cancellation.</p> + +<pre> +# Cancel array ID 1 to 3 from job array 20 +$ scancel 20_[1-3] + +# Cancel array ID 4 and 5 from job array 20 +$ scancel 20_4 20_5 + +# Cancel all elements from job array 20 +$ scancel 20 + +# Cancel the current job or job array element (if job array) +if [[-z $SLURM_ARRAY_JOB_ID]]; then + scancel $SLURM_JOB_ID +else + scancel ${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID} +fi +</pre> + +<h2>Squeue Command Use</h2> + +<p>By default, the squeue command will combine all pending elements of a job +array on one line and use a regular expression to indicate the +"array_task_id" values as shown below.</p> + +<pre> +$ squeue + JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) +1080_[5-1024] debug tmp mac PD 0:00 1 (Resources) +1080_1 debug tmp mac R 0:17 1 tux0 +1080_2 debug tmp mac R 0:16 1 tux1 +1080_3 debug tmp mac R 0:03 1 tux2 +1080_4 debug tmp mac R 0:03 1 tux3 +</pre> + +<p>An option of "--array" or "-r" has also been added to the squeue command +to print one job array element per line as shown below. +The environment variable "SQUEUE_ARRAY" is equivalent to including the "--array" +option on the squeue command line.</p> + +<pre> +$ squeue -r + JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) +1082_3 debug tmp mac PD 0:00 1 (Resources) +1082_4 debug tmp mac PD 0:00 1 (Priority) + 1080 debug tmp mac R 0:17 1 tux0 + 1081 debug tmp mac R 0:16 1 tux1 +1082_1 debug tmp mac R 0:03 1 tux2 +1082_2 debug tmp mac R 0:03 1 tux3 +</pre> + +<p>The squeue --step/-s and --job/-j options can accept job or step +specifications of the same format.</p> + +<pre> +$ squeue -j 1234_2,1234_3 +... +$ squeue -s 1234_2.0,1234_3.0 +... +</pre> + +<p>Two additional job output format field options have been added to squeue:<br> +<b>%F</b> prints the array_job_id value<br> +<b>%K</b> prints the array_task_id value<br> +(all of the obvious letters to use were already assigned to other job fields).</p> + +<h2>Scontrol Command Use</h2> + +<p>When a job array is submitted using the <i>sbatch</i> command an independent +job is submitted for each element of the array, however substantial performance +improvement is realized through the use of a single job submit request and only +needing to validate the request options one time. +Use of the <i>scontrol show job</i> option shows two new fields related to +job array support. +The <i>JobID</i> is a unique identifier for the job. +The <i>ArrayJobID</i> is the <i>JobID</i> of the first element of the job +array. +The <i>ArrayTaskID</i> is the array index of this particular entry. +Neither field is displayed if the job is not part of a job array. +The optional job ID specified with the <i>scontrol show job</i> or +<i>scontrol show step</i> commands can identify job array elements by +using specifying two numbers with an underscore between the two: +"<job_id>_<array_id>". +In order to modify a job, always use the <i>JobID</i> specification.</p> + +<pre> +$ scontrol show job +JobId=13 ArrayJobId=13 ArrayTaskId=1 Name=tmp +... +JobId=14 ArrayJobId=13 ArrayTaskId=2 Name=tmp +... +JobId=15 ArrayJobId=13 ArrayTaskId=3 Name=tmp +.... + +$ scontrol update JobId=15 TimeLimit=30 +</pre> + +<p>The scontrol command will accept a job array element specification for +the update job command, but only operate on one job (or job array element). +The scontrol hold, holdu and release commands will operate on all +elements of a job array or individual elements as shown below.</p> +<pre> +# Modify a job +$ scontrol update jobid=13_1 ... + +# Hold all elements of job array 13 +$ scontrol hold 13 + +# Release element 2 of job array 13 +$ scontrol release 13_2 +</pre> + +<h2>Other Command Use</h2> + +<p>Job dependencies for individual job array elements are supported in Slurm +version 2.6.4 and later. +A job which is to be dependent upon an entire job array, should specify +itself dependent upon the ArrayJobID. Since each array element can have a +different exit code, the interpretation of the afterok and afternotok clauses +will be based upon the last element of the job array to exit. +Examples of use follow:</p> +<pre> +# Wait for specific job array elements +sbatch --depend=after:123_4 my.job +sbatch --depend=afterok:123_4:123_8 my.job2 + +# Wait for entire job array to complette +sbatch --depend=after:123 my.job +</pre> + +<p>The following Slurm commands do not currently recognize job arrays and their +use requires the use of Slurm job IDs, which are unique for each array element: +sacct, sbcast, smap, sreport, sshare, sstat, strigger, and sview. +The sattach, sprio and sstat commands have been modified to permit +specification of either job IDs or job array elements. +The sview command has been modified to permit display of a job's ArrayJobId +and ArrayTaskId fields. Both fields are displayed with a value of "N/A" if +the job is not part of a job array.</p> + +<h2>System Administration</h2> + +<p>A new configuration parameter has been added to control the maximum +job array size: <b>MaxArraySize</b>. The smallest index that can be specified +by a user is zero and the maximum index is MaxArraySize minus one. +The default value of MaxArraySize is 1001. Be mindful about the value of +MaxArraySize as job arrays offer an easy way for users to submit large numbers +of jobs very quickly.</p> + +<p>The sched/backfill plugin has been modified to improve performance with +job arrays. Once one element of a job array is discovered to not be runable +or impact the scheduling of pending jobs, the remaining elements of that job +array will be quickly skipped.</p> + +<p>Slurm support for job arrays at this time does not use a meta-job data +structure, but creates a separate job record for each element of the array. +Two additional fields were added to Slurm's job record for managing job arrays. +The first new field is internally called "array_job_id" and is the job ID of +the first job in the array. +Subsequent elements of the job array will have a unique Slurm "job_id", but +all will have the same "array_job_id" value. +Some Slurm commands interpret the array_job_id as representing all elements of +the job array, while other commands use the unique job_id assigned to each. +Support for Slurm job arrays can be expected to improve in later releases. +The second new field is called "array_task_id" which is the job array index +value of the job array element.</p> + +<h2>Future Work</h2> + +<p>There are scalability and performance improvements possible if a job +array data structure is added rather than the current logic that only +adds a new field to the existing job data structure. +It is not certain when that work will occur.</p> + +<p style="text-align:center;">Last modified 28 October 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/job_submit_plugins.shtml b/doc/html/job_submit_plugins.shtml index 5c38da3809297425274fb5f995baccc56794d31d..6aa902b12145545b2daa5e57c53cd965250c1251 100644 --- a/doc/html/job_submit_plugins.shtml +++ b/doc/html/job_submit_plugins.shtml @@ -19,8 +19,7 @@ A free-formatted ASCII text string that identifies the plugin. plugin_type[]="<i>major/minor</i>"</span><br> <p style="margin-left:.2in"> The major type must be "job_submit." -The minor type can be any suitable name -for the type of accounting package. +The minor type can be any suitable name for the type of job submission package. We include samples in the SLURM distribution for <ul> <li><b>all_partitions</b>—Set default partition to all partitions on diff --git a/doc/html/man_index.shtml b/doc/html/man_index.shtml index f531c459f09e1844f902444356cec2df510494fe..5ff36eb5f3239bb8377eafdc34006bebeeeeedbf 100644 --- a/doc/html/man_index.shtml +++ b/doc/html/man_index.shtml @@ -18,6 +18,7 @@ Documentation for other versions of Slurm is distributed with the code</b></p> <tr><td><a href="slurm.html">slurm</a></td><td>SLURM system overview.</td></tr> <tr><td><a href="smap.html">smap</a></td><td>graphically view information about SLURM jobs, partitions, and set configurations parameters.</td></tr> <tr><td><a href="sprio.html">sprio</a></td><td>view the factors that comprise a job's scheduling priority</td></tr> +<tr><td><a href="sh5util.html">sh5util</a></td><td>merge utility for acct_gather_profile plugin.</td></tr> <tr><td><a href="squeue.html">squeue</a></td><td>view information about jobs located in the SLURM scheduling queue.</td></tr> <tr><td><a href="sreport.html">sreport</a></td><td>Generate reports from the slurm accounting data.</td></tr> <tr><td><a href="srun_cr.html">srun_cr</a></td><td>run parallel jobs with checkpoint/restart support</td></tr> @@ -26,8 +27,10 @@ Documentation for other versions of Slurm is distributed with the code</b></p> <tr><td><a href="sstat.html">sstat</a></td><td>Display various status information of a running job/step.</td></tr> <tr><td><a href="strigger.html">strigger</a></td><td>Used set, get or clear Slurm trigger information.</td></tr> <tr><td><a href="sview.html">sview</a></td><td>graphical user interface to view and modify SLURM state.</td></tr> +<tr><td><a href="acct_gather.conf.html">acct_gather.conf</a></td><td>Slurm configuration file for the acct_gather plugins</td></tr> <tr><td><a href="bluegene.conf.html">bluegene.conf</a></td><td>Slurm configuration file for BlueGene systems</td></tr> <tr><td><a href="cgroup.conf.html">cgroup.conf</a></td><td>Slurm configuration file for the cgroup support</td></tr> +<tr><td><a href="ext_sensors.conf.html">ext_sensors.conf</a></td><td>Slurm configuration file for the external sensor support</td></tr> <tr><td><a href="gres.conf.html">gres.conf</a></td><td>Slurm configuration file for generic resource management.</td></tr> <tr><td><a href="slurm.conf.html">slurm.conf</a></td><td>Slurm configuration file</td></tr> <tr><td><a href="slurmdbd.conf.html">slurmdbd.conf</a></td><td>Slurm Database Daemon (SlurmDBD) configuration file</td></tr> @@ -41,6 +44,6 @@ Documentation for other versions of Slurm is distributed with the code</b></p> </table> -<p style="text-align:center;">Last modified 29 November 2012</p> +<p style="text-align:center;">Last modified 10 July 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/meetings.shtml b/doc/html/meetings.shtml index b777243de9f36490a8ffd16cac13825a5b4e6b52..c858fa52135073524aa5691fbd81ff11c6eddce9 100644 --- a/doc/html/meetings.shtml +++ b/doc/html/meetings.shtml @@ -2,17 +2,22 @@ <h1>Meetings</h1> -<p><b>SLURM User Group Meeting 2013</b><br> -18-19 September 2013<br> -Oakland, California, USA<br> -Host: SchedMD<br><br> -<a href="slurm_ug_cfp.html">Call for Abstracts (Due 24 May)</a><br> -<a href="slurm_ug_registration.html">Registration information</a> -</p> +<p><b>SLURM User Group Meeting 2014</b><br> +23-24 September 2014<br> +Lugano, Switzerland<br> +Host: Swiss National Supercomputing Centre</p> +<!--<a href="slurm_ug_cfp.html">Call for Abstracts (Due 24 May)</a><br>--> +<!--<a href="slurm_ug_agenda.html">Meeting agenda</a><br>--> +<!--<a href="slurm_ug_registration.html">Registration information</a>--></p> <br> <h2>Past User Group Meetings</h2> +<p><b>SLURM User Group Meeting 2013</b><br> +18-19 September 2013<br> +Oakland, California, USA<br> +Host: SchedMD</p> + <p><b>SLURM User Group Meeting 2012</b><br> 9-10 October 2012<br> Barcelona, Spain<br> @@ -28,6 +33,6 @@ Host: Bull</p> Paris, France<br> Host: CEA</p> -<p style="text-align:center;">Last modified 29 April 2013</p> +<p style="text-align:center;">Last modified 20 September 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/mpi_guide.shtml b/doc/html/mpi_guide.shtml index 33643360ed4a72f45c0a3f270a5e37ebb38e61e9..a1f71cf8e5cda60b04654d73c7491a8a37905698 100644 --- a/doc/html/mpi_guide.shtml +++ b/doc/html/mpi_guide.shtml @@ -4,7 +4,7 @@ <p>MPI use depends upon the type of MPI being used. There are three fundamentally different modes of operation used -by these various MPI implementations. +by these various MPI implementations.</p> <ol> <li>SLURM directly launches the tasks and performs initialization of communications (UPC, Quadrics MPI, MPICH2, MPICH-GM, MPICH-MX, @@ -17,7 +17,11 @@ such as SSH or RSH (BlueGene MPI and some MPICH1 modes). These tasks initiated outside of SLURM's monitoring or control. SLURM's epilog should be configured to purge these tasks when the job's allocation is relinquished. </li> -</ol></p> +</ol> +<p><b>Note</b>: Slurm is not directly launching the user application in case 3, +which may prevent the desired behavior of binding tasks to CPUs and/or +accounting. Some versions of some MPI implementations work, so testing your +particular installation may be required to determie the actual behavior.</p> <p>Two SLURM parameters control which MPI implementation will be supported. Proper configuration is essential for SLURM to establish the @@ -59,6 +63,9 @@ in the <i>slurm.conf</i> file using the <i>MpiParams</i> parameter. For example: <br> <i>MpiParams=ports=12000-12999</i></p> +<p>OpenMPI just also be configured with <i>--with-pmi</i> to support the port +reservation method.</p> + <p>Launch tasks using the <span class="commandline">srun</span> command plus the option <i>--resv-ports</i>. Alternately define the environment variable <i>SLURM_RESV_PORT</i>, which is equivalent to always including @@ -228,10 +235,48 @@ $MPI_ROOT/bin/mpirun -TCP -srun -N8 ./a.out <h2><a name="mpich2" href="http://www.mcs.anl.gov/research/projects/mpich2/"><b>MPICH2</b></a></h2> -<p>MPICH2 jobs can be launched using the <b>srun</b> command or <b>mpiexec</b>. -Both modes of operation are described below.</p> +<p>MPICH2 jobs can be launched using the <b>srun</b> command using + pmi 1 or 2, or <b>mpiexec</b>. +All modes of operation are described below.</p> -<h3>MPICH2 with srun</h3> +<h3>MPICH2 with srun and PMI version 2</h3> + +<p>MPICH2 must be built specifically for use with SLURM and PMI2 using a configure +line similar to that shown below.</p> +<pre> +./configure --with-slurm=<PATH> --with-pmi=pmi2 +</pre> +<p> +The PATH must point to the SLURM installation directory, in other words the parent +directory of bin and lib. +In addition, if SLURM is not configured with <i>MpiDefault=pmi2</i>, then +the srun command must be invoked with the option <i>--mpi=pmi2</i> as shown +in the example below below.</p> +<pre> +srun -n4 --mpi=pmi2 ./a.out +</pre> + +<p> +The PMI2 support in SLURM works only if the MPI implementation supports it, in other words if the MPI has +the PMI2 interface implemented. The <i>--mpi=pmi2</i> will load the library <i>lib/slurm/mpi_pmi2.so</i> +which provides the server side functionality but the client side must implement <i>PMI2_Init()</i> +and the other interface calls.<br> + +<p> +You can refere yourself to <i>mpich2-1.5</i> implementation and configure MPICH to use PMI2 with the <i>--with-pmi=pmi2</i> configure option.<br> + +<p> +To check if the MPI version you are using supports PMI2 check for PMI2_* symbols in the MPI library. +<p> +SLURM provides a version of the PMI2 client library in the contribs directory. This library gets +installed in the SLURM lib directory. If your MPI implementation supports PMI2 and you wish to use +the SLURM provided library you have to link the SLURM provided library explicitly: +<pre> +$ mpicc -L<path_to_pim2_lib> -lpmi2 ... +$ srun -n20 a.out +</pre> + +<h3>MPICH2 with srun and PMI version 1</h3> <p>Link your program with SLURM's implementation of the PMI library so that tasks can communicate @@ -273,20 +318,6 @@ to launch the tasks. A simple example is shown below.</p> <pre>salloc -N 2 mpiexec my_application</pre> <p>All MPI_comm_spawn work fine now going through hydra's PMI 1.1 interface.</p> -<h3>MPICH2 build and configuration</h3> - -<p>MPICH2 must be built specifically for use with SLURM using a configure -line similar to that shown below.</p> -<pre> -./configure --with-slurm=<PATH> --with-pm=none -</pre> -<p>In addition, if SLURM is not configured with <i>MpiDefault=pmi2</i>, then -the srun command must be invoked with the option <i>--mpi=pmi2</i> as shown -in the example below below.</p> -<pre> -srun -n4 --mpi=pmi2 ./a.out -</pre> - <hr size=4 width="100%"> @@ -476,6 +507,6 @@ $ srun -N4 -n16 a.out <hr size=4 width="100%"> -<p style="text-align:center;">Last modified 1 April 2013</p> +<p style="text-align:center;">Last modified 13 September 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/news.shtml b/doc/html/news.shtml index 02703df4c1d1c4b36b376fb10395f829934fe64f..5585f0e1ffd87fac5aa03222eea7ea4843f444b7 100644 --- a/doc/html/news.shtml +++ b/doc/html/news.shtml @@ -4,69 +4,99 @@ <h2>Index</h2> <ul> -<li><a href="#25">SLURM Version 2.5, November 2012</a></li> -<li><a href="#26">SLURM Version 2.6, scheduled for April 2013</a></li> -<li><a href="#27">SLURM Version 2.7 and beyond</a></li> +<li><a href="#26">Slurm Version 2.6, June 2013</a></li> +<li><a href="#1312">Slurm Version 13.12, December 2013</a></li> +<li><a href="#1406">Slurm Version 14.06 and beyond</a></li> <li><a href="#security">Security Patches</a></li> </ul> - -<h2><a name="25">Major Updates in SLURM Version 2.5</a></h2> -<p>SLURM Version 2.5 was release in November 2012. +<h2><a name="26">Major Updates in Slurm Version 2.6</a></h2> +<p>SLURM Version 2.6 was released in July 2013. Major enhancements include: <ul> -<li>Support for Intel® Many Integrated Core (MIC) processors.</li> -<li>User control over CPU frequency of each job step.</li> -<li>Recording power usage information for each job.</li> -<li>Advanced reservation of cores rather than whole nodes.</li> -<li>Integration with IBM's -<a href="http://www-03.ibm.com/systems/software/parallel/">Parallel Environment</a> -including POE (Parallel Operating Environment) and NRT (Network Resource Table) API.</li> -<li>Highly optimized throughput for serial jobs in a new "select/serial" -plugin.</li> -<li>CPU load is information available</li> -<li>Configurable number of CPUs available to jobs in each SLURM partition, -which provides a mechanism to reserve CPUs for use with GPUs.</li> -</ul> - -<h2><a name="26">Major Updates in SLURM Version 2.6</a></h2> -<p>SLURM Version 2.6 is scheduled for release in April 2013. -Major enhancements planned include: -<ul> <li>Support for job arrays, which increases performance and ease of use - for sets of similar jobs.</li> -<li><a href="http://en.wikipedia.org/wiki/MapReduce">MapReduce</a> support -(launches ~1000x faster, runs ~10x faster).</li> + for sets of similar jobs.</li> +<li><a href="hdf5_profile_user_guide.html">Job profiling</a> capability added + to record a wide variety of job characteristics for each task on a user + configurable periodic basis. Data currently available includes CPU use, + memory use, energy use, Infiniband network use, Lustre file system use, + etc.</li> +<li>Support for MPICH2 using PMI2 communications interface with much greater + scalability.</li> <li>Prolog and epilog support for advanced reservations.</li> <li>Much faster throughput for job step execution with --exclusive option. The -srun process is notified when resources become available rather than -periodic polling.</li> -<li>Faster and more powerful job step management support (e.g. step -dependencies).</li> + srun process is notified when resources become available rather than + periodic polling.</li> +<li>Support improved for Intel MIC (Many Integrated Core) processor.</li> <li>Advanced reservations with hostname and core counts now supports asymmetric -reservations (e.g. specific different core count for each node).</li> -<li>Support for Intel MIC (Many Integrated Core) processor.</li> -<li>Finer-grained BlueGene resource management (partitions/queues and advanced -reservations containing less than a whole midplane).</li> + reservations (e.g. specific different core count for each node).</li> +<li><a href="ext_sensorsplugins.html">External sensor plugin infrastructure</a> + added to record power consumption, temperature, etc.</li> +<li>Improved performance for high-throughput computing.</li> +<li><a href="http://en.wikipedia.org/wiki/MapReduce">MapReduce+</a> support + (launches ~1000x faster, runs ~10x faster).</li> +<li>Added "MaxCPUsPerNode" partition configuration parameter. This can be + especially useful to schedule GPUs. For example a node can be associated + with two Slurm partitions (e.g. "cpu" and "gpu") and the partition/queue + "cpu" could be limited to only a subset of the node's CPUs, insuring that + one or more CPUs would be available to jobs in the "gpu" partition/queue.</li> </ul> +</p> -<h2><a name="27">Major Updates in SLURM Version 2.7 and beyond</a></h2> +<h2><a name="1312">Major Updates in Slurm Version 13.12</a></h2> +<p>NOTICE: Starting with 13.12 we will be numbering Slurm versions +with a YEAR.MONTH format.<br> +SLURM Version 13.12 release is planned in December 2013. +Major enhancements include: +<ul> +<li>Integration with + <a href="http://en.wikipedia.org/wiki/FlexNet_Publisher">FLEXlm (Flexnet Publisher)</a> + license management.</li> +<li>Layouts framework, which will be the basis for further developments toward + optimizing scheduling with respect to additional parameters such as temperature + and power consumption.</li> +<li>Energy consumption added as a factor in fair-share scheduling.</li> +<li>Energy aware scheduling added with respect to power caps.</li> +<li>Improved user support for fault-tolerance (e.g. "hot spare" resources).</li> +<li>New partition configuration parameters: AllowAccounts, AllowQOS, + DenyAccounts and DenyQOS.</li> +<li>Scalability improvements for MPI initialization including communication of + the compute node network interface details.</li> +<li>Defer sending SIGKILL signal to processes while a core dump is in progress.</li> +<li>Other important enhancements that can not be made public at this time...</li> +</ul> +</p> + + +<h2><a name="1406">Major Updates in Slurm Version 14.06 and beyond</a></h2> <p> Detailed plans for release dates and contents of additional SLURM releases have not been finalized. Anyone desiring to perform SLURM development should notify <a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a> to coordinate activities. Future development plans includes: <ul> -<li>Improved user support for fault-tolerance (e.g. "hot spare" resources).</li> +<li>Distributed architecture to support the management of resources with Intel + MIC processors.</li> +<li>Support of I/O as a new resources, including proxy I/O nodes with data + locality.</li> +<li>Improved scheduling support for job dependencies (e.g. pre-processing, + post-processing, co-processing on I/O nodes, etc.) to optimize overall + system utilization.</li> +<li>IP communications over InfiniBand network for improved performance.</li> +<li>Support for heterogeneous GPU environments (i.e. user specification of + desired GPU types).</li> +<li>Fault-tolerance and jobs dynamic adaptation through communication protocol + between Slurm , MPI libraries and the application.</li> +<li>Improved support for high-throughput computing (e.g. multiple slurmctld + daemons on a single cluster).</li> +<li>Scheduling fully optimized for energy efficiency.</li> <li>Numerous enhancements to advanced resource reservations (e.g. start or -end the reservation early depending upon the workload).</li> + end the reservation early depending upon the workload).</li> <li>Add Kerberos credential support including credential forwarding -and refresh.</li> + and refresh.</li> <li>Improved support for provisioning and virtualization.</li> <li>Provide a web-based SLURM administration tool.</li> -<li>Scheduling optimized for energy efficiency.</li> -<li>Integration with -<a href="http://en.wikipedia.org/wiki/FlexNet_Publisher">FLEXlm (Flexnet Publisher)</a> -license management.</li> +<li>Finer-grained BlueGene resouce management (partitions/queues and advanced + reservations containing less than a whole midplane).</li> </ul> <h2><a name="security">Security Patches</a></h2> @@ -74,25 +104,25 @@ license management.</li> <a href="http://cve.mitre.org/">http://cve.mitre.org/</a>.</p> <ul> <li>CVE-2009-0128<br> -There is a potential security vulnerability in SLURM where a user could -build an invalid job credential in order to execute a job (under his -correct UID and GID) on resources not allocated to that user. This -vulnerability exists only when the crypto/openssl plugin is used and was -fixed in SLURM version 1.3.0.</li> + There is a potential security vulnerability in SLURM where a user could + build an invalid job credential in order to execute a job (under his + correct UID and GID) on resources not allocated to that user. This + vulnerability exists only when the crypto/openssl plugin is used and was + fixed in SLURM version 1.3.0.</li> <li>CVE-2009-2084<br> -SLURM failed to properly set supplementary groups before invoking (1) sbcast -from the slurmd daemon or (2) strigger from the slurmctld daemon, which might -allow local SLURM users to modify files and gain privileges. This was fixed -in SLURM version 1.3.14.</li> + SLURM failed to properly set supplementary groups before invoking (1) sbcast + from the slurmd daemon or (2) strigger from the slurmctld daemon, which might + allow local SLURM users to modify files and gain privileges. This was fixed + in SLURM version 1.3.14.</li> <li>CVE-2010-3308<br> -There is a potential security vulnerability where if the init.d scripts are -executed by user root or SlurmUser to initiate the SLURM daemons and the -LD_LIBRARY_PATH is not set and the operating system interprets a blank entry -in the path as "." (current working directory) and that directory contains a -trojan library, then that library will be used by the SLURM daemon with -unpredictable results. This was fixed in SLURM version 2.1.14.</li> + There is a potential security vulnerability where if the init.d scripts are + executed by user root or SlurmUser to initiate the SLURM daemons and the + LD_LIBRARY_PATH is not set and the operating system interprets a blank entry + in the path as "." (current working directory) and that directory contains a + trojan library, then that library will be used by the SLURM daemon with + unpredictable results. This was fixed in SLURM version 2.1.14.</li> </ul> -<p style="text-align:center;">Last modified 6 March 2013</p> +<p style="text-align:center;">Last modified 31 July 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/platforms.shtml b/doc/html/platforms.shtml index 1f911422c61d85eefda02a46906c4b51eda26174..ad2072097790ef5f960098275cdf9c8ff72deb29 100644 --- a/doc/html/platforms.shtml +++ b/doc/html/platforms.shtml @@ -8,7 +8,7 @@ know of no AIX installations using Slurm after 2008. See <a href="ibm.html">IBM AIX User and Administrator Guide</a> for more information.</li> <li><b>FreeBSD</b>—Slurm support added in version 2.5.5.</li> <li><b>Linux</b>—Slurm has been thoroughly tested on most popular Linux -distributions using i386, ia64, and x86_64 architectures.</li>> +distributions using i386, ia64, and x86_64 architectures.</li> <li><b>NetBSD</b>—Slurm support added in version 2.4.</li> <li><b>OS X</b>—Slurm has run OS X in the past, but the current OS X linker does not support Slurm plugins. (OS X dynamically linked objects can be called by @@ -32,7 +32,7 @@ ALPS/BASIL software necessitates some changes in Slurm behavior. See been thoroughly tested.</li> <li><b>IBM Infiniband/Torrent</b>—Slurm supports IBM's NRT (Network Resource Table) interface and -<a href="ibm-pe.html">IBM's PE (Parallel Environment</a>).</li> +<a href="ibm-pe.html">IBM's PE (Parallel Environment)</a>.</li> <li><b>Infiniband</b>—Infiniband support has been thoroughly tested.</li> <li><b>Myrinet</b>—Myrinet, MPICH-GM and MPICH-MX are supported.</li> <li><b>Quadrics Elan</b>—Slurm support for Quadrics Elan 3 and Elan 4 switches diff --git a/doc/html/preempt.shtml b/doc/html/preempt.shtml index 66a09469ee73ef846d9bd1431799c7db0843ae4d..31c812119b3e8304414f6eb45e161bc54589c9af 100644 --- a/doc/html/preempt.shtml +++ b/doc/html/preempt.shtml @@ -15,29 +15,10 @@ Alternately, the low priority job(s) can be requeued and started using other resources if so configured in newer versions of SLURM. </P> <P> -In SLURM version 2.0 and earlier, high priority work is identified by the -priority of the job's partition and low priority jobs are always suspended. -The job preemption logic is within the <I>sched/gang</I> plugin. -In SLURM version 2.1 and higher, the job's partition priority or its -Quality Of Service (QOS) can be used to identify the which jobs can preempt -or be preempted by other jobs. -</P> -<P> -SLURM version 2.1 offers several options for the job preemption mechanism -including checkpoint, requeue, or cancel. -the option of requeuing low priority jobs -Checkpointed jobs are not automatically requeued or restarted. -Requeued jobs may restart faster by using different resources. -All of these new job preemption mechanisms release a job's memory space for -use by other jobs. -In SLURM version 2.1, some job preemption logic was moved into the -<I>select</I> plugin and main code base to permit use of both job preemption -plus the backfill scheduler plugin, <i>sched/backfill</I>. -</P> - -<P> -SLURM version 2.2 offers the ability to configure the preemption mechanism -used on a per partition or per QOS basis. +The job's partition priority or its Quality Of Service (QOS) can be used to +identify the which jobs can preempt or be preempted by other jobs. +SLURM offers the ability to configure the preemption mechanism used on a per +partition or per QOS basis. For example, jobs in a low priority queue may get requeued, while jobs in a medium priority queue may get suspended. </P> @@ -392,6 +373,6 @@ order to support ideal placements such as this, which can quickly complicate the design. Any and all help is welcome here! </P> -<p style="text-align:center;">Last modified 10 December 2012</p> +<p style="text-align:center;">Last modified 17 July 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/priority_multifactor.shtml b/doc/html/priority_multifactor.shtml index 829ed96f6969e5384c3813df6e3ab2490633dc3c..9b3456c066d3f3423ea3e7af76e78ceb02bc92d8 100644 --- a/doc/html/priority_multifactor.shtml +++ b/doc/html/priority_multifactor.shtml @@ -105,6 +105,13 @@ its job size factor will be. A job that requests all the nodes on the machine will get a job size factor of 1.0. When the <i>PriorityFavorSmall</i> Boolean is YES, the single node job will receive the 1.0 job size factor.</P> +<p>The <i>PriorityFlags</i> value of <i>SMALL_RELAIVE_TO_TIME</i> alters this +behavior as follows. The job size in CPUs is divided by the time limit in +minutes. The result is divided by the total number of CPUs in the system. +Thus a full-system job with a time limit of one will receive a job size factor +of 1.0, while a tiny job with a large time limit will receive a job size factor +close to 0.0. + <!--------------------------------------------------------------------------> <a name=partition> <h2>Partition Factor</h2></a> diff --git a/doc/html/priority_multifactor2.shtml b/doc/html/priority_multifactor2.shtml index b4e82f79af34aaf2f66d49e07583dec5697dd28c..5ce5ddd4bce2586846a23e64df7a7cb6045e824b 100644 --- a/doc/html/priority_multifactor2.shtml +++ b/doc/html/priority_multifactor2.shtml @@ -1,6 +1,6 @@ <!--#include virtual="header.txt"--> -<h1>Multifactor 2 Priority Plugin</h1> +<h1>Ticket-Based Multifactor Priority Plugin</h1> <h2>Contents</h2> <ul> @@ -13,10 +13,9 @@ <a name=intro> <h2>Introduction</h2></a> -<p>The priority/multifactor2 is an enhanced version of the -priority/multifactor plugin. Only the differences are documented here; -the reader is assumed to be familiar with the priority/multifactor -plugin.</p> +<p>A ticket-based variant of the priority/multifactor plugin is available. +The reader is assumed to be familiar with the priority/multifactor plugin +and only the differences are documented here.</p> <!--------------------------------------------------------------------------> <a name=fairshare> @@ -27,7 +26,7 @@ and operation of the <a href="accounting.html">SLURM Accounting Database</a> to provide the assigned shares and the consumed, computing resources described below.</p> -<p>In the multifactor2 plugin, the fair-share component of the job +<p>In the ticket-based variant, the fair-share component of the job priority is calculated differently. The goal is to make sure that the priority strictly follows the account hierarchy, so that jobs under accounts with usage lower than their fair share will always have a @@ -90,7 +89,7 @@ T = T<sub>parent</sub> * S * F / SUM(S*F)<sub>active_siblings</sub> <h3>Example</h3> <p>Here the same example as in the multifactor plugin page is shown, -calculated using the multifactor2 algorithm.</p> +calculated using the ticket-based algorithm.</p> <ul> <li>User 1 normalized share: 0.3</li> @@ -128,7 +127,7 @@ F = S/U<sub>eff</sub> <ul> <li>Account A fair-share factor: 0.4 / 0.45 = 0.89</li> <li>Account B fair-share factor: 0.3 / 0.2 = 1.50</li> -<li>Account C fair-share factor: 0.1 / 0.25 = 0.4<priority_multifactor2.shtml /li> +<li>Account C fair-share factor: 0.1 / 0.25 = 0.4</li> <li>Account D fair-share factor: 0.6 / 0.25 = 2.40</li> <li>Account E fair-share factor: 0.25 / 0.25 = 1</li> <li>Account F fair-share factor: 0.35 / 0.0035 = 100</li> @@ -179,21 +178,20 @@ configure the Multi-factor Job Priority 2 Plugin. See slurm.conf(5) man page for more details.</p> <dl> +<dt>PriorityFlags +<dd>Set to "TICKET_BASED". <dt>PriorityType -<dd>Set this value to "priority/multifactor2" to enable the Multi-factor -Job Priority 2 Plugin. The default value for this variable is "priority/basic" +<dd>Set this value to "priority/multifactor". +The default value for this variable is "priority/basic" which enables simple FIFO scheduling. </dl> -<p>Note: The other configuration parameters are the same as for the -priority/multifactor plugin.</p> - -<p>Note: As the multifactor2 algorithm ensures that the highest +<p>Note: As the ticket-based algorithm ensures that the highest priority pending job will have the fair-share factor 1.0, there is a need to rebalance the relative weights of the different factors compared to the priority/multifactor plugin.</p> <!--------------------------------------------------------------------------> -<p style="text-align:center;">Last modified 17 October 2012</p> +<p style="text-align:center;">Last modified 14 January 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/priority_plugins.shtml b/doc/html/priority_plugins.shtml index 2a24c271dd251249fa8b0be5d9b2e500cd9553cd..7d0833f986eb8b15209275985bd67472c2f19541 100644 --- a/doc/html/priority_plugins.shtml +++ b/doc/html/priority_plugins.shtml @@ -83,11 +83,14 @@ last job<br> <span class="commandline">job_ptr</span> (input) pointer to the job record.</p> <p style="margin-left:.2in"><b>Returns</b>: the priority assigned to the job</p> -<p class="commandline">void priority_p_reconfig(void)</p> +<p class="commandline">void priority_p_reconfig(bool assoc_clear)</p> <p style="margin-left:.2in"><b>Description</b>: Refresh the plugin's configuration. Called whenever slurmctld is reconfigured.</p> <p style="margin-left:.2in"><b>Arguments</b>: -<span class="commandline">none</span></p> +<span class="commandline">assoc_clear</span> (input) true if association +and QOS used_cpu_run_secs field has been reset. This should be set to true +when Slurm is reconfigured, but false if an RPC is used to change only the +debug level of debug flags.</p> <p style="margin-left:.2in"><b>Returns</b>: void</p> <p class="commandline">void priority_p_set_assoc_usage(acct_association_rec_t *assoc)</p> @@ -106,14 +109,22 @@ that contains the specific jobs or users of interest (of any).</p> <p style="margin-left:.2in"><b>Returns</b>: a list of priority_factors_object_t's containing the requested job priority factors</p> +<p class="commandline">void priority_p_job_end(struct job_record *job_ptr)</p> +<p style="margin-left:.2in"><b>Description</b>: Handle ending of job + with decayable limits.</p> +<p style="margin-left:.2in"><b>Arguments</b>: +<span class="commandline">job_ptr</span> (input) pointer to the job record.</p> +<p style="margin-left:.2in"><b>Returns</b>: void</p> + + <h2>Versioning</h2> -<p> This document describes version 100 of the SLURM Priority API. Future +<p> This document describes version 101 of the SLURM Priority API. Future releases of SLURM may revise this API. A priority plugin conveys its ability to implement a particular API version using the mechanism outlined for SLURM plugins.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 20 February 2009</p> +<p style="text-align:center;">Last modified 1 November 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/proctrack_plugins.shtml b/doc/html/proctrack_plugins.shtml index b3b09c558762435f21f1b756f3ab1bf6968dd1ca..ef9d8b7fde09b79947a01c30fce8e86fb984aacb 100644 --- a/doc/html/proctrack_plugins.shtml +++ b/doc/html/proctrack_plugins.shtml @@ -143,9 +143,9 @@ with this process or zero if none is found.</p> Given a process container ID, fill in all the process IDs in the container.</p> <p style="margin-left:.2in"><b>Arguments</b>: <span class="commandline"> cont_id</span> (input) -A container ID.</p> +A container ID.<br> <span class="commandline"> pids</span> (output) -Array of process IDs in the container.</p> +Array of process IDs in the container.<br> <span class="commandline"> npids</span> (output) Count of process IDs in the container.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if diff --git a/doc/html/prolog_epilog.shtml b/doc/html/prolog_epilog.shtml index fbb3eb3d7290844cf6635ed4f286887fbc9b521f..1f185af570ae2360c4215d87de5ac83b061192e0 100644 --- a/doc/html/prolog_epilog.shtml +++ b/doc/html/prolog_epilog.shtml @@ -54,7 +54,7 @@ allocations, when and where they run.</p> </td> <td width="40%"> <p align="LEFT"><font style="font-size: 8pt" size="1"> - First job or job step initaion on that node</font></p> + First job or job step initiation on that node</font></p> </td> </tr> <tr> @@ -293,6 +293,13 @@ step allocations, when and where they run.</p> </tbody></table> </center> +<p>The task prolog is executed with the same environment as the user tasks to +be initiated. The standard output of that program is read and processed as +follows:<br> +<i>export name=value</i> sets an environment variable for the user task<br> +<i>unset name</i> clears an environment variable from the user task<br> +<i>print ...</i> writes to the task's standard output.</p> + <p>Plugins functions are may also be useful to execute logic at various well defined points.</p> @@ -315,6 +322,6 @@ PrologSlurmctld fails.</p> <p>Based upon work by Jason Sollom, Cray Inc. and used by permission.</p> -<p style="text-align:center;">Last modified 26 November 2012</p> +<p style="text-align:center;">Last modified 26 February 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/publications.shtml b/doc/html/publications.shtml index a4547afda5634b8d553e7026efc4432ac38ed59f..cf2bd0df49ea1d9486640230dcbd6c3d19b4d283 100644 --- a/doc/html/publications.shtml +++ b/doc/html/publications.shtml @@ -5,8 +5,87 @@ <p>Note that some of this material contains dated information.</p> <h2>Presentations</h2> + +<!--Slurm User Group Meeting 2013--> +<li><b>Presentations from SLURM User Group Meeting, September 2013</b></li> <ul> +<li>Welcome: <a href="SUG13/welcome.pdf">Welcome</a> +Morris Jette (SchedMD)</li> + +<li>Keynote: <a href="SUG13/keynote.pdf">Future Outlook for Advanced Computing</a> +Dona Crawford (LLNL)</li> + +<li>Technical: <a href="SUG13/version_26.pdf">Overview of Slurm version 2.6</a>, +Morris Jette and Danny Auble (SchedMD), Yiannis Georgiou (Bull) </li> + +<li>Tutorial: <a href="SUG13/energy_sensors.pdf">Energy Accounting and External Sensor Plugins</a>, +Yiannis Georgiou, Martin Perry, Thomas Cadeau (Bull), Danny Auble (SchedMD)</li> + +<!-- DO NOT POST +<li>Technical: <a href="SUG13/tbd.pdf">Evaluation of Monitoring and Control Features for Power Management</a>, +Yiannis Georgiou , Thomas Cadeau (Bull), Danny Auble, Moe Jette (SchedMD) Matthieu Hautreux (CEA)</li> --> + +<li>Technical: <a href="SUG13/debugging-slurm-0.4.pdf">Debugging Large Machines</a>, +Matthieu Hautreux (CEA)</li> + +<li>Technical: <a href="SUG13/nice_ef.pdf">Creating easy to use HPC portals with NICE EnginFrame and Slurm</a>, +Alberto Falzone, Paolo Maggi (Nice Software)</li> + +<!-- DO NOT POST +<li>Technical: <a href="SUG13/tbd.pdf">Slurm evaluation using emulation and replay of real workload traces</a>, +David Glesser, Yiannis Georgiou, Joseph Emeras, Olivier Richard (Bull)</li> --> + +<li>Tutorial: <a href="SUG13/profile_hdf5.pdf">Usage of new profiling functionalities</a>, + Rod Schultz, Yiannis Georgiou (Bull) Danny Auble (SchedMD)</li> + +<!-- day 2 --> + +<li>Technical: <a href="SUG13/nonstop.pdf">Fault Tolerant Workload Management</a>, +David Bigagli, Morris Jette (SchedMD)</li> + +<li>Technical: <a href="SUG13/layouts_framework.pdf">Slurm Layouts Framework</a>, +Yiannis Georgiou (Bull) Matthieu Hautreux (CEA)</li> + +<li>Technical: <a href="SUG13/license_management.pdf">License Management</a>, +Bill Brophy (Bull)</li> + +<li>Technical: <a href="SUG13/Mslurm.pdf">Multi-Cluster Management</a>, +Juan Pancorbo Armada (IRZ)</li> + +<!-- CANCELLED +<li>Technical: <a href="SUG13/cray_xc30.pdf">Preparing Slurm for use on the Cray XC30</a>, +Stephen Trofinoff, Colin McMurtrie (CSCS)</li> --> + +<li>Technical: <a href="SUG13/fairshare-improvement-0.4.pdf"> +Depth Oblivious Hierarchical Fairshare Priority Factor</a>, +Francois Daikhate, Matthieu Hautreux (CEA)</li> + +<li>Technical: <a href="SUG13/Refactor_ALPS.pdf">Refactoring ALPS</a>, +Dave Wallace (Cray)</li> + +<li>Site Report: <a href="SUG13/cea-site-report-0.6.pdf">CEA</a>, +Francois Diakhate, Francis Belot, Matthieu Hautreux (CEA)</li> + +<li>Site Report: <a href="SUG13/GWU_site.pdf">George Washington University</a>, +Tim Wickberg (George Washington University)</li> + +<li>Site Report: <a href="SUG13/BYU_site.pdf">Brigham Young University</a>, +Ryan Cox (BYU)</li> + +<!-- DO NOT POST +<li>Site Report: <a href="SUG13/desres-site.pdf">D.E. Shaw Research</a>, +Doug Hughes, Chris Harwell, Eric Radman, Goran Pocina, Michael Fenn + (D.E. Shaw Research)</li> --> + +<li>Site Report: <a href="SUG13/dresden_site.pdf">Technische Universitat Dresden</a>, +Dr. Ulf Markwardt (Technische Universitat Dresden)</li> + +<li>Technical: <a href="SUG13/roadmap.pdf">Slurm Roadmap</a>, +Morris Jette, Danny Auble (SchedMD), Yiannis Georgiou (Bull)</li> + +</ul> + <!-- SC12 Slurm BOF--> <li><b>Presentations from Slurm Birds of a Feather, SuperComputing 2012, November 2012</b></li> <ul> @@ -109,7 +188,7 @@ Joseph Emeras, INRIA/LIG</li> <li><a href="pdfs/LCS_cgroups_BULL.pdf">Resource Management with Linux Control Groups in HPC Clusters</a> Yiannis Georgiou, Bull -(6th Linux Collaboration Summit, April 2012)</li> +(6th Linux Collaboration Summit, April 2012)</li><br> <li><b>Presentations from SLURM Birds Of a Feather, SuperComputing 2011, November 2011</b></li> @@ -156,7 +235,7 @@ Moe Jette, SchedMD LLC <!-- day 2 --> -<li><a href="slurm_ug_2011/SLURM-Keynote-v.pdf">Challenges and Opportunities for Exscale +<li><a href="slurm_ug_2011/SLURM-Keynote-v.pdf">Challenges and Opportunities for Exascale Resource Management and How Today's Petascale Systems are Guiding the Way</a>, William Kramer, NCSA </li> @@ -200,7 +279,7 @@ Danny Auble, SchedMD LLC <!-- Use LLNL-PRES-461787 --> <li><a href="pdfs/slurm_sc10_bof.pdf">SLURM Version 2.2: Features and Release Plans</a>, Morris Jette, Danny Auble and Donald Lipari, Lawrence Livermore National Laboratory -(Supercomputing 2010, November 2010)</li> +(Supercomputing 2010, November 2010)</li><br> <li><b>Presentations from SLURM User Group Meeting, October 2010</b></li> <ul> @@ -245,33 +324,33 @@ Morris Jette and Danny Auble, Lawrence Livermore National Laboratory <!-- Use LLNL-PRES-402832 --> <li><a href="pdfs/slurm_sc09_bof.pdf">SLURM Community Meeting</a>, Morris Jette, Danny Auble and Don Lipari, Lawrence Livermore National Laboratory -(Supercomputing 2009, November 2009)</li> +(Supercomputing 2009, November 2009)</li><br> <!-- Use LLNL-PRES-408498 --> <li><a href="pdfs/slurm.sc08.bof.pdf">High Scalability Resource Management with SLURM</a>, Morris Jette, Lawrence Livermore National Laboratory -(Supercomputing 2008, November 2008)</li> +(Supercomputing 2008, November 2008)</li><br> <!-- Use LLNL-PRES-408510 --> <li><a href="pdfs/slurm.sc08.status.pdf">SLURM Status Report</a>, Morris Jette and Danny Auble, Lawrence Livermore National Laboratory -(Supercomputing 2008, November 2008)</li> +(Supercomputing 2008, November 2008)</li><br> <!-- Use LLNL-PRES-402832 --> <li><a href="pdfs/slurm_v1.3.pdf">SLURM Version 1.3</a>, Morris Jette and Danny Auble, Lawrence Livermore National Laboratory -(May 2008)</li> +(May 2008)</li><br> <!-- Use LLNL-PRES-403148 --> <li><a href="pdfs/slurm_moab.pdf">Managing Clusters with Moab and SLURM</a>, Morris Jette and Donald Lipari, Lawrence Livermore National Laboratory -(May 2008)</li> +(May 2008)</li><br> <!-- Use UCRL-PRES-230170 --> <li><a href="pdfs/slurm_v1.2.pdf">Resource Management at LLNL, SLURM Version 1.2</a>, Morris Jette, Danny Auble and Chris Morrone, Lawrence Livermore National Laboratory -(April 2007)</li> +(April 2007)</li><br> <!-- Use UCRL-PRES-219562 --> <li><a href="pdfs/lci.7.tutorial.pdf">Resource Management Using SLURM</a>, @@ -283,7 +362,11 @@ Morris Jette, Lawrence Livermore National Laboratory <ul> -<li>GreenSpot: Scheduling Energy Consumption in Green Datacenters, +<li>Energy Accounting and Control with SLURM Resource and Job Management System, +Yiannis Georgiou, et. al. +(ICDCN 2014, January 2014)</li> + +<li>GreenSlot: Scheduling Energy Consumption in Green Datacenters, Inigo Goiri, et. al. (SuperComputing 2011, November 2011)</li> @@ -331,6 +414,6 @@ Danny Auble of LLNL about SLURM.</p> Learning Chef: Compute Cluter with SLURM</a> A SLURM Cookbook by Adam DeConinck</p> -<p style="text-align:center;">Last modified 4 February 2013</p> +<p style="text-align:center;">Last modified 30 September 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/qos.shtml b/doc/html/qos.shtml index 3e1d3705d958b07c3ba7dacb2d8001265bc053b1..3b268d5dcdccb61b321993b9ce4d5c16dd6de47c 100644 --- a/doc/html/qos.shtml +++ b/doc/html/qos.shtml @@ -74,30 +74,95 @@ will take precedence over the association's limits. <P> Here are the limits that will be imposed on jobs running under a QOS</P> -<UL> -<LI><b>GrpCpus</b> Maximum number of CPU's all jobs with this QOS can be allocated. -<LI><b>GrpCPUMins</b> A hard limit of cpu minutes to be used by jobs +<ul> +<li><b>GrpCpus</b> The total count of cpus able to be used at any given + time from jobs running from this QOS. If this limit is reached new + jobs will be queued but only allowed to run after resources have been + relinquished from this group. +</li> + +<li><b>GrpCPUMins</b> A hard limit of cpu minutes to be used by jobs running from this QOS. If this limit is reached all jobs running in this group will be killed, and no new jobs will be allowed to run. -<LI><b>GrpCPURunMins</b> Maximum number of CPU minutes all jobs +</li> + +<li><b>GrpCPURunMins</b> Maximum number of CPU minutes all jobs running with this QOS can run at the same time. This takes into consideration time limit of running jobs. If the limit is reached no new jobs are started until other jobs finish to allow time to free up. -<LI><b>GrpJobs</b> Maximum number of jobs that can run with this QOS. -<LI><b>GrpMemory</b> Maximum amount of memory (MB) all jobs with this QOS can be allocated. -<LI><b>GrpNodes</b> Maximum number of nodes that can be allocated to all jobs with this QOS. -<LI><b>GrpSubmitJobs</b> Maximum number of jobs with this QOS that can be in the system (no matter what state). -<LI><b>GrpWall</b> Wall clock limit for all jobs running with this QOS. -<LI><b>MaxCpusPerJob</b> Maximum number of CPU's any job with this QOS can be allocated. -<LI><b>MaxCPUMinsPerJob</b> Maximum number of CPU*minutes any job with this QOS can run. -<LI><b>MaxNodesPerJob</b> Maximum number of nodes that can be allocated to any job with this QOS. -<LI><b>MaxWallDurationPerJob</b> Wall clock limit for any jobs running with this QOS. -<LI><b>MaxCpusPerUser</b> Maximum number of CPU's any user with this QOS can be allocated. -<LI><b>MaxJobsPerUser</b> Maximum number of jobs a user can run with this QOS. -<LI><b>MaxNodesPerUser</b> Maximum number of nodes that can be allocated to any user with this QOS. -<LI><b>MaxSubmitJobsPerUser</b> Maximum number of jobs with this QOS that can be in the system. -</UL> +</li> + +<li><b>GrpJobs</b> +The total number of jobs able to run at any given time from this QOS. +If this limit is reached new jobs will be queued but only allowed to +run after previous jobs complete from this group. +</li> + +<li><b>GrpMemory</b> +The total amount of memory (MB) able to be used at any given time from +jobs running from QOS. If this limit is reached new jobs will be queued but only +allowed to run after resources have been relinquished from this group. +</li> + +<li><b>GrpNodes</b> +The total count of nodes able to be used at any given time from jobs +running from this QOS. If this limit is reached new jobs will be queued +but only allowed to run after resources have been relinquished from this group. +</li> + +<li><b>GrpSubmitJobs</b> +The total number of jobs able to be submitted to the system at any given time + from this QOS. If this limit is reached new submission requests will be denied +until previous jobs complete from this group. +</li> + +<li><b>GrpWall</b> +The maximum wall clock time any job submitted to this group can run for. +If this limit is reached submission requests will be denied and the +running jobs will be killed. +</li> + +<li><b>MaxCpusPerJob</b> +The maximum size in cpus any given job can have from this QOS. If this limit +is reached the job will be denied at submission. +</li> + +<li><b>MaxCPUMinsPerJob</b> +Maximum number of CPU*minutes any job with this QOS can run. Jobs submitted +requesting time bigger than the QOS limit will pend possibly indefinitely. +</li> + +<li><b>MaxNodesPerJob</b> +The maximum size in nodes any given job can have from this association. +If this limit is reached the job will be denied at submission. +</li> + +<li><b>MaxWallDurationPerJob</b> +The maximum wall clock time any job submitted to this QOS can run for. +If this limit is reached the job will be denied at submission. +</li> + +<li><b>MaxCpusPerUser</b> +Maximum number of CPU's any user with this QOS can be allocated. +If this limit is reached the job will be denied at submission. +</li> + +<li><b>MaxJobsPerUser</b> +Maximum number of jobs a user can run with this QOS. +If this limit is reached the job will be denied at submission. +</li> + +<li><b>MaxNodesPerUser</b> +Maximum number of nodes that can be allocated to any user with this QOS. +If this limit is reached the job will be denied at submission. +</li> + +<li><b>MaxSubmitJobsPerUser</b> +Maximum number of jobs with this QOS that can be in the system. +If this limit is reached the job will be denied at submission. +</li> +</ul> <a name=qos_other> <h2>Other QOS Options</h2></a> diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml index 366933bf0ef3556d41d9c2039866e1d9ed00e245..348efab8a0cf68f32cd392913e61d6bea300b9c1 100644 --- a/doc/html/quickstart.shtml +++ b/doc/html/quickstart.shtml @@ -54,7 +54,7 @@ or several job steps may independently use a portion of the allocation.</p> <h2>Commands</h2> <p>Man pages exist for all SLURM daemons, commands, and API functions. The command option <span class="commandline">--help</span> also provides a brief summary of -options. Note that the command options are all case insensitive.</p> +options. Note that the command options are all case sensitive.</p> <p><span class="commandline"><b>sacct</b></span> is used to report job or job step accounting information about active or completed jobs.</p> diff --git a/doc/html/quickstart_admin.shtml b/doc/html/quickstart_admin.shtml index a17ff45e37ec9beec203af2c7962662339b9ea77..d6f573649c576983ee9e1ee505ddbd69ddc4b9e6 100644 --- a/doc/html/quickstart_admin.shtml +++ b/doc/html/quickstart_admin.shtml @@ -28,7 +28,13 @@ and must exist on all nodes of the cluster.<br> NOTE: The parent directories for SLURM's log files, process ID files, state save directories, etc. are not created by SLURM. They must be created and made writable by <i>SlurmUser</i> as needed prior to -starting SLURM daemons.</li> +starting SLURM daemons.<br> +NOTE: If any parent directories are created during the installation process +(for the executable files, libraries, etc.), +those directories will have access rights equal to read/write/execute for +everyone minus the umask value (e.g. umask=0022 generates directories with +permissions of "drwxr-r-x" and mask=0000 generates directories with +permissions of "drwxrwrwx" which is a security problem).</li> <li>Install the configuration file in <i><sysconfdir>/slurm.conf</i>.<br> NOTE: You will need to install this configuration file on all nodes of the cluster.</li> <li>Start the <i>slurmctld</i> and <i>slurmd</i> daemons.</li> @@ -79,29 +85,6 @@ and commands are denoted below. <li> <b>Authd</b> The auth/authd plugin will be built and installed if the libauth library and its dependency libe are installed. </li> -<li> <b>Federation</b> The switch/federation plugin will be built and installed - if the IBM Federation switch library is installed. -<li> <b>QsNet</b> support in the form of the switch/elan plugin requires - that the qsnetlibs package (from Quadrics) be installed along - with its development counterpart (i.e. the qsnetheaders - package.) The switch/elan plugin also requires the - presence of the libelanosts library and /etc/elanhosts - configuration file. (See elanhosts(5) man page in that - package for more details). Define the nodes in the SLURM - configuration file <i>slurm.conf</i> in the same order as - defined in the <i>elanhosts</i> configuration file so that - node allocation for jobs can be performed so as to optimize - their performance. We highly recommend assigning the nodes - a numeric suffix equal to its Elan address for ease of - administration and because the Elan driver does not seem - to function otherwise - (e.g. /etc/elanhosts to contain two lines of this sort:<br> - eip [0-15] linux[0-15]<br> - eth [0-15] linux[0-15]<br> - for fifteen nodes with a prefix of "linux" and - numeric suffix between zero and 15). Finally, the - "ptrack" kernel patch is required for process - tracking. <li> <b>sview</b> The sview command will be built only if and <i>gtk+-2.0</i> is installed</li> </ul> @@ -656,38 +639,54 @@ or the full test suite may be executed with the single command <i>regression</i>. See <i>testsuite/expect/README</i> for more information.</p> -<h2>Upgrades</h2> - -<p>Background: The SLURM version numbers contain three digits, which represent -the major, minor and micro release numbers in that order (e.g. 2.1.3 is -major=2, minor=1, micro=3). -Changes in the RPCs (remote procedure calls) will only be made if the major -and/or minor release number changes. +<a name="upgrade"><h2>Upgrades</h2></a> + +<p>Background: The Slurm version numbers contain three digits, which represent +the major, minor and micro release numbers in that order (e.g. 2.5.3 is +major=2, minor=5, micro=3). +Changes in the RPCs (remote procedure calls) and state files will only be made +if the major and/or minor release number changes. +Slurm daemons will support RPCs and state files from the two previous minor or +releases (e.g. a version 2.6.x SlurmDBD will support slurmctld daemons and +commands with a version of 2.4.x or 2.5.x). +This means that upgrading at least ones each year is strongly recommended. +Otherwise, intermediate upgrades will be required to preserve state information. Changes in the micro release number generally represent only bug fixes, -but may also include minor enhancements.</p> +but may also include very minor enhancements.</p> <p>If the SlurmDBD daemon is used, it must be at the same or higher minor release number as the Slurmctld daemons. In other words, when changing the version to a higher release number (e.g -from 2.0 to 2.1) <b>always upgrade the SlurmDBD daemon first</b>.</p> - -<p>When upgrading to a new major or minor release of SLURM <u>prior to version -2.2</u> (e.g. 2.0.x to 2.1.x) all running and pending jobs will be purged due to -changes in state save information. -When upgrading to a new micro release of SLURM (e.g. 2.1.1 to 2.1.2) all -running and pending jobs will be preserved. Just install a new version of -SLURM and restart the daemons. -When going from version 2.1.x to version 2.2.x and higher version numbers, -we do not expect that any running or pending jobs will be lost although a -limited number of prior releases may be supported (e.g. 2.1.0 to 2.2.0 will -work fine, but 2.1.0 to 2.9.0 may not). +from 2.4 to 2.5) <b>always upgrade the SlurmDBD daemon first</b>. +The slurmctld daemon must also be upgraded before or at the same time as +the slurmd daemons on the compute nodes. +Generally upgrading Slurm on all of the login and compute nodes is recommended, +although rolling upgrades are also possible (i.e. upgrading the head node(s) +first then upgrading the compute and login nodes later at various times). +Also see the note above about reverse compatability.</p> + +<p>Pretty much each new major and/or minor release of Slurm (e.g. 2.4.x to 2.5.x) +involves changes to the state files with new data structures, new options, etc. +Slurm permits upgrades of up to two major or minor updates (e.g. 2.4.x or 2.5.x +to 2.6.x) without loss of jobs or other state information, but the state +information from older state files versions will not be recognized and will be +discarded, resulting in loss of all running and pending jobs. +State files are not recognized when downgrading (e.g. from 2.5.x to 2.4.x) +and will be discarded, resulting in loss of all running and pending jobs. +Therefore when upgrading Slurm (more precisely, the slurmctld daemon), +saving the <i>StateSaveLocation</i> (as defined in <i>slurm.conf</i>) +directory contents with all state information is recommended. +If you need to downgrade, restoring that directory's contents will let you +recover the jobs. +Jobs submitted under the new version will not be in those state files, +but it can let you recover most jobs. An exception to this is that jobs may be lost when installing new pre-release -versions (e.g. 2.3.0-pre1 to 2.3.0-pre2). We'll try to note these cases -in the NEWS file. +versions (e.g. 2.5.0-pre1 to 2.5.0-pre2). +We'll try to note these cases in the NEWS file. Contents of major releases are also described in the RELEASE_NOTES file.</p> </pre> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 26 July 2012</p> +<p style="text-align:center;">Last modified 29 August 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/reservations.shtml b/doc/html/reservations.shtml index b288f7e040cf159f2bf96801d6af7b9fe4f878bd..e7a509b6fd7f6f131707bf71630c3984deff4dde 100644 --- a/doc/html/reservations.shtml +++ b/doc/html/reservations.shtml @@ -6,14 +6,15 @@ being executed by select users and/or select bank accounts. A resource reservation identifies the resources in that reservation and a time period during which the reservation is available. -The resources which can be reserved include nodes and/or licenses. +The resources which can be reserved include cores, nodes and/or licenses. Note that resource reservations are not compatible with SLURM's gang scheduler plugin since the termination time of running jobs -is not possible to accurately predict.</p> +cannot be accurately predicted.</p> <p>Note that reserved licenses are treated somewhat differently than reserved -nodes. When nodes are reserved, then jobs using that reservation can use only -those nodes and no other jobs can use those nodes. Reserved licenses can only +cores or nodes. When cores or nodes are reserved, then jobs using that +reservation can use only those resources and no other jobs can use those +resources. Reserved licenses can only be used by jobs associated with that reservation, but licenses not explicitly reserved are available to any job. This eliminates the need to explicitly put licenses into every advanced reservation created.</p> @@ -38,14 +39,13 @@ The "maint" flag is used to identify the reservation for accounting purposes as system maintenance. The "ignore_jobs" flag is used to indicate that we can ignore currently running jobs when creating this reservation. -By default, only nodes which are not expected to have a running job +By default, only resources which are not expected to have a running job at the start time can be reserved (the time limit of all running jobs will have been reached). In this case we can manually cancel the running jobs as needed to perform system maintenance. As the reservation time approaches, -only jobs that can complete by the reservation time will be -initiated.</p> +only jobs that can complete by the reservation time will be initiated.</p> <pre> $ scontrol create reservation starttime=2009-02-06T16:00:00 \ duration=120 user=root flags=maint,ignore_jobs nodes=ALL @@ -62,7 +62,7 @@ ReservationName=root_3 StartTime=2009-02-06T16:00:00 <p>A variation of this would be to configure license to represent system resources, such as a global file system. -The system resource may not may not require an actual license for use, but +The system resource may not require an actual license for use, but SLURM licenses can be used to prevent jobs needed the resource from being started when that resource is unavailable. One could create a reservation for all of those licenses in order to perform @@ -148,32 +148,25 @@ ReservationName=alan_8 StartTime=2011-12-05T12:00:00 </pre> <p>Note that specific nodes to be associated with the reservation are -made immediately after creation of the reservation. This permits +identified immediately after creation of the reservation. This permits users to stage files to the nodes in preparation for use during the reservation. Note that the reservation creation request can also identify the partition from which to select the nodes or _one_ feature that every selected node must contain.</p> -<p>On a smaller system, one might want to reserve specific CPUs rather than -whole nodes. While the resolution of SLURM's resource reservation is that of -whole nodes, one might configure each CPU as a license to SLURM and reserve -those instead (we understand this is a kludge, but it does provide a way to -work around this shortcoming in SLURM's code). Proper enforcement then requires -that each job request one "cpu" license for each CPU to be allocated, which -can be accomplished by an appropriate job_submit plugin. In the example below, -we configure the system with one license named "cpu" for each CPU in the -system, 64 in this example, then create a reservation for 32 CPUs. The -user developed job_submit plugin would then explicitly set the job's -licenses field to require one "cpu" for each physical CPU required to satisfy -the request.</p> +<p>On a smaller system, one might want to reserve cores rather than +whole nodes. Slurm provides a core reservation capability in version 2.6. +This capability permits the administrator to identify the core count to be +reserved on each node as snown in the examples below.</p> <pre> -$ scontrol show configuration | grep Licenses -Licenses = cpu:64 - -$ scontrol create reservation starttime=2009-04-06T16:00:00 \ - duration=120 user=bob flags=license_only \ - licenses=cpu:32 -Reservation created: bob_5 +# Create a two core reservation for user alan +$ scontrol create reservation StartTime=now Duration=60 \ + NodeCnt=1 CoreCnt=2 User=alan + +# Create a reservation for user brenda with two cores on +# node tux8 and 4 cores on node tux9 +$ scontrol create reservation StartTime=now Duration=60 \ + Nodes=tux8,tux9 CoreCnt=2,4 User=brenda </pre> <p>Reservations can not only be created for the use of specific accounts and @@ -207,7 +200,7 @@ reservation name. The job must be contained completely within the named reservation. The job will be canceled after the reservation reaches its EndTime. If letting the job continue execution after the reservation EndTime, a configuration option <i>ResvOverRun</i> -can be set to control how long the job can continue execution.</p> +in slurm.conf can be set to control how long the job can continue execution.</p> <pre> $ sbatch --reservation=alan_6 -N4 my.script sbatch: Submitted batch job 65540 @@ -309,20 +302,23 @@ associated with the reservation on an equal basis (e.g. if two users are eligible to use a reservation and neither does, each user will be reported to have used half of the reserved resources).</p> +<h2>Prolog and Epilog</h2> + +<p>Slurm supports both a reservation prolog and epilog. +They may be configured using the <b>ResvProlog</b> and <b>ResvEpilog</b> +configuration parameters in the slurm.conf file. +These scripts can be used to cancel jobs, modify partition configuration, +etc.</p> + <h2>Future Work</h2> -<p>Several enhancements are anticipated at some point in the future. -<ol> -<li>Reservations made within a partition having gang scheduling assumes +<p>Reservations made within a partition having gang scheduling assumes the highest level rather than the actual level of time-slicing when considering the initiation of jobs. This will prevent the initiation of some jobs which would complete execution -before a reservation given fewer jobs to time-slice with.</li> -<li>Add support to reserve specific CPU counts rather than require whole -nodes be reserved (work around described above).</li> -</ol> +before a reservation given fewer jobs to time-slice with.</p> -<p style="text-align: center;">Last modified 29 October 2012</p> +<p style="text-align: center;">Last modified 13 August 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/resource_limits.shtml b/doc/html/resource_limits.shtml index cacdadc834233c59437979cbc8eaf377d8dabc5a..c233ef9684504e91bd49f5b5f6bb4981ff4c9946 100644 --- a/doc/html/resource_limits.shtml +++ b/doc/html/resource_limits.shtml @@ -2,16 +2,10 @@ <h1>Resource Limits</h1> -<p>SLURM scheduling policy support was significantly changed -in version 2.0 in order to take advantage of the database -integration used for storing accounting information. -This document describes the capabilities available in -SLURM version 2.0. -New features are under active development. -Familiarity with SLURM's <a href="accounting.html">Accounting</a> web page +<p>Familiarity with SLURM's <a href="accounting.html">Accounting</a> web page is strongly recommended before use of this document.</p> -<p>Note for users of Maui or Moab schedulers: <br> +<p><b>Note for users of Maui or Moab schedulers:</b><br> Maui and Moab are not integrated with SLURM's resource limits, but should use their own resource limits mechanisms.</p> @@ -160,8 +154,8 @@ specified then no limit will apply.</p> </li> <li><b>GrpWall=</b> The maximum wall clock time any job submitted to - this group can run for. If this limit is reached submission requests - will be denied. + this group can run for. If this limit is reached submission requests + will be denied and the running jobs will be killed. </li> <li><b>MaxCPUsPerJob=</b> The maximum size in cpus any given job can @@ -216,6 +210,6 @@ data maintained in the SLURM database. More information can be found in the <a href="priority_multifactor.html">priority/multifactor</a> plugin description.</p> -<p style="text-align: center;">Last modified 30 October 2012</p> +<p style="text-align: center;">Last modified 12 August 2013</p> </ul></body></html> diff --git a/doc/html/slurm.shtml b/doc/html/slurm.shtml index 14f2368d7beefddc9a4c58d98051d447475cbe66..16b09b67b0cd8d1530f1c2360cb6c3345394d9f0 100644 --- a/doc/html/slurm.shtml +++ b/doc/html/slurm.shtml @@ -16,10 +16,7 @@ pending work. </p> In its simplest configuration, it can be installed and configured in a couple of minutes (see <a href="http://www.linux-mag.com/id/7239/1/"> Caos NSA and Perceus: All-in-one Cluster Software Stack</a> -by Jeffrey B. Layton) and has been used by -<a href="http://www.intel.com/">Intel</a> for their 48-core -<a href="http://www.hpcwire.com/features/Intel-Unveils-48-Core-Research-Chip-78378487.html"> -"cluster on a chip"</a>. +by Jeffrey B. Layton). More complex configurations can satisfy the job scheduling needs of world-class computer centers and rely upon a <a href="http://www.mysql.com/">MySQL</a> database for archiving @@ -56,13 +53,23 @@ help identify load imbalances and other anomalies.</li> </ul></p> <p>Slurm provides workload management on many of the most powerful computers in -the world including: +the world. On the June 2013 <a href="http://www.top500.org">Top500</a> list, +five of the ten top systems use Slurm including the number one system. +These five systems alone contain over 5.7 million cores. +A few of the systems using Slurm are listed below: <ul> +<li><a href="http://www.top500.org/blog/lists/2013/06/press-release/"> +Tianhe-2</a> designed by +<a href="http://english.nudt.edu.cn">The National University of Defense Technology (NUDT)</a> +in China has 16,000 nodes, each with two Intel Xeon IvyBridge processors and +three Xeon Phi processors for a total of 3.1 million cores and a peak +performance of 33.86 Petaflops.</li> + <li><a href="https://asc.llnl.gov/computing_resources/sequoia/">Sequoia</a>, an <a href="http://www.ibm.com">IBM</a> BlueGene/Q system at <a href="https://www.llnl.gov">Lawrence Livermore National Laboratory</a> with 1.6 petabytes of memory, 96 racks, 98,304 compute nodes, and 1.6 -million cores, with a peak performance of over 20 Petaflops.</li> +million cores, with a peak performance of over 17.17 Petaflops.</li> <li><a href="http://www.tacc.utexas.edu/stampede">Stampede</a> at the <a href="http://www.tacc.utexas.edu">Texas Advanced Computing Center/University of Texas</a> @@ -70,13 +77,7 @@ is a <a herf="http://www.dell.com">Dell</a> with over 80,000 <a href="http://www.intel.com">Intel</a> Xeon cores, Intel Phi co-processors, plus 128 <a href="http://www.nvidia.com">NVIDIA</a> GPUs -delivering 2.66 Petaflops.</li> - -<li><a href="http://www.nytimes.com/2010/10/28/technology/28compute.html?_r=1&partner=rss&emc=rss"> -Tianhe-1A</a> designed by -<a href="http://english.nudt.edu.cn">The National University of Defense Technology (NUDT)</a> -in China with 14,336 Intel CPUs and 7,168 NVDIA Tesla M2050 GPUs, -with a peak performance of 2.507 Petaflops.</li> +delivering 5.17 Petaflops.</li> <li><a href="http://www-hpc.cea.fr/en/complexe/tgcc-curie.htm">TGCC Curie</a>, owned by <a href="http://www.genci.fr">GENCI</a> and operated in the TGCC by @@ -112,6 +113,6 @@ named after Monte Rosa in the Swiss-Italian Alps, elevation 4,634m. </ul> -<p style="text-align:center;">Last modified 7 December 2012</p> +<p style="text-align:center;">Last modified 1 July 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/slurm_ug_agenda.shtml b/doc/html/slurm_ug_agenda.shtml index ce914aa9dfe5a063ebf8ddac070613866ce35269..2bafc1d7c674bfa9594b676da83fe322a1982508 100644 --- a/doc/html/slurm_ug_agenda.shtml +++ b/doc/html/slurm_ug_agenda.shtml @@ -1,462 +1,603 @@ <!--#include virtual="header.txt"--> - <a href="http:///www.bull.com" target="_blank"><img src="bull.jpg" style="float: right;" border="0"></a></p> - - <h1>Slurm User Group Meeting 2011</h1> - - <p>Hosted by <a href="http:///www.bull.com">Bull</a> - - <h1>Agenda</h1> - - <p> - The 2011 SLURM User Group Meeting will be held on September 22 and 23 - in Phoenix, Arizona and will be hosted by Bull. - On September 22 there will be two parallel tracks of tutorials meeting in separate rooms. - One set of tutorials will be for users and the other will be for system administrators. - There will be a series of technical presentations on September 23. - The <a href="#schedule">Schedule</a> amd <a href="#abstracts">Abstracts</a> - are shown below. - </p> - - <h2>Hotel Information</h2> - <p>The meeting will be held at - <a href="http://embassysuites1.hilton.com/en_US/es/hotel/PHXNOES-Embassy-Suites-Phoenix-North-Arizona/index.do">Embassy Suites Phoenix - North</a> - 2577 West Greenway Road, Phoenix, Arizona, USA (Phone: 1-602-375-1777 Fax: 1-602-375-4012). - You may book your reservations on line at - <a href="http://embassysuites1.hilton.com/en_US/es/hotel/PHXNOES-Embassy-Suites-Phoenix-North-Arizona/index.do">Embassy Suites Phoenix - North</a><p></p> - - <p>Please reference Bull when making your reservations to receive a $79/room rate.</p> - - <h2>Directions and Transportation</h2> - <p>From Phoenix Sky Harbor Airport, take I-10 west to I-17 North. - Follow I-17 to the Greenway Road, exit 211 approximately 15 miles. - Exit and turn right, 1/8th of a mile on the right is the hotel entrance.</p> - <p><a href="http://embassysuites1.hilton.com/en_US/es/hotel/PHXNOES-Embassy-Suites-Phoenix-North-Arizona/directions.do;jsessionid=DDD31DD6EFFAF2D32299955C321976F3.etc83"> - View all directions, map, and airport information</a></p> - - <h2>Contact</h2> - <p>If you need further informations about the event, or the - registration protocols, contact the - <a href="mailto:Nancy.Kritkausky@bull.com?subject=Informations"> - <b>Slurm User Group 2011</b></a> organizers.<br> - - - <h2>Registration</h2> - <p>Please <a href="slurm_ug_registration.html">register</a> online no later - than August 22.</p> - - <a name="schedule"><h1>Schedule</h1></a> - - <h2>September 22: User Tutorials.</h2> - - <table width="100%" border=1 cellspacing=0 cellpadding=0> - - <tr> - <th width="15%">Time</th> - <th width="15%">Theme</th> - <th width="25%">Speaker</th> - <th width="45%">Title</th> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">08:30 - 09:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Registration</td> - </tr> - - <tr> - <td width="15%">09:00 - 10:30</td> - <td width="15%"> User Tutorial #1</td> - <td width="25%"> Don Albert and Rod Schultz (Bull)</td> - <td width="45%"> SLURM: Beginners Usage</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">10:30 - 11:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> - </tr> - - <tr> - <td width="15%">11:00 - 12:30</td> - <td width="15%"> User Tutorial #2</td> - <td width="25%"> Bill Brophy, Rod Schultz, Yiannis Georgiou (Bull)</td> - <td width="45%"> SLURM: Advanced Usage Usage</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">12:30 - 14:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Lunch at conference center</td> - </tr> - - <tr> - <td width="15%">14:00 - 15:30</td> - <td width="15%"> User Tutorial #3</td> - <td width="25%"> Martin Perry and Yiannis Georgiou (Bull)</td> - <td width="45%"> Resource Management for multicore/multi-threaded usage</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">15:30 - 16:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> - </tr> - - <tr> - <td width="15%">16:00 - 17:00</td> - <td width="15%"> Question and Answer</td> - <td width="25%"> Danny Auble and Morris Jette (SchedMD)</td> - <td width="45%"> Get your questions answered by the developers</td> - </tr> - - </table> - - <h2>September 22: System Administrator Tutorials.</h2> - - <table width="100%" border=1 cellspacing=0 cellpadding=0> - - <tr> - <th width="15%">Time</th> - <th width="15%">Theme</th> - <th width="25%">Speaker</th> - <th width="45%">Title</th> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">08:30 - 09:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Registration</td> - </tr> - - <tr> - <td width="15%">09:00 - 10:30</td> - <td width="15%"> Admin Tutorial #1</td> - <td width="25%"> David Egolf and Bill Brophy (Bull)</td> - <td width="45%"> SLURM High Availability</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">10:30 - 11:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> - </tr> - - <tr> - <td width="15%">11:00 - 12:30</td> - <td width="15%"> Admin Tutorial #2</td> - <td width="25%"> Dan Rusak (Bull)</td> - <td width="45%"> Power Management / sview</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">12:30 - 14:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Lunch at conference center</td> - </tr> - - <tr> - <td width="15%">14:00 - 15:30</td> - <td width="15%"> Admin Tutorial #3</td> - <td width="25%"> Don Albert and Rod Schultz (Bull)</td> - <td width="45%"> Accounting, limits and Priorities configurations</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">15:30 - 16:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> - </tr> - - <tr> - <td width="15%">16:00 - 17:30</td> - <td width="15%"> Admin Tutorial #4</td> - <td width="25%"> Matthieu Hautreux (CEA), Yiannis Georgiou and Martin Perry (Bull)</td> - <td width="45%"> Scalability, Scheduling and Task placement</td> - </tr> - - </table> - - <h2>September 23: Technical Session</h2> - - <table width="100%" border=1 cellspacing=0 cellpadding=0> - - <tr> - <th width="15%">Time</th> - <th width="15%">Theme</th> - <th width="25%">Speaker</th> - <th width="45%">Title</th> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">08:30 - 09:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Registration</td> - </tr> - - <tr> - <td width="15%" rowspan="4">09:00 - 10:40</td> - <td width="85%" colspan="3"> Welcome</td> - </tr> - - <tr> - <td width="15%"> Keynote</td> - <td width="25%"> William Kramer (NCSA)</td> - <td width="45%"> Challenges and Opportunities for Exscale Resource Management and how Today's Petascale Systems are Guiding the Way</td> - </tr> - <tr> - <td width="15%"> Session #1</td> - <td width="25%"> Matthieu Hautreux (CEA)</td> - <td width="45%"> SLURM at CEA</td> - </tr> - <tr> - <td width="15%"> Session #2</td> - <td width="25%"> Don Lipari (LLNL)</td> - <td width="45%"> LLNL site report</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">10:40 - 11:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> - </tr> - - <tr> - <td width="15%" rowspan="3">11:00 - 12:30</td> - <td width="15%"> Session #3</td> - <td width="25%"> Alejandro Lucero Palau (BSC)</td> - <td width="45%"> SLURM Simulator</td> - </tr> - <tr> - <td width="15%"> Session #4</td> - <td width="25%"> Danny Auble (SchedMD)</td> - <td width="45%"> SLURM operation on IBM BlueGene/Q</td> - </tr> - <tr> - <td width="15%"> Session #5</td> - <td width="25%"> Morris Jette (SchedMD)</td> - <td width="45%"> SLURM operation on Cray XT and XE</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">12:30 - 14:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Lunch at conference center</td> - </tr> - - <tr> - <td width="15%" rowspan="3">14:00 - 15:30</td> - <td width="15%"> Session #6</td> - <td width="25%"> Mariusz Mamoński (Poznań University)</td> - <td width="45%"> Introduction to SLURM DRMAA</td> - </tr> - <tr> - <td width="15%"> Session #7</td> - <td width="25%"> Robert Stober, Sr. (Bright Computing)</td> - <td width="45%"> Bright Cluster Manager & SLURM: Benefits of Seamless Integration</td> - </tr> - <tr> - <td width="15%"> Session #8</td> - <td width="25%"> Morris Jette (SchedMD)</td> - <td width="45%"> Proposed Design for Job Step Management in User Space</td> - </tr> - - <tr> - <td width="15%" bgcolor="#F0F1C9">15:30 - 16:00</td> - <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> - </tr> - - <tr> - <td width="15%" rowspan="3">16:00 - 17:30</td> - <td width="15%"> Session #9</td> - <td width="25%"> Don Lipari (LLNL)</td> - <td width="45%"> Proposed Design for Enhanced Enterprise-wide Scheduling</td> - </tr> - - <tr> - <td width="15%"> Session #10</td> - <td width="25%"> Danny Auble and Morris Jette (SchedMD)</td> - <td width="45%"> SLURM Version 2.3 and plans for future releases</td> - </tr> - - <tr> - <td width="85%" colspan="3"> Open discussion, feature requests, etc.</td> - </tr> - - </table> - - <br><br> - <a name="abstracts"><h1>Abstracts</h1></a> - - <h2>User Tutorial #1</h2> - SLURM Beginners Usage<br> - Don Albert and Rod Schultz (Bull) - <ul> - <li>Simple use of commands (submission/monitoring/result collection)</li> - <li>Reservations</li> - <li>Use of accounting and reporting</li> - <li>Scheduling techniques for smaller response time (setting of walltime for backfill , etc)</li> - </ul> - - <h2>User Tutorial #2</h2> - SLURM Advanced Usage<br> - Bill Brophy, Rod Schultz, Yiannis Georgiou (Bull) - <ul> - <li>MPI jobs</li> - <li>Checkpoint/Restart (BLCR or application level)</li> - <li>Preemption / Gang Scheduling Usage</li> - <li>Dynamic allocations (growing/shrinking)</li> - <li>Grace Time Delay with Preemption</li> - </ul> - - <h2>User Tutorial #3</h2> - Resource Management for multicore/multi-threaded usage<br> - Martin Perry and Yiannis Georgiou (Bull) - <ul> - <li>CPU allocation</li> - <li>CPU/tasks distribution</li> - <li>Task binding</li> - <li>Internals of the allocation procedures</li> - </ul> - - - <h2>Administrator Tutorial #1</h2> - SLURM High Availability<br> - David Egolf and Bill Brophy (Bull) - <ul> - <li>How to set up the High Availability SLURM</li> - <li>Event logging with striggers</li> - </ul> - - <h2>Administrator Tutorial #2</h2> - Power Management / Sview<br> - Dan Rusak (Bull) - <ul> - <li>Power Management configuration</li> - <li>sview presentation</li> - </ul> - - <h2>Administrator Tutorial #3</h2> - Accounting, limits and Priorities configurations<br> - Don Albert and Rod Schultz (Bull) - <ul> - <li>Accounting with slurmdbd configuration</li> - <li>Multifactor job priorities with examples considering all different factors</li> - <li>QOS configuration</li> - <li>Fairsharing setting</li> - </ul> - - <h2>Administrator Tutorial #4</h2> - Scalability, Scheduling and Task placement<br> - Matthieu Hautreux (CEA), Yiannis Georgiou and Martin Perry (Bull) - <ul> - <li>High Throughput Computing</li> - <li>Topology constraints config</li> - <li>Generic Resources and GPUs config</li> - <li>Task Placement with Cgroups</li> - </ul> - - <h2> Keynote Speaker</h2> - Challenges and Opportunities for Exscale Resource Management and how - Today's Petascale Systems are Guiding the Way<br> - William Kramer (NCSA)<br><br> - Resource management challenges currently experienced on the Blue Waters - computer will be described. These experiences will be extended to describe - the additional challenges faced in exascale and trans-petascale systems. - - <h2>Session #1</h2> - CEA Site report<br> - Matthieu Hautreux (CEA)<br><br> - Evolutions and feedback from Tera100. SLURM on Curie, the PRACE second Tier-0 - system that is planned to be installed by the end of the year in a new facility - hosted at CEA. Curie will be a 1.6 Petaflop system from Bull. - - <h2>Session #2</h2> - LLNL site report<br> - Don Lipari (LLNL)<br><br> - Don Lipari will provide an overview of the batch scheduling systems in use - at LLNL and an overview on how they are managed. - - <h2>Session #3</h2> - SLURM Simulator<br> - Alejandro Lucero Palau (BSC)<br><br> - Batch scheduling for high performance cluster installations has two main goals: - 1) to keep the whole machine working at full capacity at all times, and - 2) to respect priorities avoiding lower priority jobs jeopardizing higher - priority ones. Usually, batch schedulers allow different policies with - several variables to be tuned by policy. Other features like special job - requests, reservations or job preemption increase the complexity for achiev- - ing a fine-tuned algorithm. A local decision for a specific job can change - the full scheduling for a high number of jobs and what can be thought - as logical within a short term could make no sense for a long trace mea- - sured in weeks or months. Although it is possible to extract algorithms - from batch scheduling software to make simulations of large job traces, - this is not the ideal approach since scheduling is not an isolated part of - this type of tools and replicating same environment requires an important - effort plus a high maintenance cost. We present a method for obtaining a - special mode of operation for a real production-ready scheduling software, - SLURM, where we can simulate execution of real job traces to evaluate - impact of scheduling policies and policy tuning. - - <h2>Session #4</h2> - SLURM Operation on IBM BlueGene/Q<br> - Danny Auble (SchedMD)<br><br> - SLURM version 2.3 supports IBM BlueGene/Q. This presentation will report the - design and operation of SLURM with respect to BlueGene/Q systems. - - <h2>Session #5</h2> - SLURM Operation on Cray XT and XE systems<br> - Morris Jette (SchedMD)<br><br> - SLURM version 2.3 supports Cray XT and XE systems running over Cray's ALPS - (Application Level Placement Scheduler) resource manager. This presentation - will discuss the design and operation of SLURM with respect to Cray systems. - - <h2>Session #6</h2> - Introduction to SLURM DRMAA<br> - Mariusz Mamoński (Poznań University)<br><br> - DRMAA or Distributed Resource Management Application API is a high-level - Open Grid Forum API specification for the submission and control of jobs - in a Grid architecture. - - <h2>Session #7</h2> - Bright Cluster Manager & SLURM: Benefits of Seamless Integration<br> - Robert Stober, Sr. (Bright Computing)<br><br> - Bright Cluster Manager, tightly integrated with SLURM, simplifies HPC - cluster installation and management while boosting system throughput. Bright - automatically installs, configures and deploys SLURM so that clusters are - ready to use in minutes rather than days. Bright provides extensive and - extensible monitoring and management through its intuitive Bright Cluster - Manager GUI, powerful cluster management shell, and customizable web-based - user portal. - Additional integration benefits include sampling, analysis and visualization - of all key SLURM metrics from within the Bright GUI, automatic head node - failover, and extensive pre-job health checking capability. Regarding the - latter, say good-bye to the black hole node syndrome: Bright plus SLURM - effectively prevent this productivity-killing problem by identifying and - sidelining problematic nodes before the job is run. - - <h2>Session #8</h2> - Proposed Design for Job Step Management in User Space<br> - Morris Jette (SchedMD)<br><br> - SLURM currently creates and manages job steps using SLURM's control daemon, - slurmctld. Since some user jobs create thousands of job steps, the management - of those job steps accounts for most of slurmctld's work. It is possible to - move job step management from slurmctld into user space to improve SLURM - scalability and performance. A possible implementation of this will be - presented. - - <h2>Session #9</h2> - Proposed Design for Enhanced Enterprise-wide Scheduling<br> - Don Lipari (LLNL)<br><br> - SLURM currently supports the ability to submit and status jobs between - computers at site, however the current design has some limitations. When a job - is submitted with several possible computers usable for its execution, the - job is routed to the computer on which it is expected to start earliest. - Changes in the workload or system failures could make moving the job to another - computer result in faster initiation, but that is currently impossible. SLURM - is also unable to support dependencies between jobs executing on different - computers. The design of a SLURM meta-scheduler with enhanced enterprise-wide - scheduling capabilities will be presented. - - <h2>Session #10</h2> - Contents of SLURM Version 2.3 and plans for future releases<br> - Danny Auble and Morris Jette (SchedMD)<br><br> - An overview of the changes SLURM Version 2.3 will be presented along with - current plans for future releases. - - <h2>Open Discussion</h2> - All meeting attendees will be invited to provide input with respect to - SLURM's design and development work. - We also invite proposals for hosting the SLURM User Group Meeting in 2012. +<h1>Slurm User Group Meeting 2013</h1> + +<p>Hosted by <a href="http:///www.schedmd.com">SchedMD</a> + +<h1>Agenda</h1> + +<p>The 2013 SLURM User Group Meeting will be held on September 18 and 19 +in Oakland, California, USA. +The meeting will include an assortment of tutorials, technical presentations, +and site reports. +The <a href="#schedule">Schedule</a> amd <a href="#abstracts">Abstracts</a> +are shown below.</p> + +<h2>Meeting Information</h2> +<p>The meeting will be held at +<a href="http://www.ce.csueastbay.edu/businessservices/conference_facilities/index.shtml"> +California State University's Conference Center</a>, +1000 Broadway Avenue, Suite 109, Oakland, California +(Phone 510-208-7001, access from 11th Street). +This state of the art facility is located adjacent to the 12th Street +<a href="http://www.bart.gov">BART</a> (Metro) station, with easy access to +the entire San Francisco area. +There is also frequent and free bus service to +<a href="http://www.jacklondonsquare.com">Jack London Square</a> using the +<a href="http://Bshuttle.com">Broadway Shuttle</a>. + +<h2>Hotel Information</h2> +<p>Many hotel options are available in Oakland, San Fransisco, and elsewhere in +the area. Just be sure that your hotel has easy access to BART. +Consider the hotels listed below as suggestions:</p> + +<p><a href="http://www.waterfronthoteloakland.com"><b>Waterfront Hotel</b></a><br> +Like it says in the name, on the waterfront, with several nice restaurants nearby. +About 1 mile (2 km) from the conference center via the +<a href="http://Bshuttle.com">Broadway Shuttle</a>. +Ferry service to San Fransisco adjacent to the hotel.</p> + +<p><a href="http://www.marriott.com/hotels/travel/oakdt-oakland-marriott-city-center/"> +<b>Oakland Marriott City Center</b></a><br> +Across the street from the conference center. +Discounted rooms are available to government employees.</p> + +<h2>Registration</h2> +<p>The conference cost is $250 per person for registrations by 29 August and +$300 per person for late registration. +This includes presentations, tutorials, lunch and snacks on both days, +plus dinner on Wednesday evening.<br><br> +<a href="http://sug2013.eventbrite.com">Register here.</a></p> + +<a name="schedule"><h1>Schedule</h1></a> + +<h2>September 18, 2013</h2> + +<table width="100%" border=1 cellspacing=0 cellpadding=0> + +<tr> + <th width="15%">Time</th> + <th width="15%">Theme</th> + <th width="25%">Speaker</th> + <th width="45%">Title</th> + </tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">08:00 - 09:00</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Registration / Breakfast</td> + </tr> + +<tr> + <td width="15%">09:00 - 09:15</td> + <td width="15%"> Welcome</td> + <td width="25%"> Morris Jette (SchedMD)</td> + <td width="45%"> Welcome to Slurm User Group Meeting</td> +</tr> + +<tr> + <td width="15%">09:15 - 10:00</td> + <td width="15%"> Keynote</td> + <td width="25%"> Dona Crawford (LLNL)</td> + <td width="45%"> Future Outlook for Advanced Computing</td> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">10:00 - 10:30</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> +</tr> + +<tr> + <td width="15%">10:30 - 11:00</td> + <td width="15%"> Technical</td> + <td width="25%"> Morris Jette, Danny Auble (SchedMD), Yiannis Georgiou (Bull)</td> + <td width="45%"> Overview of Slurm version 2.6</td> +</tr> +<tr> + <td width="15%">11:00 - 12:00</td> + <td width="15%"> Tutorial</td> + <td width="25%"> Yiannis Georgiou, Martin Perry, Thomas Cadeau (Bull), Danny Auble (SchedMD)</td> + <td width="45%"> Energy Accounting and External Sensor Plugins</td> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">12:00 - 13:00</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Lunch at conference center</td> +</tr> + + +<tr> + <td width="15%">13:00 - 13:30</td> + <td width="15%"> Technical</td> + <td width="25%"> Yiannis Georgiou , Thomas Cadeau (Bull), Danny Auble, Moe Jette (SchedMD) Matthieu Hautreux (CEA)</td> + <td width="45%"> Evaluation of Monitoring and Control Features for Power Management</td> +</tr> +<tr> + <td width="15%">13:30 - 14:00</td> + <td width="15%"> Technical</td> + <td width="25%"> Matthieu Hautreux (CEA)</td> + <td width="45%"> Debugging Large Machines</td> +<tr> + <td width="15%">14:00 - 14:30</td> + <td width="15%"> Technical</td> + <td width="25%"> Alberto Falzone, Paolo Maggi (Nice)</td> + <td width="45%"> Creating easy to use HPC portals with NICE EnginFrame and Slurm</td> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">14:30 - 15:00</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> +</tr> + +<tr> + <td width="15%">15:00 - 15:30</td> + <td width="15%"> Technical</td> + <td width="25%"> David Glesser, Yiannis Georgiou, Joseph Emeras, Olivier Richard (Bull)</td> + <td width="45%"> Slurm evaluation using emulation and replay of real workload traces</td> +</tr> + +<tr> + <td width="15%">15:30 - 16:30</td> + <td width="15%"> Tutorial</td> + <td width="25%"> Rod Schultz, Yiannis Georgiou (Bull) Danny Auble (SchedMD)</td> + <td width="45%"> Usage of new profiling functionalities</td> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">18:00 - </td> + <td width="15%" bgcolor="#F0F1C9"> Dinner</td> + <td width="70%" colspan="2" bgcolor="#F0F1C9"> Lungomare, 1 Broadway Ave.</td> +</tr> +</table> + +<h2>September 19, 2013</h2> + +<table width="100%" border=1 cellspacing=0 cellpadding=0> + +<tr> + <th width="15%">Time</th> + <th width="15%">Theme</th> + <th width="25%">Speaker</th> + <th width="45%">Title</th> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">08:00 - 08:30</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Registration / Breakfast</td> + </tr> + +<tr> + <td width="15%">08:30 - 09:00</td> + <td width="15%"> Technical</td> + <td width="25%"> Morris Jette, David Bigagli, Danny Auble (SchedMD)</td> + <td width="45%"> Fault Tolerant Workload Management</td> +</tr> +<tr> + <td width="15%">09:00 - 09:30</td> + <td width="15%"> Technical</td> + <td width="25%"> Yiannis Georgiou (Bull) Matthieu Hautreux (CEA)</td> + <td width="45%"> Slurm Layouts Framework</td> +</tr> + +<tr> + <td width="15%">09:30 - 10:00</td> + <td width="15%"> Technical</td> + <td width="25%"> Bill Brophy (Bull)</td> + <td width="45%"> License Management</td> +</tr> + + +<tr> + <td width="15%" bgcolor="#F0F1C9">10:00 - 10:30</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> +</tr> + +<tr> + <td width="15%">10:30 - 11:00</td> + <td width="15%"> Technical</td> + <td width="25%"> Juan Pancorbo Armada (IRZ)</td> + <td width="45%"> Multi-Cluster Management</td> +</tr> + +<tr> + <td width="15%">11:00 - 11:30</td> + <td width="15%"> Technical</td> + <td width="25%"> Francois Daikhate, Matthieu Hautreux (CEA)</td> + <td width="45%"> Depth Oblivious Hierarchical Fairshare Priority Factor</td> +</tr> + +<tr> + <td width="15%">11:30 - 12:00</td> + <td width="15%"> Technical</td> + <td width="25%"> Dave Wallace (Cray)</td> + <td width="45%"> Refactoring ALPS</td> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">12:00 - 13:00</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Lunch at conference center</td> +</tr> + +<tr> + <td width="15%">13:00 - 13:20</td> + <td width="15%"> Site Report</td> + <td width="25%"> Francois Diakhate, Francis Belot, Matthieu Hautreux (CEA)</td> + <td width="45%"> CEA Site Report</td> +</tr> +<tr> + <td width="15%">13:20 - 13:40</td> + <td width="15%"> Site Report</td> + <td width="25%"> Tim Wickberg (George Washington University)</td> + <td width="45%"> George Washington University Site Report</td> +</tr> +<tr> + <td width="15%">13:40 - 14:00</td> + <td width="15%"> Site Report</td> + <td width="25%"> Ryan Cox (BYU)</td> + <td width="45%"> Brigham Young University Site Report</td> +</tr> +<tr> + <td width="15%">14:00 - 14:20</td> + <td width="15%"> Site Report</td> + <td width="25%"> Doug Hughes, Chris Harwell, Eric Radman, Goran Pocina, Michael Fenn (D.E. Shaw Research)</td> + <td width="45%"> D.E. Shaw Research Site Report</td> +</tr> +<tr> + <td width="15%">14:20 - 14:40</td> + <td width="15%"> Site Report</td> + <td width="25%"> Dr. Ulf Markwardt (Technische Universitat Dresden)</td> + <td width="45%"> Technische Universitat Dresden Site Report</td> +</tr> + +<tr> + <td width="15%" bgcolor="#F0F1C9">14:40 - 15:10</td> + <td width="85%" colspan="3" bgcolor="#F0F1C9"> Coffee break</td> +</tr> + +<tr> + <td width="15%">15:00 - 15:30</td> + <td width="15%"> Technical</td> + <td width="25%"> Morris Jette (SchedMD), Yiannis Georgiou (Bull)</td> + <td width="45%"> Slurm Roadmap</td> +</tr> +<tr> + <td width="15%">15:30 - 16:30</td> + <td width="15%"> Discussion</td> + <td width="25%"> Everyone</td> + <td width="45%"> Open Discussion</td> +</tr> + +</table> + +<br><br> +<a name="abstracts"><h1>Abstracts</h1></a> + +<h2>September 18, 2013</h2> + +<h3>Overview of Slurm Version 2.6</h3> +<p>Danny Auble, Morris Jette (SchedMD) +Yiannis Georgiou (Bull)</p> +<p>This presentation will provide an overview of Slurm enhancements in +version 2.6, released in May. Specific development to be described include:</p> +<ul> +<li>Support for job arrays, which increases performance and ease of use for +sets of similar jobs.</li> +<li>Support for MapReduce+.</li> +<li>Added prolog and epilog support for advanced reservations.</li> +<li>Much faster throughput for job step execution.</li> +<li>Advanced reservations now supports specific different core count for each node.</li> +<li>Added external sensors plugin to capture temperature and power data.</li> +<li>Added job profiling capability.</li> +<li>CPU count limits by partition.</li> +</ul> + +<h3>Usage of Energy Accounting and External Sensor Plugins</h3> +<p>Yiannis Georgiou, Martin Perry, Thomas Cadeau (Bull) +Danny Auble (SchedMD)</p> +<p>Power Management has gradually passed from a trend to an important need in +High Performance Computing. Slurm version 2.6 provides functionalities for +energy consumption recording and accounting per node and job following both +in-band and out-of-band strategies. The new implementations consist of two new +plugins: One plugin allowing in-band collection of energy consumption data from +the BMC of each node based on freeipmi library; Another plugin allowing +out-of-band collection from a centralized storage based on rrdtool library. +The second plugin allows the integration of external mechanisms like wattmeters +to be taken into account for the energy consumption recording and accounting +per node and job. The data can be used by users and administrators to improve +the energy efficiency of their applications and the whole clusters in general.</p> +<p>The tutorial will provide a brief description of the various power +management features in Slurm and will make a detailed review of the new plugins +introduced in 2.6, with configuration and usage details along with examples of +actual deployment.</p> + +<h3>Evaluation of Monitoring and Control Features for Power Management</h3> +<p>Yiannis Georgiou , Thomas Cadeau(Bull), Danny Auble, Moe Jette(SchedMD), +Matthieu Hautreux (CEA)</p> +<p>High Performance Computing platforms are characterized by their + increasing needs in power consumption. The Resource and Job + Management System (RJMS) is the HPC middleware responsible for + distributing computing resources to user applications. Appearance of + hardware sensors along with their support on the kernel/software side can be + taken into account by the RJMS in order to enhance the monitoring + and control of the executions with energy considerations. This + essentially enables the applications' execution statistics for + online energy profiling and gives the possibility to users to + control the tradeoffs between energy consumption and performance. In + this work we present the design and evaluation of a new framework, + developed upon SLURM Resource and Job Management System, + which allows energy consumption recording and accounting per node + and job along with parameters for job energy control features based on static + frequency scaling of the CPUs. We evaluate the overhead of the design choices + and the precision of the energy consumption results with different + HPC benchmarks (IMB,stream,HPL) on real-scale platforms and + integrated wattmeters. Having as goal the deployment of the + framework on large petaflopic clusters such as Curie, scalability is + an important aspect.</p> + +<h3>Debugging Large Machines</h3> +<p>Matthieu Hautreux (CEA)</p> +<p>This talk will present some cases of particularly interesting bugs + that were studied/worked-around/corrected over the past few years + on the petaflopic machines installed and used at CEA. The goal + is to share with the administrator community some methods and tools + helping to identify and in some cases work-around or correct + unexpected performance issues or bugs.</p> + +<h3>Creating easy to use HPC portals with NICE EnginFrame and Slurm</h3> +<p>Alberto Falzone, Paolo Maggi (Nice)</p> +<p>NICE EnginFrame is a popular framework to easily create HPC portals +that provide user-friendly application-oriented computing and data +services, hiding all the complexity of the underlying IT infrastructure. +Designed for technical computing users in a broad range of markets +(Oil&Gas, Automotive, Aerospace, Medical, Finance, Research, and +more), EnginFrame simplifies engineers' and scientists' work +through its intuitive, self-documenting interfaces, increasing +productivity and streamlining data and resource +management. Leveraging all the major HPC job schedulers and remote +visualization technologies, EnginFrame translates user clicks into the +appropriate actions to submit HPC jobs, create remote visualization +sessions, monitor workloads on distributed resources, manage data +and much more. In this work we describe the integration between the +SLURM Workload Manager and EnginFrame. We will then illustrate how +this integration can be leveraged to create easy to use HPC portals +for SLURM-based HPC infrastructures.</p> + +<h3>Slurm evaluation using emulation and replay of real workload traces</h3> +<p>David Glesser, Yiannis Georgiou, Joseph Emeras, Olivier Richard (Bull)</p> +<p>The experimentation and evaluation of Resource and Job Management + Systems in HPC supercomputers are characterized by important + complexities due to the inter-dependency of multiple parameters that + have to be taken into control. In our study we have developed a + methodology based upon emulated controlled experimentation, under + real conditions, with submission of workload traces extracted from a + production system. The methodology is used to perform comparisons of + different Slurm configurations in order to deduce the best + configuration for the typical workload that takes place on the + supercomputer, without disturbing the production. We will present + observations and evaluations results using real workload traces + extracted from Curie supercomputer,Top500 system with 80640, + replayed upon only 128 cores of a machine with similar + architecture. Various interesting results are extracted and important + side effects are discussed along with proposed configurations for + each type of workloads. Ideas for improvements on Slurm are also + proposed.</p> + +<h3>Usage of new profiling functionalities</h3> +<p>Rod Schultz, Yiannis Georgiou (Bull), Danny Auble (SchedMD)</p> +<p>SLURM Version 2.6 includes the ability to gather detailed +performance data on jobs. It has a plugin that stores the detailed +data in an HDF5 file. Other plugin gather data on task performance +such as cpu usage, memory usage, and local disk I/O; I/O to the +Lustre file system; traffic through and Infiniband network +interface; and energy information collected from IPMI. +This tutorial will describe the new capability, show how to configure +the various data sources, show examples of different data streams, +and report on actual usage.</p> + +<h2>September 19, 2013</h2> + +<h3>Fault Tolerant Workload Management</h3> +<p>Morris Jette, David Bigagli, Danny Auble (SchedMD)</p> +<p>One of the major issues facing exascale computing is fault +tolerance; how can a computer be effectively used if the typical job +execution time exceeds its mean time between failure. Part of the +solution is providing users with means to address failures in a +coordinated fashion with a highly adaptable workload manager. Such a +solution would support coordinated recognition of failures, +notification of failing and failed components, replacement +resources, and extended job time limits using negotiated interactive +communications. This paper describes fault tolerance issues from the +perspective of a workload manager and the implementation of solution +designed to optimize job fault tolerance based upon the popular open +source workload manager, Slurm.</p> + +<h3>Slurm Layouts Framework</h3> +<p>Yiannis Georgiou (Bull), Matthieu Hautreux (CEA)</p> +<p>This talk will describe the origins and goals of the study +concerning the Layouts Framework as well as first targets, current +developments and results. The layouts framework aims at providing a +uniform and generalized way to describe the hierarchical +relations between resources managed by a RM in order to use that +information in related RM internal logic. Examples of +instantiated layouts could be the description of the network +connectivity of nodes for the Slurm internal communication, the +description of the power supply network and capacities per branch +powering up the nodes, the description of the racking of the nodes, ...<p> + +<h3>License Management</h3> +<p>Bill Brophy (Bull)</p> +<p>License management becomes an increasingly critical issue as the +size of systems increase. These valuable resources deserve the same +careful management as all other resources configured in a +cluster. When licenses are being utilized in both interactive and +batch execution environments with multiple resource managers +involved the complexity of this task increases +significantly. Current license management within SLURM is not +integrated with any external license managers. This approach is +adequate if all jobs requiring licenses are submitted through SLURM +or if SLURM is given a subset of the licenses available on the +system to sub manage. However, the case of sub management can result +in underutilization of valuable license resources. Documentation for +other resource managers describes their interaction with external +license managers. For SLURM to become an active participant in +license management an evolution to its management approach must +occur. This article proposes a two-phased approach for accomplishing +that transformation. In the first phase, enhancements are proposed for +now SLURM internally deals with licenses: restriction of license to +specific accounts or users, provides recommendations for keeping +track of license information and suggestions for how this +information can be displayed for a SLURM users or +administrators. The second phase of this effort, which is +considerably more ambitious, is to define an evolution of SLURM's +approach to license management. This phase introduces an interaction +between SLURM and external license managers. The goal of this effort +is to increase SLURM's effectiveness in another area of resource +management, namely management of software licenses.</p> + +<h3>Multi-Cluster Management</h3> +<p>Juan Pancorbo Armada (IRZ)</p> +<p>As a service provider for scientific high performance computing, +Leibniz Rechen Zentrum (LRZ) operates compute systems for use by +educational institutions in Munich, Bavaria, as well as on the +national level. LRZ provides own computing resources as well as +housing and managing computing resources from other institutions +such as Max Planck Institute, or Ludwig Maximilians University. +The tier 2 Linux cluster operated at LRZ is a heterogeneous system +with different types of compute nodes, divided into 13 different +partitions, each of which is managed by SLURM. The various +partitions are configured for the different needs and services +requested, ranging from single node multiple core NUMAlink shared +memory clusters, to a 16-way infiniband- connected cluster for +parallel job execution, or an 8-way Gbit Ethernet cluster for serial +job execution. The management of all partitions is centralized on a +single VM. In this VM one SLURM cluster for each of these Linux +cluster partitions is configured. The required SLURM control daemons +run concurrently on this VM. With the use of a wrapper script called +MSLURM, the SLURM administrator can send SLURM commands to any +cluster in an easy-to use and flexible manner, including starting or +stopping the complete SLURM subsystem. Although such a setup may not +be desirable for large homogeneous supercomputing clusters, on small +heterogeneous clusters it has its own advantages. No separate control +node is required for each cluster for the slurmctld to run, so the +control of small clusters can be grouped in a single control +node. This feature also help to solve the restriction for some +parameters that cannot be set to different values for different +partitions in the same slurm.conf file; in that case it is possible +to move such parameters to partition-specific slurm.conf files.</p> + +<h3>Preparing Slurm for use on the Cray XC30</h3> +<p>Stephen Trofinoff, Colin McMurtrie (CSCS)</p> +<p>In this paper we describe the technical details associated with the +preparation of Slurm for use on a XC30 system installed at the Swiss +National Supercomputing Centre (CSCS). The system comprises external +login nodes, internal login nodes and a new ALPS/BASIL version so a +number of technical details needed to be overcome in order to have +Slurm working, as desired, on the system. Due to the backward +compatibility of ALPS/BASIL and the well-written code of Slurm, +Slurm was able to run, as it had in the past on previous Cray +systems, with little effort. However some problems were encountered +and their identification and resolution is described in +detail. Moreover, we describe the work involved in enhancing Slurm +to utilize the new BASIL protocol. Finally, we provide detail on the +work done to improve the Slurm task affinity bindings on a +general-purpose Linux cluster so that they, as closely as possible, +match the Cray bindings, thereby providing our users with some +degree of consistency in application behavior between these systems.</p> + +<h3>Refactoring ALPS</h3> +<p>Dave Wallace (Cray)</p> +<p>One of the hallmarks of the Cray Linux Environment is the Cray +Application Level Placement Scheduler (ALPS). ALPS is a resource +placement infrastructure used on all Cray systems. Developed by +Cray, ALPS addresses the size, complexity, and unique resource +management challenges presented by Cray systems. It works in +conjunction with workload management tools such as SLURM to +schedule, allocate, and launch applications. ALPS separates policy +from placement, so it launches applications but does not conflict +with batch system policies. The batch system interacts with ALPS via +an XML interface. Over time, the requirement to support more and +varied platform and processor capabilities, dynamic resource +management and new workload manager features has led Cray to +investigate alternatives to provide more flexible methods for +supporting expanding workload manager capabilities on Cray +systems. This presentation will highlight Cray's plans to expose low +level hardware interfaces by refactoring ALPS to allow 'native' +workload manager implementations that don't rely on the current ALPS +interface mechanism.</p> + +<h3>CEA Site Report</h3> +<p>Francois Daikhate, Francis Belot, Matthieu Hautreux (CEA)</p> +<p>The site report will detail the evolution of Slurm usage at CEA +as well as recent developments used on production systems. A +modification of the fairshare logic to better handle fair sharing of +resources between unbalanced groups hierarchies will be detailed.</p> + +<h3>George Washington University Site Report</h3> +<p>Tim Wickberg (George Washington University)<p> +<p>The site report will detail the evaluation of Slurm usage at +George Washington University, and the new Colonial One System.</p> + +<h3>Brigham Young University Site Report</h3> +<p>Ryan Cox (BYU)<p> +<p>The site report will detail the evaluation of Slurm at Brigham Young +University.</p> + +<h3>D.E. Shaw Research Site Report</h3> +<p>Doug Hughes, Chris Harwell, Eric Radman, Goran Pocina, Michael Fenn +(D.E. Shaw Research)</p> +<p>DESRES uses SLURM to schedule Anton. Anton is a specialized +supercomputer which executes molecular dynamics (MD) simulations of +proteins and other biological macromolecules orders of magnitude +faster than was previously possible. In this report, we present the +current SLURM configuration for scheduling Anton and launching our +MD application. We take advantage of the ability to run multiple +slurmd programs on a single node and use them as place-holders for +the Anton machines. We combine that with a pool of commodity Linux +nodes which act as frontends to any of the Anton machines where the +application is launched. We run a partition-specific prolog to insure +machine health prior to starting a job and to reset ASICs if +necessary. We also periodically run health checks and set nodes to +drain or resume via scontrol. Recently we have also used the prolog +to set a specific QOS for jobs which run on an early (and slower) +version of the ASIC in order to adjust the fair-share UsageFactor.</p> +<p>DESRES also uses SLURM to schedule a cluster of commodity nodes for +running regressions, our DESMOND MD program and various other +computational chemistry software. The jobs are an interesting mix of +those with MPI required and those without, short (minutes) and long (weeks).</p> +<p>DESRES is also investigating using SLURM to schedule a small +cluster of 8-GPU nodes for a port of the DESMOND MD program to +GPUs. This workload includes both full node 8-GPU jobs and multi-node +full 8-GPU per node jobs, but also jobs with lower GPU requirements +such that multiple jobs would be on a single node. We've made use of +CPU affinity and binding. GRES was not quite flexible enough and we +ended up taking advantage of the 8 CPU to 8 GPU opting to assign +GPUs to specific CPUs.</p> + +<h3>Technische Universitat Dresden Site Report</h3> +<p>Dr. Ulf Markwardt (Technische Universitat Dresden)</p> +<p>This site report will detail the recent introduction of Slurm on a new +computer at Technische Universitat Dresden.</p> + +<h3>Depth Oblivious Hierarchical Fairshare Priority Factor</h3> +<p>Francois Daikhate, Matthieu Hautreux (CEA)</p> +<p>As High Performance Computing use becomes prevalent in increasingly varied +scientific and industrial fields, clusters often need to be shared by a growing +number of user communities. One aspect of managing these heterogenous groups +involves being able to schedule their jobs fairly according to their respective +machine shares. In this talk we look at how slurm hierarchical fairshare +algorithms handle this task when user groups form complex hierarchies. We +propose an alternative formula to compute job priorities which improves +fairness in this situation.</p> + +<h3>Slurm Roadmap</h3> +<p>Morris Jette (SchedMD), Yiannis Georgiou (Bull)</p> +<p>Slurm continues to evolve rapidly, with two major releases per +year. This presentation will outline Slurm development plans in the +coming years. Particular attention will be given to describing +anticipated workload management requirements for Exascale +computing. These requirements include not only scalability issues, +but a new focus on power management, fault tolerance, topology +optimized scheduling, and heterogeneous computing.</p> + +<p style="text-align:center;">Last modified 16 September 2013</p> <!--#include virtual="footer.txt"--> - diff --git a/doc/html/slurm_ug_registration.shtml b/doc/html/slurm_ug_registration.shtml index e310008738a8377f3f7e56db941f9a4839f4f550..4d95e3d63964ecbd707b52fb65ce1719cf0aba82 100644 --- a/doc/html/slurm_ug_registration.shtml +++ b/doc/html/slurm_ug_registration.shtml @@ -42,22 +42,8 @@ Ferry service to San Fransisco adjacent to the hotel.</p> Across the street from the conference center. Discounted rooms are available to government employees.</p> -<h2>Preliminary Agenda</h2> -<p><b>Tuesday 17 September</b><br> -No-host dinner for those interested at -<a href="http://www.lungomareoakland.com/">Lungomare Restaurant</a>, 7:00 PM. -You must register separetely for this event at TBD.<p> - -<p><b>Wednesday 18 September</b><br> -Meeting from 9:00 AM to 5:00 PM.<br> -Dinner at TBD. -There is an additional fee of $50 for guests and they must register in advance -at TBD.<p> - -<p><b>Thursday 19 September</b><br> -Meeting from 9:00 AM to 3:00 PM.<p> - -<p>A complete agenda will be posted online when available.</p> +<h2>Agenda</h2> +<p>See <a href="slurm_ug_agenda.html">agenda</a>.</p> <h2>Contact</h2> <p>If you need further informations about the event, or the @@ -65,4 +51,12 @@ registration protocols, contact the <a href="mailto:jette@schedmd.com?subject=SUG2013"> <b>Slurm User Group 2013</b></a> organizers.<br> +<h2>Other Events</h2> + +<p>The <a href="http://www.americascup.com/en">America's Cup</a> yatch +races will be taking place in San Fransisco Bay through the month of September +for those interested.</p> + +<p style="text-align:center;">Last modified 5 August 2013</p> + <!--#include virtual="footer.txt"--> diff --git a/doc/html/slurmctld_plugstack.shtml b/doc/html/slurmctld_plugstack.shtml new file mode 100644 index 0000000000000000000000000000000000000000..7f06c2e5e395b8b28e5ac7a05f536223c2bf225d --- /dev/null +++ b/doc/html/slurmctld_plugstack.shtml @@ -0,0 +1,42 @@ +<!--#include virtual="header.txt"--> + +<h1><a name="top">Slurmctld Generic Plugin Programmer Guide</a></h1> + +<h2> Overview</h2> + +<p> This document describes slurmctld daemon's generic plugins and the API that +defines them. It is intended as a resource to programmers wishing to write +their own slurmctld generic plugins. This is version 100 of the API. + +<p>The slurmctld generic plugin must conform to the +Slurm Plugin API with the following specifications: + +<p><span class="commandline">const char +plugin_name[]="<i>full text name</i>"</span> +<p style="margin-left:.2in"> +A free-formatted ASCII text string that identifies the plugin. + +<p><span class="commandline">const char +plugin_type[]="<i>major/minor</i>"</span><br> +<p style="margin-left:.2in"> +The major type must be "slurmctld_plugstack." +The minor type can be any suitable name for the type of slurmctld package. +Slurm can be configured to use multiple slurmctld_plugstack plugins if desired.</p> + +<h2>API Functions</h2> + +<p>Only the init and fini functions of the plugin will be called. +The init function will be called when the slurmctld daemon begins accepting RPCs. +The fini function will be called when the slurmctld daemon stops accepting RPCs. +In the cased of the backup slurmctld daemon, the init and fini functions may +be called multiple times (when it assumes control functions and then when it +reliquishes them to the primary slurmctld daemon).</p> + +<h2>Versioning</h2> +<p> This document describes version 100 of the SLURM Job Submission API. Future +releases of SLURM may revise this API. +<p class="footer"><a href="#top">top</a> + +<p style="text-align:center;">Last modified 23 January 2013</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/switchplugins.shtml b/doc/html/switchplugins.shtml index a1974528f3814e86c46c9ba8eebf7bb77bb0c95a..896432e83e1138693b1748158b5c67aa113495c3 100644 --- a/doc/html/switchplugins.shtml +++ b/doc/html/switchplugins.shtml @@ -36,8 +36,8 @@ for sample implementations of a SLURM switch plugin.</p> <h2>Data Objects</h2> <p> The implementation must support two opaque data classes. -One is used as an job's switch "credential." -This class must encapsulate all job-specific information necessary +One is used as an job step's switch "credential." +This class must encapsulate all job step specific information necessary for the operation of the API specification below. The second is a node's switch state record. Both data classes are referred to in SLURM code using an anonymous @@ -116,7 +116,7 @@ switch state information on a periodic basis.</p> <p class="commandline">int switch_p_clear_node_state (void);</p> <p style="margin-left:.2in"><b>Description</b>: Initialize node state. -If any switch state has previously been established for a job, it will be cleared. +If any switch state has previously been established for a job step, it will be cleared. This will be used to establish a "clean" state for the switch on the node upon which it is executed.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, @@ -192,10 +192,10 @@ of buf in bytes.</p> <h3>Job's Switch Credential Management Functions</h3> <p class="commandline">int switch_p_alloc_jobinfo(switch_jobinfo_t *switch_job);</p> -<p style="margin-left:.2in"><b>Description</b>: Allocate storage for a job's switch credential. +<p style="margin-left:.2in"><b>Description</b>: Allocate storage for a job step's switch credential. It is recommended that the credential contain a magic number for validation purposes.</p> <p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span> - (output) location for writing location of job's switch credential.</p> + (output) location for writing location of job step's switch credential.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> @@ -208,14 +208,14 @@ It is recommended that the credential's magic number be validated.</p> <span class="commandline">switch_job</span> (input/output) Job's switch credential to be updated<br> <span class="commandline">nodelist</span> (input) List of nodes -allocated to the job. This may contain expressions to specify node ranges (e.g. +allocated to the job step. This may contain expressions to specify node ranges (e.g. "linux[1-20]" or "linux[2,4,6,8]").<br> <span class="commandline">tasks_per_node</span> (input) count -of processes per node to be initiated as part of the job.<br> +of processes per node to be initiated as part of the job step.<br> <span class="commandline">tids</span> (input) List of task IDs to be initiated. The first array index is the node ID. The second array index ranges from 0 to tasks_per_node of that node ID minus 1.<br> -<span class="commandline">network</span> (input) Job's network +<span class="commandline">network</span> (input) Job step's network specification from srun command. </p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value @@ -225,23 +225,23 @@ to indicate the reason for failure.</p> <p style="margin-left:.2in"><b>Description</b>: Allocate storage for a job's switch credential and copy an existing credential to that location.</p> <p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span> - (input) an existing job switch credential.</p> -<p style="margin-left:.2in"><b>Returns</b>: A newly allocated job switch credential containing a -copy of the function argument.</p> + (input) an existing job step switch credential.</p> +<p style="margin-left:.2in"><b>Returns</b>: A newly allocated job step switch +credential containing a copy of the function argument.</p> <p class="commandline">void switch_p_free_jobinfo (switch_jobinfo_t switch_job);</p> <p style="margin-left:.2in"><b>Description</b>: Release the storage associated with a job's switch credential.</p> <p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span> - (input) an existing job switch credential.</p> + (input) an existing job step switch credential.</p> <p style="margin-left:.2in"><b>Returns</b>: None</p> <p class="commandline">int switch_p_pack_jobinfo (switch_jobinfo_t switch_job, Buf buffer);</p> -<p style="margin-left:.2in"><b>Description</b>: Pack the data associated with a job's +<p style="margin-left:.2in"><b>Description</b>: Pack the data associated with a job step's switch credential into a buffer for network transmission.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> -<span class="commandline"> switch_job</span> (input) an existing job -switch credential.<br> +<span class="commandline"> switch_job</span> (input) an +existing job step switch credential.<br> <span class="commandline"> buffer</span> (input/output) buffer onto which the credential's contents are appended.</p> <p style="margin-left:.2in"><b>Returns</b>: @@ -254,7 +254,7 @@ to indicate the reason for failure.</p> switch credential from a buffer.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> <span class="commandline"> switch_job</span> (input/output) a previously -allocated job switch credential to be filled in with data read from the buffer.<br> +allocated job step switch credential to be filled in with data read from the buffer.<br> <span class="commandline"> buffer</span> (input/output) buffer from which the credential's contents are read.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, @@ -279,10 +279,10 @@ char *nodelist);</p> with the specified nodelist has completed execution.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> <span class="commandline"> switch_job</span> (input) -The completed job's switch credential.<br> +The completed job step's switch credential.<br> <span class="commandline"> nodelist</span> (input) A list of nodes -on which the job has completed. This may contain expressions to specify node ranges. -(e.g. "linux[1-20]" or "linux[2,4,6,8]").</p> +on which the job step has completed. This may contain expressions to specify +node ranges. (e.g. "linux[1-20]" or "linux[2,4,6,8]").</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> @@ -335,11 +335,11 @@ bytes</p> <p class="commandline">int switch_p_get_data_jobinfo(switch_jobinfo_t switch_job, int key, void *resulting_data);</p> -<p style="margin-left:.2in"><b>Description</b>: Get data from a job's +<p style="margin-left:.2in"><b>Description</b>: Get data from a job step's switch credential.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> -<span class="commandline"> switch_job</span> (input) a job's -switch credential.<br> +<span class="commandline"> switch_job</span> (input) a job +step's switch credential.<br> <span class="commandline"> key</span> (input) identification of the type of data to be retrieved from the switch credential. NOTE: The interpretation of this key is dependent upon the switch type. <br> @@ -370,7 +370,7 @@ the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> <p class="footer"><a href="#top">top</a></p> -<h3>Job Management Functions</h3> +<h3>Job Step Management Functions</h3> <pre> ========================================================================= Process 1 (root) Process 2 (root, user) | Process 3 (user task) @@ -387,7 +387,7 @@ switch_p_job_postfini | <p class="commandline">int switch_p_job_preinit (switch_jobinfo_t jobinfo switch_job);</p> <p style="margin-left:.2in"><b>Description</b>: Preinit is run as root in the first slurmd process, -the so called job manager. This function can be used to perform any initialization +the so called job step manager. This function can be used to perform any initialization that needs to be performed in the same process as switch_p_job_fini().</p> <p style="margin-left:.2in"><b>Arguments</b>: <span class="commandline"> switch_job</span> (input) a job's @@ -402,10 +402,10 @@ This function is run from the second slurmd process (some interconnect implement may require the switch_p_job_init functions to be executed from a separate process than the process executing switch_p_job_fini() [e.g. Quadrics Elan]).</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> -<span class="commandline"> switch_job</span> (input) a job's -switch credential.<br> +<span class="commandline"> switch_job</span> (input) a job +step's switch credential.<br> <span class="commandline"> uid</span> (input) the user id -to execute a job.</p> +to execute a job step.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> @@ -419,16 +419,16 @@ environment variables here).</p> <span class="commandline"> switch_job</span> (input) a job's switch credential.<br> <span class="commandline"> env</span> (input/output) the -environment variables to be set upon job initiation. Switch specific environment -variables are added as needed.<br> +environment variables to be set upon job step initiation. Switch specific +environment variables are added as needed.<br> <span class="commandline"> nodeid</span> (input) zero-origin id of this node.<br> <span class="commandline"> procid</span> (input) zero-origin process id local to slurmd and <b>not</b> equivalent to the global task id or MPI rank.<br> <span class="commandline"> nnodes</span> (input) count of -nodes allocated to this job.<br> +nodes allocated to this job step.<br> <span class="commandline"> nprocs</span> (input) total count of -processes or tasks to be initiated for this job.<br> +processes or tasks to be initiated for this job step.<br> <span class="commandline"> rank</span> (input) zero-origin id of this task.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, @@ -438,10 +438,10 @@ to indicate the reason for failure.</p> <p class="commandline">int switch_p_job_fini (switch_jobinfo_t jobinfo switch_job);</p> <p style="margin-left:.2in"><b>Description</b>: This function is run from the same process as switch_p_job_init() after all job tasks have exited. It is *not* run as root, because -the process in question has already setuid to the job owner.</p> +the process in question has already setuid to the job step owner.</p> <p style="margin-left:.2in"><b>Arguments</b>: -<span class="commandline"> switch_job</span> (input) a job's -switch credential.</p> +<span class="commandline"> switch_job</span> (input) a job +step's switch credential.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> @@ -452,8 +452,8 @@ uid_t pgid, uint32_t job_id, uint32_t step_id );</p> process (same process as switch_p_job_preinit()), and is run as root. Any cleanup routines that need to be run with root privileges should be run from this function.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> -<span class="commandline"> switch_job</span> (input) a job's -switch credential.<br> +<span class="commandline"> switch_job</span> (input) a job +step's switch credential.<br> <span class="commandline"> pgid</span> (input) The process group id associated with this task.<br> <span class="commandline"> job_id</span> (input) the @@ -489,9 +489,9 @@ to indicate the reason for failure.</p> <p style="margin-left:.2in"><b>Description</b>: Determine if a specific job step can be preempted.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> -<span class="commandline"> switch_job</span> (input) a job's -switch credential.</p> -<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if the job can be +<span class="commandline"> switch_job</span> (input) a job +step's switch credential.</p> +<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if the job step can be preempted and SLURM_ERROR otherwise.</p> <p class="commandline">void switch_p_job_suspend_info_get(switch_jobinfo_t *switch_job, @@ -509,7 +509,7 @@ for addition function call (i.e. for each addition job step).</p> <p class="commandline">void switch_p_job_suspend_info_pack(void *suspend_info, Buf buffer);</p> <p style="margin-left:.2in"><b>Description</b>: Pack the information needed -for a job step to be preempted into a buffer</p> +for a job to be preempted into a buffer</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> <span class="commandline"> suspend_info</span> (input) information needed for a job to be preempted, including information for all @@ -519,7 +519,7 @@ the buffer that has suspend_info added to it.</p> <p class="commandline">int switch_p_job_suspend_info_unpack(void **suspend_info, Buf buffer);</p> <p style="margin-left:.2in"><b>Description</b>: Unpack the information needed -for a job step to be preempted from a buffer.<br> +for a job to be preempted from a buffer.<br> <b>NOTE</b>: Use switch_p_job_suspend_info_free() to free the opaque data structure.</p> <p style="margin-left:.2in"><b>Arguments</b>:<br> <span class="commandline"> suspend_info</span> (output) @@ -590,6 +590,6 @@ plugin that transmitted it. It is at the discretion of the plugin author whether to maintain data format compatibility across different versions of the plugin.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 6 August 2012</p> +<p style="text-align:center;">Last modified 26 June 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index e38bc2ae237a5f4b3e9980c4395f0816d4751a61..daa93ccd27d540147a1bfedf57ef6b35ffb9eb08 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -18,17 +18,19 @@ Lead Slurm developers are: <li><a href="http://www.fhcrc.org">Fred Hutchinson Cancer Research Center</a></li> <li><a href="http://www.hp.com">HP</a></li> <li><a href="http://www.llnl.gov">Lawrence Livermore National Laboratory</a></li> +<li><a href="http://www.lanl.gov">Los Alamos National Laboratory</a></li> <li><a href="http://english.nudt.edu.cn">National University of Defense Technology (NUDT, China)</a></li> <li><a href="http://www.nvidia.com">NVIDIA</a></li> <li><a href="http://www.ornl.gov">Oak Ridge National Laboratory</a></li> <li><a href="http://www.schedmd.com">SchedMD</a></li> <li><a href="http://www.cscs.ch">Swiss National Supercomputing Centre</a></li> -<br><!-- INDIVIDUALS, PLEASE KEEP IN ALPHABETICAL ORDER --> +<br><!-- INDIVIDUALS, PLEASE KEEP IN ALPHABETICAL ORDER --><br> <li>Ramiro Alba (Centre Tecnològic de Tranferència de Calor, Spain)</li> <li>Amjad Majid Ali (Colorado State University)</li> <li>Pär Andersson (National Supercomputer Centre, Sweden)</li> <li>Don Albert (Bull)</li> <li>Ernest Artiaga (Barcelona Supercomputing Center, Spain)</li> +<br> <li>Jason W. Bacon</li> <li>Susanne Balle (HP)</li> <li>Ralph Bean (Rochester Institute of Technology)</li> @@ -40,65 +42,94 @@ Lead Slurm developers are: <li>David Bremer (Lawrence Livermore National Laboratory)</li> <li>Jon Bringhurst (Los Alamos National Laboratory)</li> <li>Bill Brophy (Bull)</li> +<br> +<li>Luis Cabellos (Instituto de Fisica de Cantabria, Spain)</li> +<li>Thomas Cadeau (Bull)</li> <li>Hongjia Cao (National University of Defense Technology, China)</li> +<li>Jimmy Cao (Greenplum/EMC)</li> +<li>Ralph Castain (Intel, Greenplum/EMC, Los Alamos National Laboratory)</li> +<li>François Chevallier (CEA)</li> <li>Daniel Christians (HP)</li> <li>Gilles Civario (Bull)</li> <li>Chuck Clouston (Bull)</li> +<br> <li>Yuri D'Elia (Center for Biomedicine, EURAC Research, Italy)</li> <li>Francois Diakhate (CEA, France)</li> <li>Joseph Donaghy (Lawrence Livermore National Laboratory)</li> <li>Chris Dunlap (Lawrence Livermore National Laboratory)</li> +<br> <li>Phil Eckert (Lawrence Livermore National Laboratory)</li> <li>Joey Ekstrom (Lawrence Livermore National Laboratory/Bringham Young University)</li> <li>Josh England (TGS Management Corporation)</li> <li>Kent Engström (National Supercomputer Centre, Sweden)</li> +<br> <li>Carles Fenoy (Barcelona Supercomputing Center, Spain)</li> <li>Damien François (Université catholique de Louvain, Belgium)</li> +<br> <li>Jim Garlick (Lawrence Livermore National Laboratory)</li> <li>Didier Gazen (Laboratoire d'Aerologie, France)</li> <li>Raphael Geissert (Debian)</li> <li>Yiannis Georgiou (Bull)</li> +<li>Armin Größlinger (University Passau, Germany)</li> <li>Mark Grondona (Lawrence Livermore National Laboratory)</li> <li>Dmitri Gribenko</li> <li>Andriy Grytsenko (Massive Solutions Limited, Ukraine)</li> +<li>Michael Gutteridge (Fred Hutchinson Cancer Research Center)</li> +<br> +<li>Chris Harwell (D. E. Shaw Research)</li> <li>Takao Hatazaki (HP)</li> <li>Matthieu Hautreux (CEA, France)</li> +<li>Dave Henseler (Cray)</li> <li>Chris Holmes (HP)</li> <li>David Höppner</li> <li>Nathan Huff (North Dakota State University)</li> +<br> <li>David Jackson (Adaptive Computing)</li> <li>Alec Jensen (SchedMD)</li> <li>Klaus Joas (University Karlsruhe, Germany)</li> <li>Greg Johnson (Los Alamos National Laboratory)</li> -<li>Magnus Jonsson (Umeå University, Sweden) +<li>Magnus Jonsson (Umeå University, Sweden)</li> +<br> <li>Jason King (Lawrence Livermore National Laboratory)</li> <li>Yury Kiryanov (Intel)</li> <li>Aaron Knister (Environmental Protection Agency, UMBC)</li> <li>Nancy Kritkausky (Bull)</li> <li>Roman Kurakin (Institute of Natural Science and Ecology, Russia)</li> +<br> <li>Sam Lang</li> <li>Puenlap Lee (Bull)</li> <li>Dennis Leepow</li> <li>Olli-Pekka Lehto (CSC-IT Center for Science Ltd., Finland)</li> +<li>Piotr Lesnicki (Bull)</li> <li>Bernard Li (Genome Sciences Centre, Canada)</li> <li>Eric Lin (Bull)</li> <li>Donald Lipari (Lawrence Livermore National Laboratory)</li> +<br> <li>Komoto Masahiro</li> <li>Steven McDougall (SiCortex)</li> <li>Donna Mecozzi (Lawrence Livermore National Laboratory)</li> <li>Bjørn-Helge Mevik (University of Oslo, Norway)</li> <li>Chris Morrone (Lawrence Livermore National Laboratory)</li> <li>Pere Munt (Barcelona Supercomputing Center, Spain)</li> +<br> +<li>Denis Nadeau</li> <li>Mark Nelson (IBM)</li> <li>Michal Novotny (Masaryk University, Czech Republic)</li> +<br> <li>Bryan O'Sullivan (Pathscale)</li> <li>Gennaro Oliva (Institute of High Performance Computing and Networking, Italy)</li> +<li>Alan Orth (International Livestock Research Institute, Kenya)</li> +<br> +<li>Juan Pancorbo (Leibniz-Rechenzentrum, Germany)</li> +<li>Chrysovalantis Paschoulas (Juelich Supercomputing Centre, Germany)</li> <li>Rémi Palancher</li> <li>Alejandro Lucero Palau (Barcelona Supercomputing Center, Spain)</li> <li>Daniel Palermo (HP)</li> <li>Martin Perry (Bull)</li> <li>Dan Phung (Lawrence Livermore National Laboratory/Columbia University)</li> <li>Ashley Pittman (Quadrics, UK)</li> +<li>Ludovic Prevost (NEC, France)</li> +<br> <li>Vijay Ramasubramanian (University of Maryland)</li> <li>Krishnakumar Ravi[KK] (HP)</li> <li>Chris Read</li> @@ -112,30 +143,40 @@ Lead Slurm developers are: <li>Simon Ruderich</li> <li>Dan Rusak (Bull)</li> <li>Eygene Ryabinkin (Kurchatov Institute, Russia)</li> -<li>Federico Sacerdoti (D.E. Shaw)</li> +<br> +<li>Federico Sacerdoti (D. E. Shaw Research)</li> <li>Aleksej Saushev</li> <li>Rod Schultz (Bull)</li> +<li>Filip Skalski (University of Warsaw, Poland)</li> <li>Jason Sollom (Cray)</li> -<li>Eric Soyez (Science+Computing, Germany)</li> +<li>Eric Soyez (Science+Computing)</li> +<li>Marcin Stolarek</li> <li>Tyler Strickland (University of Florida)</li> <li>Jeff Squyres (LAM MPI)</li> +<br> <li>Prashanth Tamraparni (HP, India)</li> <li>Jimmy Tang (Trinity College, Ireland)</li> <li>Kevin Tew (Lawrence Livermore National Laboratory/Bringham Young University)</li> <li>John Thiltges (University of Nebraska-Lincoln)</li> <li>Adam Todorski (Rensselaer Polytechnic Institute)</li> <li>Stephen Trofinoff (Swiss National Supercomputing Centre)</li> +<br> +<li>Garrison Vaughan</li> +<br> +<li>Daniel M. Weeks (Rensselaer Polytechnic Institute)</li> <li>Nathan Weeks (Iowa State University)</li> <li>Andy Wettstein (University of Chicago)</li> <li>Tim Wickberg (Rensselaer Polytechnic Institute)</li> <li>Ramiro Brito Willmersdorf (Universidade Federal de Pemambuco, Brazil)</li> <li>Jay Windley (Linux NetworX)</li> +<li>Eric Winter</li> <li>Anne-Marie Wunderlin (Bull)</li> +<br> <li>Yair Yarom (The Hebrew University of Jerusalem, Israel)</li> <li>Nathan Yee (SchedMD)</li> <!-- INDIVIDUALS, PLEASE KEEP IN ALPHABETICAL ORDER --> </ul> -<p style="text-align:center;">Last modified 7 December 2012</p> +<p style="text-align:center;">Last modified 30 October 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/testimonials.shtml b/doc/html/testimonials.shtml index 9bfc1c0a227f2af4f3c092684e3971824bb509f1..792bf7755c36a61efbbad843fb47c181f30414bd 100644 --- a/doc/html/testimonials.shtml +++ b/doc/html/testimonials.shtml @@ -2,6 +2,25 @@ <h1>Customer Testimonials</h1> +<i> +"In 2010, when we embarked upon our mission to port SLURM to our Cray XT and XE +systems, we discovered first-hand the high quality software engineering that +has gone into the creation of this product. From its very core SLURM has been +designed to be extensible and flexible. Moreover, as our work progressed, we +discovered the high level of technical expertise possessed by SchedMD who was +very quick to respond to our questions with insightful advice, suggestions and +clarifications. In the end we arrived at a solution that more than satisfied +our needs. The project was so successful we have now migrated all our production +science systems to Slurm, including our 20 cabinet Cray XT5 system. The ease +with which we have made this transition is testament to the robustness and +high quality of the product but also to the no-fuss installation and +configuration procedure and the high quality documentation. We have no qualms +about recommending SLURM to any facility, large or small, who wish to make the +break from the various commercial options available today"<br><br> +Colin McMurtrie, Head of Systems, Swiss National Supercomputing Centre +</i> +<HR SIZE=4> + <i> "Thank you for SLURM! It is one of the nicest pieces of free software for managing HPC clusters we have come across in a long time. @@ -133,6 +152,6 @@ Bill Celmaster, XC Program Manager, Hewlett-Packard Company </i> <HR SIZE=4> -<p style="text-align:center;">Last modified 8 April 2011</p> +<p style="text-align:center;">Last modified 23 January 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/topology.shtml b/doc/html/topology.shtml index 4b36405e575a9377a5b63ba04ec2ef575a604eac..a3e4afee9a5827d98a61a96e147ed11821e55cf4 100644 --- a/doc/html/topology.shtml +++ b/doc/html/topology.shtml @@ -2,7 +2,7 @@ <h1>Topology Guide</h1> -<p>SLURM version 2.0 can be configured to support topology-aware resource +<p>SLURM can be configured to support topology-aware resource allocation to optimize job performance. There are two primary modes of operation, one to optimize performance on systems with a three-dimensional torus interconnect and another for @@ -19,12 +19,12 @@ allocated to the job.</p> <p>Some larger computers rely upon a three-dimensional torus interconnect. The IBM BlueGene computers is one example of this which has highly constrained resource allocation scheme, essentially requiring that -jobs be allocated a set of nodes logically having a rectangular shape. +jobs be allocated a set of nodes logically having a rectangular prism shape. SLURM has a plugin specifically written for BlueGene to select appropriate nodes for jobs, change network switch routing, boot nodes, etc as described in the <a href="bluegene.html">BlueGene User and Administrator Guide</a>.</p> -<p>The Sun Constellation and Cray XT systems also have three-dimensional +<p>The Sun Constellation and Cray systems also have three-dimensional torus interconnects, but do not require that jobs execute in adjacent nodes. On those systems, SLURM only needs to allocate resources to a job which are nearby on the network. @@ -36,7 +36,7 @@ SLURM's native best-fit algorithm is thus able to achieve a high degree of locality for jobs. For more information, see SLURM's documentation for <a href="sun_const.html">Sun Constellation</a> and -<a href="cray.html">Cray XT</a> systems.</p> +<a href="cray.html">Cray</a> systems.</p> <h2>Hierarchical Networks</h2> @@ -132,6 +132,16 @@ SwitchName=s3 Nodes=tux[12-15] SwitchName=s4 Switches=s[0-3] </pre> -<p style="text-align:center;">Last modified 27 March 2012</p> +<h2>User Options</h2> + +<p>For use with the topology/tree plugin, user can also specify the maximum +number of leaf switches to be used for their job with the maximum time the +job should wait for this optimized configuration. The syntax for this option +is "--switches=count[@time]". +The system administrator can limit the maximum time that any job can +wait for this optimized configuration using the <i>SchedulerParameters</i> +configuration parameter with the <i>max_switch_wait</i> option.</p> + +<p style="text-align:center;">Last modified 13 August 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/tutorials.shtml b/doc/html/tutorials.shtml index 89ace8e7d215fa4e35d90222579b6b41c8e60be2..7dba652c1ba446b0b6f63c7ade3a5379d710a776 100644 --- a/doc/html/tutorials.shtml +++ b/doc/html/tutorials.shtml @@ -1,49 +1,54 @@ <!--#include virtual="header.txt"--> -<h1>SLURM Tutorials</h1> +<h1>Slurm Tutorials</h1> <h2>Slurm Workload Manager: Architecture, Configuration and Use</h2> -<p>Introduction to the SLURM Resource Manager for users and system +<p>Introduction to the Slurm Workload Manager for users and system administrators, plus some material for Slurm programmers: <a href="http://www.open-mpi.org/video/?category=slurm">Slurm Workload Manager</a></p> -<h2>Introduction to SLURM Tutorial</h2> -<p>Introduction to the SLURM Resource Manager for users and system -administrators. Tutorial covers SLURM architecture, daemons and commands. +<h2>Introduction to Slurm Tutorial</h2> +<p>Introduction to the Slurm Resource Manager for users and system +administrators. Tutorial covers Slurm architecture, daemons and commands. Learn how to use a basic set of commands. Learn how to build, configure, and install SLURM.</p> <ul> -<li>Introduction to SLURM video on <a href="http://www.youtube.com">YouTube</a> (in eight parts)</a></li> +<li>Introduction to Slurm video on <a href="http://www.youtube.com">YouTube</a> (in eight parts)</a></li> <ol> -<li><a href="http://www.youtube.com/watch?v=NH_Fb7X6Db0&feature=relmfu">Introduction to SLURM, Part 1</a></li> -<li><a href="http://www.youtube.com/watch?v=LJrY0AthLB8&feature=relmfu">Introduction to SLURM, Part 2</a></li> -<li><a href="http://www.youtube.com/watch?v=MI9jHavOt5o&feature=relmfu">Introduction to SLURM, Part 3</a></li> -<li><a href="http://www.youtube.com/watch?v=aljhVYwyAoM&feature=relmfu">Introduction to SLURM, Part 4</a></li> -<li><a href="http://www.youtube.com/watch?v=-4XFG5MoHQY&feature=relmfu">Introduction to SLURM, Part 5</a></li> -<li><a href="http://www.youtube.com/watch?v=PAYMsuqf-CA&feature=relmfu">Introduction to SLURM, Part 6</a></li> -<li><a href="http://www.youtube.com/watch?v=kFW0tk7aDDo&feature=relmfu">Introduction to SLURM, Part 7</a></li> -<li><a href="http://www.youtube.com/watch?v=VozfZGZIX8w&feature=relmfu">Introduction to SLURM, Part 8</a></li> +<li><a href="http://www.youtube.com/watch?v=NH_Fb7X6Db0&feature=relmfu">Introduction to Slurm, Part 1</a></li> +<li><a href="http://www.youtube.com/watch?v=LJrY0AthLB8&feature=relmfu">Introduction to Slurm, Part 2</a></li> +<li><a href="http://www.youtube.com/watch?v=MI9jHavOt5o&feature=relmfu">Introduction to Slurm, Part 3</a></li> +<li><a href="http://www.youtube.com/watch?v=aljhVYwyAoM&feature=relmfu">Introduction to Slurm, Part 4</a></li> +<li><a href="http://www.youtube.com/watch?v=-4XFG5MoHQY&feature=relmfu">Introduction to Slurm, Part 5</a></li> +<li><a href="http://www.youtube.com/watch?v=PAYMsuqf-CA&feature=relmfu">Introduction to Slurm, Part 6</a></li> +<li><a href="http://www.youtube.com/watch?v=kFW0tk7aDDo&feature=relmfu">Introduction to Slurm, Part 7</a></li> +<li><a href="http://www.youtube.com/watch?v=VozfZGZIX8w&feature=relmfu">Introduction to Slurm, Part 8</a></li> </ol> -<li><a href="tutorial_slurm_intro.avi">Introduction to SLURM video (one 330 MB file)</a></li> +<li><a href="tutorial_slurm_intro.avi">Introduction to Slurm video</a> +(one 330 MB file, downloading recommended rather than trying to stream the file)</li> <li><a href="tutorial_intro_files.tar">Tar-ball</a> with sample configuration files</li> </ul> -<h2>SLURM Database Usage</h2> -<p>SLURM Resource Manager database for users and system administrators. -Tutorial covers SLURM architecture for database use, accounting commands, +<h2>Introduction to Slurm Tools</h2> + +<p>This video gives a basic introduction to using sbatch, squeue, scancel and +scontrol show job on the computers at +<a href="http://home.byu.edu">Brigham Young University</a>, +<a href="https://marylou.byu.edu/">Fulton Supercomputing Lab</a>.<br><br> +<a href="https://www.youtube.com/watch?v=U42qlYkzP9k&feature=player_embedded">Introduction to Slurm Tools</a> + +<h2>Slurm Database Usage</h2> +<p>Slurm Resource Manager database for users and system administrators. +Tutorial covers Slurm architecture for database use, accounting commands, resource limits, fair share scheduling, and accounting configuration.</p> -<ul> -<li>SLURM Database Usage video on <a href="http://www.youtube.com">YouTube</a> (in two parts)</a></li> +Slurm Database Usage video on <a href="http://www.youtube.com">YouTube</a> (in two parts)</a> <ol> -<li><a href="http://www.youtube.com/watch?v=8UfzXnzSmL4&feature=relmfu">SLURM Database Usage, Part 1</a></li> -<li><a href="http://www.youtube.com/watch?v=wZrJQKRc2Vg&feature=relmfu">SLURM Database Usage, Part 2</a></li> +<li><a href="http://www.youtube.com/watch?v=8UfzXnzSmL4&feature=relmfu">Slurm Database Usage, Part 1</a></li> +<li><a href="http://www.youtube.com/watch?v=wZrJQKRc2Vg&feature=relmfu">Slurm Database Usage, Part 2</a></li> </ol> -</ul> - -<h2>More Tutorials Coming Soon...</h2> -<p style="text-align:center;">Last modified 18 January 2013</p> +<p style="text-align:center;">Last modified 10 September 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/Makefile.in b/doc/man/Makefile.in index 09a5357f0e46c0b762b87b27ec8ab081eabf099c..9c8f1a6d54d26c09366ff2f5bdcc59c52fcf2ef8 100644 --- a/doc/man/Makefile.in +++ b/doc/man/Makefile.in @@ -55,6 +55,7 @@ subdir = doc/man DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/doc/man/man1/Makefile.am b/doc/man/man1/Makefile.am index 7c54dfb8a1dc548ab8a41b3c65b9c2fbb2c4664b..57be1510f635fb93768274c8bee62f1dedf2e3a6 100644 --- a/doc/man/man1/Makefile.am +++ b/doc/man/man1/Makefile.am @@ -1,4 +1,4 @@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html man1_MANS = \ sacct.1 \ @@ -14,6 +14,7 @@ man1_MANS = \ slurm.1 \ smap.1 \ sprio.1 \ + sh5util.1 \ squeue.1 \ sreport.1 \ srun.1 \ @@ -40,6 +41,7 @@ html_DATA = \ sinfo.html \ smap.html \ sprio.html \ + sh5util.html \ squeue.html \ sreport.html \ srun.html \ diff --git a/doc/man/man1/Makefile.in b/doc/man/man1/Makefile.in index cdf31bef12b7b413b40660bae04dae11732dd9fd..be884edf85b5acab7959d367aae933ae2dde81e8 100644 --- a/doc/man/man1/Makefile.in +++ b/doc/man/man1/Makefile.in @@ -57,6 +57,7 @@ subdir = doc/man/man1 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -317,7 +343,7 @@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ @@ -360,6 +386,7 @@ man1_MANS = \ slurm.1 \ smap.1 \ sprio.1 \ + sh5util.1 \ squeue.1 \ sreport.1 \ srun.1 \ @@ -383,6 +410,7 @@ EXTRA_DIST = $(man1_MANS) $(am__append_1) @HAVE_MAN2HTML_TRUE@ sinfo.html \ @HAVE_MAN2HTML_TRUE@ smap.html \ @HAVE_MAN2HTML_TRUE@ sprio.html \ +@HAVE_MAN2HTML_TRUE@ sh5util.html \ @HAVE_MAN2HTML_TRUE@ squeue.html \ @HAVE_MAN2HTML_TRUE@ sreport.html \ @HAVE_MAN2HTML_TRUE@ srun.html \ diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index 952de08874bbed3e429ef5b23355ef11ff737d9a..6962a74c66e4c7893612f52b92ff44d3f6b7155b 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -1,4 +1,4 @@ -.TH SACCT "1" "March 2010" "sacct 2.2" "Slurm components" +.TH SACCT "1" "August 2013" "sacct 2.6" "Slurm components" .SH "NAME" sacct \- displays accounting data for all jobs and job steps in the @@ -54,10 +54,9 @@ gather and report incomplete information for these calls; \f2getrusage (3)\fP man page for information about which data are actually available on your system. .IP -Elapsed time fields are presented as 2 fields, integral seconds and integral microseconds -.IP Elapsed time fields are presented as -[[days-]hours:]minutes:seconds.hundredths. +[days-]hours:minutes:seconds[.microseconds]. Only 'CPU' fields will +ever have microseconds. .IP The default input file is the file named in the \f3AccountingStorageLoc\fP parameter in slurm.conf. @@ -120,21 +119,24 @@ Print a list of fields that can be specified with the \f3\-\-format\fP option. .ft 3 Fields available: -AllocCPUS Account AssocID AveCPU -AveCPUFreq AvePages AveRSS AveVMSize -BlockID Cluster Comment ConsumedEnergy -CPUTime CPUTimeRAW DerivedExitCode Elapsed -Eligible End ExitCode GID -Group JobID JobName Layout -MaxPages MaxPagesNode MaxPagesTask MaxRSS -MaxRSSNode MaxRSSTask MaxVMSize MaxVMSizeNode -MaxVMSizeTask MinCPU MinCPUNode MinCPUTask -NCPUS NNodes NodeList NTasks -Priority Partition QOSRAW ReqCPUS -Reserved ResvCPU ResvCPURAW Start -State Submit Suspended SystemCPU -Timelimit TotalCPU UID User -UserCPU WCKey WCKeyID +AllocCPUS Account AssocID AveCPU +AveCPUFreq AveDiskRead AveDiskWrite AvePages +AveRSS AveVMSize BlockID Cluster +Comment ConsumedEnergy CPUTime CPUTimeRAW +DerivedExitCode Elapsed Eligible End +ExitCode GID Group JobID +JobName Layout MaxDiskRead MaxDiskReadNode +MaxDiskReadTask MaxDiskWrite MaxDiskWriteNode MaxDiskWriteTask +MaxPages MaxPagesNode MaxPagesTask MaxRSS +MaxRSSNode MaxRSSTask MaxVMSize MaxVMSizeNode +MaxVMSizeTask MinCPU MinCPUNode MinCPUTask +NCPUS NNodes NodeList NTasks +Priority Partition QOSRAW ReqCPUFreq +ReqCPUs ReqMem Reserved ResvCPU +ResvCPURAW Start State Submit +Suspended SystemCPU Timelimit TotalCPU +UID User UserCPU WCKey +WCKeyID .ft 1 .fi @@ -207,7 +209,11 @@ maximum timelimit of the range. Default is no restriction. Equivalent to specifying: .IP .na -\-\-format=jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode +\-\-format=jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask, +avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode, +maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks, +alloccpus,elapsed,state,exitcode,maxdiskread,maxdiskreadnode,maxdiskreadtask, +avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite .ad .TP @@ -397,8 +403,12 @@ Only show cumulative statistics for each job, not the intermediate steps. The following describes each job accounting field: .RS .TP "10" -\f3alloccpus\fP -Count of allocated processors. +\f3ALL\fP +Print all fields listed below. + +.TP +\f3AllocCPUs\fP +Count of allocated CPUs. Equivalant to \f3NCPUs\fP. .TP \f3account\fP @@ -416,6 +426,14 @@ Average (system + user) CPU time of all tasks in job. \f3AveCPUFreq\fP Average weighted CPU frequency of all tasks in job, in kHz. +.TP +\f3AveDiskRead\fP +Average number of bytes read by all tasks in job. + +.TP +\f3AveDiskWrite\fP +Average number of bytes written by all tasks in job. + .TP \f3AvePages\fP Average number of page faults of all tasks in job. @@ -562,6 +580,30 @@ in place of the space. What the layout of a step was when it was running. This can be used to give you an idea of which node ran which rank in your job. +.TP +\f3MaxDiskRead\fP +Maximum number of bytes read by all tasks in job. + +.TP +\f3MaxDiskReadNode\fP +The node on which the maxdiskread occurred. + +.TP +\f3MaxDiskReadTask\fP +The task ID where the maxdiskread occurred. + +.TP +\f3MaxDiskWrite\fP +Maximum number of bytes written by all tasks in job. + +.TP +\f3MaxDiskWriteNode\fP +The node on which the maxdiskwrite occurred. + +.TP +\f3MaxDiskWriteTask\fP +The task ID where the maxdiskwrite occurred. + .TP \f3MaxPages\fP Maximum number of page faults of all tasks in job. @@ -612,6 +654,8 @@ The task ID where the mincpu occurred. .TP \f3ncpus\fP +Count of allocated CPUs. Equivalant to \f3AllocCPUs\fP + Total number of CPUs allocated to the job. .TP @@ -642,10 +686,21 @@ Name of Quality of Service. \f3qosraw\fP Id of Quality of Service. +.TP +\f3ReqCPUFreq\fP +Requested CPU frequency for the step, in kHz. +Note: This value applies only to a job step. No value is reported for the job. + .TP \f3reqcpus\fP Required CPUs. +.TP +\f3ReqMem\fP +Minimum required memory for the job, in MB. A 'c' at the end of +number represents Memory Per CPU, a 'n' represents Memory Per Node. +Note: This value is only from the job allocation, not the step. + .TP \f3reserved\fP How much wall clock time was used as reserved time for this job. This is @@ -810,11 +865,12 @@ on the command line. .fi .RE .SH "COPYING" - Copyright (C) 2005\-2007 Copyright Hewlett\-Packard Development Company L.P. -.LP -Copyright (C) 2008\-2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, -DISCLAIMER). CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 12d31914477a0a8fa5fe856570b0153e84cb01e2..7756f556e6e966c0ebee779f39c6dee250ab3b28 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -1010,8 +1010,8 @@ in aggregate for this QOS. .TP \fIGrpWall\fP Maximum wall clock time running jobs are able to be allocated in aggregate for -this QOS. - +this QOS. If this limit is reached submission requests will be denied and the +running jobs will be killed. .TP \fIID\fP The id of the QOS. @@ -1790,9 +1790,10 @@ sacctmgr line. .ec .SH "COPYING" -Copyright (C) 2008\-2009 Lawrence Livermore National Security. +Copyright (C) 2008\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 746a4f0a81f1d075d03d856b90cb0b8385f45bf7..cc3bcd7402ea9dc84842c2259eab88fbfafa6311 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,4 +1,4 @@ -.TH "salloc" "1" "SLURM 2.5" "October 2012" "SLURM Commands" +.TH "salloc" "1" "SLURM 2.6" "January 2013" "SLURM Commands" .SH "NAME" salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, @@ -42,12 +42,56 @@ be changed after job submission using the \fBscontrol\fR command. .TP -\fB\-\-acctg\-freq\fR=<\fIseconds\fR> -Define the job accounting sampling interval. +\fB\-\-acctg\-freq\fR +Define the job accounting and profiling sampling intervals. This can be used to override the \fIJobAcctGatherFrequency\fR parameter in SLURM's configuration file, \fIslurm.conf\fR. -A value of zero disables real the periodic job sampling and provides accounting -information only on job termination (reducing SLURM interference with the job). +The supported format is as follows: +.RS +.TP 12 +\fB\-\-acctg\-freq=\fR\fI<datatype>\fR\fB=\fR\fI<interval>\fR +where \fI<datatype>\fR=\fI<interval>\fR specifies the task sampling +interval for the jobacct_gather plugin or a +sampling interval for a profiling type by the +acct_gather_profile plugin. Multiple, +comma-separated \fI<datatype>\fR=\fI<interval>\fR intervals +may be specified. Supported datatypes are as follows: +.RS +.TP +\fBtask=\fI<interval>\fR +where \fI<interval>\fR is the task sampling interval in seconds +for the jobacct_gather plugins and for task +profiling by the acct_gather_profile plugin. +.TP +\fBenergy=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for energy profiling using the acct_gather_energy plugin +.TP +\fBnetwork=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for infiniband profiling using the acct_gather_infiniband +plugin. +.TP +\fBfilesystem=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for filesystem profiling using the acct_gather_filesystem +plugin. +.TP +.RE +.RE +.br +The default value for the task sampling interval +is 30. The default value for all other intervals is 0. +An interval of 0 disables sampling of the specified type. +If the task sampling interval is 0, accounting +information is collected only at job termination (reducing SLURM +interference with the job). +.br +.br +Smaller (non\-zero) values have a greater impact upon job performance, +but a value of 30 seconds is not likely to be noticeable for +applications having less than 10,000 tasks. +.RE .TP \fB\-B\fR \fB\-\-extra\-node\-info\fR=<\fIsockets\fR[:\fIcores\fR[:\fIthreads\fR]]> @@ -405,17 +449,15 @@ by the user root. .TP \fB\-\-gres\fR=<\fIlist\fR> Specifies a comma delimited list of generic consumable resources. -The format of each entry on the list is "name[:count[*cpu]]". +The format of each entry on the list is "name[:count]". The name is that of the consumable resource. The count is the number of those resources with a default value of 1. -The specified resources will be allocated to the job on each node -allocated unless "*cpu" is appended, in which case the resources -will be allocated on a per cpu basis. +The specified resources will be allocated to the job on each node. The available generic consumable resources is configurable by the system administrator. A list of available generic consumable resources will be printed and the command will exit if the option argument is "help". -Examples of use include "\-\-gres=gpus:2*cpu,disk=40G" and "\-\-gres=help". +Examples of use include "\-\-gres=gpu:2,mic=1" and "\-\-gres=help". .TP \fB\-H, \-\-hold\fR @@ -557,7 +599,9 @@ of tasks is no larger than the number of allocated nodes. The tasks are distributed in blocks of a specified size. The options include a number representing the size of the task block. This is followed by an optional specification of the task distribution scheme -within a block of tasks and between the blocks of tasks. For more +within a block of tasks and between the blocks of tasks. The number of tasks +distributed to each node is the same as for cyclic distribution, but the +taskids assigned to each node depend on the plane size. For more details (including examples and diagrams), please see .br http://slurm.schedmd.com/mc_support.html @@ -925,6 +969,38 @@ per processor. By specifying \fB\-\-overcommit\fR you are explicitly allowing more than one task per processor. However no more than \fBMAX_TASKS_PER_NODE\fR tasks are permitted to execute per node. +.TP +\fB\-\-profile\fR=<all|none|[energy[,|task[,|lustre[,|network]]]]> +enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in an HDF5 file for +the job. + +.RS +.TP 10 +\fBAll\fR +All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR +No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR +Energy data is collected. + +.TP +\fBTask\fR +Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR +Lustre data is collected. + +.TP +\fBNetwork\fR +Network (InfiniBand) data is collected. +.RE .TP \fB\-p\fR, \fB\-\-partition\fR=<\fIpartition_names\fR> Request a specific partition for the resource allocation. If not specified, @@ -1233,6 +1309,9 @@ Same as \fB\-O, \-\-overcommit\fR \fBSALLOC_PARTITION\fR Same as \fB\-p, \-\-partition\fR .TP +\fBSALLOC_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSALLOC_QOS\fR Same as \fB\-\-qos\fR .TP @@ -1279,6 +1358,9 @@ CPU_ID = Board_ID x threads_per_board + Socket_ID x threads_per_socket + Core_ID x threads_per_core + Thread_ID). .TP +\fBSLURM_DISTRIBUTION\fR +Same as \fB\-m, \-\-distribution\fR +.TP \fBSLURM_JOB_ID\fR (and \fBSLURM_JOBID\fR for backwards compatibility) The ID of the job allocation. .TP @@ -1302,6 +1384,9 @@ Set to value of the \-\-mem_bind\fR option. \fBSLURM_SUBMIT_DIR\fR The directory from which \fBsalloc\fR was invoked. .TP +\fBSLURM_SUBMIT_HOST\fR +The hostname of the computer from which \fBsalloc\fR was invoked. +.TP \fBSLURM_NODE_ALIASES\fR Sets of node name, communication address and hostname for nodes allocated to the job from the cloud. Each element in the set if colon separated and each @@ -1314,6 +1399,9 @@ Same as \fB\-n, \-\-ntasks\fR \fBSLURM_NTASKS_PER_NODE\fR Set to value of the \-\-ntasks\-per\-node\fR option, if specified. .TP +\fBSLURM_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSLURM_TASKS_PER_NODE\fR Number of tasks to be initiated on each node. Values are comma separated and in the same order as SLURM_NODELIST. @@ -1364,9 +1452,11 @@ salloc \-N5 srun \-n10 myprogram .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sattach.1 b/doc/man/man1/sattach.1 index 67d3a8f39c5240313189bc7594c8e2a7d468b2d0..f35ddf5fbbc7927a8ab9bc5981b3b5fca5b662e2 100644 --- a/doc/man/man1/sattach.1 +++ b/doc/man/man1/sattach.1 @@ -91,9 +91,11 @@ sattach \-\-output\-filter 5 65386.15 .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. -Copyright (C) 2008\-2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2009 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 1a8026cc4926d8cba5bf2545f78eb6192349874c..8e6242d14b8a8b916c24e28c24871b87e714e6a0 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -1,4 +1,4 @@ -.TH "sbatch" "1" "SLURM 2.5" "October 2012" "SLURM Commands" +.TH "sbatch" "1" "SLURM 2.6" "January 2013" "SLURM Commands" .SH "NAME" sbatch \- Submit a batch script to SLURM. @@ -33,6 +33,18 @@ http://slurm.schedmd.com/cpu_management.html .SH "OPTIONS" .LP +.TP +\fB\-a\fR, \fB\-\-array\fR=<\fIindexes\fR> +Submit a job array, multiple jobs to be executed with identical parameters. +The \fIindexes\fR specification identifies what array index values should +be used. Multiple values may be specified using a comma separated list and/or +a range of values with a "\-" separator. For example, "\-\-array=0\-15" or +"\-\-array=0,6,16\-32". +A step function can also be specified with a suffix containing a colon and +number. For example, "\-\-array=0\-15:4" is equivalent to "\-\-array=0,4,8,12". +The minimum index value is 0. +the maximum value is one less than the configuration parameter MaxArraySize. + .TP \fB\-A\fR, \fB\-\-account\fR=<\fIaccount\fR> Charge resources used by this job to specified account. @@ -41,12 +53,56 @@ be changed after job submission using the \fBscontrol\fR command. .TP -\fB\-\-acctg\-freq\fR=<\fIseconds\fR> -Define the job accounting sampling interval. +\fB\-\-acctg\-freq\fR +Define the job accounting and profiling sampling intervals. This can be used to override the \fIJobAcctGatherFrequency\fR parameter in SLURM's configuration file, \fIslurm.conf\fR. -A value of zero disables the periodic job sampling and provides accounting -information only on job termination (reducing SLURM interference with the job). +The supported format is as follows: +.RS +.TP 12 +\fB\-\-acctg\-freq=\fR\fI<datatype>\fR\fB=\fR\fI<interval>\fR +where \fI<datatype>\fR=\fI<interval>\fR specifies the task sampling +interval for the jobacct_gather plugin or a +sampling interval for a profiling type by the +acct_gather_profile plugin. Multiple, +comma-separated \fI<datatype>\fR=\fI<interval>\fR intervals +may be specified. Supported datatypes are as follows: +.RS +.TP +\fBtask=\fI<interval>\fR +where \fI<interval>\fR is the task sampling interval in seconds +for the jobacct_gather plugins and for task +profiling by the acct_gather_profile plugin. +.TP +\fBenergy=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for energy profiling using the acct_gather_energy plugin +.TP +\fBnetwork=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for infiniband profiling using the acct_gather_infiniband +plugin. +.TP +\fBfilesystem=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for filesystem profiling using the acct_gather_filesystem +plugin. +.TP +.RE +.RE +.br +The default value for the task sampling interval is 30. +The default value for all other intervals is 0. +An interval of 0 disables sampling of the specified type. +If the task sampling interval is 0, accounting +information is collected only at job termination (reducing SLURM +interference with the job). +.br +.br +Smaller (non\-zero) values have a greater impact upon job performance, +but a value of 30 seconds is not likely to be noticeable for +applications having less than 10,000 tasks. +.RE .TP \fB\-B\fR \fB\-\-extra\-node\-info\fR=<\fIsockets\fR[:\fIcores\fR[:\fIthreads\fR]]> @@ -371,9 +427,11 @@ it is executed. \fB\-e\fR, \fB\-\-error\fR=<\fIfilename pattern\fR> Instruct SLURM to connect the batch script's standard error directly to the file name specified in the "\fIfilename pattern\fR". -By default both standard output and standard error are directed to a file of -the name "slurm\-%j.out", where the "%j" is replaced with the job allocation -number. +By default both standard output and standard error are directed to the same file. +For job arrays, the default file name is "slurm-%A_%a.out", "%A" is replaced +by the job ID and "%a" with the array index. +For other jobs, the default file name is "slurm-%j.out", where the "%j" is +replaced by the job ID. See the \fB\-\-input\fR option for filename specification options. .TP @@ -447,17 +505,15 @@ may be the group name or the numerical group ID. .TP \fB\-\-gres\fR=<\fIlist\fR> Specifies a comma delimited list of generic consumable resources. -The format of each entry on the list is "name[:count[*cpu]]". +The format of each entry on the list is "name[:count]". The name is that of the consumable resource. The count is the number of those resources with a default value of 1. -The specified resources will be allocated to the job on each node -allocated unless "*cpu" is appended, in which case the resources -will be allocated on a per cpu basis. +The specified resources will be allocated to the job on each node. The available generic consumable resources is configurable by the system administrator. A list of available generic consumable resources will be printed and the command will exit if the option argument is "help". -Examples of use include "\-\-gres=gpus:2*cpu,disk=40G" and "\-\-gres=help". +Examples of use include "\-\-gres=gpu:2,mic=1" and "\-\-gres=help". .TP \fB\-H, \-\-hold\fR @@ -498,6 +554,10 @@ job allocation will have to wait in a queue of pending jobs, the batch script will not be submitted. NOTE: There is limited support for this option with batch jobs. +.TP +\fB\-\-ignore\-pbs\fR +Ignore any "#PBS" options specified in the batch script. + .TP \fB\-i\fR, \fB\-\-input\fR=<\fIfilename pattern\fR> Instruct SLURM to connect the batch script's standard input @@ -515,12 +575,21 @@ Supported replacement symbols are: .PD .RS .TP +\fB%A\fR +Job array's master job allocation number. +.TP +\fB%a\fR +Job array ID (index) number. +.TP \fB%j\fR Job allocation number. .TP \fB%N\fR Node name. Only one file is created, so %N will be replaced by the name of the first node in the job, which is the one that runs the script. +.TP +\fB%u\fR +User name. .RE .TP @@ -614,7 +683,9 @@ of tasks is no larger than the number of allocated nodes. The tasks are distributed in blocks of a specified size. The options include a number representing the size of the task block. This is followed by an optional specification of the task distribution scheme -within a block of tasks and between the blocks of tasks. For more +within a block of tasks and between the blocks of tasks. The number of tasks +distributed to each node is the same as for cyclic distribution, but the +taskids assigned to each node depend on the plane size. For more details (including examples and diagrams), please see .br http://slurm.schedmd.com/mc_support.html @@ -977,9 +1048,11 @@ allowing more than one task per processor. However no more than \fB\-o\fR, \fB\-\-output\fR=<\fIfilename pattern\fR> Instruct SLURM to connect the batch script's standard output directly to the file name specified in the "\fIfilename pattern\fR". -By default both standard output and standard error are directed to a file of -the name "slurm\-%j.out", where the "%j" is replaced with the job allocation -number. +By default both standard output and standard error are directed to the same file. +For job arrays, the default file name is "slurm-%A_%a.out", "%A" is replaced +by the job ID and "%a" with the array index. +For other jobs, the default file name is "slurm-%j.out", where the "%j" is +replaced by the job ID. See the \fB\-\-input\fR option for filename specification options. .TP @@ -996,6 +1069,39 @@ partition as designated by the system administrator. If the job can use more than one partition, specify their names in a comma separate list and the one offering earliest initiation will be used. +.TP +\fB\-\-profile\fR=<all|none|[energy[,|task[,|lustre[,|network]]]]> +enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in an HDF5 file for +the job. + +.RS +.TP 10 +\fBAll\fR +All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR +No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR +Energy data is collected. + +.TP +\fBTask\fR +Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR +Lustre data is collected. + +.TP +\fBNetwork\fR +Network (InfiniBand) data is collected. +.RE + .TP \fB\-\-propagate\fR[=\fIrlimitfR] Allows users to specify which of the modifiable (soft) resource limits @@ -1299,6 +1405,9 @@ Same as \fB\-A, \-\-account\fR \fBSBATCH_ACCTG_FREQ\fR Same as \fB\-\-acctg\-freq\fR .TP +\fBSBATCH_ARRAY_INX\fR +Same as \fB\-a, \-\-array\fR +.TP \fBSBATCH_BLRTS_IMAGE\fR Same as \fB\-\-blrts\-image\fR .TP @@ -1344,6 +1453,9 @@ Same as \fB\-g, \-\-geometry\fR \fBSBATCH_GET_USER_ENV\fR Same as \fB\-\-get\-user\-env\fR .TP +\fBSBATCH_IGNORE_PBS\fR +Same as \fB\-\-ignore\-pbs\fR +.TP \fBSBATCH_IMMEDIATE\fR Same as \fB\-I, \-\-immediate\fR .TP @@ -1383,6 +1495,9 @@ Same as \fB\-O, \-\-overcommit\fR \fBSBATCH_PARTITION\fR Same as \fB\-p, \-\-partition\fR .TP +\fBSBATCH_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSBATCH_QOS\fR Same as \fB\-\-qos\fR .TP @@ -1414,6 +1529,10 @@ Max time waiting for requested switches. See \fB\-\-switches\fR .TP \fBSBATCH_WCKEY\fR Same as \fB\-\-wckey\fR +.TP +\fBSLURM_STEP_KILLED_MSG_NODE_ID\fR=ID +If set, only the specified node will log when the job or step are killed +by a signal. .SH "OUTPUT ENVIRONMENT VARIABLES" .PP @@ -1432,6 +1551,11 @@ Do not free a block on Blue Gene L/P systems only. \fBMPIRUN_PARTITION\fR The block name on Blue Gene systems only. .TP +\fBSLURM_ARRAY_TASK_ID\fR +Job array ID (index) number. +.TP +\fBSLURM_ARRAY_JOB_ID\fR +Job array's master job ID number. \fBSLURM_CHECKPOINT_IMAGE_DIR\fR Directory into which checkpoint images should be written if specified on the execute line. @@ -1448,6 +1572,9 @@ Core_ID x threads_per_core + Thread_ID). \fBSLURM_CPUS_ON_NODE\fR Number of CPUS on the allocated node. .TP +\fBSLURM_DISTRIBUTION\fR +Same as \fB\-m, \-\-distribution\fR +.TP \fBSLURM_GTIDS\fR Global task IDs running on this node. Zero origin and comma separated. .TP @@ -1514,6 +1641,9 @@ This value is propagated to the spawned processes. \fBSLURM_PROCID\fR The MPI rank (or relative process ID) of the current process .TP +\fBSLURM_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSLURM_RESTART_COUNT\fR If the job has been restarted due to system failure or has been explicitly requeued, this will be sent to the number of times @@ -1522,6 +1652,9 @@ the job has been restarted. \fBSLURM_SUBMIT_DIR\fR The directory from which \fBsbatch\fR was invoked. .TP +\fBSLURM_SUBMIT_HOST\fR +The hostname of the computer from which \fBsbatch\fR was invoked. +.TP \fBSLURM_TASKS_PER_NODE\fR Number of tasks to be initiated on each node. Values are comma separated and in the same order as SLURM_NODELIST. @@ -1606,9 +1739,11 @@ host4 .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sbcast.1 b/doc/man/man1/sbcast.1 index 0d0f1f257d4352ed2c826cfc82b6714f3288b1ab..02af7757aa596d8c21c7385040ba391c84ce786c 100644 --- a/doc/man/man1/sbcast.1 +++ b/doc/man/man1/sbcast.1 @@ -104,9 +104,10 @@ srun: jobid 12345 submitted .fi .SH "COPYING" -Copyright (C) 2006 The Regents of the University of California. +Copyright (C) 2006-2010 The Regents of the University of California. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/scancel.1 b/doc/man/man1/scancel.1 index ab6023567af31e99eb7f1dee289934b20636084a..dad7fdf1da08f6a17422a01a4d07ed2cbdd2a459 100644 --- a/doc/man/man1/scancel.1 +++ b/doc/man/man1/scancel.1 @@ -1,18 +1,23 @@ -.TH SCANCEL "1" "November 2012" "scancel 2.5" "Slurm components" +.TH SCANCEL "1" "January 2013" "scancel 2.6" "Slurm components" .SH "NAME" scancel \- Used to signal jobs or job steps that are under the control of Slurm. .SH "SYNOPSIS" -\fBscancel\fR [\fIOPTIONS\fR...] [\fIjob_id\fR[.\fIstep_id\fR]] [\fIjob_id\fR[.\fIstep_id\fR]...] +\fBscancel\fR [\fIOPTIONS\fR...] [\fIjob_id\fR[_\fIarray_id\fR][.\fIstep_id\fR]] [\fIjob_id\fR[_\fIarray_id\fR][.\fIstep_id\fR]...] .SH "DESCRIPTION" -\fBscancel\fR is used to signal or cancel jobs or job steps. An arbitrary number -of jobs or job steps may be signaled using job specification filters or a -space separated list of specific job and/or job step IDs. A job or job step -can only be signaled by the owner of that job or user root. If an attempt is -made by an unauthorized user to signal a job or job step, an error message will -be printed and the job will not be signaled. +\fBscancel\fR is used to signal or cancel jobs, job arrays or job steps. +An arbitrary number of jobs or job steps may be signaled using job +specification filters or a space separated list of specific job and/or +job step IDs. +If the job ID of a job array is specified with an array ID value then only that +job array element will be cancelled. +If the job ID of a job array is specified without an array ID value then all +job array elements will be cancelled. +A job or job step can only be signaled by the owner of that job or user root. +If an attempt is made by an unauthorized user to signal a job or job step, an +error message will be printed and the job will not be signaled. .SH "OPTIONS" @@ -224,11 +229,17 @@ scancel \-\-signal=USR1 \-\-batch 1236 Cancel job all pending jobs belonging to user "bob" in partition "debug": scancel \-\-state=PENDING \-\-user=bob \-\-partition=debug +.TP +Cancel only array ID 4 of job array 1237 +scancel 1237_4 + .SH "COPYING" Copyright (C) 2002-2007 The Regents of the University of California. -Copyright (C) 2008-2011 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008-2011 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index ffca9573e9908b678217e2ccb4201326a9c25f73..f903ba64efb2a21e35ffc99cc649b667e4091cb3 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -190,7 +190,7 @@ be released by the job's owner. .TP \fBnotify\fP \fIjob_id\fP \fImessage\fP -Send a message to standard error of the salloc or srun command or batch job +Send a message to standard error of the salloc or srun command or batch job associated with the specified \fIjob_id\fP. .TP @@ -243,6 +243,8 @@ rebooted. You can explicitly drain the nodes in order to reboot nodes as soon as possible, but the nodes must also explicitly be returned to service after being rebooted. You can alternately create an advanced reservation to prevent additional jobs from being initiated on nodes to be rebooted. +NOTE: Nodes will be placed in a state of "MAINT" until rebooted and returned +to service with a normal state. .TP \fBreconfigure\fP @@ -274,10 +276,10 @@ Resume a previously suspended job. Also see \fBsuspend\fR. .TP \fBschedloglevel\fP \fILEVEL\fP Enable or disable scheduler logging. -\fILEVEL\fP may be "0", "1", "disable" or "enable". "0" has the same +\fILEVEL\fP may be "0", "1", "disable" or "enable". "0" has the same effect as "disable". "1" has the same effect as "enable". -This value is temporary and will be overwritten when the slurmctld -daemon reads the slurm.conf configuration file (e.g. when the daemon +This value is temporary and will be overwritten when the slurmctld +daemon reads the slurm.conf configuration file (e.g. when the daemon is restarted or \fBscontrol reconfigure\fR is executed) if the SlurmSchedLogLevel parameter is present. @@ -393,7 +395,7 @@ system administrator (also see the \fBhold\fP command). .TP \fBupdate\fP \fISPECIFICATION\fP -Update job, step, node, partition, or reservation configuration per the +Update job, step, node, partition, or reservation configuration per the supplied specification. \fISPECIFICATION\fP is in the same format as the Slurm configuration file and the output of the \fIshow\fP command described above. It may be desirable to execute the \fIshow\fP command (described above) on the @@ -440,15 +442,16 @@ Possible values on Blue Gene are "MESH", "TORUS" and "NAV" \fIContiguous\fP=<yes|no> Set the job's requirement for contiguous (consecutive) nodes to be allocated. Possible values are "YES" and "NO". +Only the Slurm administrator or root can change this parameter. .TP \fIDependency\fP=<dependency_list> -Defer job's initiation until specified job dependency specification +Defer job's initiation until specified job dependency specification is satisfied. Cancel dependency with an empty dependency_list (e.g. "Dependency="). <\fIdependency_list\fR> is of the form <\fItype:job_id[:job_id][,type:job_id[:job_id]]\fR>. Many jobs can share the same dependency and these jobs may even belong to -different users. +different users. .PD .RS .TP @@ -534,18 +537,21 @@ of the cluster) as described in salloc/sbatch/srun man pages. .TP \fIMinCPUsNode\fP=<count> Set the job's minimum number of CPUs per node to the specified value. +Only the Slurm administrator or root can change this parameter. .TP \fIMinMemoryCPU\fP=<megabytes> Set the job's minimum real memory required per allocated CPU to the specified -value. +value. Only the Slurm administrator or root can change this parameter. Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. .TP \fIMinMemoryNode\fP=<megabytes> Set the job's minimum real memory required per node to the specified value. Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. +Only the Slurm administrator or root can change this parameter. .TP \fIMinTmpDiskNode\fP=<megabytes> Set the job's minimum temporary disk space required per node to the specified value. +Only the Slurm administrator or root can change this parameter. .TP \fIName\fP=<name> Set the job's name to the specified value. @@ -556,7 +562,7 @@ The adjustment range is from \-10000 (highest priority) to 10000 (lowest priority). Nice value changes are not additive, but overwrite any prior nice value and are applied to the job's base priority. -Only privileged users can specify a negative adjustment. +Only privileged users, Slurm administrator or root, can specify a negative adjustment. .TP \fINodeList\fP=<nodes> Change the nodes allocated to a running job to shrink it's size. @@ -592,6 +598,7 @@ Explicitly setting a job's priority clears any previously set nice value and removes the priority/multifactor plugin's ability to manage a job's priority. In order to restore the priority/multifactor plugin's ability to manage a job's priority, hold and then release the job. +Only the Slurm administrator or root can increase job's priority. .TP \fIQOS\fP=<name> Set the job's QOS (Quality Of Service) to the specified value. @@ -625,7 +632,7 @@ Possible values are "YES" and "NO". .TP \fIShared\fP=<yes|no> Set the job's ability to share nodes with other jobs. Possible values are -"YES" and "NO". +"YES" and "NO". Only the Slurm administrator or root can increase job's priority. .TP \fIStartTime\fP=<time_spec> Set the job's earliest initiation time. @@ -684,7 +691,7 @@ precede the time with a "+" or "\-" to increment or decrement the current time limit (e.g. "TimeLimit=+30"). In order to increment or decrement the current time limit, the \fIJobId\fP specification must precede the \fITimeLimit\fP specification. - +Only the Slurm administrator or root can increase job's TimeLimit. .TP \fIwait\-for\-switch\fP=<max\-time\-to\-wait> When a tree topology is used, this defines the maximum time to wait for the @@ -761,7 +768,7 @@ Time the job was last suspended or resumed. \fIUserId\fP \fIGroupId\fP The user and group under which the job was submitted. .TP -NOTE on information displayed for various job states: +NOTE on information displayed for various job states: When you submit a request for the "show job" function the scontrol process makes an RPC request call to slurmctld with a REQUEST_JOB_INFO message type. If the state of the job is PENDING, then it returns @@ -776,11 +783,54 @@ started. \fBSPECIFICATIONS FOR UPDATE COMMAND, STEPS\fR .TP \fIStepId\fP=<job_id>[.<step_id>] -Identify the step to be updated. -If the job_id is given, but no step_id is specified then all steps of +Identify the step to be updated. +If the job_id is given, but no step_id is specified then all steps of the identified job will be modified. This specification is required. .TP +\fICompFile\fP=<completion file> +Update a step with information about a steps completion. Can be +useful if step statistics aren't directly available through a +jobacct_gather plugin. The file is a space\-delimited file with +format for Version 1 is as follows +.eo +.br +.sp +1 34461 0 2 0 3 1361906011 1361906015 1 1 3368 13357 /bin/sleep +.br +A B C D E F G H I J K L M +.br +.sp +Field Descriptions: +.br +.sp +A file version +.br +B ALPS apid +.br +C inblocks +.br +D outblocks +.br +E exit status +.br +F number of allocated CPUs +.br +G start time +.br +H end time +.br +I utime +.br +J stime +.br +K maxrss +.br +L uid +.br +M command name +.ec +.TP \fITimeLimit\fP=<time> The job's time limit. Output format is [days\-]hours:minutes:seconds or "UNLIMITED". @@ -805,10 +855,10 @@ simple node range expressions (e.g. "lx[10\-20]"). This specification is require \fIFeatures\fP=<features> Identify feature(s) to be associated with the specified node. Any previously defined feature(s) will be overwritten with the new value. -Features assigned via \fBscontrol\fR will only persist across the restart -of the slurmctld daemon with the \fI\-R\fR option and state files -preserved or slurmctld's receipt of a SIGHUP. -Update slurm.conf with any changes meant to be persistent across normal +Features assigned via \fBscontrol\fR will only persist across the restart +of the slurmctld daemon with the \fI\-R\fR option and state files +preserved or slurmctld's receipt of a SIGHUP. +Update slurm.conf with any changes meant to be persistent across normal restarts of slurmctld or the execution of \fBscontrol reconfig\fR. .TP @@ -868,10 +918,10 @@ systems. Use Cray tools such as \fIxtprocadmin\fR instead. Identify weight to be associated with specified nodes. This allows dynamic changes to weight associated with nodes, which will be used for the subsequent node allocation decisions. -Weight assigned via \fBscontrol\fR will only persist across the restart -of the slurmctld daemon with the \fI\-R\fR option and state files -preserved or slurmctld's receipt of a SIGHUP. -Update slurm.conf with any changes meant to be persistent across normal +Weight assigned via \fBscontrol\fR will only persist across the restart +of the slurmctld daemon with the \fI\-R\fR option and state files +preserved or slurmctld's receipt of a SIGHUP. +Update slurm.conf with any changes meant to be persistent across normal restarts of slurmctld or the execution of \fBscontrol reconfig\fR. .TP @@ -1050,7 +1100,7 @@ each resource. .TP \fIState\fP=<up|down|drain|inactive> -Specify if jobs can be allocated nodes or queued in this partition. +Specify if jobs can be allocated nodes or queued in this partition. Possible values are "UP", "DOWN", "DRAIN" and "INACTIVE". .RS .TP 10 @@ -1107,9 +1157,10 @@ not possible to also explicitly specify allowed accounts. .TP \fICoreCnt\fP=<num> -Identify number of cores to be reserved. This should only be used for -reservations that are less than one node in size. Otherwise use the -\fINodeCnt\fP option described below. +This option is only suported when SelectType=select/cons_res. Identify number of +cores to be reserved. If NodeCnt is used, this is the total number of cores to +reserve where cores per node is CoreCnt/NodeCnt. If a nodelist is used, this +should be an array of core numbers by node: Nodes=node[1\-5] CoreCnt=2,2,3,3,4 .TP \fILicenses\fP=<license> @@ -1324,6 +1375,29 @@ If the reported value is "n/s" (not supported), the node does not support the configured \fBAcctGatherEnergyType\fR plugin. If the reported value is zero, energy accounting for nodes is disabled. +.TP +The meaning of the external sensors information is as follows: + +.TP +\fIExtSensorsJoules\fP +The energy consumed by the node between the last time it was powered on +and the last external sensors plugin node sample, in joules. + + +.TP +\fIExtSensorsWatts\fP +The instantaneous power consumption of the node at the time of the last +external sensors plugin node sample, in watts. + +.TP +\fIExtSensorsTemp\fP +The temperature of the node at the time of the last external sensors plugin +node sample, in celsius. + +.PP +If the reported value is "n/s" (not supported), the node does not support the +configured \fBExtSensorsType\fR plugin. + .SH "ENVIRONMENT VARIABLES" .PP Some \fBscontrol\fR options may @@ -1485,10 +1559,11 @@ scontrol: quit .SH "COPYING" Copyright (C) 2002\-2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. -Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sdiag.1 b/doc/man/man1/sdiag.1 index 3448cfc1ec3925a854704f36234831ad0925522d..9c4970d82fbc3eee87564844a576d5578af864ac 100644 --- a/doc/man/man1/sdiag.1 +++ b/doc/man/man1/sdiag.1 @@ -198,6 +198,10 @@ Print list of options and exit. Print current version number and exit. .SH "COPYING" +Copyright (C) 2010-2011 Barcelona Supercomputing Center. +.br +Copyright (C) 2010\-2013 SchedMD LLC. +.LP SLURM is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) diff --git a/doc/man/man1/sh5util.1 b/doc/man/man1/sh5util.1 new file mode 100644 index 0000000000000000000000000000000000000000..b12d61f65093bd783fe53884a6c38884cf931041 --- /dev/null +++ b/doc/man/man1/sh5util.1 @@ -0,0 +1,187 @@ +.TH "sh5util" "1" "SLURM 2.6" "June 2013" "SLURM Commands" +.SH "NAME" +.LP +sh5util \- Tool for merging HDF5 files from the acct_gather_profile +plugin that gathers detailed data for jobs running under SLURM + +.SH "SYNOPSIS" +.LP +sh5util + +.SH "DESCRIPTION" +.LP +sh5util merges HDF5 files produced on each node for each step of a job into +one HDF5 file for the job. The resulting file can be viewed and manipulated +by common HDF5 tools such as HDF5View, h5dump, h5edit, or h5ls. +.LP +sh5util also has two extract modes. The first, writes a limited set of +data for specific nodes, steps, and data series in +"comma separated value" form to a file which can be imported into other +analysis tools such as spreadsheets. +.LP +The second, (Item-Extract) extracts one data time from one time series for all +the samples on all the nodes from a jobs HDF5 profile. +.TP +\- Finds sample with maximum value of the item. +.TP +\- Write CSV file with min, ave, max, and item totals for each node for each +sample + + +.SH "OPTIONS" +.LP + +.TP +\fB\-E\fR, \fB\-\-extract\fR + +Extract data series from a merged job file. + +.RS +.TP 10 +Extract mode options + +.TP +\fB\-i\fR, \fB\-\-input\fR=\fIpath\fR +merged file to extract from (default ./job_$jobid.h5) + +.TP +\fB\-N\fR, \fB\-\-node\fR=\fInodename\fR +Node name to extract (default is all) + +.TP +\fB\-l\fR, \fB\-\-level\fR=\fI[Node:Totals|Node:TimeSeries]\fR +Level to which series is attached. (default Node:Totals) + +.TP +\fB\-s\fR, \fB\-\-series\fR=\fI[Energy | Lustre | Network | Tasks | Task_#]\fR +\fBTasks\fR is all tasks, \fBTask_#\fR (# is a task id) (default is everything) + +.RE + +.TP +\fB\-I\fR, \fB\-\-item\-extract\fR + +Extract one data item from all samples of one data series from all nodes in a merged job file. + +.RS +.TP 10 +Item-Extract mode options + +.TP +\fB\-s\fR, \fB\-\-series\fR=[Energy | Lustre | Network | Task]\fR + +.TP +\fB\-d\fR, \fB\-\-data\fR +Name of data item in series (See note below). + +.RE + +.TP +\fB\-j\fR, \fB\-\-jobs\fR=\fI<job(.step)>\fR +Format is <job(.step)>. Merge this job/step +(or a comma-separated list of job steps). This option is required. +Not specifying a step will result in all steps found to be processed. + +.TP +\fB\-h\fR, \fB\-\-help\fR +Print this description of use. + +.TP +\fB\-o\fR, \fB\-\-output\fR=\fIpath\fR +.nf +Path to a file into which to write. +Default for merge is ./job_$jobid.h5 +Default for extract is ./extract_$jobid.csv +.fi + +.TP +\fB\-p\fR, \fB\-\-profiledir\fR=\fIdir\fR +Directory location where node-step files exist default is set in +acct_gather.conf. + +.TP +\fB\-S\fR, \fB\-\-savefiles\fR +Instead of removing node-step files after merging them into the job file, +keep them around. + +.TP +\fB\-\-user\fR=\fIuser\fR +User who profiled job. +(Handy for root user, defaults to user running this command.) + +.TP +\fB\-\-usage\fR +Display brief usage message. + +.SH "Data Items per Series" + +.TP +\fBEnergy\fR +.nf +Power +CPU_Frequency +.fi + +.TP +\fBLustre\fR +.nf +Reads +Megabytes_Read +Writes +Megabytes_Write +.fi + +.TP +\fBNetwork\fR +.nf +Packets_In +Megabytes_In +Packets_Out +Megabytes_Out +.fi + +.TP +\fBTask\fR +.nf +CPU_Frequency +CPU_Time +CPU_Utilization +RSS +VM_Size +Pages +Read_Megabytes +Write_Megabytes +.fi + +.SH "Examples" + +.TP +Merge node-step files (as part of a sbatch script) +.LP +sbatch -n1 -d$SLURM_JOB_ID --wrap="sh5util --savefiles -j $SLURM_JOB_ID" + +.TP +Extract all task data from a node +.LP +sh5util -j 42 -N snowflake01 --level=Node:TimeSeries --series=Tasks + +.TP +Extract all energy data +sh5util -j 42 --series=Energy --data=power + +.SH "COPYING" +Copyright (C) 2013 Bull. +.br +Copyright (C) 2013 SchedMD LLC. +SLURM is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. +.LP +SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +.SH "SEE ALSO" +.LP diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index f54a373c100f3f0f7e93ede8708944390d50bf6e..2a938b1337d84f47123ada87a796385ff858880c 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -1,4 +1,4 @@ -.TH SINFO "1" "August 2012" "sinfo 2.5" "Slurm components" +.TH SINFO "1" "December 2012" "sinfo 2.6" "Slurm components" .SH "NAME" sinfo \- view information about SLURM nodes and partitions. @@ -13,7 +13,7 @@ system running SLURM. .TP \fB\-a\fR, \fB\-\-all\fR -Display information about all partions. This causes information to be +Display information about all partitions. This causes information to be displayed about partitions that are configured as hidden and partitions that are unavailable to user's group. @@ -69,6 +69,8 @@ Print information only about the specified node(s). Multiple nodes may be comma separated or expressed using a node range expression. For example "linux[00\-07]" would indicate eight nodes, "linux00" through "linux07." +Performance of the command can be measurably improved for systems with large +numbers of nodes when a single node name is specified. .TP \fB\-N\fR, \fB\-\-Node\fR @@ -120,6 +122,9 @@ Number of nodes by state in the format "allocated/idle". Do not use this with a node state option ("%t" or "%T") or the different node states will be placed on separate lines. .TP +\fB%B\fR +The max number of CPUs per node available to jobs in the partition. +.TP \fB%c\fR Number of CPUs per node .TP @@ -267,10 +272,10 @@ This is ignored if the \fB\-\-format\fR option is specified. .TP \fB\-S <sort_list>\fR, \fB\-\-sort=<sort_list>\fR Specification of the order in which records should be reported. -This uses the same field specifciation as the <output_format>. +This uses the same field specification as the <output_format>. Multiple sorts may be performed by listing multiple sort fields separated by commas. The field specifications may be preceded -by "+" or "\-" for assending (default) and desending order +by "+" or "\-" for ascending (default) and descending order respectively. The partition field specification, "P", may be preceded by a "#" to report partitions in the same order that they appear in SLURM's configuration file, \fBslurm.conf\fR. @@ -464,7 +469,8 @@ man page or the \fBslurm.conf\fR(5) man page for more information. The node is not allocated to any jobs and is available for use. .TP \fBMAINT\fR -The node is currently in a reservation with a flag value of "maintainence". +The node is currently in a reservation with a flag value of "maintenance" +or is scheduled to be rebooted. .TP \fBUNKNOWN\fR The SLURM controller has just started and the node's state @@ -574,10 +580,11 @@ Not Responding dev8 .SH "COPYING" Copyright (C) 2002\-2007 The Regents of the University of California. -Copyright (C) 2008\-2009 Lawrence Livermore National Security. -Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2009 Lawrence Livermore National Security. +.br +Copyright (C) 2010-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/slurm.1 b/doc/man/man1/slurm.1 index d82b3054bb4f4c4835ce933af92dbc11cb0f871d..65d9a97c5c0b96daa736292bb88b60447032abcc 100644 --- a/doc/man/man1/slurm.1 +++ b/doc/man/man1/slurm.1 @@ -41,9 +41,11 @@ Extensive documentation is also available on the internet at .SH "COPYING" Copyright (C) 2005\-2007 The Regents of the University of California. -Copyright (C) 2008\-2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2009 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/smap.1 b/doc/man/man1/smap.1 index 3fd53ce06dedc53e479ae6eb28541c67d93863ed..5ece4fc9c94346e10f5eba7c88a17f59b3469be0 100644 --- a/doc/man/man1/smap.1 +++ b/doc/man/man1/smap.1 @@ -495,9 +495,11 @@ The location of the SLURM configuration file. .SH "COPYING" Copyright (C) 2004\-2007 The Regents of the University of California. -Copyright (C) 2008\-2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2009 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sprio.1 b/doc/man/man1/sprio.1 index 45c4d2f0f00f37b580dd5cabd5c51341e0692b06..5aee2709b1293627cfce5dfc6cc04cb6127e5754 100644 --- a/doc/man/man1/sprio.1 +++ b/doc/man/man1/sprio.1 @@ -219,7 +219,8 @@ Print the configured weights for each priority component .SH "COPYING" Copyright (C) 2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 2acd8ac780e151ab3639c83e3ca3956c04ff416f..5f3c3b3a05fb42af64d5abf4d3bb2fdeaec39133 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -1,4 +1,4 @@ -.TH SQUEUE "1" "June 2011" "squeue 2.3" "Slurm components" +.TH SQUEUE "1" "January 2013" "squeue 2.6" "Slurm components" .SH "NAME" squeue \- view information about jobs located in the SLURM scheduling queue. @@ -19,10 +19,17 @@ list of account names. This has no effect when listing job steps. .TP \fB\-a\fR, \fB\-\-all\fR -Display information about jobs and job steps in all partions. +Display information about jobs and job steps in all partitions. This causes information to be displayed about partitions that are configured as hidden and partitions that are unavailable to user's group. +.TP +\fB\-r\fR, \fB\-\-array\fR +Display one job array element per line. +Without this option, the display will be optimized for use with job arrays +(pending job array elements will be combined on one line of output with the +array index values printed using a regular expression). + .TP \fB\-h\fR, \fB\-\-noheader\fR Do not print a header on the output. @@ -33,7 +40,7 @@ Print a help message describing all options \fBsqueue\fR. .TP \fB\-\-hide\fR -Do not display information about jobs and job steps in all partions. By default, +Do not display information about jobs and job steps in all partitions. By default, information about partitions that are configured as hidden or are not available to the user's group will not be displayed (i.e. this is the default behavior). @@ -45,14 +52,17 @@ By default, prints a time stamp with the header. .TP \fB\-j <job_id_list>\fR, \fB\-\-jobs=<job_id_list>\fR -Requests a comma separated list of job ids to display. Defaults to all jobs. +Requests a comma separated list of job IDs to display. Defaults to all jobs. The \fB\-\-jobs=<job_id_list>\fR option may be used in conjunction with the \fB\-\-steps\fR option to print step information about specific jobs. -Note: If a list of job ids is provided, the jobs are displayed even if +Note: If a list of job IDs is provided, the jobs are displayed even if they are on hidden partitions. Since this option's argument is optional, for proper parsing the single letter option must be followed immediately with the value and not include a space between them. For example "\-j1008" -and not "\-j 1008". +and not "\-j 1008". +The job ID format is "job_id[_array_id]". +Performance of the command can be measurably improved for systems with large +numbers of jobs when a single job ID is specified. .TP \fB\-l\fR, \fB\-\-long\fR @@ -78,13 +88,13 @@ The default formats with various options are .RS .TP 15 \fIdefault\fR -"%.7i %.9P %.8j %.8u %.2t %.10M %.6D %R" +"%.18i %.9P %.8j %.8u %.2t %.10M %.6D %R" .TP \fI\-l, \-\-long\fR -"%.7i %.9P %.8j %.8u %.8T %.10M %.9l %.6D %R" +"%.18i %.9P %.8j %.8u %.8T %.10M %.9l %.6D %R" .TP \fI\-s, \-\-steps\fR -"%10i %.8j %.9P %.8u %.9M %N" +"%.15i %.8j %.9P %.8u %.9M %N" .RE .IP @@ -116,6 +126,11 @@ Number of tasks created by a job step. This reports the value of the \fBsrun \-\-ntasks\fR option. (Valid for job steps only) .TP +\fB%A\fR +Job id. +This will have a unique value for each element of job arrays. +(Valid for jobs only) +.TP \fB%b\fR Generic resources (gres) required by the job or step. (Valid for jobs and job steps) @@ -168,6 +183,10 @@ completes. A value of zero implies this job has no dependencies. Features required by the job. (Valid for jobs only) .TP +\fB%F\fR +Job array's job ID. This is the base job ID. +(Valid for jobs only) +.TP \fB%g\fR Group name of the job. (Valid for jobs only) @@ -188,6 +207,8 @@ When \-\-sockets\-per\-node has not been set, "*" is displayed. .TP \fB%i\fR Job or job step id. +In the case of job arrays, the job ID format will be of the form +"<base_job_id>_<index>". (Valid for jobs and job steps) .TP \fB%I\fR @@ -210,6 +231,10 @@ When \-\-threads\-per\-core has not been set, "*" is displayed. Comment associated with the job. (Valid for jobs only) .TP +\fB%K\fR +Job array index. +(Valid for jobs only) +.TP \fB%l\fR Time limit of the job or job step in days\-hours:minutes:seconds. The value may be "NOT_SET" if not yet established or "UNLIMITED" for no limit. @@ -357,7 +382,7 @@ Specify the reservation of the jobs to view. \fB\-s\fR, \fB\-\-steps\fR Specify the job steps to view. This flag indicates that a comma separated list of job steps to view follows without an equal sign (see examples). -The job step format is "job_id.step_id". Defaults to all job +The job step format is "job_id[_array_id].step_id". Defaults to all job steps. Since this option's argument is optional, for proper parsing the single letter option must be followed immediately with the value and not include a space between them. For example "\-s1008.0" and not @@ -400,13 +425,15 @@ jobs are reported. Valid states (in both extended and compact form) include: PENDING (PD), RUNNING (R), SUSPENDED (S), COMPLETING (CG), COMPLETED (CD), CONFIGURING (CF), CANCELLED (CA), FAILED (F), TIMEOUT (TO), PREEMPTED (PR) and NODE_FAIL (NF). Note the -\fB<state_list>\fR supplied is case insensitve ("pd" and "PD" work the same). +\fB<state_list>\fR supplied is case insensitive ("pd" and "PD" work the same). See the \fBJOB STATE CODES\fR section below for more information. .TP \fB\-u <user_list>\fR, \fB\-\-user=<user_list>\fR -Request jobs or job steps from a comma separated list of users. The -list can consist of user names or user id numbers. +Request jobs or job steps from a comma separated list of users. +The list can consist of user names or user id numbers. +Performance of the command can be measurably improved for systems with large +numbers of jobs when a single user is specified. .TP \fB\-\-usage\fR @@ -548,6 +575,9 @@ Another suggested value is "%a %T" for a day of week and time stamp (e.g. \fBSQUEUE_ALL\fR \fB\-a, \-\-all\fR .TP +\fBSQUEUE_ARRAY\fR +\fB\-r, \-\-array\fR +.TP \fBSQUEUE_NAMES\fR \fB\-\-name=<name_list>\fR .TP @@ -626,9 +656,11 @@ Print information only about job step 65552.1: .SH "COPYING" Copyright (C) 2002\-2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index 55b5bfd58848d00e891170a0f2fdeb9b132b7dcd..5b72be46cdc9d7e4389d864c2e6cedc19233b448 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -429,7 +429,8 @@ Report top usage in percent of the lc account during the specified week .SH "COPYING" Copyright (C) 2009\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 59db00362e86fa4efaef8f033e3de72fe7e2147e..c303af88a858070ef75b3a2fd98d316a8baf5790 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,4 +1,4 @@ -.TH "srun" "1" "SLURM 2.5" "October 2012" "SLURM Commands" +.TH "srun" "1" "SLURM 2.6" "January 2013" "SLURM Commands" .SH "NAME" srun \- Run parallel jobs @@ -26,12 +26,56 @@ be changed after job submission using the \fBscontrol\fR command. .TP -\fB\-\-acctg\-freq\fR=<\fIseconds\fR> -Define the job accounting sampling interval. +\fB\-\-acctg\-freq\fR +Define the job accounting and profiling sampling intervals. This can be used to override the \fIJobAcctGatherFrequency\fR parameter in SLURM's configuration file, \fIslurm.conf\fR. -A value of zero disables real the periodic job sampling and provides accounting -information only on job termination (reducing SLURM interference with the job). +The supported format is follows: +.RS +.TP 12 +\fB\-\-acctg\-freq=\fR\fI<datatype>\fR\fB=\fR\fI<interval>\fR +where \fI<datatype>\fR=\fI<interval>\fR specifies the task sampling +interval for the jobacct_gather plugin or a +sampling interval for a profiling type by the +acct_gather_profile plugin. Multiple, +comma-separated \fI<datatype>\fR=\fI<interval>\fR intervals +may be specified. Supported datatypes are as follows: +.RS +.TP +\fBtask=\fI<interval>\fR +where \fI<interval>\fR is the task sampling interval in seconds +for the jobacct_gather plugins and for task +profiling by the acct_gather_profile plugin. +.TP +\fBenergy=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for energy profiling using the acct_gather_energy plugin +.TP +\fBnetwork=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for infiniband profiling using the acct_gather_infiniband +plugin. +.TP +\fBfilesystem=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for filesystem profiling using the acct_gather_filesystem +plugin. +.TP +.RE +.RE +.br +The default value for the task sampling interval +is 30. The default value for all other intervals is 0. +An interval of 0 disables sampling of the specified type. +If the task sampling interval is 0, accounting +information is collected only at job termination (reducing SLURM +interference with the job). +.br +.br +Smaller (non\-zero) values have a greater impact upon job performance, +but a value of 30 seconds is not likely to be noticeable for +applications having less than 10,000 tasks. +.RE .TP \fB\-B\fR \fB\-\-extra\-node\-info\fR=<\fIsockets\fR[:\fIcores\fR[:\fIthreads\fR]]> @@ -228,8 +272,9 @@ To ensure that distinct CPUs are allocated to each job step, use the If the job step allocation includes an allocation with a number of sockets, cores, or threads equal to the number of tasks to be started -then the tasks will by default be bound to the appropriate resources. -Disable this mode of operation by explicitly setting "-\-cpu\-bind=none". +then the tasks will by default be bound to the appropriate resources (auto +binding). Disable this mode of operation by explicitly setting +"-\-cpu\-bind=none". Note that a job step can be allocated different numbers of CPUs on each node or be allocated CPUs not starting at location zero. Therefore one of the @@ -253,7 +298,7 @@ Quietly bind before task runs (default) Verbosely report binding before task runs .TP .B no[ne] -Do not bind tasks to CPUs (default) +Do not bind tasks to CPUs (default unless auto binding is applied) .TP .B rank Automatically bind by task rank. @@ -446,8 +491,10 @@ from commandline parameters. \fBsrun\fR will run \fIexecutable\fR just after the job step completes. The command line arguments for \fIexecutable\fR will be the command and arguments of the job step. If \fIexecutable\fR is "none", then -no epilog will be run. This parameter overrides the SrunEpilog -parameter in slurm.conf. +no srun epilog will be run. This parameter overrides the SrunEpilog +parameter in slurm.conf. This parameter is completely independent from +the Epilog parameter in slurm.conf. + .TP \fB\-\-exclusive\fR @@ -481,17 +528,15 @@ may be the group name or the numerical group ID. .TP \fB\-\-gres\fR=<\fIlist\fR> Specifies a comma delimited list of generic consumable resources. -The format of each entry on the list is "name[:count[*cpu]]". +The format of each entry on the list is "name[:count]". The name is that of the consumable resource. The count is the number of those resources with a default value of 1. -The specified resources will be allocated to the job on each node -allocated unless "*cpu" is appended, in which case the resources -will be allocated on a per cpu basis. +The specified resources will be allocated to the job on each node. The available generic consumable resources is configurable by the system administrator. A list of available generic consumable resources will be printed and the command will exit if the option argument is "help". -Examples of use include "\-\-gres=gpu:2*cpu,disk=40G" and "\-\-gres=help". +Examples of use include "\-\-gres=gpu:2,mic=1" and "\-\-gres=help". NOTE: By default, a job step is allocated all of the generic resources that have allocated to the job. To change the behavior so that each job step is allocated no generic resources, explicitly set the value of \-\-gres to specify @@ -667,7 +712,9 @@ of tasks is no larger than the number of allocated nodes. The tasks are distributed in blocks of a specified size. The options include a number representing the size of the task block. This is followed by an optional specification of the task distribution scheme -within a block of tasks and between the blocks of tasks. For more +within a block of tasks and between the blocks of tasks. The number of tasks +distributed to each node is the same as for cyclic distribution, but the +taskids assigned to each node depend on the plane size. For more details (including examples and diagrams), please see .br http://slurm.schedmd.com/mc_support.html @@ -880,7 +927,8 @@ limit is changed. If a job node limit exceeds the number of nodes configured in the partition, the job will be rejected. Note that the environment -variable \fBSLURM_NNODES\fR will be set to the count of nodes actually +variable \fBSLURM_JOB_NUM_NODES\fR (and \fBSLURM_NNODES\fR for backwards compatibility) +will be set to the count of nodes actually allocated to the job. See the \fBENVIRONMENT VARIABLES\fR section for more information. If \fB\-N\fR is not specified, the default behavior is to allocate enough nodes to satisfy the requirements of @@ -924,7 +972,7 @@ order of preferences is IPONLY (which is not considered in User Space mode), HFI, IB, HPCE, and KMUX. .TP \fBCAU\fR=<\fIcount\fR> -Number of Collecitve Accelleration Units (CAU) required. +Number of Collecitve Acceleration Units (CAU) required. Applies only to IBM Power7-IH processors. Default value is zero. Independent CAU will be allocated for each programming interface (MPI, LAPI, etc.) @@ -1095,13 +1143,47 @@ partition as designated by the system administrator. If the job can use more than one partition, specify their names in a comma separate list and the one offering earliest initiation will be used. +.TP +\fB\-\-profile\fR=<all|none|[energy[,|task[,|lustre[,|network]]]]> +enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in an HDF5 file for +the job. + +.RS +.TP 10 +\fBAll\fR +All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR +No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR +Energy data is collected. + +.TP +\fBTask\fR +Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR +Lustre data is collected. + +.TP +\fBNetwork\fR +Network (InfiniBand) data is collected. +.RE + .TP \fB\-\-prolog\fR=<\fIexecutable\fR> \fBsrun\fR will run \fIexecutable\fR just before launching the job step. The command line arguments for \fIexecutable\fR will be the command and arguments of the job step. If \fIexecutable\fR is "none", then -no prolog will be run. This parameter overrides the SrunProlog -parameter in slurm.conf. +no srun prolog will be run. This parameter overrides the SrunProlog +parameter in slurm.conf. This parameter is completely independent from +the Prolog parameter in slurm.conf. .TP \fB\-\-propagate\fR[=\fIrlimits\fR] @@ -1391,6 +1473,9 @@ The host list will be assumed to be a filename if it contains a "/" character. If you specify a max node count (\-N1\-2) if there are more than 2 hosts in the file only the first 2 nodes will be used in the request list. +Rather than repeating a host name multiple times, an asterisk and +a repitition count may be appended to a host name. For example +"host1,host1" and "host1*2" are equivalent. .TP \fB\-\-wckey\fR=<\fIwckey\fR> @@ -1593,6 +1678,12 @@ rather than the node where \fBsrun\fR executes, these format specifiers are not supported on a BGQ system. .RS 10 .TP +\fB%A\fR +Job array's master job allocation number. +.TP +\fB%a\fR +Job array ID (index) number. +.TP %J jobid.stepid of the running job. (e.g. "128.0") .TP @@ -1612,6 +1703,9 @@ the running job) This will create a separate IO file per node. %t task identifier (rank) relative to current job. This will create a separate IO file per task. +.TP +%u +User name. .PP A number placed between the percent character and format specifier may be used to zero\-pad the result in the IO filename. This number is ignored if @@ -1771,6 +1865,9 @@ Same as \fB\-J, \-\-job\-name\fR except within an existing allocation, in which case it is ignored to avoid using the batch job's name as the name of each job step. .TP +\fBSLURM_JOB_NUM_NODES\fR (and \fBSLURM_NNODES\fR for backwards compatibility) +Total number of nodes in the job’s resource allocation. +.TP \fBSLURM_KILL_BAD_EXIT\fR Same as \fB\-K, \-\-kill\-on\-bad\-exit\fR .TP @@ -1833,6 +1930,9 @@ If set, then PMI key\-pairs will contain no duplicate keys. This is the case for MPICH2 and reduces overhead in testing for duplicates for improved performance .TP +\fBSLURM_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSLURM_PROLOG\fR Same as \fB\-\-prolog\fR .TP @@ -1868,10 +1968,18 @@ Same as \fB\-e, \-\-error\fR \fBSLURM_STDINMODE\fR Same as \fB\-i, \-\-input\fR .TP +\fBSLURM_SRUN_REDUCE_TASK_EXIT_MSG\fR +if set and non-zero, successive task exit messages with the same exit code will +be printed only once. +.TP \fBSLURM_STEP_GRES\fR Same as \fB\-\-gres\fR (only applies to job steps, not to job allocations). Also see \fBSLURM_GRES\fR .TP +\fBSLURM_STEP_KILLED_MSG_NODE_ID\fR=ID +If set, only the specified node will log when the job or step are killed +by a signal. +.TP \fBSLURM_STDOUTMODE\fR Same as \fB\-o, \-\-output\fR .TP @@ -1991,7 +2099,9 @@ Total number of nodes in the job's resource allocation Sets of node name, communication address and hostname for nodes allocated to the job from the cloud. Each element in the set if colon separated and each set is comma separated. For example: -SLURM_NODE_ALIASES=ec0:1.2.3.4:foo,ec1:1.2.3.5:bar +.na +SLURM_NODE_ALIASES\:=\:ec0:1.2.3.4:foo,ec1:1.2.3.5:bar +.ad .TP \fBSLURM_NODEID\fR The relative node ID of the current node @@ -2010,7 +2120,7 @@ This value is propagated to the spawned processes. The MPI rank (or relative process ID) of the current process .TP \fBSLURM_SRUN_COMM_HOST\fR -IP adress of srun communication host. +IP address of srun communication host. .TP \fBSLURM_SRUN_COMM_PORT\fR srun communication port. @@ -2030,12 +2140,15 @@ Number of processes in the step. \fBSLURM_STEP_TASKS_PER_NODE\fR Number of processes per node within the step. .TP -\fBSLURM_STEPID\fR (and \fBSLURM_STEP_ID\fR for backwards compatibility) +\fBSLURM_STEP_ID\fR (and \fBSLURM_STEPID\fR for backwards compatibility) The step ID of the current job .TP \fBSLURM_SUBMIT_DIR\fR The directory from which \fBsrun\fR was invoked. .TP +\fBSLURM_SUBMIT_HOST\fR +The hostname of the computer from which \fBsalloc\fR was invoked. +.TP \fBSLURM_TASK_PID\fR The process ID of the task being started. .TP @@ -2319,9 +2432,11 @@ wait .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/srun_cr.1 b/doc/man/man1/srun_cr.1 index b0dd6f6bbf963eb1865894d97169c0ae1b4202d5..f77da432ab742375e64bdac2dd727496b196189f 100644 --- a/doc/man/man1/srun_cr.1 +++ b/doc/man/man1/srun_cr.1 @@ -54,7 +54,6 @@ execution of the tasks from the previous checkpoint. .SH "COPYING" Copyright (C) 2009 National University of Defense Technology, China. Produced at National University of Defense Technology, China (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sshare.1 b/doc/man/man1/sshare.1 index 4d455ed8d336ad49d4a818d91792fbd467b5d4de..f191426f2ddb3438cd678f20fabca5d38e5bb6c7 100644 --- a/doc/man/man1/sshare.1 +++ b/doc/man/man1/sshare.1 @@ -129,7 +129,8 @@ the account. .SH "COPYING" Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sstat.1 b/doc/man/man1/sstat.1 index c0bda8b6d7933d7a4cdbd9fc9aacbfe7fbc3d244..e7c2f131ce3101c3eb9bd7915d267ff2f90de344 100644 --- a/doc/man/man1/sstat.1 +++ b/doc/man/man1/sstat.1 @@ -116,6 +116,14 @@ Average (system + user) CPU time of all tasks in job. \f3AveCPUFreq\fP Average weighted CPU frequency of all tasks in job, in kHz. +.TP +\f3AveDiskRead\fP +Average number of bytes read by all tasks in job. + +.TP +\f3AveDiskWrite\fP +Average number of bytes written by all tasks in job. + .TP \f3AvePages\fP Average number of page faults of all tasks in job. @@ -141,6 +149,30 @@ It is in the form: \f2job.jobstep\fP\c \&. +.TP +\f3MaxDiskRead\fP +Maximum number of bytes read by all tasks in job. + +.TP +\f3MaxDiskReadNode\fP +The node on which the maxdiskread occurred. + +.TP +\f3MaxDiskReadTask\fP +The task ID where the maxdiskread occurred. + +.TP +\f3MaxDiskWrite\fP +Maximum number of bytes written by all tasks in job. + +.TP +\f3MaxDiskWriteNode\fP +The node on which the maxdiskwrite occurred. + +.TP +\f3MaxDiskWriteTask\fP +The task ID where the maxdiskwrite occurred. + .TP \f3MaxPages\fP Maximum number of page faults of all tasks in job. @@ -193,6 +225,9 @@ The task ID where the mincpu occurred. \f3NTasks\fP Total number of tasks in a job or step. +.TP +\f3ReqCPUFreq\fP +Requested CPU frequency for the step, in kHz. .SH "EXAMPLES" @@ -207,7 +242,8 @@ Total number of tasks in a job or step. .SH "COPYING" Copyright (C) 2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/strigger.1 b/doc/man/man1/strigger.1 index 30ec9c077328d5714bbb0b7510e448f08f62e797..ef9d92e97eed96a31ca9a45842ee508d25e68448 100644 --- a/doc/man/man1/strigger.1 +++ b/doc/man/man1/strigger.1 @@ -361,9 +361,11 @@ Execute /home/joe/job_fini upon completion of job 1237. .SH "COPYING" Copyright (C) 2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man1/sview.1 b/doc/man/man1/sview.1 index c083e545b74f296718c712053d71d8a66c8842e0..711e6cf3540d484308bd185786756282925a5963 100644 --- a/doc/man/man1/sview.1 +++ b/doc/man/man1/sview.1 @@ -1,4 +1,4 @@ -.TH "sview" "1" "SLURM 2.3" "February 2011" "SLURM Commands" +.TH "sview" "1" "SLURM 2.6" "October 2013" "SLURM Commands" .SH "NAME" .LP sview \- graphical user interface to view and modify SLURM state. @@ -43,11 +43,20 @@ At least some gtk themes are unable to display large numbers of lines (jobs, nodes, etc). The information is still in gtk's internal data structures, but not visible by scrolling down the window. +On systems with the topology/tree plugin configured, the sview command will +attempt to display the nodes on each switch on a separate line. +Change the sview configuration for optimal viewing by selecting "Options" then +"Set Default Settings". +The "Nodes in Row" and "Node Button Size in Pixels" would be the mostly +commonly changed options. + .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. -Copyright (C) 2008\-2011 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2011 Lawrence Livermore National Security. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man3/Makefile.am b/doc/man/man3/Makefile.am index c9b4771be8be45a7bf6bf2012cd1bffe3017b358..d04ffcd7cc8186a704a11bd25de84f64b16b9de3 100644 --- a/doc/man/man3/Makefile.am +++ b/doc/man/man3/Makefile.am @@ -69,7 +69,9 @@ man3_MANS = slurm_hostlist_create.3 \ slurm_load_front_end.3 \ slurm_load_job.3 \ slurm_load_jobs.3 \ + slurm_load_job_user.3 \ slurm_load_node.3 \ + slurm_load_node_single.3 \ slurm_load_partitions.3 \ slurm_load_reservations.3 \ slurm_load_slurmd_status.3 \ diff --git a/doc/man/man3/Makefile.in b/doc/man/man3/Makefile.in index 099acbd6f4d8ccea796785d4d6611f981ab8e207..e8d52c502ea5776bab3520b698cee732427fa9be 100644 --- a/doc/man/man3/Makefile.in +++ b/doc/man/man3/Makefile.in @@ -55,6 +55,7 @@ subdir = doc/man/man3 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -158,6 +162,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -178,6 +184,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -187,6 +196,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -194,6 +205,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -228,6 +248,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -255,6 +278,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -414,7 +440,9 @@ man3_MANS = slurm_hostlist_create.3 \ slurm_load_front_end.3 \ slurm_load_job.3 \ slurm_load_jobs.3 \ + slurm_load_job_user.3 \ slurm_load_node.3 \ + slurm_load_node_single.3 \ slurm_load_partitions.3 \ slurm_load_reservations.3 \ slurm_load_slurmd_status.3 \ diff --git a/doc/man/man3/slurm_free_job_info_msg.3 b/doc/man/man3/slurm_free_job_info_msg.3 index 79bc71c7a443915b72877aedc38cf72d7555d14c..ba89de9784496e1883c80dcc9fa9bb2f59ed70c4 100644 --- a/doc/man/man3/slurm_free_job_info_msg.3 +++ b/doc/man/man3/slurm_free_job_info_msg.3 @@ -1,8 +1,8 @@ -.TH "Slurm API" "3" "September 2006" "Morris Jette" "Slurm job information reporting functions" +.TH "Slurm API" "3" "January 2013" "Morris Jette" "Slurm job information reporting functions" .SH "NAME" slurm_free_job_alloc_info_response_msg, slurm_free_job_info_msg, slurm_get_end_time, slurm_get_rem_time, slurm_get_select_jobinfo, -slurm_load_jobs, slurm_pid2jobid, +slurm_load_jobs, slurm_load_job_user, slurm_pid2jobid, slurm_print_job_info, slurm_print_job_info_msg \- Slurm job information reporting functions .LP @@ -41,6 +41,16 @@ int \fBslurm_load_job\fR ( .br ); .LP +int \fBslurm_load_job_user\fR ( +.br + job_info_msg_t **\fIjob_info_msg_pptr\fP, +.br + uint32_t \fIuser_id\fP, +.br + uint16_t \fIshow_flags\fP, +.br +); +.LP int \fBslurm_load_jobs\fR ( .br time_t \fIupdate_time\fP, @@ -222,6 +232,9 @@ For all of the following informational calls, if update_time is equal to or greater than the last time changes where made to that information, new information is not returned. Otherwise all the configuration. job, node, or partition records are returned. +.TP +\fIuser_id\fP +ID of user we want information for. .SH "DESCRIPTION" .LP @@ -251,6 +264,13 @@ record count, and array of job_table records for some specific job ID. \fBslurm_load_jobs\fR Returns a job_info_msg_t that contains an update time, record count, and array of job_table records for all jobs. .LP +\fBslurm_load_job_yser\fR Returns a job_info_msg_t that contains an update +time, record count, and array of job_table records for all jobs associated +with a specific user ID. +.LP +\fBslurm_load_job_user\fR issues RPC to get slurm information about all jobs to +be run as the specified user. +.LP \fBslurm_notify_job\fR Sends the specified message to standard output of the specified job ID. .LP diff --git a/doc/man/man3/slurm_free_node_info.3 b/doc/man/man3/slurm_free_node_info.3 index 981909fe348471c332489c8ce3fa4580496f059f..8eb4d04ce8b7c1eed5fbbd7de6efa08a2cd568b0 100644 --- a/doc/man/man3/slurm_free_node_info.3 +++ b/doc/man/man3/slurm_free_node_info.3 @@ -1,7 +1,8 @@ -.TH "Slurm API" "3" "January 2006" "Morris Jette" "Slurm node informational calls" +.TH "Slurm API" "3" "January 2013" "Morris Jette" "Slurm node informational calls" .SH "NAME" -slurm_free_node_info_msg, slurm_load_node, -slurm_print_node_info_msg, slurm_print_node_table, slurm_sprint_node_table +slurm_free_node_info_msg, slurm_load_node, slurm_load_node_single, +slurm_print_node_info_msg, slurm_print_node_table, +slurm_sprint_node_table \- Slurm node information reporting functions .SH "SYNTAX" .LP @@ -25,6 +26,16 @@ int \fBslurm_load_node\fR ( .br ); .LP +int \fBslurm_load_node_single\fR ( +.br + node_info_msg_t **\fInode_info_msg_pptr\fP, +.br + char *\fInode_name\fP, +.br + uint16_t \fIshow_flags\fP +.br +); +.LP void \fBslurm_print_node_info_msg\fR ( .br FILE *\fIout_file\fp, @@ -69,6 +80,9 @@ about each node. Detailed node information is written to fixed sized records and includes: name, state, processor count, memory size, etc. See slurm.h for full details on the data structure's contents. .TP +\fInode_name\fP +Name of the node for which information is requested. +.TP \fInode_ptr\fP Specifies a pointer to a single node record from the \fInode_info_msg_ptr\fP data structure. @@ -101,6 +115,9 @@ or partition records are returned. \fBslurm_free_node_info_msg\fR Release the storage generated by the \fBslurm_load_node\fR function. .LP +\fBslurm_load_node_single\fR issue RPC to get slurm configuration +information for a specific node. +.LP \fBslurm_load_node\fR Returns a \fInode_info_msg_t\fP that contains an update time, record count, and array of node_table records for all nodes. Note that nodes which are hidden for any reason will have a NULL node name. diff --git a/doc/man/man3/slurm_load_job_user.3 b/doc/man/man3/slurm_load_job_user.3 new file mode 100644 index 0000000000000000000000000000000000000000..836ffa79b478c25f79af77cc002dc816f26891c0 --- /dev/null +++ b/doc/man/man3/slurm_load_job_user.3 @@ -0,0 +1 @@ +.so man3/slurm_free_job_info_msg.3 diff --git a/doc/man/man3/slurm_load_node_single.3 b/doc/man/man3/slurm_load_node_single.3 new file mode 100644 index 0000000000000000000000000000000000000000..63979eec2ec9c8972d214f5e5a9dae44a8a88d17 --- /dev/null +++ b/doc/man/man3/slurm_load_node_single.3 @@ -0,0 +1 @@ +.so man3/slurm_free_node_info.3 diff --git a/doc/man/man5/Makefile.am b/doc/man/man5/Makefile.am index f08d16562f64b70ab83480ba9724a41cec0342ef..6e92b7738da607260939735e462c6ec962c9cddd 100644 --- a/doc/man/man5/Makefile.am +++ b/doc/man/man5/Makefile.am @@ -1,8 +1,11 @@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html -man5_MANS = bluegene.conf.5 \ +man5_MANS = \ + acct_gather.conf.5 \ + bluegene.conf.5 \ cgroup.conf.5 \ cray.conf.5 \ + ext_sensors.conf.5 \ gres.conf.5 \ slurm.conf.5 \ slurmdbd.conf.5 \ @@ -14,8 +17,11 @@ EXTRA_DIST = $(man5_MANS) if HAVE_MAN2HTML html_DATA = \ + acct_gather.conf.html \ bluegene.conf.html \ cgroup.conf.html \ + cray.conf.html \ + ext_sensors.conf.html \ gres.conf.html \ slurm.conf.html \ slurmdbd.conf.html \ diff --git a/doc/man/man5/Makefile.in b/doc/man/man5/Makefile.in index 6dd25e2ac9ffba228d52ec71a996f4d1aca7461c..abb6848e38ced7b06f5adee7e2cbda127c9c587d 100644 --- a/doc/man/man5/Makefile.in +++ b/doc/man/man5/Makefile.in @@ -57,6 +57,7 @@ subdir = doc/man/man5 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -317,7 +343,7 @@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ @@ -346,9 +372,12 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -man5_MANS = bluegene.conf.5 \ +man5_MANS = \ + acct_gather.conf.5 \ + bluegene.conf.5 \ cgroup.conf.5 \ cray.conf.5 \ + ext_sensors.conf.5 \ gres.conf.5 \ slurm.conf.5 \ slurmdbd.conf.5 \ @@ -357,8 +386,11 @@ man5_MANS = bluegene.conf.5 \ EXTRA_DIST = $(man5_MANS) $(am__append_1) @HAVE_MAN2HTML_TRUE@html_DATA = \ +@HAVE_MAN2HTML_TRUE@ acct_gather.conf.html \ @HAVE_MAN2HTML_TRUE@ bluegene.conf.html \ @HAVE_MAN2HTML_TRUE@ cgroup.conf.html \ +@HAVE_MAN2HTML_TRUE@ cray.conf.html \ +@HAVE_MAN2HTML_TRUE@ ext_sensors.conf.html \ @HAVE_MAN2HTML_TRUE@ gres.conf.html \ @HAVE_MAN2HTML_TRUE@ slurm.conf.html \ @HAVE_MAN2HTML_TRUE@ slurmdbd.conf.html \ diff --git a/doc/man/man5/acct_gather.conf.5 b/doc/man/man5/acct_gather.conf.5 new file mode 100644 index 0000000000000000000000000000000000000000..32fbe46311bce9b3fa326f1b642f1e58cade1495 --- /dev/null +++ b/doc/man/man5/acct_gather.conf.5 @@ -0,0 +1,175 @@ +.TH "acct_gather.conf" "3" "May 2013" "acct_gather.conf 1.0" "Slurm acct_gather.configuration file" + +.SH "NAME" +acct_gather.conf \- Slurm configuration file for the acct_gather plugins + +.SH "DESCRIPTION" + +\fBacct_gather.conf\fP is an ASCII file which defines parameters used by +Slurm's acct_gather related plugins. +The file location can be modified at system build time using the +DEFAULT_SLURM_CONF parameter or at execution time by setting the SLURM_CONF +environment variable. The file will always be located in the +same directory as the \fBslurm.conf\fP file. +.LP +Parameter names are case insensitive. +Any text following a "#" in the configuration file is treated +as a comment through the end of that line. +The size of each line in the file is limited to 1024 characters. +Changes to the configuration file take effect upon restart of +SLURM daemons, daemon receipt of the SIGHUP signal, or execution +of the command "scontrol reconfigure" unless otherwise noted. + +.LP +The following acct_gather.conf parameters are defined to control the general +behavior of various plugins in SLURM. + +.LP +The acct_gather.conf file is different than other Slurm .conf files. Each +plugin defines which options are available. So if you do not load the +respective plugin for an option that option will appear to be unknown by Slurm +and could cause Slurm not to load. If you decide to change plugin types you +might also have to change the related options as well. + +.TP +\fBEnergyIPMI\fR +Options used for AcctGatherEnergyType/ipmi are as follows: + +.RS +.TP 20 +\fBEnergyIPMIFrequency\fR=<number> +This parameter is the number of seconds between BMC access samples. + +.TP +\fBEnergyIPMICalcAdjustment\fR=<yes|no> +If set to "yes", the consumption between the last BMC access sample and +a step consumption update is approximated to get more accurate task consumption. +The adjustment is made at the step start and each time the +consumption is updated, including the step end. The approximations are not +accumulated, only the first and last adjustments are used to calculated the +consumption. The default is "no". + +.TP +\fBEnergyIPMIPowerSensor\fR=<number> +This parameter is optional. If the parameter is included, the plugin searches +the node for a "watt" sensor with the number specified by <number>. If a +matching sensor is found, that sensor is used for power data. If no matching +sensor is found, a value of zero is returned for power data. If the parameter +is omitted, the plugin will use the first "watt" sensor it finds. + +.LP +The following acct_gather.conf parameters are defined to control the +IPMI config default values for libipmiconsole. + +.TP +\fBEnergyIPMIUsername\fR=\fIUSERNAME\fR +Specify BMC Username. + +.TP +\fBEnergyIPMIPassword\fR=\fIPASSWORD\fR +Specify BMC Password. +.RE + +.TP +\fBProfileHDF5\fR +Options used for AcctGatherProfileType/hdf5 are as follows: + +.RS +.TP 20 +\fBProfileHDF5Dir\fR=<path> +This parameter is the path to the shared folder into which the +acct_gather_profile plugin will write detailed data (usually as an HDF5 file). +The directory is assumed to be on a file system shared by the controller and +all compute nodes. This is a required parameter. + +.TP +\fBProfileHDF5Default\fR +A comma delimited list of data types to be collected for each job submission. +Allowed values are: + +.RS +.TP +\fBAll\fR All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR Energy data is collected. + +.TP +\fBTask\fR Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR Lustre data is collected. + +.TP +\fBNetwork\fR Network (InfiniBand) data is collected. +.RE +.RE + +.TP +\fBInfinibandOFED\fR +Options used for AcctGatherInfinbandType/ofed are as follows: + +.RS +.TP 20 +\fBInfinibandOFEDFrequency\fR=<number> +This parameter is the number of seconds between the calls to infiniband counters. + +.TP +\fBInfinibandOFEDPort\fR=<number> +This parameter represents the port number of the local Infiniband card that we are willing to monitor. +The default port is 1. +.RE +.RE +.SH "EXAMPLE" +.LP +.br +### +.br +# Slurm acct_gather configuration file +.br +### +.br +# Parameters for AcctGatherEnergy/impi plugin +.br +EnergyIPMIFrequency=10 +.br +EnergyIPMICalcAdjustment=yes +.br +# +.br +# Parameters for AcctGatherProfileType/hdf5 plugin +.br +ProfileHDF5Dir=/app/slurm/profile_data +.br +# Parameters for AcctGatherInfiniband/ofed plugin +.br +InfinibandOFEDFrequency=4 +.br +InfinibandOFEDPort=1 +.br + + +.SH "COPYING" +Copyright (C) 2012-2013 Bull. +Produced at Bull (cf, DISCLAIMER). +.LP +This file is part of SLURM, a resource management program. +For details, see <http://slurm.schedmd.com/>. +.LP +SLURM is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. +.LP +SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +.SH "SEE ALSO" +.LP +\fBslurm.conf\fR(5) diff --git a/doc/man/man5/bluegene.conf.5 b/doc/man/man5/bluegene.conf.5 index be88a0e1e6f45e2f585ad17d8ce37eaba93d45db..59048ad7105a8cd0810e6ba2df219cc890192802 100644 --- a/doc/man/man5/bluegene.conf.5 +++ b/doc/man/man5/bluegene.conf.5 @@ -335,7 +335,8 @@ BPs=[333] Type=SMALL 32CNBlocks=4 128CNBlocks=3 # 32 * 4 + 128 * 3 = 512 .SH "COPYING" Copyright (C) 2006-2010 The Regents of the University of California. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man5/cgroup.conf.5 b/doc/man/man5/cgroup.conf.5 index bb6415ac50b06db72e172920a4384bd167648759..65e8eeac4a0556b73c36bb10000973237abce90d 100644 --- a/doc/man/man5/cgroup.conf.5 +++ b/doc/man/man5/cgroup.conf.5 @@ -1,4 +1,4 @@ -.TH "cgroup.conf" "5" "June 2012" "cgroup.conf 2.2" \ +.TH "cgroup.conf" "5" "July 2013" "cgroup.conf 2.6" \ "Slurm cgroup configuration file" .SH "NAME" @@ -68,11 +68,11 @@ The default value is "no". .TP \fBAllowedRAMSpace\fR=<number> Constrain the job cgroup RAM to this percentage of the allocated memory. -The default value is 100. If SLURM is not allocating memory to jobs, The percentage supplied may be expressed as floating point number, e.g. 98.5. If the \fBAllowedRAMSpace\fR limit is exceeded, the job steps will be killed and a warning message will be written to standard error. Also see \fBConstrainRAMSpace\fR. +The default value is 100. .TP \fBAllowedSwapSpace\fR=<number> @@ -86,8 +86,10 @@ Also see \fBConstrainSwapSpace\fR. .TP \fBConstrainRAMSpace\fR=<yes|no> If configured to "yes" then constrain the job's RAM usage. -The default value is "no". -Also see \fBAllowedRAMSpace\fR. +The default value is "no", +in which case the job's RAM limit will be set to its swap space limit. +Also see \fBAllowedSwapSpace\fR, \fBAllowedRAMSpace\fR and +\fBConstrainSwapSpace\fR. .TP \fBConstrainSwapSpace\fR=<yes|no> @@ -105,7 +107,8 @@ Also see \fBAllowedSwapSpace\fR. \fBMaxRAMPercent\fR=\fIPERCENT\fR Set an upper bound in percent of total RAM on the RAM constraint for a job. This will be the memory constraint applied to jobs that are not explicitly -allocated memory by SLURM. The \fIPERCENT\fR may be an arbitrary floating +allocated memory by Slurm (i.e. Slurm's select plugin is not configured to manage +memory allocations). The \fIPERCENT\fR may be an arbitrary floating point number. The default value is 100. .TP @@ -161,7 +164,8 @@ ConstrainCores=yes .SH "COPYING" Copyright (C) 2010-2012 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man5/cray.conf.5 b/doc/man/man5/cray.conf.5 index e4a251d944621d8186917e85ffe49d1bfa0ef150..5e7ef6bd9d560c1ef2152bd5187caac3b8de08d8 100644 --- a/doc/man/man5/cray.conf.5 +++ b/doc/man/man5/cray.conf.5 @@ -1,4 +1,4 @@ -.TH "cray.conf" "5" "August 2011" "cray.conf 2.3" "Slurm configuration file" +.TH "cray.conf" "5" "January 2015" "cray.conf 2.6" "Slurm configuration file" .SH "NAME" cray.conf \- Slurm configuration file for the Cray\-specific information @@ -22,6 +22,17 @@ of the command "scontrol reconfigure" unless otherwise noted. .LP The configuration parameters available include: +.TP +\fBAlpsEngine\R=<engine_version> +Communication protocol version number to be used between Slurm and ALPS/BASIL. +The default value is BASIL's response to the ENGINE query. +Use with caution: Changes in ALPS communications which are not recognized +by Slurm could result in loss of jobs. +Currently supported values include +1.1, 1.2.0, 1.3.0, 3.1.0, 4.0, 4.1.0, 5.0.0, 5.0.1, 5.1.0 or "latest". +A value of "latest" will use the most current version of Slurm's logic and +can be useful for validation with new versions of ALPS. + .TP \fBAlpsDir\fR=<pathname> Fully qualified pathname of the directory in which ALPS is installed. @@ -89,9 +100,7 @@ SDBuser=alps_user SDBdb=XT5istanbul .SH "COPYING" -Copyright (C) 2011 SchedMD LLC. -Produced at SchedMD LLC (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +Copyright (C) 2011-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man5/ext_sensors.conf.5 b/doc/man/man5/ext_sensors.conf.5 new file mode 100644 index 0000000000000000000000000000000000000000..46a94558ad752b0678c728746c8eb76d04de8468 --- /dev/null +++ b/doc/man/man5/ext_sensors.conf.5 @@ -0,0 +1,114 @@ +.TH "ext_sensors.conf" "5" "February 2013" "ext_sensors.conf 2.2" \ +"Slurm external sensors plugin configuration file" + +.SH "NAME" +ext_sensors.conf \- Slurm configuration file for the external sensors plugin + +.SH "DESCRIPTION" + +\fBext_sensors.conf\fP is an ASCII file which defines parameters used by +Slurm's external sensors plugins. +The file location can be modified at system build time using the +DEFAULT_SLURM_CONF parameter or at execution time by setting the SLURM_CONF +environment variable. The file will always be located in the +same directory as the \fBslurm.conf\fP file. +.LP +Parameter names are case insensitive. +Any text following a "#" in the configuration file is treated +as a comment through the end of that line. +The size of each line in the file is limited to 1024 characters. +Changes to the configuration file take effect upon restart of +SLURM daemons, daemon receipt of the SIGHUP signal, or execution +of the command "scontrol reconfigure" unless otherwise noted. + +.LP +The following ext_sensors.conf parameters are defined to control data +collection by the ext_sensors plugins. All of these parameters are optional. +If a parameter is omitted, data collection of the omitted type is disabled. + +.TP +\fBJobData\fR=\fBenergy\fR +Specify the data types to be collected by the plugin for jobs/steps. + +.TP +\fBNodeData\fR=\fB[energy|temp][,temp|energy]\fR +Specify the data types to be collected by the plugin for nodes. + +.TP +\fBSwitchData\fR=\fBenergy\fR +Specify the data types to be collected by the plugin for switches. + +.TP +\fBColdDoorData\fR=\fBtemp\fR +Specify the data types to be collected by the plugin for cold doors. + +.TP +\fBMinWatt\fR=\fB<number>\fR +Minimum recorded power consumption, in watts. + +.TP +\fBMaxWatt\fR=\fB<number>\fR +Maximum recorded power consumption, in watts. + +.TP +\fBMinTemp\fR=\fB<number>\fR +Minimum recorded temperature, in celsius. + +.TP +\fBMaxTemp\fR=\fB<number>\fR +Maximum recorded temperature, in celsius. + +.TP +\fBEnergyRRA\fR=\fB<name>\fR +Energy RRA name. + +.TP +\fBTempRRA\fR=\fB<name>\fR +Temperature RRA name. + +.TP +\fBEnergyPathRRD\fR=\fB<path>\fR +Pathname of energy RRD file. + +.TP +\fBTempPathRRD\fR=\fB<patch>\fR +Pathname of temperature RRD file. + +.SH "EXAMPLE" +.LP +.br +### +.br +# Slurm external sensors plugin configuration file +.br +### +.br +JobData=energy +.br +NodeData=energy,temp +.br +SwitchData=energy +.br +ColdDoorData=temp +.br +# + +.SH "COPYING" +Copyright (C) 2013 Bull +.LP +This file is part of SLURM, a resource management program. +For details, see <http://slurm.schedmd.com/>. +.LP +SLURM is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. +.LP +SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +.SH "SEE ALSO" +.LP +\fBslurm.conf\fR(5) diff --git a/doc/man/man5/gres.conf.5 b/doc/man/man5/gres.conf.5 index 960e70963f1ee986159198d395c14f2540e4c585..7e087c69a3e56f67636213b3d1ac7298c11198c1 100644 --- a/doc/man/man5/gres.conf.5 +++ b/doc/man/man5/gres.conf.5 @@ -107,7 +107,8 @@ Name=bandwidth Count=20M .SH "COPYING" Copyright (C) 2010 The Regents of the University of California. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 4c5d8c38b25a139f431a65c32b8d2e618c55de8a..c3e564d5a1f5c2b50adf0b6b3f780b148340a721 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1,4 +1,4 @@ -.TH "slurm.conf" "17" "December 2012" "slurm.conf 2.5" "Slurm configuration file" +.TH "slurm.conf" "5" "March 2013" "slurm.conf 2.6" "Slurm configuration file" .SH "NAME" slurm.conf \- Slurm configuration file @@ -53,22 +53,35 @@ Only used for database type storage plugins, ignored otherwise. \fBAccountingStorageEnforce\fR This controls what level of association\-based enforcement to impose on job submissions. Valid options are any combination of -\fIassociations\fR, \fIlimits\fR, \fIqos\fR, \fIsafe\fR, and \fIwckeys\fR, or -\fIall\fR for all things. If limits, qos, or wckeys are set, -associations will automatically be set. In addition, if wckeys is -set, TrackWCKey will automatically be set. If safe is set, limits and -associations will automatically be set. By enforcing Associations no -new job is allowed to run unless a corresponding association exists in -the system. If limits are enforced users can be limited by association -to whatever job size or run time limits are defined. If safe is -enforced a job will only be launched against an association or qos that -has a GrpCPUMins limit set if the job will be able to run to completion. +\fIassociations\fR, \fIlimits\fR, \fInojobs\fR, \fInosteps\fR, \fIqos\fR, \fIsafe\fR, and \fIwckeys\fR, or +\fIall\fR for all things. + +If limits, qos, or wckeys are set, associations will automatically be set. + +If wckeys is set, TrackWCKey will automatically be set. + +If safe is set, limits and associations will automatically be set. + +If nojobs is set nosteps will automatically be set. + +By enforcing Associations no new job is allowed to run unless a corresponding +association exists in the system. If limits are enforced users can be +limited by association to whatever job size or run time limits are defined. + +If nojobs is set Slurm will not account for any jobs or steps on the system, +like wise if nosteps is set Slurm will not account for any steps ran limits +will still be enforced. + +If safe is enforced a job will only be launched against an association or qos +that has a GrpCPUMins limit set if the job will be able to run to completion. Without this option set, jobs will be launched as long as their usage hasn't reached the cpu-minutes limit which can lead to jobs being -launched but then killed when the limit is reached. With qos and/or -wckeys enforced jobs will not be scheduled unless a valid qos and/or -workload characterization key is specified. When -\fBAccountingStorageEnforce\fR is changed, a restart of the slurmctld +launched but then killed when the limit is reached. + +With qos and/or wckeys enforced jobs will not be scheduled unless a valid qos +and/or workload characterization key is specified. + +When \fBAccountingStorageEnforce\fR is changed, a restart of the slurmctld daemon is required (not just a "scontrol reconfig"). .TP @@ -128,7 +141,7 @@ information and will prevent some sacct options from proper operation. Note: The PostgreSQL plugin is not complete and should not be used if wanting to use associations. It will however work with basic accounting of jobs and job steps. If -interested in completing, please email slurm-dev@lists.llnl.gov. Also +interested in completing, please email slurm-dev@schedmd.com. Also see \fBDefaultStorageType\fR. .TP @@ -147,7 +160,7 @@ is "YES". \fBAcctGatherNodeFreq\fR The AcctGather plugins sampling interval for node accounting. For AcctGather plugin values of none, this parameter is ignored. -For all other values this parameter is the number +For all other values this parameter is the number of seconds between node accounting samples. For the acct_gather_energy/rapl plugin, set a value less than 300 because the counters may overflow beyond this rate. @@ -159,12 +172,12 @@ determined by the value of \fBJobAcctGatherFrequency\fR. \fBAcctGatherEnergyType\fR Identifies the plugin to be used for energy consumption accounting. The jobacct_gather plugin and slurmd daemon call this plugin to collect -energy consumption data for jobs and nodes. The collection of energy -consumption data takes place on node level, hence only in case of exclusive -job allocation the energy consumption measurements will reflect the jobs -real consumption. In case of node sharing between jobs the reported consumed -energy per job (through sstat or sacct) will not reflect the real energy -consumed by the jobs. +energy consumption data for jobs and nodes. The collection of energy +consumption data takes place on node level, hence only in case of exclusive +job allocation the energy consumption measurements will reflect the jobs +real consumption. In case of node sharing between jobs the reported consumed +energy per job (through sstat or sacct) will not reflect the real energy +consumed by the jobs. Configurable values at present are: .RS @@ -182,6 +195,70 @@ Average Power Limit (RAPL) mechanism. Note that enabling RAPL may require the execution of the command "sudo modprobe msr". .RE +.TP +\fBAcctGatherInfinibandType\fR +Identifies the plugin to be used for infiniband network traffic accounting. +The plugin is activated only when profiling on hdf5 files is activated and +the user asks for network data collection for jobs through --profile=Network +(or =All). The collection of network traffic data takes place on node level, +hence only in case of exclusive job allocation the collected values will +reflect the jobs real traffic. All network traffic data are logged on hdf5 files +per job on each node. No storage on the Slurm database takes place. + +Configurable values at present are: +.RS +.TP 20 +\fBacct_gather_infiniband/none\fR +No infiniband network data are collected. +.TP +\fBacct_gather_infiniband/ofed\fR +Infiniband network traffic data are collected from the hardware monitoring +counters of Infiniband devices through the OFED library. +.RE + +.TP +\fBAcctGatherFilesystemType\fR +Identifies the plugin to be used for filesystem traffic accounting. +The plugin is activated only when profiling on hdf5 files is activated and +the user asks for filesystem data collection for jobs through \-\-profile=Network +(or =All). The collection of filesystem traffic data takes place on node level, +hence only in case of exclusive job allocation the collected values will +reflect the jobs real traffic. All filesystem traffic data are logged on hdf5 files +per job on each node. No storage on the Slurm database takes place. + +Configurable values at present are: +.RS +.TP 20 +\fBacct_gather_filesystem/none\fR +No filesystem data are collected. +.TP +\fBacct_gather_filesystem/lustre\fR +Lustre filesystem traffic data are collected from the counters found in +/proc/fs/lustre/. +.RE + +.TP +\fBAcctGatherProfileType\fR +Identifies the plugin to be used for detailed job profiling. +The jobacct_gather plugin and slurmd daemon call this plugin to collect +detailed data such as I/O counts, memory usage, or energy consumption for jobs +and nodes. There are interfaces in this plugin to collect data as step start +and completion, task start and completion, and at the account gather +frequency. The data collected at the node level is related to jobs only in +case of exclusive job allocation. + +Configurable values at present are: +.RS +.TP 20 +\fBacct_gather_profile/none\fR +No profile data is collected. +.TP +\fBacct_gather_profile/hdf5\fR +This enables the HDF5 plugin. The directory where the profile files +are stored and which values are collected are configured in the +acct_gather.conf file. +.RE + .TP \fBAuthType\fR The authentication method for communications between SLURM @@ -354,6 +431,9 @@ CPU binding details for jobs and steps \fBEnergy\fR AcctGatherEnergy debug info .TP +\fBExtSensors\fR +External Sensors debug info +.TP \fBFrontEnd\fR Front end node details .TP @@ -386,6 +466,9 @@ Slurmctld triggers .TP \fBWiki\fR Sched/wiki and wiki2 communications +.TP +\fBThreadID\fR +Prints the thread ID in the log file .RE .TP @@ -473,6 +556,11 @@ If set to "YES" then user root will be prevented from running any jobs. The default value is "NO", meaning user root will be able to execute jobs. \fBDisableRootJobs\fR may also be set by partition. +.TP +\fBDynallocPort\fR +Socket port used for MapReduce dynamic allocation communications. +Used only by the slurmctld/dynalloc plugin. + .TP \fBEnforcePartLimits\fR If set to "YES" then jobs which exceed a partition's size and/or time limits @@ -510,11 +598,39 @@ upon termination of a job allocation (e.g. The program executes as SlurmUser, which gives it permission to drain nodes and requeue the job if a failure occurs or cancel the job if appropriate. Exactly what the program does and how it accomplishes this is completely at -the discression of the system administrator. +the discretion of the system administrator. Information about the job being initiated, it's allocated nodes, etc. are passed to the program using environment variables. See \fBProlog and Epilog Scripts\fR for more information. +.TP +\fBExtSensorsFreq\fR +The external sensors plugin sampling interval. +If \fBExtSensorsType=ext_sensors/none\fR, this parameter is ignored. +For all other values of \fBExtSensorsType\fR, this parameter is the number +of seconds between external sensors samples for hardware components (nodes, +switches, etc.) The default value is zero. This value disables external +sensors sampling. Note: This parameter does not affect external sensors +data collection for jobs/steps. + +.TP +\fBExtSensorsType\fR +Identifies the plugin to be used for external sensors data collection. +Slurmctld calls this plugin to collect external sensors data for jobs/steps +and hardware components. In case of node sharing between jobs the reported +values per job/step (through sstat or sacct) may not be accurate. See also +"man ext_sensors.conf". + +Configurable values at present are: +.RS +.TP 20 +\fBext_sensors/none\fR +No external sensors data is collected. +.TP +\fBext_sensors/rrd\fR +External sensors data is collected from the RRD database. +.RE + .TP \fBFastSchedule\fR Controls how a node's configuration specifications in slurm.conf are used. @@ -602,6 +718,26 @@ Also see the \fBGroupUpdateForce\fR parameter. The interval in seconds between executions of \fBHealthCheckProgram\fR. The default value is zero, which disables execution. +.TP +\fBHealthCheckNodeState\fR +Identify what node states should execute the \fBHealthCheckProgram\fR. +Multiple state values may be specified with a comma separator. +The default value is ANY to execute on nodes in any state. +.RS +.TP 12 +\fBALLOC\fR +Run on nodes in the ALLOC state (all CPUs allocated). +.TP +\fBANY\fR +Run on nodes in any state. +.TP +\fBIDLE\fR +Run on nodes in the IDLE state. +.TP +\fBMIXED\fR +Run on nodes in the MIXED state (some CPUs idle and other CPUs allocated). +.RE + .TP \fBHealthCheckProgram\fR Fully qualified pathname of a script to execute as user root periodically @@ -640,32 +776,76 @@ The default value is unlimited (zero) and may not exceed 65533 seconds. The job accounting mechanism type. Acceptable values at present include "jobacct_gather/aix" (for AIX operating system), "jobacct_gather/linux" (for Linux operating system), -"jobacct_gather/cgroup (experimental) and "jobacct_gather/none" +"jobacct_gather/cgroup" (experimental) and "jobacct_gather/none" (no accounting data collected). The default value is "jobacct_gather/none". "jobacct_gather/cgroup" is an experimental plugin for the Linux operating system that uses cgroups to collect accounting statistics. The plugin collects the -following statistics: From the cgroup memory subsystem: memory.usage_in_bytes +following statistics: From the cgroup memory subsystem: memory.usage_in_bytes (reported as 'pages') and rss from memory.stat (reported as 'rss'). From the cgroup cpuacct subsystem: user cpu time and system cpu time. No value is provided by cgroups for virtual memory size ('vsize'). In order to use the \fBsstat\fR tool, "jobacct_gather/aix", "jobacct_gather/linux", or "jobacct_gather/cgroup" must be configured. +.br +\fBNOTE:\fR Changing this configuration parameter changes the contents of +the messages between Slurm daemons. Any previously running job steps are +managed by a slurmstepd daemon that will persist through the lifetime of +that job step and not change it's communication prototol. Only change this +configuration parameter when there are no running job steps. .TP \fBJobAcctGatherFrequency\fR -The job accounting sampling interval. -For jobacct_gather/none this parameter is ignored. -For jobacct_gather/aix and jobacct_gather/linux the parameter is a number is -seconds between sampling job state. -The default value is 30 seconds. -A value of zero disables real the periodic job sampling and provides accounting -information only on job termination (reducing SLURM interference with the job). -Smaller (non\-zero) values have a greater impact upon job performance, but -a value of 30 seconds is not likely to be noticeable for applications having -less than 10,000 tasks. -Users can override this value on a per job basis using the \fB\-\-acctg\-freq\fR -option when submitting the job. +The job accounting and profiling sampling intervals. +The supported format is follows: +.RS +.TP 12 +\fBJobAcctGatherFrequency=\fR\fI<datatype>\fR\fB=\fR\fI<interval>\fR +where \fI<datatype>\fR=\fI<interval>\fR specifies the task sampling +interval for the jobacct_gather plugin or a +sampling interval for a profiling type by the +acct_gather_profile plugin. Multiple, +comma-separated \fI<datatype>\fR=\fI<interval>\fR intervals +may be specified. Supported datatypes are as follows: +.RS +.TP +\fBtask=\fI<interval>\fR +where \fI<interval>\fR is the task sampling interval in seconds +for the jobacct_gather plugins and for task +profiling by the acct_gather_profile plugin. +.TP +\fBenergy=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for energy profiling using the acct_gather_energy plugin +.TP +\fBnetwork=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for infiniband profiling using the acct_gather_infiniband +plugin. +.TP +\fBfilesystem=\fI<interval>\fR +where \fI<interval>\fR is the sampling interval in seconds +for filesystem profiling using the acct_gather_filesystem +plugin. +.TP +.RE +.RE +The default value for task sampling interval +is 30 seconds. The default value for all other intervals is 0. +An interval of 0 disables sampling of the specified type. +If the task sampling interval is 0, accounting +information is collected only at job termination (reducing SLURM +interference with the job). +.br +.br +Smaller (non\-zero) values have a greater impact upon job performance, +but a value of 30 seconds is not likely to be noticeable for +applications having less than 10,000 tasks. +.br +.br +Users can independently override each interval on a per job basis using the +\fB\-\-acctg\-freq\fR option when submitting the job. +.RE .TP \fBJobCheckpointDir\fR @@ -776,6 +956,15 @@ and located in default script directory (typically the subdirectory "etc" of the installation directory). No job submission plugins are used by default. +.TP +\fBKeepAliveTime\fR +Specifies how long sockets communications used between the srun command and its +slurmstepd process are kept alive after disconnect. +Longer values can be used to improve reliability of communications in the +event of network failures. +The default value leaves the system default value. +The value may not exceed 65533. + .TP \fBKillOnBadExit\fR If set to 1, the job will be terminated immediately when one of the @@ -825,15 +1014,29 @@ not explicitly listed in the job submission specification. Fully qualified pathname to the program used to send email per user request. The default value is "/bin/mail". +.TP +\fBMaxArraySize\fR +The maximum job array size. +The maximum job array task index value will be one less than MaxArraySize +to allow for an index value of zero. +Configure MaxArraySize to 0 in order to disable job array use. +The value may not exceed 65533. +Default value is 1001. + .TP \fBMaxJobCount\fR The maximum number of jobs SLURM can have in its active database at one time. Set the values of \fBMaxJobCount\fR and \fBMinJobAge\fR to insure the slurmctld daemon does not exhaust its memory or other resources. Once this limit is reached, requests to submit additional -jobs will fail. The default value is 10000 jobs. This value may not -be reset via "scontrol reconfig". It only takes effect upon restart -of the slurmctld daemon. +jobs will fail. The default value is 10000 jobs. +Performance can suffer with more than a couple hundred thousand jobs. +Setting per MaxSubmitJobs per user is generally valuable to prevent a single +user from filling the system with jobs. +This is accomplished using Slurm's database and configuring enforcement of +resource limits. +This value may not be reset via "scontrol reconfig". +It only takes effect upon restart of the slurmctld daemon. .TP \fBMaxJobId\fR @@ -999,8 +1202,8 @@ A suspended job will resume execution once the high priority job preempting it completes. The \fBSUSPEND\fR may only be used with the \fBGANG\fR option (the gang scheduler module performs the job resume operation) -and with \fBPreemptType=preempt/partition_prio\fR (the logic to -suspend and resume jobs current only has the data structures to +and with \fBPreemptType=preempt/partition_prio\fR (the logic to +suspend and resume jobs current only has the data structures to support partitions). .RE @@ -1055,11 +1258,20 @@ Supported values are "YES" and "NO". The default value is "NO". \fBPriorityFlags\fR Flags to modify priority behavior Applicable only if PriorityType=priority/multifactor. +The keywords below have no associated value +(e.g. "PriorityFlags=ACCRUE_ALWAYS,SMALL_RELATIVE_TO_TIME"). .RS .TP 17 \fBACCRUE_ALWAYS\fR -If set, priority age factor will be increased despite job dependencies +If set, priority age factor will be increased despite job dependencies or holds. +.TP +\fBSMALL_RELATIVE_TO_TIME\fR +If set, the job's size component will be based upon not the job size alone, but +the job's size divided by it's time limit. +.TP +\fBTICKET_BASED\fR +If set, priority will be calculated based on the ticket system. .RE .TP @@ -1112,8 +1324,8 @@ This specifies the plugin to be used in establishing a job's scheduling priority. Supported values are "priority/basic" (jobs are prioritized by order of arrival, also suitable for sched/wiki and sched/wiki2), "priority/multifactor" (jobs are prioritized based upon size, age, -fair\-share of allocation, etc) and "priority/multifactor2" (a ticket based -variation of priority/multifactor). +fair\-share of allocation, etc). +Also see \fBPriorityFlags\fR for configuration options. The default value is "priority/basic". .TP @@ -1161,30 +1373,31 @@ Acceptable values include: .RS .TP \fBaccounts\fR -(NON-SLURMDBD ACCOUNTING ONLY) prevents users from viewing any account +(NON-SLURMDBD ACCOUNTING ONLY) Prevents users from viewing any account definitions unless they are coordinators of them. .TP \fBjobs\fR -prevents users from viewing jobs or job steps belonging -to other users. (NON-SLURMDBD ACCOUNTING ONLY) prevents users from viewing +Prevents users from viewing jobs or job steps belonging +to other users. (NON-SLURMDBD ACCOUNTING ONLY) Prevents users from viewing job records belonging to other users unless they are coordinators of the association running the job when using sacct. .TP \fBnodes\fR -prevents users from viewing node state information. +Prevents users from viewing node state information. .TP \fBpartitions\fR -prevents users from viewing partition state information. +Prevents users from viewing partition state information. .TP \fBreservations\fR -prevents regular users from viewing reservations. +Prevents regular users from viewing reservations. .TP \fBusage\fR -(NON-SLURMDBD ACCOUNTING ONLY) prevents users from viewing -usage of any other user. This applies to sreport. +Prevents users from viewing usage of any other user, this applies to sshare. +(NON-SLURMDBD ACCOUNTING ONLY) Prevents users from viewing +usage of any other user, this applies to sreport. .TP \fBusers\fR -(NON-SLURMDBD ACCOUNTING ONLY) prevents users from viewing +(NON-SLURMDBD ACCOUNTING ONLY) Prevents users from viewing information of any user other than themselves, this also makes it so users can only see associations they deal with. Coordinators can see associations of all users they are coordinator of, @@ -1229,7 +1442,7 @@ which uses a site\-specific LUA script to track processes which uses Quadrics kernel patch and is the default if "SwitchType=switch/elan" .TP \fBproctrack/sgi_job\fR -which uses SGI's Process Aggregates (PAGG) kernel module, +which uses SGI's Process Aggregates (PAGG) kernel module, see \fIhttp://oss.sgi.com/projects/pagg/\fR for more information .TP \fBproctrack/pgid\fR @@ -1262,7 +1475,7 @@ nodes and requeue the job if a failure occurs or cancel the job if appropriate. The program can be used to reboot nodes or perform other work to prepare resources for use. Exactly what the program does and how it accomplishes this is completely at -the discression of the system administrator. +the discretion of the system administrator. Information about the job being initiated, it's allocated nodes, etc. are passed to the program using environment variables. While this program is running, the nodes associated with the job will be @@ -1340,7 +1553,7 @@ The maximum size of a process's data segment .TP \fBFSIZE\fR The maximum size of files created. Note that if the user sets FSIZE to less -than the current size of the slurmd.log, job launches will fail with +than the current size of the slurmd.log, job launches will fail with a 'File size limit exceeded' error. .TP \fBMEMLOCK\fR @@ -1450,6 +1663,14 @@ Related configuration options include \fBResumeProgram\fR, \fBResumeRate\fR, More information is available at the SLURM web site ( http://slurm.schedmd.com/power_save.html ). +.TP +\fBResvEpilog\fR +Fully qualified pathname of a program for the slurmctld to execute +when a reservation ends. The program can be used to cancel jobs, modify +partition configuration, etc. +The reservation named will be passed as an argument to the program. +By default there is no epilog. + .TP \fBResvOverRun\fR Describes how long a job already running in a reservation should be @@ -1461,6 +1682,14 @@ The value may not exceed 65533 minutes, although a value of "UNLIMITED" is supported to permit a job to run indefinitely after its reservation is terminated. +.TP +\fBResvProlog\fR +Fully qualified pathname of a program for the slurmctld to execute +when a reservation begins. The program can be used to cancel jobs, modify +partition configuration, etc. +The reservation named will be passed as an argument to the program. +By default there is no prolog. + .TP \fBReturnToService\fR Controls when a DOWN node will be returned to service. @@ -1519,10 +1748,17 @@ Multiple options may be comma separated. .TP \fBdefault_queue_depth=#\fR The default number of jobs to attempt scheduling (i.e. the queue depth) when a -running job completes or other routine actions occur. The full queue will be -tested on a less frequent basis. The default value is 100. +running job completes or other routine actions occur. +The full queue will be tested on a less frequent basis. +The default value is 100. In the case of large clusters (more than 1000 nodes), configuring a relatively small value may be desirable. +Specifying a large value (say 1000 or higher) can be expected to result in +poor system responsiveness since this scheduling logic will not release +locks for other events to occur. +It would be better to let the backfill scheduler process a larger number of jobs +(see \fBmax_job_bf\fR, \fBbf_continue\fR and other options here for more +information). .TP \fBdefer\fR Setting this option will avoid attempting to schedule each job @@ -1532,12 +1768,31 @@ This option may improve system responsiveness when large numbers of jobs (many hundreds) are submitted at the same time, but it will delay the initiation time of individual jobs. Also see \fBdefault_queue_depth\fR above. .TP +\fBbf_continue\fR +The backfill scheduler periodically releases locks in order to permit other +operations to proceed rather than blocking all activity for what could be an +extended period of time. +Setting this option will cause the backfill scheduler to continue processing +pending jobs from its original job list after releasing locks even if job +or node state changes. +This can result in lower priority jobs from being backfill scheduled instead +of newly arrived higher priority jobs, but will permit more queued jobs to be +considered for backfill scheduling. +.TP \fBbf_interval=#\fR The number of seconds between iterations. Higher values result in less overhead and better responsiveness. The default value is 30 seconds. This option applies only to \fBSchedulerType=sched/backfill\fR. .TP +\fBbf_max_job_part=#\fR +The maximum number of jobs per partition to attempt backfill scheduling for, +not counting jobs which cannot be started due to an association resource +limit. This can be especially helpful for systems with large numbers of +partitions and jobs. +The default value is 0, which means no limit. +This option applies only to \fBSchedulerType=sched/backfill\fR. +.TP \fBbf_max_job_user=#\fR The maximum number of jobs per user to attempt backfill scheduling for, not counting jobs which cannot be started due to an association resource @@ -1558,6 +1813,12 @@ This option applies only to \fBSchedulerType=sched/backfill\fR. The number of minutes into the future to look when considering jobs to schedule. Higher values result in more overhead and less responsiveness. The default value is 1440 minutes (one day). +A value at least as long as the highest allowed time limit is generally +advisable to prevent job starvation. +In order limit the amount of data managed by the backfill scheduler, +if the value of \fBbf_window\fR is increased, then it is generally advisable +to also increase \fBbf_resolution\fR. +if This option applies only to \fBSchedulerType=sched/backfill\fR. .TP \fBmax_job_bf=#\fR @@ -1567,9 +1828,6 @@ Higher values result in more overhead and less responsiveness. Until an attempt is made to backfill schedule a job, its expected initiation time value will not be set. The default value is 50. -In the case of large clusters (more than 1000 nodes) configured with -\fBSelectType=select/cons_res\fR, configuring a relatively small value may be -desirable. This option applies only to \fBSchedulerType=sched/backfill\fR. .TP \fBmax_depend_depth=#\fR @@ -1615,18 +1873,8 @@ if desired. Acceptable values include: .RS .TP -\fBsched/builtin\fR -Initiate jobs in priority order. -If any job in the partition can not be scheduled, no lower priority job in that -partition will be scheduled. -An exception is made for jobs can not run due to partition constraints -(e.g. the time limit) or down/drained nodes. -In that case, lower priority jobs can be initiated and not impact the higher -priority job. -This is the default configuration. -.TP \fBsched/backfill\fR -for a backfill scheduling module to augment the default FIFO scheduling. +For a backfill scheduling module to augment the default FIFO scheduling. Backfill scheduling will initiate lower\-priority jobs if doing so does not delay the expected initiation time of any higher priority job. @@ -1634,27 +1882,37 @@ Effectiveness of backfill scheduling is dependent upon users specifying job time limits, otherwise all jobs will have the same time limit and backfilling is impossible. Note documentation for the \fBSchedulerParameters\fR option above. +This is the default configuration. +.TP +\fBsched/builtin\fR +This is the FIFO scheduler which initiates jobs in priority order. +If any job in the partition can not be scheduled, no lower priority job in that +partition will be scheduled. +An exception is made for jobs can not run due to partition constraints +(e.g. the time limit) or down/drained nodes. +In that case, lower priority jobs can be initiated and not impact the higher +priority job. .TP \fBsched/gang\fR Defunct option. See \fBPreemptType\fR and \fBPreemptMode\fR options. .TP \fBsched/hold\fR -to hold all newly arriving jobs if a file "/etc/slurm.hold" +To hold all newly arriving jobs if a file "/etc/slurm.hold" exists otherwise use the built\-in FIFO scheduler .TP \fBsched/wiki\fR -for the Wiki interface to the Maui Scheduler +For the Wiki interface to the Maui Scheduler .TP \fBsched/wiki2\fR -for the Wiki interface to the Moab Cluster Suite +For the Wiki interface to the Moab Cluster Suite .RE .TP \fBSelectType\fR Identifies the type of resource selection algorithm to be used. Changing this value can only be done by restarting the slurmctld daemon -and will result in the loss of all job information (running and pending) -since the job state save format used by each plugin is different. +and will result in the loss of all job information (running and pending) +since the job state save format used by each plugin is different. Acceptable values include .RS .TP @@ -1692,12 +1950,16 @@ The permitted values of \fBSelectTypeParameters\fR depend upon the configured value of \fBSelectType\fR. \fBSelectType=select/bluegene\fR supports no \fBSelectTypeParameters\fR. The only supported option for \fBSelectType=select/linear\fR are -\fBCR_ONE_TASK_PER_CORE\fR and +\fBCR_ONE_TASK_PER_CORE\fR and \fBCR_Memory\fR, which treats memory as a consumable resource and prevents memory over subscription with job preemption or gang scheduling. The following values are supported for \fBSelectType=select/cons_res\fR: .RS .TP +\fBCR_ALLOCATE_FULL_SOCKET\fR +Jobs are allocated whole sockets rather than individual cores. +Must be used with \fBCR_Socket\fR or \fBCR_Socket_Memory\fR option. +.TP \fBCR_CPU\fR CPUs are consumable resources. Configure the number of \fBCPUs\fR on each node, which may be equal to the @@ -1735,15 +1997,15 @@ Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_ONE_TASK_PER_CORE\fR Allocate one task per core by default. -Without this option, by default one task will be allocated per +Without this option, by default one task will be allocated per thread on nodes with more than one \fBThreadsPerCore\fR configured. .TP \fBCR_CORE_DEFAULT_DIST_BLOCK\fR Allocate cores within a node using block distribution by default. This is a pseudo\-best\-fit algorithm that minimizes the number of boards and minimizes the number of sockets (within minimum boards) -used for the allocation. -This default behavior can be overridden specifying a particular +used for the allocation. +This default behavior can be overridden specifying a particular "\-m" parameter with srun/salloc/sbatch. Without this option, cores will be allocated cyclicly across the sockets. .TP @@ -1835,6 +2097,12 @@ Fully qualified pathname of a file into which the \fBslurmctld\fR daemon may write its process id. This may be used for automated signal processing. The default value is "/var/run/slurmctld.pid". +.TP +\fBSlurmctldPlugstack\fR +A comma delimited list of SLURM controller plugins to be started when the +daemon begins and terminated when it ends. +Only the plugin's init and fini functions are called. + .TP \fBSlurmctldPort\fR The port number that the SLURM controller, \fBslurmctld\fR, listens @@ -1950,19 +2218,19 @@ The value may not exceed 65533 seconds. .TP \fBSlurmSchedLogFile\fR -Fully qualified pathname of the scheduling event logging file. -The syntax of this parameter is the same as for \fBSlurmctldLogFile\fR. +Fully qualified pathname of the scheduling event logging file. +The syntax of this parameter is the same as for \fBSlurmctldLogFile\fR. In order to configure scheduler logging, set both the \fBSlurmSchedLogFile\fR and \fBSlurmSchedLogLevel\fR parameters. .TP \fBSlurmSchedLogLevel\fR -The initial level of scheduling event logging, similar to the -\fBSlurmctlDebug\fR parameter used to control the initial level of -\fBslurmctld\fR logging. -Valid values for \fBSlurmSchedLogLevel\fR are "0" (scheduler logging -disabled) and "1" (scheduler logging enabled). -If this parameter is omitted, the value defaults to "0" (disabled). +The initial level of scheduling event logging, similar to the +\fBSlurmctlDebug\fR parameter used to control the initial level of +\fBslurmctld\fR logging. +Valid values for \fBSlurmSchedLogLevel\fR are "0" (scheduler logging +disabled) and "1" (scheduler logging enabled). +If this parameter is omitted, the value defaults to "0" (disabled). In order to configure scheduler logging, set both the \fBSlurmSchedLogFile\fR and \fBSlurmSchedLogLevel\fR parameters. The scheduler logging level can be changed dynamically using \fBscontrol\fR. @@ -2181,7 +2449,7 @@ variables and output for the user program. .RS .TP 20 \fBexport NAME=value\fR -Will set environment variables for the task being spawned. +Will set environment variables for the task being spawned. Everything after the equal sign to the end of the line will be used as the value for the environment variable. Exporting of functions is not currently supported. @@ -2191,7 +2459,7 @@ Will cause that line (without the leading "print ") to be printed to the job's standard output. .TP \fBunset NAME\fR -Will clear environment variables for the task being spawned. +Will clear environment variables for the task being spawned. .TP The order of task prolog/epilog execution is as follows: .TP @@ -2323,14 +2591,14 @@ lines (see above), where \fBslurm\fR is the service\-name, should be added. .TP \fBVSizeFactor\fR -Memory specifications in job requests apply to real memory size (also known -as resident set size). It is possible to enforce virtual memory limits for +Memory specifications in job requests apply to real memory size (also known +as resident set size). It is possible to enforce virtual memory limits for both jobs and job steps by limiting their virtual memory to some percentage -of their real memory allocation. The \fBVSizeFactor\fR parameter specifies -the job's or job step's virtual memory limit as a percentage of its real -memory limit. For example, if a job's real memory limit is 500MB and +of their real memory allocation. The \fBVSizeFactor\fR parameter specifies +the job's or job step's virtual memory limit as a percentage of its real +memory limit. For example, if a job's real memory limit is 500MB and VSizeFactor is set to 101 then the job will be killed if its real memory -exceeds 500MB or its virtual memory exceeds 505MB (101 percent of the +exceeds 500MB or its virtual memory exceeds 505MB (101 percent of the real memory limit). The default valus is 0, which disables enforcement of virtual memory limits. The value may not exceed 65533 percent. @@ -2372,10 +2640,12 @@ used for scheduling purposes (depending upon the value of \fBFastSchedule\fR in the configuration file. .LP Default values can be specified with a record in which -"NodeName" is "DEFAULT". +\fBNodeName\fR is "DEFAULT". The default entry values will apply only to lines following it in the configuration file and the default values can be reset multiple times in the configuration file with multiple entries where "NodeName=DEFAULT". +Each line where \fBNodeName\fR is "DEFAULT" will replace or add to previous +default values and not a reinitialize the default values. The "NodeName=" specification must be placed on every line describing the configuration of nodes. A single node name can not appear as a NodeName value in more than one line @@ -2437,6 +2707,8 @@ If the \fBNodeName\fR is "DEFAULT", the values specified with that record will apply to subsequent node specifications unless explicitly set to other values in that node record or replaced with a different set of default values. +Each line where \fBNodeName\fR is "DEFAULT" will replace or add to previous +default values and not a reinitialize the default values. For architectures in which the node order is significant, nodes will be considered consecutive in the order defined. For example, if the configuration for "NodeName=charlie" immediately @@ -2526,7 +2798,7 @@ Also see \fBFeature\fR. \fBPort\fR The port number that the SLURM compute node daemon, \fBslurmd\fR, listens to for work on this particular node. By default there is a single port number -for all \fBslurmd\fR daemons on all compute nodes as defined by the +for all \fBslurmd\fR daemons on all compute nodes as defined by the \fBSlurmdPort\fR configuration parameter. Use of this option is not generally recommended except for development or testing purposes. If multiple \fBslurmd\fR daemons execute on a node this can specify a range of ports @@ -2707,6 +2979,30 @@ with the appropriate parameters (\-\-have\-front\-end, appropriate architecture by the configure script (BlueGene or Cray systems). The front end configuration specifies the following information: +.TP +\fBAllowGroups\fR +Comma separated list of group names which may execute jobs on this front end +node. By default, all groups may use this front end node. +May not be used with the \fBDenyGroups\fR option. + +.TP +\fBAllowUsers\fR +Comma separated list of user names which may execute jobs on this front end +node. By default, all users may use this front end node. +May not be used with the \fBDenyUsers\fR option. + +.TP +\fBDenyGroups\fR +Comma separated list of group names which are prevented from executing jobs on +this front end node. +May not be used with the \fBAllowGroups\fR option. + +.TP +\fBDenyUsers\fR +Comma separated list of user names which are prevented from executing jobs on +this front end node. +May not be used with the \fBAllowUsers\fR option. + .TP \fBFrontendName\fR Name that SLURM uses to refer to a frontend node. @@ -2721,6 +3017,8 @@ If the \fBFrontendName\fR is "DEFAULT", the values specified with that record will apply to subsequent node specifications unless explicitly set to other values in that frontend node record or replaced with a different set of default values. +Each line where \fBFrontendName\fR is "DEFAULT" will replace or add to previous +default values and not a reinitialize the default values. Note that since the naming of front end nodes would typically not follow that of the compute nodes (e.g. lacking X, Y and Z coordinates found in the compute node naming scheme), each front end node name should be listed separately and @@ -2785,12 +3083,14 @@ partitions, each with different constraints (time limit, job sizes, groups allowed to use the partition, etc.). Jobs are allocated resources within a single partition. Default values can be specified with a record in which -"PartitionName" is "DEFAULT". +\fBPartitionName\fR is "DEFAULT". The default entry values will apply only to lines following it in the configuration file and the default values can be reset multiple times in the configuration file with multiple entries where "PartitionName=DEFAULT". The "PartitionName=" specification must be placed on every line describing the configuration of partitions. +Each line where \fBPartitionName\fR is "DEFAULT" will replace or add to previous +default values and not a reinitialize the default values. A single partition name can not appear as a PartitionName value in more than one line (duplicate partition name records will be ignored). If a partition that is in use is deleted from the configuration and slurm @@ -2900,6 +3200,15 @@ The default value is "NO". Note that partitions that a user lacks access to by virtue of the \fBAllowGroups\fR parameter will also be hidden by default. +.TP +\fBMaxCPUsPerNode\fR +Maximum number of CPUs on any node available to all jobs from this partition. +This can be especially useful to schedule GPUs. For example a node can be +associated with two Slurm partitions (e.g. "cpu" and "gpu") and the +partition/queue "cpu" could be limited to only a subset of the node's CPUs, +insuring that one or more CPUs would be available to jobs in the "gpu" +partition/queue. + .TP \fBMaxMemPerCPU\fR Maximum real memory size available per allocated CPU in MegaBytes. @@ -2970,6 +3279,8 @@ If the \fBPartitionName\fR is "DEFAULT", the values specified with that record will apply to subsequent partition specifications unless explicitly set to other values in that partition record or replaced with a different set of default values. +Each line where \fBPartitionName\fR is "DEFAULT" will replace or add to previous +default values and not a reinitialize the default values. .TP \fBPreemptMode\fR @@ -2996,9 +3307,9 @@ The value may not exceed 65533. .TP \fBReqResv\fR Specifies users of this partition are required to designate a reservation -when submitting a job. This option can be useful in restricting usage +when submitting a job. This option can be useful in restricting usage of a partition that may have higher priority or additional resources to be -allowed only within a reservation. +allowed only within a reservation. Possible values are "YES" and "NO". The default value is "NO". @@ -3013,6 +3324,13 @@ users from directly using those resources. Possible values are "YES" and "NO". The default value is "NO". +.TP +\fBSelectTypeParameters\fR +Partition\-specific resource allocation type. +Supported values are \fBCR_Core\fR and \fBCR_Socket\fR. +Use requires the system\-wide \fBSelectTypeParameters\fR value be set plus +\fBCR_ALLOCATE_FULL_SOCKET\fR. + .TP \fBShared\fR Controls the ability of the partition to execute more than one job at a @@ -3058,13 +3376,9 @@ small blocks or for systems running with gang scheduling (\fBSchedulerType=sched/gang\fR). .TP \fBYES\fR -Makes all resources in the partition available for sharing, -but honors a user's request for dedicated resources. -If \fBSelectType=select/cons_res\fR, then resources will be -over\-subscribed unless explicitly disabled in the job submit -request using the "\-\-exclusive" option. -With \fBSelectType=select/bluegene\fR or \fBSelectType=select/linear\fR, -resources will only be over\-subscribed when explicitly requested +Makes all resources in the partition available for sharing upon request by +the job. +Resources will only be over\-subscribed when explicitly requested by the user using the "\-\-share" option on job submission. May be followed with a colon and maximum number of jobs in running or suspended state. @@ -3498,7 +3812,7 @@ See the section \fBFILE AND DIRECTORY PERMISSIONS\fR for information about the various files and directories used by SLURM. .LP It is recommended that the logrotate utility be used to insure that -various log files do not become too large. +various log files do not become too large. This also applies to text files used for accounting, process tracking, and the slurmdbd log if they are used. .LP @@ -3506,53 +3820,54 @@ Here is a sample logrotate configuration. Make appropriate site modifications and save as /etc/logrotate.d/slurm on all nodes. See the \fBlogrotate\fR man page for more details. .LP -## +## .br -# SLURM Logrotate Configuration +# SLURM Logrotate Configuration .br -## +## .br /var/log/slurm/*log { .br - compress + compress .br - missingok + missingok .br - nocopytruncate + nocopytruncate .br - nocreate + nocreate .br - nodelaycompress + nodelaycompress .br - nomail + nomail .br - notifempty + notifempty .br - noolddir + noolddir .br - rotate 5 + rotate 5 .br - sharedscripts + sharedscripts .br - size=5M + size=5M .br - create 640 slurm root + create 640 slurm root .br - postrotate + postrotate .br - /etc/init.d/slurm reconfig + /etc/init.d/slurm reconfig .br - endscript + endscript .br } .br .SH "COPYING" Copyright (C) 2002\-2007 The Regents of the University of California. -Copyright (C) 2008\-2010 Lawrence Livermore National Security. -Portions Copyright (C) 2010-2012 SchedMD <http://www.sched\-md.com>. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2008\-2010 Lawrence Livermore National Security. +.br +Copyright (C) 2010-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man5/slurmdbd.conf.5 b/doc/man/man5/slurmdbd.conf.5 index 6f37d595ee15a0a2e27204059c3010a45ec69c08..cf30d5558b6a7c6bcc3e56fd8062cce4504c1736 100644 --- a/doc/man/man5/slurmdbd.conf.5 +++ b/doc/man/man5/slurmdbd.conf.5 @@ -1,4 +1,4 @@ -.TH "slurmdbd.conf" "5" "March 2012" "slurmdbd.conf 2.4" "Slurm configuration file" +.TH "slurmdbd.conf" "28" "Feb 2013" "slurmdbd.conf 2.6" "Slurm configuration file" .SH "NAME" slurmdbd.conf \- Slurm Database Daemon (SlurmDBD) configuration file @@ -36,13 +36,18 @@ $ArchiveDir/$ClusterName_$ArchiveObject_archive_$BeginTimeStamp_$endTimeStamp .TP \fBArchiveEvents\fR -When purging events also achive them. Boolean, yes to archive event data, -no other wise. Default is no. +When purging events also archive them. Boolean, yes to archive event data, +no otherwise. Default is no. .TP \fBArchiveJobs\fR -When purging jobs also achive them. Boolean, yes to archive job data, -no other wise. Default is no. +When purging jobs also archive them. Boolean, yes to archive job data, +no otherwise. Default is no. + +.TP +\fBArchiveResvs\fR +When purging reservations also archive them. Boolean, yes to archive +reservation data, no otherwise. Default is no. .TP \fBArchiveScript\fR @@ -61,7 +66,7 @@ variables are set. Time of last event start to archive. .TP \fBSLURM_ARCHIVE_JOBS\fR -1 for achive jobs 0 otherwise. +1 for archive jobs 0 otherwise. .TP \fBSLURM_ARCHIVE_LAST_JOB\fR Time of last job submit to archive. @@ -82,13 +87,13 @@ Time of last suspend start to archive. .TP \fBArchiveSteps\fR -When purging steps also achive them. Boolean, yes to archive step data, -no other wise. Default is no. +When purging steps also archive them. Boolean, yes to archive step data, +no otherwise. Default is no. .TP \fBArchiveSuspend\fR -When purging suspend data also achive it. Boolean, yes to archive -suspend data, no other wise. Default is no. +When purging suspend data also archive it. Boolean, yes to archive +suspend data, no otherwise. Default is no. .TP \fBAuthInfo\fR @@ -268,6 +273,16 @@ those more frequent purges. (i.e. a value of '12hours' would purge everything older than 12 hours.) If not set (default), then job records are never purged. +.TP +\fBPurgeResvAfter\fR +Individual reservation records over this age are purged from the database. +Aggregated information will be preserved indefinitely. +The time is a numeric value and is a number of months. If you want to purge +more often you can include hours, or days behind the numeric value to get +those more frequent purges. (i.e. a value of '12hours' would purge +everything older than 12 hours.) +If not set (default), then reservation records are never purged. + .TP \fBPurgeStepAfter\fR Individual job step records over this age are purged from the database. @@ -331,23 +346,17 @@ with the database. \fBStorageType\fR Define the accounting storage mechanism type. Acceptable values at present include -"accounting_storage/gold", "accounting_storage/mysql", and +"accounting_storage/mysql", and "accounting_storage/pgsql". -The value "accounting_storage/gold" indicates that account records -will be written to Gold -.na -(http://www.clusterresources.com/pages/products/gold-allocation-manager.php), -.ad -which maintains its own database. The value "accounting_storage/mysql" indicates that accounting records should be written to a MySQL database specified by the \fBStorageLoc\fR parameter. The value "accounting_storage/pgsql" indicates that accounting records should be written to a PostgreSQL database specified by the -\fBStorageLoc\fR parameter. This plugin is not complete and -should not be used if wanting to use associations. It will however work with -basic accounting of jobs and job steps. If interested in -completing please email slurm-dev@lists.llnl.gov. +\fBStorageLoc\fR parameter. This plugin is not complete and has been +depricated. In future versions of Slurm this plugin may be removed. +It should not be used if wanting to use associations. It will however work +with basic accounting of jobs and job steps. This value must be specified. .TP @@ -378,6 +387,8 @@ ArchiveEvents=yes .br ArchiveJobs=yes .br +ArchiveResv=yes +.br ArchiveSteps=no .br ArchiveSuspend=no @@ -396,6 +407,8 @@ PurgeEventAfter=1month .br PurgeJobAfter=12month .br +PurgeResvAfter=1month +.br PurgeStepAfter=1month .br PurgeSuspendAfter=1month @@ -415,7 +428,8 @@ StorageUser=database_mgr .SH "COPYING" Copyright (C) 2008-2010 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man5/topology.conf.5 b/doc/man/man5/topology.conf.5 index 112e3fcba6443ae51a5d14d1c226acdbe0b5d29c..b15577f230444f0ca38d4d4b65d326ba2c3b34d7 100644 --- a/doc/man/man5/topology.conf.5 +++ b/doc/man/man5/topology.conf.5 @@ -72,7 +72,8 @@ SwitchName=s3 Switches=s[0\-2] .SH "COPYING" Copyright (C) 2009 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). -CODE\-OCEC\-09\-009. All rights reserved. +.br +Copyright (C) 2010\-2013 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/doc/man/man8/Makefile.am b/doc/man/man8/Makefile.am index 10d64d367fe1d13754f250aa537b430b0ef74f91..294d9de4610972ff418503db37358e2f69f8149e 100644 --- a/doc/man/man8/Makefile.am +++ b/doc/man/man8/Makefile.am @@ -1,4 +1,4 @@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html man8_MANS = slurmctld.8 \ slurmd.8 \ diff --git a/doc/man/man8/Makefile.in b/doc/man/man8/Makefile.in index 3db38b7ef0e05a2165cc7a6ec39fc58f503f302f..75dfcfcc119149eb8a42fcdd129d9876a5f3fe5b 100644 --- a/doc/man/man8/Makefile.in +++ b/doc/man/man8/Makefile.in @@ -57,6 +57,7 @@ subdir = doc/man/man8 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -317,7 +343,7 @@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ -htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@/html includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ diff --git a/doc/man/man8/spank.8 b/doc/man/man8/spank.8 index 2f8eaf32b615bc33219244847f0d24ebc51603ee..c34b16f3ab7c35afae31d169391042e109d867ef 100644 --- a/doc/man/man8/spank.8 +++ b/doc/man/man8/spank.8 @@ -357,7 +357,7 @@ This function returns \fBESPANK_SUCCESS\fR if the option defined in the struct spank_option \fIopt\fR has been used by the user. If \fIoptargp\fR is non-NULL then it is set to any option argument passed (if the option takes an argument). The use of this method is \fIrequired\fR to process -options in \fBjob_script\fR context (\fBslurm_spank_job_prolog\R and +options in \fBjob_script\fR context (\fBslurm_spank_job_prolog\fR and \fBslurm_spank_job_epilog\fR). .SH "CONFIGURATION" diff --git a/etc/init.d.slurm.in b/etc/init.d.slurm.in index ec5d534fa42092ed0217fc07549412cfb507597a..a854faa072e51a44eb5a12e72dee41cdac0a4407 100644 --- a/etc/init.d.slurm.in +++ b/etc/init.d.slurm.in @@ -5,10 +5,10 @@ # manages exclusive access to a set of compute \ # resources and distributes work to those resources. # -# processname: /usr/sbin/slurmd +# processname: @sbindir@/slurmd # pidfile: /var/run/slurmd.pid # -# processname: /usr/sbin/slurmctld +# processname: @sbindir@/slurmctld # pidfile: /var/run/slurmctld.pid # # config: /etc/sysconfig/slurm @@ -139,6 +139,7 @@ slurmstatus() { if [ $? = 0 ]; then pidfile=${pidfile##*=} pidfile=${pidfile%#*} + pidfile=${pidfile//\"/} else pidfile=/var/run/${base}.pid fi @@ -288,7 +289,9 @@ case "$1" in ;; reconfig) for prog in `$BINDIR/scontrol show daemons`; do + echo -n $"Reloading $prog daemon configuration: " killproc $prog -HUP + echo done ;; test) diff --git a/etc/init.d.slurmdbd.in b/etc/init.d.slurmdbd.in index 62ecf8a04eeb2a5287f8fd942e5e1db021efbf53..4a1626f7991a623b128604e67e45d406f705ee2f 100755 --- a/etc/init.d.slurmdbd.in +++ b/etc/init.d.slurmdbd.in @@ -4,7 +4,7 @@ # description: SLURMDBD is a database server interface for \ # SLURM (Simple Linux Utility for Resource Management). # -# processname: /usr/sbin/slurmdbd +# processname: @sbindir@/slurmdbd # pidfile: /var/run/slurmdbd.pid # # config: /etc/sysconfig/slurm @@ -101,6 +101,7 @@ slurmstatus() { if [ $? = 0 ]; then pidfile=${pidfile##*=} pidfile=${pidfile%#*} + pidfile=${pidfile//\"/} else pidfile=/var/run/slurmdbd.pid fi @@ -179,7 +180,9 @@ case "$1" in fi ;; reconfig) + echo -n $"Reloading slurmdbd daemon configuration: " killproc slurmdbd -HUP + echo ;; *) echo "Usage: $0 {start|stop|status|restart|condrestart|reconfig}" diff --git a/etc/slurm.epilog.clean b/etc/slurm.epilog.clean index e829554f7651cda64280b080cf591941e44f2108..15d435229b8b21e81ccea1c282244e3511ba251f 100644 --- a/etc/slurm.epilog.clean +++ b/etc/slurm.epilog.clean @@ -22,7 +22,7 @@ if [ $SLURM_UID -lt 100 ] ; then exit 0 fi -job_list=`${SLURM_BIN}squeue --noheader --format=%i --user=$SLURM_UID --node=localhost` +job_list=`${SLURM_BIN}squeue --noheader --format=%A --user=$SLURM_UID --node=localhost` for job_id in $job_list do if [ $job_id -ne $SLURM_JOB_ID ] ; then diff --git a/slurm.spec b/slurm.spec index 027c1ab9c857859a186a3c96f2b0fcdfab6cb43c..73aebb38a9dff666167870dfef93e3a7a4298740 100644 --- a/slurm.spec +++ b/slurm.spec @@ -5,6 +5,7 @@ # # build options .rpmmacros options change to default action # =============== ==================== ======================== +# --enable-multiple-slurmd %_with_multiple_slurmd 1 build with the multiple slurmd option. Typically used to simulate a larger system than one has access to. # --enable-salloc-background %_with_salloc_background 1 on a cray system alloc salloc to execute as a background process. # --prefix %_prefix path install path for commands, libraries, etc. # --with aix %_with_aix 1 build aix RPM @@ -44,6 +45,7 @@ %slurm_without_opt debug %slurm_without_opt sun_const %slurm_without_opt salloc_background +%slurm_without_opt multiple_slurmd # These options are only here to force there to be these on the build. # If they are not set they will still be compiled if the packages exist. @@ -91,16 +93,16 @@ %endif Name: slurm -Version: 2.5.7 +Version: 2.6.4 Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-2.5.7.tar.bz2 +Source: slurm-2.6.4.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} -URL: http://www.schedmd.com/slurmdocs/ +URL: http://slurm.schedmd.com/ Requires: slurm-plugins @@ -165,7 +167,7 @@ BuildRequires: perl(ExtUtils::MakeMaker) SLURM is an open source, fault-tolerant, and highly scalable cluster management and job scheduling system for Linux clusters containing up to 65,536 nodes. Components include machine status, -partition management, job management, scheduling and accounting modules. +partition management, job management, scheduling and accounting modules # Allow override of sysconfdir via _slurm_sysconfdir. # Note 'global' instead of 'define' needed here to work around apparent @@ -173,6 +175,20 @@ partition management, job management, scheduling and accounting modules. %{!?_slurm_sysconfdir: %global _slurm_sysconfdir /etc/slurm} %define _sysconfdir %_slurm_sysconfdir +# Allow override of datadir via _slurm_datadir. +%{!?_slurm_datadir: %global _slurm_datadir %{_prefix}/share} +%define _datadir %{_slurm_datadir} + +# Allow override of mandir via _slurm_mandir. +%{!?_slurm_mandir: %global _slurm_mandir %{_datadir}/man} +%define _mandir %{_slurm_mandir} + +# Allow override of infodir via _slurm_infodir. +# (Not currently used for anything) +%{!?_slurm_infodir: %global _slurm_infodir %{_datadir}/info} +%define _infodir %{_slurm_infodir} + + # # Never allow rpm to strip binaries as this will break # parallel debugging capability @@ -207,20 +223,20 @@ partition management, job management, scheduling and accounting modules. %define _php_extdir %(php-config --extension-dir 2>/dev/null || echo %{_libdir}/php5) %package perlapi -Summary: Perl API to SLURM. +Summary: Perl API to SLURM Group: Development/System Requires: slurm %description perlapi Perl API package for SLURM. This package includes the perl API to provide a -helpful interface to SLURM through Perl. +helpful interface to SLURM through Perl %package devel -Summary: Development package for SLURM. +Summary: Development package for SLURM Group: Development/System Requires: slurm %description devel Development package for SLURM. This package includes the header files -and static libraries for the SLURM API. +and static libraries for the SLURM API %if %{slurm_with auth_none} %package auth-none @@ -237,7 +253,8 @@ Summary: SLURM auth implementation using Brent Chun's authd Group: System Environment/Base Requires: slurm authd %description auth-authd -SLURM authentication module for Brent Chun's authd +SLURM authentication module for Brent Chun's authd. Used to +authenticate user originating an RPC %endif # This is named munge instead of auth-munge since there are 2 plugins in the @@ -250,7 +267,8 @@ Requires: slurm munge BuildRequires: munge-devel munge-libs Obsoletes: slurm-auth-munge %description munge -SLURM authentication module for Chris Dunlap's Munge +SLURM authentication and crypto implementation using Munge. Used to +authenticate user originating an RPC, digitally sign and/or encrypt messages %endif %if %{slurm_with bluegene} @@ -267,72 +285,77 @@ Summary: SLURM database daemon Group: System Environment/Base Requires: slurm-plugins slurm-sql %description slurmdbd -SLURM database daemon +SLURM database daemon. Used to accept and process database RPCs and upload +database changes to slurmctld daemons on each cluster %package sql Summary: SLURM SQL support Group: System Environment/Base %description sql -SLURM sql support +SLURM SQL support. Contains interfaces to MySQL and PostGreSQL %package plugins Summary: SLURM plugins (loadable shared objects) Group: System Environment/Base %description plugins -SLURM plugins (loadable shared objects) +SLURM plugins (loadable shared objects) supporting a wide variety of +architectures and behaviors. These basically provide the building blocks +with which Slurm can be configured. Note that some system specific plugins +are in other packages %package torque -Summary: Torque/PBS wrappers for transitition from Torque/PBS to SLURM. +Summary: Torque/PBS wrappers for transitition from Torque/PBS to SLURM Group: Development/System Requires: slurm-perlapi %description torque -Torque wrapper scripts used for helping migrate from Torque/PBS to SLURM. +Torque wrapper scripts used for helping migrate from Torque/PBS to SLURM %package sjobexit -Summary: SLURM job exit code management tools. +Summary: SLURM job exit code management tools Group: Development/System Requires: slurm-perlapi %description sjobexit -SLURM job exit code management tools. +SLURM job exit code management tools. Enables users to alter job exit code +information for completed jobs %package slurmdb-direct Summary: Wrappers to write directly to the slurmdb. Group: Development/System Requires: slurm-perlapi %description slurmdb-direct -Wrappers to write directly to the slurmdb. +Wrappers to write directly to the slurmdb %if %{slurm_with aix} %package aix -Summary: SLURM interfaces to IBM AIX. +Summary: SLURM interfaces to IBM AIX Group: System Environment/Base Requires: slurm BuildRequires: proctrack >= 3 Obsoletes: slurm-aix-federation %description aix -SLURM plugins for IBM AIX. +SLURM interfaces for IBM AIX systems %endif %if %{slurm_with percs} %package percs -Summary: SLURM plugins to run on an IBM PERCS system. +Summary: SLURM plugins to run on an IBM PERCS system Group: System Environment/Base Requires: slurm nrt BuildRequires: nrt %description percs -SLURM plugins to run on an IBM PERCS system, POE interface and NRT switch plugin. +SLURM plugins to run on an IBM PERCS system, POE interface and NRT switch plugin %endif %if %{slurm_with sgijob} %package proctrack-sgi-job -Summary: SLURM process tracking plugin for SGI job containers. +Summary: SLURM process tracking plugin for SGI job containers Group: System Environment/Base Requires: slurm BuildRequires: job %description proctrack-sgi-job -SLURM process tracking plugin for SGI job containers. -(See http://oss.sgi.com/projects/pagg). +SLURM process tracking plugin for SGI job containers +(See http://oss.sgi.com/projects/pagg) %endif %if %{slurm_with lua} @@ -347,15 +370,19 @@ Includes the SLURM proctrack/lua and job_submit/lua plugin %endif %package sjstat -Summary: Perl tool to print SLURM job state information. +Summary: Perl tool to print SLURM job state information Group: Development/System Requires: slurm %description sjstat -Perl tool to print SLURM job state information. +Perl tool to print SLURM job state information. The output is designed to give +information on the resource usage and availablilty, as well as information +about jobs that are currently active on the machine. This output is built +using the SLURM utilities, sinfo, squeue and scontrol, the man pages for these +utilites will provide more information and greater depth of understanding %if %{slurm_with pam} %package pam_slurm -Summary: PAM module for restricting access to compute nodes via SLURM. +Summary: PAM module for restricting access to compute nodes via SLURM Group: System Environment/Base Requires: slurm slurm-devel BuildRequires: pam-devel @@ -379,10 +406,10 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart ############################################################################# %prep -%setup -n slurm-2.5.7 +%setup -n slurm-2.6.4 %build -%configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ +%configure \ %{?slurm_with_debug:--enable-debug} \ %{?slurm_with_partial_attach:--enable-partial-attach} \ %{?slurm_with_sun_const:--enable-sun-const} \ @@ -399,6 +426,7 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart %{?with_blcr} \ %{?slurm_with_salloc_background:--enable-salloc-background} \ %{!?slurm_with_readline:--without-readline} \ + %{?slurm_with_multiple_slurmd:--enable-multiple-slurmd} \ %{?with_cflags} make %{?_smp_mflags} @@ -439,6 +467,7 @@ install -D -m755 contribs/sjstat ${RPM_BUILD_ROOT}%{_bindir}/sjstat # Delete unpackaged files: rm -f $RPM_BUILD_ROOT/%{_libdir}/libpmi.a +rm -f $RPM_BUILD_ROOT/%{_libdir}/libpmi2.a rm -f $RPM_BUILD_ROOT/%{_libdir}/libslurm.a rm -f $RPM_BUILD_ROOT/%{_libdir}/libslurmdb.a rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/*.{a,la} @@ -475,9 +504,15 @@ rm -f $RPM_BUILD_ROOT/%{_perldir}/auto/Slurmdb/.packlist # remove these if they exist rm -f ${RPM_BUILD_ROOT}%{_mandir}/man1/srun_cr* rm -f ${RPM_BUILD_ROOT}%{_bindir}/srun_cr +rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/checkpoint_blcr.so rm -f ${RPM_BUILD_ROOT}%{_libexecdir}/slurm/cr_* %endif +%if ! %{slurm_with lua} +rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/job_submit_lua.so +rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/proctrack_lua.so +%endif + %if ! %{slurm_with sgijob} rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/proctrack_sgi_job.so %endif @@ -485,6 +520,8 @@ rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/proctrack_sgi_job.so # Build man pages that are generated directly by the tools rm -f $RPM_BUILD_ROOT/%{_mandir}/man1/sjobexitmod.1 ${RPM_BUILD_ROOT}%{_bindir}/sjobexitmod --roff > $RPM_BUILD_ROOT/%{_mandir}/man1/sjobexitmod.1 +rm -f $RPM_BUILD_ROOT/%{_mandir}/man1/sjstat.1 +${RPM_BUILD_ROOT}%{_bindir}/sjstat --roff > $RPM_BUILD_ROOT/%{_mandir}/man1/sjstat.1 # Build conditional file list for main package LIST=./slurm.files @@ -548,18 +585,30 @@ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/jobcomp_pgsql.so && echo %{_libdir}/slurm/jobcomp_pgsql.so >> $LIST LIST=./plugins.files +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/acct_gather_energy_ipmi.so && + echo %{_libdir}/slurm/acct_gather_energy_ipmi.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/acct_gather_energy_rapl.so && + echo %{_libdir}/slurm/acct_gather_energy_rapl.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/acct_gather_infiniband_ofed.so && + echo %{_libdir}/slurm/acct_gather_infiniband_ofed.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/acct_gather_profile_hdf5.so && + echo %{_libdir}/slurm/acct_gather_profile_hdf5.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/crypto_openssl.so && echo %{_libdir}/slurm/crypto_openssl.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/ext_sensors_rrd.so && + echo %{_libdir}/slurm/ext_sensors_rrd.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/launch_slurm.so && + echo %{_libdir}/slurm/launch_slurm.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/launch_aprun.so && + echo %{_libdir}/slurm/launch_aprun.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/select_bluegene.so && echo %{_libdir}/slurm/select_bluegene.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/slurmctld_dynalloc.so && + echo %{_libdir}/slurm/slurmctld_dynalloc.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/task_affinity.so && echo %{_libdir}/slurm/task_affinity.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/task_cgroup.so && echo %{_libdir}/slurm/task_cgroup.so >> $LIST -test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/launch_slurm.so && - echo %{_libdir}/slurm/launch_slurm.so >> $LIST -test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/launch_aprun.so && - echo %{_libdir}/slurm/launch_aprun.so >> $LIST LIST=./pam.files touch $LIST @@ -582,7 +631,7 @@ rm -rf $RPM_BUILD_ROOT %files -f slurm.files %defattr(-,root,root,0755) -%{_mandir}/../doc +%{_datadir}/doc %{_bindir}/s* %exclude %{_bindir}/sjobexitmod %exclude %{_bindir}/sjstat @@ -595,6 +644,8 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/*.so* %{_libdir}/slurm/src/* %{_mandir}/man1/* +%{_mandir}/man5/acct_gather.* +%{_mandir}/man5/ext_sensors.* %{_mandir}/man5/cgroup.* %{_mandir}/man5/cray.* %{_mandir}/man5/gres.* @@ -619,6 +670,7 @@ rm -rf $RPM_BUILD_ROOT %config %{_sysconfdir}/cgroup/release_memory %config %{_sysconfdir}/slurm.epilog.clean %exclude %{_mandir}/man1/sjobexit* +%exclude %{_mandir}/man1/sjstat* %if %{slurm_with blcr} %exclude %{_mandir}/man1/srun_cr* %exclude %{_bindir}/srun_cr @@ -627,9 +679,11 @@ rm -rf $RPM_BUILD_ROOT %files devel %defattr(-,root,root) -%dir %attr(0755,root,root) %{_prefix}/include/slurm +%dir %attr(0755,root,root) +%dir %{_prefix}/include/slurm %{_prefix}/include/slurm/* %{_libdir}/libpmi.la +%{_libdir}/libpmi2.la %{_libdir}/libslurm.la %{_libdir}/libslurmdb.la %{_mandir}/man3/slurm_* @@ -708,17 +762,22 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/slurm/accounting_storage_filetxt.so %{_libdir}/slurm/accounting_storage_none.so %{_libdir}/slurm/accounting_storage_slurmdbd.so -%{_libdir}/slurm/acct_gather_energy_ipmi.so +%{_libdir}/slurm/acct_gather_filesystem_lustre.so +%{_libdir}/slurm/acct_gather_filesystem_none.so +%{_libdir}/slurm/acct_gather_infiniband_none.so %{_libdir}/slurm/acct_gather_energy_none.so -%{_libdir}/slurm/acct_gather_energy_rapl.so +%{_libdir}/slurm/acct_gather_profile_none.so %{_libdir}/slurm/checkpoint_none.so %{_libdir}/slurm/checkpoint_ompi.so +%{_libdir}/slurm/ext_sensors_none.so %{_libdir}/slurm/gres_gpu.so %{_libdir}/slurm/gres_mic.so %{_libdir}/slurm/gres_nic.so +%{_libdir}/slurm/job_submit_all_partitions.so %{_libdir}/slurm/job_submit_defaults.so %{_libdir}/slurm/job_submit_logging.so %{_libdir}/slurm/job_submit_partition.so +%{_libdir}/slurm/job_submit_require_timelimit.so %{_libdir}/slurm/jobacct_gather_aix.so %{_libdir}/slurm/jobacct_gather_cgroup.so %{_libdir}/slurm/jobacct_gather_linux.so @@ -726,21 +785,22 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/slurm/jobcomp_filetxt.so %{_libdir}/slurm/jobcomp_none.so %{_libdir}/slurm/jobcomp_script.so +%if ! %{slurm_with bluegene} %{_libdir}/slurm/mpi_lam.so %{_libdir}/slurm/mpi_mpich1_p4.so %{_libdir}/slurm/mpi_mpich1_shmem.so %{_libdir}/slurm/mpi_mpichgm.so %{_libdir}/slurm/mpi_mpichmx.so %{_libdir}/slurm/mpi_mvapich.so -%{_libdir}/slurm/mpi_none.so %{_libdir}/slurm/mpi_openmpi.so %{_libdir}/slurm/mpi_pmi2.so +%endif +%{_libdir}/slurm/mpi_none.so %{_libdir}/slurm/preempt_none.so %{_libdir}/slurm/preempt_partition_prio.so %{_libdir}/slurm/preempt_qos.so %{_libdir}/slurm/priority_basic.so %{_libdir}/slurm/priority_multifactor.so -%{_libdir}/slurm/priority_multifactor2.so %{_libdir}/slurm/proctrack_cgroup.so %{_libdir}/slurm/proctrack_linuxproc.so %{_libdir}/slurm/proctrack_pgid.so @@ -764,12 +824,17 @@ rm -rf $RPM_BUILD_ROOT %files torque %defattr(-,root,root) %{_bindir}/pbsnodes +%{_bindir}/qalter %{_bindir}/qdel %{_bindir}/qhold +%{_bindir}/qrerun %{_bindir}/qrls %{_bindir}/qstat %{_bindir}/qsub %{_bindir}/mpiexec +%{_bindir}/generate_pbs_nodefile +%{_libdir}/slurm/job_submit_pbs.so +%{_libdir}/slurm/spank_pbs.so ############################################################################# %files sjobexit @@ -816,6 +881,7 @@ rm -rf $RPM_BUILD_ROOT %files sjstat %defattr(-,root,root) %{_bindir}/sjstat +%{_mandir}/man1/sjstat* ############################################################################# %if %{slurm_with pam} diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 43fab380c659d612ece2f0fdee3e4132c2cf94c9..2abf7ad6a3bead42bfb93d6551beea8e4a23dcf6 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -3,13 +3,14 @@ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2010 Lawrence Livermore National Security. - * Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>. + * Portions Copyright (C) 2010-2013 SchedMD <http://www.schedmd.com>. + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>, et. al. * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -372,6 +373,7 @@ enum select_nodedata_type { * freed with xfree */ SELECT_NODEDATA_RACK_MP, /* data-> char * needs to be * freed with xfree */ + SELECT_NODEDATA_MEM_ALLOC, /* data-> uint32_t */ }; enum select_print_mode { @@ -406,6 +408,24 @@ enum select_node_cnt { * base partition count */ }; +enum acct_gather_profile_info { + ACCT_GATHER_PROFILE_DIR, /* Give directory profiling is stored */ + ACCT_GATHER_PROFILE_DEFAULT, /* What is being collected for + * profiling by default */ + ACCT_GATHER_PROFILE_RUNNING /* What is actually be collected + * wither it be user or + * default. (Only works in the slurmstepd) + */ +}; + +#define ACCT_GATHER_PROFILE_NOT_SET 0x00000000 +#define ACCT_GATHER_PROFILE_NONE 0x00000001 +#define ACCT_GATHER_PROFILE_ENERGY 0x00000002 +#define ACCT_GATHER_PROFILE_TASK 0x00000004 +#define ACCT_GATHER_PROFILE_LUSTRE 0x00000008 +#define ACCT_GATHER_PROFILE_NETWORK 0x00000010 +#define ACCT_GATHER_PROFILE_ALL 0xffffffff + /* jobacct data types */ enum jobacct_data_type { JOBACCT_DATA_TOTAL, /* data-> jobacctinfo_t * */ @@ -425,13 +445,21 @@ enum jobacct_data_type { JOBACCT_DATA_MIN_CPU_ID, /* data-> jobacct_id_t psize */ JOBACCT_DATA_TOT_CPU, /* data-> uint32_t psize */ JOBACCT_DATA_ACT_CPUFREQ, /* data-> uint32_t psize hb*/ - JOBACCT_DATA_CONSUMED_ENERGY /* data-> uint32_t psize hb*/ + JOBACCT_DATA_CONSUMED_ENERGY, /* data-> uint32_t psize hb*/ + JOBACCT_DATA_MAX_DISK_READ, /* data->double psize */ + JOBACCT_DATA_MAX_DISK_READ_ID, /* data->jobacct_id_t psize */ + JOBACCT_DATA_TOT_DISK_READ, /* data->double psize */ + JOBACCT_DATA_MAX_DISK_WRITE, /* data->double psize */ + JOBACCT_DATA_MAX_DISK_WRITE_ID, /* data->jobacct_id_t psize */ + JOBACCT_DATA_TOT_DISK_WRITE /* data->double psize */ }; enum acct_energy_type { ENERGY_DATA_JOULES_TASK, ENERGY_DATA_STRUCT, ENERGY_DATA_RECONFIG, + ENERGY_DATA_PROFILE, + ENERGY_DATA_LAST_POLL, }; /* Possible task distributions across the nodes */ @@ -584,6 +612,8 @@ enum ctx_keys { #define CR_CORE_DEFAULT_DIST_BLOCK 0x1000 /* By default, distribute cores using a block * approach inside the nodes */ +#define CR_ALLOCATE_FULL_SOCKET 0x2000 +/* Allocate full sockets to jobs rather than individual cores */ #define MEM_PER_CPU 0x80000000 #define SHARED_FORCE 0x8000 @@ -610,10 +640,14 @@ enum ctx_keys { * value that is higher than slurmd */ #define PRIORITY_FLAGS_ACCRUE_ALWAYS 0x0001 /* Flag to always accrue age - * priority to pending jobs ignoring - * dependencies or holds */ - - + * priority to pending jobs ignoring + * dependencies or holds */ +#define PRIORITY_FLAGS_TICKET_BASED 0x0002 /* Enable the ticket based multifactor + * plugin. The default is usage based + * multifactor plugin. + */ +#define PRIORITY_FLAGS_SIZE_RELATIVE 0x0004 /* Enable job size measurement + * relative to its time limit */ /*****************************************************************************\ * SLURM HOSTLIST FUNCTIONS \*****************************************************************************/ @@ -916,16 +950,24 @@ typedef struct dynamic_plugin_data { } dynamic_plugin_data_t; typedef struct acct_gather_energy { - uint32_t previous_consumed_energy; uint32_t base_consumed_energy; uint32_t base_watts; /* lowest power consump of node, in watts */ uint32_t consumed_energy; /* total energy consumed by node, in joules */ uint32_t current_watts; /* current power consump of node, in watts */ + uint32_t previous_consumed_energy; + time_t poll_time; /* When information was last retrieved */ } acct_gather_energy_t; +typedef struct ext_sensors_data { + uint32_t consumed_energy; /* total energy consumed, in joules */ + uint32_t temperature; /* temperature, in celsius */ + time_t energy_update_time; /* last update time for consumed_energy */ + uint32_t current_watts; /* current power consumption, in watts */ +} ext_sensors_data_t; + typedef struct job_descriptor { /* For submit, allocate, and update requests */ char *account; /* charge to specified account */ - uint16_t acctg_freq; /* accounting polling interval (seconds) */ + char *acctg_freq; /* accounting polling intervals (seconds) */ char *alloc_node; /* node making resource allocation request * NOTE: Normally set by slurm_submit* or * slurm_allocate* function */ @@ -937,6 +979,8 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ * ALLOC_SID_* flags */ uint32_t argc; /* number of arguments to the script */ char **argv; /* arguments to the script */ + char *array_inx; /* job array index values */ + void *array_bitmap; /* NOTE: Set by slurmctld */ time_t begin_time; /* delay initiation until this time */ uint16_t ckpt_interval; /* periodically checkpoint this job */ char *ckpt_dir; /* directory to store checkpoint images */ @@ -986,6 +1030,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint32_t priority; /* relative priority of the job, * explicitly set only for user root, * 0 == held (don't initiate) */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *qos; /* Quality of Service */ char *resp_host; /* NOTE: Set by slurmctld */ char *req_nodes; /* comma separated list of required nodes @@ -1073,6 +1118,8 @@ typedef struct job_info { char *account; /* charge to specified account */ char *alloc_node; /* local node making resource alloc */ uint32_t alloc_sid; /* local sid making resource alloc */ + uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ + uint16_t array_task_id; /* task_id of a job array */ uint32_t assoc_id; /* association id for job */ uint16_t batch_flag; /* 1 if batch: queued job with script */ char *batch_host; /* name of host running batch script */ @@ -1126,6 +1173,7 @@ typedef struct job_info { time_t pre_sus_time; /* time job ran prior to last suspend */ uint32_t priority; /* relative priority of the job, * 0=held, 1=required nodes DOWN/DRAINED */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *qos; /* Quality of Service */ char *req_nodes; /* comma separated list of required nodes */ int *req_node_inx; /* required list index pairs into node_table: @@ -1177,7 +1225,12 @@ typedef struct job_info_msg { } job_info_msg_t; typedef struct step_update_request_msg { + time_t end_time; /* step end time */ + uint32_t exit_code; /* exit code for job (status from wait call) */ uint32_t job_id; + jobacctinfo_t *jobacct; + char *name; /* step name */ + time_t start_time; /* step start time */ uint32_t step_id; uint32_t time_limit; /* In minutes */ } step_update_request_msg_t; @@ -1282,12 +1335,14 @@ typedef struct { uint16_t immediate; /* 1 if allocate to run or fail immediately, * 0 if to be queued awaiting resources */ uint32_t job_id; /* job ID */ - uint32_t mem_per_cpu; /* memory required per CPU (MB), - * use job limit if 0 */ + uint32_t pn_min_memory; /* minimum real memory per node OR + * real memory per CPU | MEM_PER_CPU, + * default=0 (use job limit) */ char *ckpt_dir; /* directory to store checkpoint image files */ char *gres; /* generic resources needed */ char *name; /* name of the job step */ char *network; /* network use spec */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ uint8_t no_kill; /* 1 if no kill on node failure */ uint32_t min_nodes; /* minimum number of nodes required by job, * default=0 */ @@ -1334,6 +1389,7 @@ typedef struct { bool multi_prog; uint32_t slurmd_debug; /* remote slurmd debug level */ bool parallel_debug; + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *task_prolog; char *task_epilog; uint16_t cpu_bind_type; /* use cpu_bind_type_t */ @@ -1351,7 +1407,7 @@ typedef struct { char *mpi_plugin_name; uint8_t open_mode; - uint16_t acctg_freq; + char *acctg_freq; bool pty; char *ckpt_dir; char *restart_dir; @@ -1386,6 +1442,8 @@ typedef struct { } slurm_trigger_callbacks_t; typedef struct { + uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ + uint16_t array_task_id; /* task_id of a job array */ char *ckpt_dir; /* path to store checkpoint image files */ uint16_t ckpt_interval; /* checkpoint interval in minutes */ char *gres; /* generic resources required */ @@ -1400,6 +1458,7 @@ typedef struct { uint32_t cpu_freq; /* requested cpu frequency */ uint32_t num_tasks; /* number of tasks */ char *partition; /* name of assigned partition */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *resv_ports; /* ports allocated for MPI */ time_t run_time; /* net run time (factor out time suspended) */ dynamic_plugin_data_t *select_jobinfo; /* opaque data type, @@ -1407,6 +1466,7 @@ typedef struct { * slurm_get_select_jobinfo() */ time_t start_time; /* step start time */ + uint16_t state; /* state of the step, see enum job_states */ uint32_t step_id; /* step ID */ uint32_t time_limit; /* step time limit */ uint32_t user_id; /* user the job runs as */ @@ -1450,6 +1510,8 @@ typedef struct node_info { uint16_t cores; /* number of cores per socket */ uint16_t cpus; /* configured count of cpus running on * the node */ + acct_gather_energy_t *energy; /* energy data */ + ext_sensors_data_t *ext_sensors; /* external sensor data */ char *features; /* list of a node's features */ char *gres; /* list of a node's generic resources */ uint32_t cpu_load; /* CPU load * 100 */ @@ -1469,7 +1531,6 @@ typedef struct node_info { uint16_t threads; /* number of threads per core */ uint32_t tmp_disk; /* configured MB of total disk in TMP_FS */ uint32_t weight; /* arbitrary priority of node for scheduling */ - acct_gather_energy_t *energy; dynamic_plugin_data_t *select_nodeinfo; /* opaque data structure, * use * slurm_get_select_nodeinfo() @@ -1489,8 +1550,12 @@ typedef struct node_info_msg { } node_info_msg_t; typedef struct front_end_info { + char *allow_groups; /* allowed group string */ + char *allow_users; /* allowed user string */ time_t boot_time; /* Time of node boot, * computed from up_time */ + char *deny_groups; /* denied group string */ + char *deny_users; /* denied user string */ char *name; /* node name */ uint16_t node_state; /* see enum node_states */ char *reason; /* reason for node being DOWN or @@ -1530,6 +1595,10 @@ typedef struct acct_gather_node_resp_msg { acct_gather_energy_t *energy; } acct_gather_node_resp_msg_t; +typedef struct acct_gather_energy_req_msg { + uint16_t delta; +} acct_gather_energy_req_msg_t; + /* Current partition state information and used to set partition options * using slurm_update_partition(). */ #define PART_FLAG_DEFAULT 0x0001 /* Set if default partition */ @@ -1552,10 +1621,12 @@ typedef struct partition_info { char *allow_groups; /* comma delimited list of groups, * null indicates all */ char *alternate; /* name of alternate partition */ + uint16_t cr_type; /* see CR_* values */ uint32_t def_mem_per_cpu; /* default MB memory per allocated CPU */ uint32_t default_time; /* minutes, NO_VAL or INFINITE */ uint16_t flags; /* see PART_FLAG_* above */ uint32_t grace_time; /* preemption grace time in seconds */ + uint32_t max_cpus_per_node; /* maximum allocated CPUs per node */ uint32_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ uint32_t max_nodes; /* per job or INFINITE */ uint16_t max_share; /* number of jobs to gang schedule */ @@ -1778,7 +1849,7 @@ typedef struct resv_desc_msg { uint16_t flags; /* see RESERVE_FLAG_* above */ char *licenses; /* names of licenses to be reserved */ char *name; /* name of reservation (optional on create) */ - uint32_t core_cnt; /* Count of cores required */ + uint32_t *core_cnt; /* Count of cores required */ uint32_t *node_cnt; /* Count of nodes required. Specify set of job * sizes with trailing zero to optimize layout * for those jobs just specify their total size @@ -1824,6 +1895,11 @@ typedef struct reservation_name_msg { * dies on a bluegene system */ #define DEBUG_FLAG_SWITCH 0x00020000 /* SwitchType plugin */ #define DEBUG_FLAG_ENERGY 0x00040000 /* AcctGatherEnergy plugin */ +#define DEBUG_FLAG_EXT_SENSORS 0x00080000 /* ExtSensorsType plugin */ +#define DEBUG_FLAG_THREADID 0x00100000 /* Print out the thread id */ +#define DEBUG_FLAG_PROFILE 0x00200000 /* AcctGatherProfile plugin */ +#define DEBUG_FLAG_INFINIBAND 0x00400000 /* AcctGatherInfiniband plugin */ +#define DEBUG_FLAG_FILESYSTEM 0x00800000 /* AcctGatherFilesystem plugin */ #define GROUP_FORCE 0x8000 /* if set, update group membership * info even if no updates to @@ -1844,6 +1920,12 @@ typedef struct reservation_name_msg { #define RECONFIG_KEEP_PART_INFO 0x0001 /* keep dynamic partition info on scontrol reconfig */ #define RECONFIG_KEEP_PART_STAT 0x0002 /* keep dynamic partition state on scontrol reconfig */ +#define HEALTH_CHECK_NODE_IDLE 0x0001 /* execute on idle nodes */ +#define HEALTH_CHECK_NODE_ALLOC 0x0002 /* execute on fully allocated nodes */ +#define HEALTH_CHECK_NODE_MIXED 0x0004 /* execute on partially allocated nodes */ +#define HEALTH_CHECK_NODE_ANY 0xffff /* execute on all node states */ + + typedef struct slurm_ctl_conf { time_t last_update; /* last update time of the build parameters */ uint16_t accounting_storage_enforce; /* job requires valid association: @@ -1860,6 +1942,9 @@ typedef struct slurm_ctl_conf { char *accounting_storage_user; /* accounting storage user */ uint16_t acctng_store_job_comment; /* send job comment to accounting */ char *acct_gather_energy_type; /* energy accounting type */ + char *acct_gather_profile_type; /* profile accounting type */ + char *acct_gather_infiniband_type; /* infiniband accounting type */ + char *acct_gather_filesystem_type; /* filesystem accounting type */ uint16_t acct_gather_node_freq; /* secs between node acct request */ char *authtype; /* authentication type */ char *backup_addr; /* comm path of slurmctld secondary server */ @@ -1876,12 +1961,15 @@ typedef struct slurm_ctl_conf { uint32_t debug_flags; /* see DEBUG_FLAG_* above for values */ uint32_t def_mem_per_cpu; /* default MB memory per allocated CPU */ uint16_t disable_root_jobs; /* if set then user root can't run jobs */ + uint16_t dynalloc_port; /* port for dynamic allocation connection */ uint16_t enforce_part_limits; /* if set, reject job exceeding * partition size and/or time limits */ char *epilog; /* pathname of job epilog */ uint32_t epilog_msg_time; /* usecs for slurmctld to process an * epilog complete message */ char *epilog_slurmctld; /* pathname of job epilog run by slurmctld */ + char *ext_sensors_type; /* external sensors plugin type */ + uint16_t ext_sensors_freq; /* secs between ext sensors sampling */ uint16_t fast_schedule; /* 1 to *not* check configurations by node * (only check configuration file, faster) */ uint32_t first_job_id; /* first slurm generated job_id to assign */ @@ -1890,10 +1978,13 @@ typedef struct slurm_ctl_conf { uint16_t group_info; /* see GROUP_* fields above */ uint32_t hash_val; /* Hash value of the slurm.conf file */ uint16_t health_check_interval; /* secs between health checks */ + uint16_t health_check_node_state; /* Node states on which to execute + * health check program, see + * HEALTH_CHECK_NODE_* above */ char * health_check_program; /* pathname of health check program */ uint16_t inactive_limit;/* seconds of inactivity before a * inactive resource allocation is released */ - uint16_t job_acct_gather_freq; /* poll frequency for job accounting + char *job_acct_gather_freq; /* poll frequency for job accounting * gather plugins */ char *job_acct_gather_type; /* job accounting gather type */ char *job_ckpt_dir; /* directory saving job record checkpoint */ @@ -1908,6 +1999,7 @@ typedef struct slurm_ctl_conf { uint16_t job_file_append; /* if set, append to stdout/err file */ uint16_t job_requeue; /* If set, jobs get requeued on node failre */ char *job_submit_plugins; /* List of job_submit plugins to use */ + uint16_t keep_alive_time; /* Keep alive time for srun I/O sockets */ uint16_t kill_on_bad_exit; /* If set, the job will be * terminated immediately when one of * the processes is aborted or crashed */ @@ -1917,6 +2009,7 @@ typedef struct slurm_ctl_conf { char *licenses; /* licenses available on this cluster */ char *licenses_used; /* licenses used on this cluster */ char *mail_prog; /* pathname of mail program */ + uint16_t max_array_sz; /* Maximum job array size */ uint32_t max_job_cnt; /* maximum number of active jobs */ uint32_t max_job_id; /* maximum job id before using first_job_id */ uint32_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ @@ -1933,8 +2026,7 @@ typedef struct slurm_ctl_conf { uint16_t over_time_limit; /* job's time limit can be exceeded by this * number of minutes before cancellation */ char *plugindir; /* pathname to plugins */ - char *plugstack; /* pathname to plugin stack config - * file */ + char *plugstack; /* pathname to plugin stack config file */ uint16_t preempt_mode; /* See PREEMPT_MODE_* in slurm/slurm.h */ char *preempt_type; /* job preemption selection plugin */ uint32_t priority_decay_hl; /* priority decay half life in @@ -1969,8 +2061,10 @@ typedef struct slurm_ctl_conf { uint16_t resume_rate; /* nodes to make full power, per minute */ uint16_t resume_timeout;/* time required in order to perform a node * resume operation */ + char *resv_epilog; /* path of reservation epilog run by slurmctld */ uint16_t resv_over_run; /* how long a running job can exceed * reservation time */ + char *resv_prolog; /* path of reservation prolog run by slurmctld */ uint16_t ret2service; /* 1 return DOWN node to service at * registration */ char *salloc_default_command; /* default salloc command */ @@ -1996,6 +2090,7 @@ typedef struct slurm_ctl_conf { uint16_t slurmctld_debug; /* slurmctld logging level */ char *slurmctld_logfile;/* where slurmctld error log gets written */ char *slurmctld_pidfile;/* where to put slurmctld pidfile */ + char *slurmctld_plugstack;/* generic slurmctld plugins */ uint32_t slurmctld_port; /* default communications port to slurmctld */ uint16_t slurmctld_port_count; /* number of slurmctld comm ports */ uint16_t slurmctld_timeout;/* seconds that backup controller waits @@ -2360,15 +2455,18 @@ extern void slurm_free_sbcast_cred_msg PARAMS((job_sbcast_cred_msg_t * msg)); * JOB/STEP SIGNALING FUNCTIONS \*****************************************************************************/ +#define KILL_JOB_BATCH 0x0001 /* signal batch shell only */ +#define KILL_JOB_ARRAY 0x0002 /* kill all elements of a job array */ + /* * slurm_kill_job - send the specified signal to all steps of an existing job * IN job_id - the job's id * IN signal - signal number - * IN batch_flag - 1 to signal batch shell only, otherwise 0 + * IN flags - see KILL_JOB_* flags above * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_kill_job PARAMS((uint32_t job_id, uint16_t signal, - uint16_t batch_flag)); + uint16_t flags)); /* * slurm_kill_job_step - send the specified signal to an existing job step @@ -2457,6 +2555,17 @@ extern void slurm_step_ctx_params_t_init PARAMS((slurm_step_ctx_params_t *ptr)); extern slurm_step_ctx_t *slurm_step_ctx_create PARAMS( (const slurm_step_ctx_params_t *step_params)); +/* + * slurm_step_ctx_create_timeout - Create a job step and its context. + * IN step_params - job step parameters + * IN timeout - in milliseconds + * RET the step context or NULL on failure with slurm errno set + * NOTE: Free allocated memory using slurm_step_ctx_destroy. + */ +extern slurm_step_ctx_t * +slurm_step_ctx_create_timeout PARAMS( + (const slurm_step_ctx_params_t *step_params, int timeout)); + /* * slurm_step_ctx_create_no_alloc - Create a job step and its context without * getting an allocation. @@ -2757,11 +2866,24 @@ extern int slurm_job_node_ready(uint32_t job_id); extern int slurm_load_job PARAMS((job_info_msg_t **resp, uint32_t job_id, uint16_t show_flags)); +/* + * slurm_load_job_user - issue RPC to get slurm information about all jobs + * to be run as the specified user + * IN/OUT job_info_msg_pptr - place to store a job configuration pointer + * IN user_id - ID of user we want information for + * IN show_flags - job filtering options + * RET 0 or -1 on error + * NOTE: free the response using slurm_free_job_info_msg + */ +extern int slurm_load_job_user PARAMS((job_info_msg_t **job_info_msg_pptr, + uint32_t user_id, + uint16_t show_flags)); + /* * slurm_load_jobs - issue RPC to get slurm all job configuration * information if changed since update_time * IN update_time - time of current configuration data - * IN job_info_msg_pptr - place to store a job configuration pointer + * IN/OUT job_info_msg_pptr - place to store a job configuration pointer * IN show_flags - job filtering options * RET 0 or -1 on error * NOTE: free the response using slurm_free_job_info_msg @@ -2827,6 +2949,15 @@ extern char *slurm_sprint_job_info PARAMS((slurm_job_info_t * job_ptr, */ extern int slurm_update_job PARAMS((job_desc_msg_t * job_msg)); +/* + * slurm_xlate_job_id - Translate a Slurm job ID string into a slurm job ID + * number. If this job ID contains an array index, map this to the + * equivalent Slurm job ID number (e.g. "123_2" to 124) + * + * IN job_id_str - String containing a single job ID number + * RET - equivalent job ID number or 0 on error + */ +extern uint32_t slurm_xlate_job_id PARAMS((char *job_id_str)); /*****************************************************************************\ * SLURM JOB STEP CONFIGURATION READ/PRINT/UPDATE FUNCTIONS \*****************************************************************************/ @@ -2949,14 +3080,37 @@ extern int slurm_update_step PARAMS((step_update_request_msg_t * step_msg)); * slurm_load_node - issue RPC to get slurm all node configuration information * if changed since update_time * IN update_time - time of current configuration data - * IN node_info_msg_pptr - place to store a node configuration pointer + * OUT resp - place to store a node configuration pointer * IN show_flags - node filtering options * RET 0 or a slurm error code * NOTE: free the response using slurm_free_node_info_msg */ -extern int slurm_load_node PARAMS( - (time_t update_time, node_info_msg_t **node_info_msg_pptr, - uint16_t show_flags)); +extern int slurm_load_node PARAMS((time_t update_time, node_info_msg_t **resp, + uint16_t show_flags)); + +/* + * slurm_load_node_single - issue RPC to get slurm configuration information + * for a specific node + * OUT resp - place to store a node configuration pointer + * IN node_name - name of the node for which information is requested + * IN show_flags - node filtering options + * RET 0 or a slurm error code + * NOTE: free the response using slurm_free_node_info_msg + */ +extern int slurm_load_node_single PARAMS((node_info_msg_t **resp, + char *node_name, uint16_t show_flags)); + +/* + * slurm_node_energy - issue RPC to get the energy data on this machine + * IN host - name of node to query, NULL if localhost + * IN delta - Use cache if data is newer than this in seconds + * OUT acct_gather_energy_t structure on success or NULL other wise + * RET 0 or a slurm error code + * NOTE: free the response using slurm_acct_gather_energy_destroy + */ +extern int slurm_get_node_energy PARAMS( + (char *host, uint16_t delta, + acct_gather_energy_t **acct_gather_energy)); /* * slurm_free_node_info_msg - free the node information response message diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h index 7f8bb723ab902d3711cfc3c56396f280f158c392..bba94dc49f2c6054e7d857855a59ccd362395b83 100644 --- a/slurm/slurm_errno.h +++ b/slurm/slurm_errno.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -192,6 +192,8 @@ enum { ESLURM_CAN_NOT_START_IMMEDIATELY, ESLURM_INTERCONNECT_BUSY, ESLURM_RESERVATION_EMPTY, + ESLURM_INVALID_ARRAY, + /* switch specific error codes, specific values defined in plugin module */ ESLURM_SWITCH_MIN = 3000, ESLURM_SWITCH_MAX = 3099, @@ -257,7 +259,10 @@ enum { ESLURM_JOBS_RUNNING_ON_ASSOC, ESLURM_CLUSTER_DELETED, ESLURM_ONE_CHANGE, - ESLURM_BAD_NAME + ESLURM_BAD_NAME, + + /* plugin and custom errors */ + ESLURM_MISSING_TIME_LIMIT = 8000 }; /* look up an errno value */ diff --git a/slurm/slurmdb.h b/slurm/slurmdb.h index 9ee3560b3ec6f01096e1d2c7c5e87ebbfc947d92..43fd35d78029bbafb319a591e45508002b7754c3 100644 --- a/slurm/slurmdb.h +++ b/slurm/slurmdb.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -288,19 +288,27 @@ typedef struct { double consumed_energy; /* contains energy consumption in joules */ uint32_t cpu_min; uint32_t cpu_min_nodeid; /* contains which node number it was on */ - uint16_t cpu_min_taskid; /* contains which task number it was on */ + uint32_t cpu_min_taskid; /* contains which task number it was on */ + double disk_read_ave; /* average amount of disk read data, in mb */ + double disk_read_max; /* maximum amount of disk read data, in mb */ + uint32_t disk_read_max_nodeid; /* contains node number max was on */ + uint32_t disk_read_max_taskid;/* contains task number max was on */ + double disk_write_ave; /* average amount of disk write data, in mb */ + double disk_write_max; /* maximum amount of disk write data, in mb */ + uint32_t disk_write_max_nodeid; /* contains node number max was on */ + uint32_t disk_write_max_taskid;/* contains task number max was on */ double pages_ave; uint32_t pages_max; uint32_t pages_max_nodeid; /* contains which node number it was on */ - uint16_t pages_max_taskid; /* contains which task number it was on */ + uint32_t pages_max_taskid; /* contains which task number it was on */ double rss_ave; uint32_t rss_max; uint32_t rss_max_nodeid; /* contains which node number it was on */ - uint16_t rss_max_taskid; /* contains which task number it was on */ + uint32_t rss_max_taskid; /* contains which task number it was on */ double vsize_ave; uint32_t vsize_max; uint32_t vsize_max_nodeid; /* contains which node number it was on */ - uint16_t vsize_max_taskid; /* contains which task number it was on */ + uint32_t vsize_max_taskid; /* contains which task number it was on */ } slurmdb_stats_t; @@ -343,6 +351,10 @@ typedef struct { * by default set the * SLURMDB_PURGE_ARCHIVE bit for * archiving */ + uint32_t purge_resv; /* purge reservations older than this in months + * by default set the + * SLURMDB_PURGE_ARCHIVE bit for + * archiving */ uint32_t purge_step; /* purge steps older than this in months * by default set the * SLURMDB_PURGE_ARCHIVE bit for @@ -553,6 +565,7 @@ typedef struct { uint32_t priority; uint32_t qosid; uint32_t req_cpus; + uint32_t req_mem; uint32_t requid; uint32_t resvid; uint32_t show_full; diff --git a/slurm/spank.h b/slurm/spank.h index 3cafa2f4927b0791862502b6cfff4c57cdd3f010..8f80bd33a4a92ddd1b6183639de0ff82d4618ec1 100644 --- a/slurm/spank.h +++ b/slurm/spank.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/Makefile.am b/src/Makefile.am index d372dac582b0caa7de4e3b5e3ea57fc3c938ee9f..998515712922792d9bc2363fcb8fa9f06fe042ad 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -7,6 +7,3 @@ SUBDIRS = common api db_api database \ if !REAL_BG_L_P_LOADED SUBDIRS += srun endif - - - diff --git a/src/Makefile.in b/src/Makefile.in index 7b94a79948b4e954dd7cb2bfc98f77d51c7543d1..17eb3ce005e56fbe0d790f66190a970b158383a7 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -56,6 +56,7 @@ subdir = src DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -73,6 +74,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -81,11 +83,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -171,6 +175,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -191,6 +197,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -200,6 +209,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -207,6 +218,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -241,6 +261,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -268,6 +291,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/api/Makefile.in b/src/api/Makefile.in index 3f256bb5d72ccf56b55c75264f5cde31b84ec75e..a14056465ea05001efd40225d0670100b23ac23f 100644 --- a/src/api/Makefile.in +++ b/src/api/Makefile.in @@ -61,6 +61,7 @@ subdir = src/api DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -78,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -86,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -210,6 +214,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -231,6 +237,9 @@ EGREP = @EGREP@ # This is needed if compiling on windows EXEEXT = FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -240,6 +249,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -247,6 +258,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -281,6 +301,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -308,6 +331,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/api/allocate.c b/src/api/allocate.c index 4d3164ddccc36623bfad01de874842f9cf293a25..68bfe311a643bdb652536876ded3ae9a41295237 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -324,7 +324,7 @@ int slurm_job_will_run (job_desc_msg_t *req) return SLURM_PROTOCOL_ERROR; break; case RESPONSE_JOB_WILL_RUN: - if(cluster_flags & CLUSTER_FLAG_BG) + if (cluster_flags & CLUSTER_FLAG_BG) type = "cnodes"; will_run_resp = (will_run_response_msg_t *) resp_msg.data; slurm_make_time_str(&will_run_resp->start_time, @@ -562,6 +562,8 @@ char *slurm_read_hostfile(char *filename, int n) int line_num = 0; hostlist_t hostlist = NULL; char *nodelist = NULL; + char *asterisk, *tmp_text, *save_ptr = NULL, *host_name; + int total_file_len = 0; if (filename == NULL || strlen(filename) == 0) return NULL; @@ -581,6 +583,7 @@ char *slurm_read_hostfile(char *filename, int n) while (fgets(in_line, BUFFER_SIZE, fp) != NULL) { line_num++; line_size = strlen(in_line); + total_file_len += line_size; if (line_size == (BUFFER_SIZE - 1)) { error ("Line %d, of hostfile %s too long", line_num, filename); @@ -609,8 +612,22 @@ char *slurm_read_hostfile(char *filename, int n) break; } - hostlist_push(hostlist, in_line); - if (n != (int)NO_VAL && hostlist_count(hostlist) == n) + tmp_text = xstrdup(in_line); + host_name = strtok_r(tmp_text, ",", &save_ptr); + while (host_name) { + if ((asterisk = strchr(host_name, '*')) && + (i = atoi(asterisk + 1))) { + asterisk[0] = '\0'; + for (j = 0; j < i; j++) + hostlist_push(hostlist, host_name); + } else { + hostlist_push(hostlist, host_name); + } + host_name = strtok_r(NULL, ",", &save_ptr); + } + xfree(tmp_text); + + if ((n != (int)NO_VAL) && (hostlist_count(hostlist) == n)) break; } fclose(fp); @@ -624,13 +641,14 @@ char *slurm_read_hostfile(char *filename, int n) goto cleanup_hostfile; } - nodelist = (char *)malloc(0xffff); + total_file_len += 1024; + nodelist = (char *)malloc(total_file_len); if (!nodelist) { error("Nodelist xmalloc failed"); goto cleanup_hostfile; } - if (hostlist_ranged_string(hostlist, 0xffff, nodelist) == -1) { + if (hostlist_ranged_string(hostlist, total_file_len, nodelist) == -1) { error("Hostlist is too long for the allocate RPC!"); free(nodelist); nodelist = NULL; @@ -745,7 +763,7 @@ _accept_msg_connection(int listen_fd, msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); - if((rc = slurm_receive_msg(conn_fd, msg, 0)) != 0) { + if ((rc = slurm_receive_msg(conn_fd, msg, 0)) != 0) { slurm_free_msg(msg); if (errno == EINTR) { diff --git a/src/api/allocate_msg.c b/src/api/allocate_msg.c index f01b3bb38e64ca7ed910407aea14e7b556423c06..475943a6df558d05991a92c01c4fd2967f048440 100644 --- a/src/api/allocate_msg.c +++ b/src/api/allocate_msg.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/block_info.c b/src/api/block_info.c index a622027b1fc2d8154423b1e034ffbb52b755d373..e6b414c11d7620fa87c753973f3ca64e94cf8f81 100644 --- a/src/api/block_info.c +++ b/src/api/block_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -155,14 +155,14 @@ char *slurm_sprint_block_info( tmp_char = conn_type_string_full(block_ptr->conn_type); xstrfmtcat(out, "ConnType=%s", tmp_char); xfree(tmp_char); - if(cluster_flags & CLUSTER_FLAG_BGL) + if (cluster_flags & CLUSTER_FLAG_BGL) xstrfmtcat(out, " NodeUse=%s", node_use_string(block_ptr->node_use)); xstrcat(out, line_end); /****** Line 3 ******/ - if(block_ptr->ionode_str) + if (block_ptr->ionode_str) xstrfmtcat(out, "MidPlanes=%s[%s] MPIndices=", block_ptr->mp_str, block_ptr->ionode_str); else diff --git a/src/api/cancel.c b/src/api/cancel.c index 5fc6309967cb7c7191954aecdac45f4b15dd3d85..bce2a11f81476da5a8d558fe66926b105d926cc0 100644 --- a/src/api/cancel.c +++ b/src/api/cancel.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -53,11 +53,11 @@ * slurm_kill_job - send the specified signal to all steps of an existing job * IN job_id - the job's id * IN signal - signal number - * IN batch_flag - 1 to signal batch shell only, otherwise 0 + * IN flags - see KILL_JOB_* flags above * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int -slurm_kill_job (uint32_t job_id, uint16_t signal, uint16_t batch_flag) +slurm_kill_job (uint32_t job_id, uint16_t signal, uint16_t flags) { int rc; slurm_msg_t msg; @@ -70,7 +70,7 @@ slurm_kill_job (uint32_t job_id, uint16_t signal, uint16_t batch_flag) req.job_id = job_id; req.job_step_id = NO_VAL; req.signal = signal; - req.batch_flag = (uint16_t) batch_flag; + req.flags = flags; msg.msg_type = REQUEST_CANCEL_JOB_STEP; msg.data = &req; @@ -105,7 +105,7 @@ slurm_kill_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal) req.job_id = job_id; req.job_step_id = step_id; req.signal = signal; - req.batch_flag = false; + req.flags = 0; msg.msg_type = REQUEST_CANCEL_JOB_STEP; msg.data = &req; diff --git a/src/api/checkpoint.c b/src/api/checkpoint.c index 33c783b27da0ad0559b7bf764da4283929aa691e..9ac0e9c7edac467551dbdc293d51dcb50f8ba396 100644 --- a/src/api/checkpoint.c +++ b/src/api/checkpoint.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/complete.c b/src/api/complete.c index c124884ca0070978fdabe30f445185563ccd928a..4d59b5f6cd5fe9aa6bd3c8b4fd024a0d2573be53 100644 --- a/src/api/complete.c +++ b/src/api/complete.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/config_info.c b/src/api/config_info.c index 6729d66a4087f74967e55dea71e8f2b30a751ac8..f2744f4cd42539d523012b6a903eabccea23cc1b 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -105,11 +105,11 @@ void slurm_print_ctl_conf ( FILE* out, char *select_title = ""; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); - if(cluster_flags & CLUSTER_FLAG_BGL) + if (cluster_flags & CLUSTER_FLAG_BGL) select_title = "\nBluegene/L configuration\n"; - else if(cluster_flags & CLUSTER_FLAG_BGP) + else if (cluster_flags & CLUSTER_FLAG_BGP) select_title = "\nBluegene/P configuration\n"; - else if(cluster_flags & CLUSTER_FLAG_BGQ) + else if (cluster_flags & CLUSTER_FLAG_BGQ) select_title = "\nBluegene/Q configuration\n"; if ( slurm_ctl_conf_ptr == NULL ) @@ -121,7 +121,7 @@ void slurm_print_ctl_conf ( FILE* out, time_str); ret_list = slurm_ctl_conf_2_key_pairs(slurm_ctl_conf_ptr); - if(ret_list) { + if (ret_list) { slurm_print_key_pairs(out, ret_list, tmp_str); list_destroy((List)ret_list); @@ -197,6 +197,18 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_energy_type); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("AcctGatherFilesystemType"); + key_pair->value = + xstrdup(slurm_ctl_conf_ptr->acct_gather_filesystem_type); + list_append(ret_list, key_pair); + + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("AcctGatherInfinibandType"); + key_pair->value = + xstrdup(slurm_ctl_conf_ptr->acct_gather_infiniband_type); + list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->acct_gather_node_freq); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -204,6 +216,11 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("AcctGatherProfileType"); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_profile_type); + list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AuthType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->authtype); @@ -299,15 +316,22 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); key_pair->name = xstrdup("DisableRootJobs"); - if(slurm_ctl_conf_ptr->disable_root_jobs) + if (slurm_ctl_conf_ptr->disable_root_jobs) key_pair->value = xstrdup("YES"); else key_pair->value = xstrdup("NO"); + snprintf(tmp_str, sizeof(tmp_str), "%u", + slurm_ctl_conf_ptr->dynalloc_port); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("DynAllocPort"); + key_pair->value = xstrdup(tmp_str); + list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); key_pair->name = xstrdup("EnforcePartLimits"); - if(slurm_ctl_conf_ptr->enforce_part_limits) + if (slurm_ctl_conf_ptr->enforce_part_limits) key_pair->value = xstrdup("YES"); else key_pair->value = xstrdup("NO"); @@ -329,6 +353,18 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->epilog_slurmctld); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("ExtSensorsType"); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->ext_sensors_type); + list_append(ret_list, key_pair); + + snprintf(tmp_str, sizeof(tmp_str), "%u sec", + slurm_ctl_conf_ptr->ext_sensors_freq); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("ExtSensorsFreq"); + key_pair->value = xstrdup(tmp_str); + list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->fast_schedule); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -393,12 +429,18 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("HealthCheckNodeState"); + key_pair->value = health_check_node_state_str(slurm_ctl_conf_ptr-> + health_check_node_state); + list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HealthCheckProgram"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->health_check_program); list_append(ret_list, key_pair); - if(cluster_flags & CLUSTER_FLAG_XCPU) { + if (cluster_flags & CLUSTER_FLAG_XCPU) { key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HAVE_XCPU"); key_pair->value = xstrdup("1"); @@ -412,11 +454,9 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); - snprintf(tmp_str, sizeof(tmp_str), "%u sec", - slurm_ctl_conf_ptr->job_acct_gather_freq); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobAcctGatherFrequency"); - key_pair->value = xstrdup(tmp_str); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_freq); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -487,6 +527,17 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_submit_plugins); list_append(ret_list, key_pair); + if (slurm_ctl_conf_ptr->keep_alive_time == (uint16_t) NO_VAL) + snprintf(tmp_str, sizeof(tmp_str), "SYSTEM_DEFAULT"); + else { + snprintf(tmp_str, sizeof(tmp_str), "%u sec", + slurm_ctl_conf_ptr->keep_alive_time); + } + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("KeepAliveTime"); + key_pair->value = xstrdup(tmp_str); + list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->kill_on_bad_exit); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -521,6 +572,13 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->mail_prog); list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u", + slurm_ctl_conf_ptr->max_array_sz); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("MaxArraySize"); + key_pair->value = xstrdup(tmp_str); + list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_job_cnt); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -591,7 +649,7 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->mpi_params); list_append(ret_list, key_pair); - if(cluster_flags & CLUSTER_FLAG_MULTSD) { + if (cluster_flags & CLUSTER_FLAG_MULTSD) { key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MULTIPLE_SLURMD"); key_pair->value = xstrdup("1"); @@ -773,7 +831,8 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ReconfigFlags"); - key_pair->value = reconfig_flags2str(slurm_ctl_conf_ptr->reconfig_flags); + key_pair->value = + reconfig_flags2str(slurm_ctl_conf_ptr->reconfig_flags); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -795,6 +854,11 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("ResvEpilog"); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->resv_epilog); + list_append(ret_list, key_pair); + if (slurm_ctl_conf_ptr->resv_over_run == (uint16_t) INFINITE) snprintf(tmp_str, sizeof(tmp_str), "UNLIMITED"); else @@ -805,6 +869,11 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("ResvProlog"); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->resv_prolog); + list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->ret2service); key_pair = xmalloc(sizeof(config_key_pair_t)); @@ -966,6 +1035,11 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmctld_pidfile); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("SlurmctldPlugstack"); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmctld_plugstack); + list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SLURM_CONF"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurm_conf); @@ -1180,8 +1254,8 @@ slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr) slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); - if(cluster_flags & CLUSTER_FLAG_MULTSD) { - if((this_addr = getenv("SLURMD_NODENAME"))) { + if (cluster_flags & CLUSTER_FLAG_MULTSD) { + if ((this_addr = getenv("SLURMD_NODENAME"))) { slurm_conf_get_addr(this_addr, &req_msg.address); } else { this_addr = "localhost"; diff --git a/src/api/front_end_info.c b/src/api/front_end_info.c index dfa21d7190845e583209b9db04f1908e570dde9f..9fa9c61686a4511de1db5899b98c1d286ac2550e 100644 --- a/src/api/front_end_info.c +++ b/src/api/front_end_info.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -164,6 +164,32 @@ slurm_sprint_front_end_table (front_end_info_t * front_end_ptr, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "SlurmdStartTime=%s", time_str); xstrcat(out, tmp_line); + + /****** Line 3 (optional) ******/ + if (front_end_ptr->allow_groups || front_end_ptr->allow_users || + front_end_ptr->deny_groups || front_end_ptr->deny_users) { + if (one_liner) + xstrcat(out, " "); + else + xstrcat(out, "\n "); + if (front_end_ptr->allow_groups) { + xstrfmtcat(out, "AllowGroups=%s ", + front_end_ptr->allow_groups); + } + if (front_end_ptr->allow_users) { + xstrfmtcat(out, "AllowUsers=%s ", + front_end_ptr->allow_users); + } + if (front_end_ptr->deny_groups) { + xstrfmtcat(out, "DenyGroups=%s ", + front_end_ptr->deny_groups); + } + if (front_end_ptr->deny_users) { + xstrfmtcat(out, "DenyUsers=%s ", + front_end_ptr->deny_users); + } + } + if (one_liner) xstrcat(out, "\n"); else diff --git a/src/api/init_msg.c b/src/api/init_msg.c index d104580be6b3966a73f29812168b6afbb91a2a21..7f718ae631072db603a88854626da0270d7f0487 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,7 +58,6 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) { memset(job_desc_msg, 0, sizeof(job_desc_msg_t)); - job_desc_msg->acctg_freq = (uint16_t) NO_VAL; job_desc_msg->alloc_sid = NO_VAL; job_desc_msg->conn_type[0] = (uint16_t) NO_VAL; job_desc_msg->contiguous = (uint16_t) NO_VAL; @@ -124,6 +123,7 @@ void slurm_init_part_desc_msg (update_part_msg_t * update_part_msg) update_part_msg->default_time = (uint32_t) NO_VAL; update_part_msg->def_mem_per_cpu = (uint32_t) NO_VAL; update_part_msg->grace_time = (uint32_t) NO_VAL; + update_part_msg->max_cpus_per_node = NO_VAL; update_part_msg->max_mem_per_cpu = (uint32_t) NO_VAL; update_part_msg->max_nodes = NO_VAL; update_part_msg->max_share = (uint16_t) NO_VAL; diff --git a/src/api/job_info.c b/src/api/job_info.c index 6bf5761eccd6b18a7ff7923595791655c48646cb..c647592df74e6f70623fcab263282c0e354853cc 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -63,6 +63,44 @@ #include "src/common/uid.h" #include "src/common/xstring.h" +/* + * slurm_xlate_job_id - Translate a Slurm job ID string into a slurm job ID + * number. If this job ID contains an array index, map this to the + * equivalent Slurm job ID number (e.g. "123_2" to 124) + * + * IN job_id_str - String containing a single job ID number + * RET - equivalent job ID number or 0 on error + */ +extern uint32_t slurm_xlate_job_id(char *job_id_str) +{ + char *next_str; + uint32_t i, job_id; + uint16_t array_id; + job_info_msg_t *resp; + slurm_job_info_t *job_ptr; + + job_id = (uint32_t) strtol(job_id_str, &next_str, 10); + if (next_str[0] == '\0') + return job_id; + if (next_str[0] != '_') + return (uint32_t) 0; + array_id = (uint16_t) strtol(next_str + 1, &next_str, 10); + if (next_str[0] != '\0') + return (uint32_t) 0; + if (slurm_load_job(&resp, job_id, SHOW_ALL) != 0) + return (uint32_t) 0; + job_id = 0; + for (i = 0, job_ptr = resp->job_array; i < resp->record_count; + i++, job_ptr++) { + if (job_ptr->array_task_id == array_id) { + job_id = job_ptr->job_id; + break; + } + } + slurm_free_job_info_msg(resp); + return job_id; +} + /* * slurm_print_job_info_msg - output information about all Slurm * jobs based upon message as loaded using slurm_load_jobs @@ -92,14 +130,14 @@ static void _sprint_range(char *str, uint32_t str_size, char tmp[128]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)lower, tmp, sizeof(tmp), UNIT_NONE); } else { snprintf(tmp, sizeof(tmp), "%u", lower); } if (upper > 0) { char tmp2[128]; - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)upper, tmp2, sizeof(tmp2), UNIT_NONE); } else { @@ -168,9 +206,16 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) } /****** Line 1 ******/ - snprintf(tmp_line, sizeof(tmp_line), - "JobId=%u Name=%s", job_ptr->job_id, job_ptr->name); + snprintf(tmp_line, sizeof(tmp_line), "JobId=%u ", job_ptr->job_id); out = xstrdup(tmp_line); + if (job_ptr->array_job_id) { + snprintf(tmp_line, sizeof(tmp_line), + "ArrayJobId=%u ArrayTaskId=%u ", + job_ptr->array_job_id, job_ptr->array_task_id); + xstrcat(out, tmp_line); + } + snprintf(tmp_line, sizeof(tmp_line), "Name=%s", job_ptr->name); + xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else @@ -195,7 +240,7 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) "Priority=%u Account=%s QOS=%s", job_ptr->priority, job_ptr->account, job_ptr->qos); xstrcat(out, tmp_line); - if(slurm_get_track_wckey()) { + if (slurm_get_track_wckey()) { snprintf(tmp_line, sizeof(tmp_line), " WCKey=%s", job_ptr->wckey); xstrcat(out, tmp_line); @@ -402,7 +447,7 @@ line6: /****** Line 13 ******/ xstrfmtcat(out, "%s=", nodelist); xstrcat(out, job_ptr->nodes); - if(job_ptr->nodes && ionodes) { + if (job_ptr->nodes && ionodes) { snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes); xstrcat(out, tmp_line); xfree(ionodes); @@ -431,10 +476,22 @@ line6: if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; - } else if(job_ptr->max_nodes) + } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else { min_nodes = job_ptr->num_nodes; + if ((min_nodes == 1) && (job_ptr->num_cpus > 1) + && job_ptr->ntasks_per_node + && (job_ptr->ntasks_per_node != (uint16_t) NO_VAL)) { + int num_tasks = job_ptr->num_cpus; + if (job_ptr->cpus_per_task != (uint16_t) NO_VAL) + num_tasks /= job_ptr->cpus_per_task; + min_nodes = (num_tasks + 1) / job_ptr->ntasks_per_node; + if (min_nodes > num_tasks) + min_nodes = num_tasks; + else if (!min_nodes) + min_nodes = 1; + } max_nodes = job_ptr->max_nodes; } @@ -464,7 +521,7 @@ line6: if (!job_resrcs) goto line15; - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { @@ -511,10 +568,10 @@ line6: if (last == -1) goto line15; - hl = hostlist_create(job_ptr->nodes); + hl = hostlist_create(job_resrcs->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", - job_ptr->nodes); + job_resrcs->nodes); return NULL; } hl_last = hostlist_create(NULL); @@ -544,13 +601,6 @@ line6: job_resrcs->cores_per_socket[sock_inx]; core_bitmap = bit_alloc(bit_reps); - if (core_bitmap == NULL) { - error("bit_alloc malloc failure"); - hostlist_destroy(hl_last); - hostlist_destroy(hl); - return NULL; - } - for (j=0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)) bit_set(core_bitmap, j); @@ -635,7 +685,7 @@ line15: } else tmp6_ptr = "Node"; - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%s", tmp1); @@ -694,7 +744,7 @@ line15: job_ptr->work_dir); xstrcat(out, tmp_line); - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 20 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), @@ -721,7 +771,7 @@ line15: xstrcat(out, select_buf); } - if(cluster_flags & CLUSTER_FLAG_BGL) { + if (cluster_flags & CLUSTER_FLAG_BGL) { /****** Line 22 (optional) ******/ select_g_select_jobinfo_sprint( job_ptr->select_jobinfo, @@ -746,7 +796,7 @@ line15: xstrcat(out, " "); else xstrcat(out, "\n "); - if(cluster_flags & CLUSTER_FLAG_BGL) + if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "LinuxImage=%s", select_buf); else @@ -777,7 +827,7 @@ line15: xstrcat(out, " "); else xstrcat(out, "\n "); - if(cluster_flags & CLUSTER_FLAG_BGL) + if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "RamDiskImage=%s", select_buf); else @@ -836,13 +886,13 @@ line15: * slurm_load_jobs - issue RPC to get all job configuration * information if changed since update_time * IN update_time - time of current configuration data - * IN job_info_msg_pptr - place to store a job configuration pointer + * IN/OUT job_info_msg_pptr - place to store a job configuration pointer * IN show_flags - job filtering option: 0, SHOW_ALL or SHOW_DETAIL * RET 0 or -1 on error * NOTE: free the response using slurm_free_job_info_msg */ extern int -slurm_load_jobs (time_t update_time, job_info_msg_t **resp, +slurm_load_jobs (time_t update_time, job_info_msg_t **job_info_msg_pptr, uint16_t show_flags) { int rc; @@ -854,7 +904,7 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp, slurm_msg_t_init(&resp_msg); req.last_update = update_time; - req.show_flags = show_flags; + req.show_flags = show_flags; req_msg.msg_type = REQUEST_JOB_INFO; req_msg.data = &req; @@ -863,7 +913,7 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp, switch (resp_msg.msg_type) { case RESPONSE_JOB_INFO: - *resp = (job_info_msg_t *)resp_msg.data; + *job_info_msg_pptr = (job_info_msg_t *)resp_msg.data; break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; @@ -876,7 +926,54 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp, break; } - return SLURM_PROTOCOL_SUCCESS ; + return SLURM_PROTOCOL_SUCCESS; +} + +/* + * slurm_load_job_user - issue RPC to get slurm information about all jobs + * to be run as the specified user + * IN/OUT job_info_msg_pptr - place to store a job configuration pointer + * IN user_id - ID of user we want information for + * IN show_flags - job filtering options + * RET 0 or -1 on error + * NOTE: free the response using slurm_free_job_info_msg + */ +extern int slurm_load_job_user (job_info_msg_t **job_info_msg_pptr, + uint32_t user_id, + uint16_t show_flags) +{ + int rc; + slurm_msg_t resp_msg; + slurm_msg_t req_msg; + job_user_id_msg_t req; + + slurm_msg_t_init(&req_msg); + slurm_msg_t_init(&resp_msg); + + req.show_flags = show_flags; + req.user_id = user_id; + req_msg.msg_type = REQUEST_JOB_USER_INFO; + req_msg.data = &req; + + if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) + return SLURM_ERROR; + + switch (resp_msg.msg_type) { + case RESPONSE_JOB_INFO: + *job_info_msg_pptr = (job_info_msg_t *)resp_msg.data; + break; + case RESPONSE_SLURM_RC: + rc = ((return_code_msg_t *) resp_msg.data)->return_code; + slurm_free_return_code_msg(resp_msg.data); + if (rc) + slurm_seterrno_ret(rc); + break; + default: + slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); + break; + } + + return SLURM_PROTOCOL_SUCCESS; } /* @@ -944,8 +1041,8 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid) slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); - if(cluster_flags & CLUSTER_FLAG_MULTSD) { - if((this_addr = getenv("SLURMD_NODENAME"))) { + if (cluster_flags & CLUSTER_FLAG_MULTSD) { + if ((this_addr = getenv("SLURMD_NODENAME"))) { slurm_conf_get_addr(this_addr, &req_msg.address); } else { this_addr = "localhost"; @@ -974,13 +1071,13 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid) rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); - if(rc != 0 || !resp_msg.auth_cred) { + if (rc != 0 || !resp_msg.auth_cred) { error("slurm_pid2jobid: %m"); - if(resp_msg.auth_cred) + if (resp_msg.auth_cred) g_slurm_auth_destroy(resp_msg.auth_cred); return SLURM_ERROR; } - if(resp_msg.auth_cred) + if (resp_msg.auth_cred) g_slurm_auth_destroy(resp_msg.auth_cred); switch (resp_msg.msg_type) { case RESPONSE_JOB_ID: @@ -1009,7 +1106,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid) extern long slurm_get_rem_time(uint32_t jobid) { time_t now = time(NULL); - time_t end_time; + time_t end_time = 0; long rc; if (slurm_get_end_time(jobid, &end_time) != SLURM_SUCCESS) @@ -1025,7 +1122,7 @@ extern long slurm_get_rem_time(uint32_t jobid) extern int32_t islurm_get_rem_time__(uint32_t *jobid) { time_t now = time(NULL); - time_t end_time; + time_t end_time = 0; int32_t rc; if ((jobid == NULL) @@ -1186,7 +1283,7 @@ extern int slurm_job_cpus_allocated_on_node_id( for (i = 0; i < job_resrcs_ptr->cpu_array_cnt; i++) { start_node += job_resrcs_ptr->cpu_array_reps[i]; - if(start_node >= node_id) + if (start_node >= node_id) break; } diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c index c1dd5de218a98bb2cd65880330db3fccae135954..1344f9c9cbd941536ff98854cff5ece14d5b5570 100644 --- a/src/api/job_step_info.c +++ b/src/api/job_step_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,7 +66,7 @@ static int _sort_pids_by_name(job_step_pids_t *rec_a, job_step_pids_t *rec_b) { int diff = 0; - if(!rec_a->node_name || !rec_b->node_name) + if (!rec_a->node_name || !rec_b->node_name) return 0; diff = strcmp(rec_a->node_name, rec_b->node_name); @@ -80,7 +80,7 @@ static int _sort_pids_by_name(job_step_pids_t *rec_a, job_step_pids_t *rec_b) static int _sort_stats_by_name(job_step_stat_t *rec_a, job_step_stat_t *rec_b) { - if(!rec_a->step_pids || !rec_b->step_pids) + if (!rec_a->step_pids || !rec_b->step_pids) return 0; return _sort_pids_by_name(rec_a->step_pids, rec_b->step_pids); @@ -157,17 +157,30 @@ slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, else secs2time_str ((time_t)job_step_ptr->time_limit * 60, limit_str, sizeof(limit_str)); + if (job_step_ptr->array_job_id) { + snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.%u ", + job_step_ptr->array_job_id, + job_step_ptr->array_task_id, job_step_ptr->step_id); + out = xstrdup(tmp_line); + } else { + snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.%u ", + job_step_ptr->job_id, job_step_ptr->step_id); + out = xstrdup(tmp_line); + } snprintf(tmp_line, sizeof(tmp_line), - "StepId=%u.%u UserId=%u StartTime=%s TimeLimit=%s", - job_step_ptr->job_id, job_step_ptr->step_id, - job_step_ptr->user_id, time_str, limit_str); - out = xstrdup(tmp_line); + "UserId=%u StartTime=%s TimeLimit=%s", + job_step_ptr->user_id, time_str, limit_str); + xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ + snprintf(tmp_line, sizeof(tmp_line), + "State=%s ", + job_state_string(job_step_ptr->state)); + xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { char *io_nodes; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, @@ -199,7 +212,6 @@ slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, xstrcat(out, "\n "); /****** Line 3 ******/ - if (cluster_flags & CLUSTER_FLAG_BGQ) { uint32_t nodes = 0; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, @@ -235,10 +247,10 @@ slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, xstrcat(out, "\n "); /****** Line 5 ******/ - if (job_step_ptr->cpu_freq == NO_VAL) + if (job_step_ptr->cpu_freq == NO_VAL) { snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=Default\n\n"); - else if (job_step_ptr->cpu_freq & CPU_FREQ_RANGE_FLAG) { + } else if (job_step_ptr->cpu_freq & CPU_FREQ_RANGE_FLAG) { switch (job_step_ptr->cpu_freq) { case CPU_FREQ_LOW : @@ -257,10 +269,10 @@ slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=Unknown\n\n"); } - } - else + } else { snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=%u\n\n", job_step_ptr->cpu_freq); + } xstrcat(out, tmp_line); return out; @@ -494,8 +506,8 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, xassert(resp); - if(!node_list) { - if(!(step_layout = + if (!node_list) { + if (!(step_layout = slurm_job_step_layout_get(job_id, step_id))) { rc = errno; error("slurm_job_step_get_pids: " @@ -506,7 +518,7 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, node_list = step_layout->node_list; } - if(!*resp) { + if (!*resp) { resp_out = xmalloc(sizeof(job_step_pids_response_msg_t)); *resp = resp_out; created = 1; @@ -526,11 +538,11 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, req_msg.msg_type = REQUEST_JOB_STEP_PIDS; req_msg.data = &req; - if(!(ret_list = slurm_send_recv_msgs(node_list, + if (!(ret_list = slurm_send_recv_msgs(node_list, &req_msg, 0, false))) { error("slurm_job_step_get_pids: got an error no list returned"); rc = SLURM_ERROR; - if(created) { + if (created) { slurm_job_step_pids_response_msg_free(resp_out); *resp = NULL; } @@ -541,7 +553,7 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, while((ret_data_info = list_next(itr))) { switch (ret_data_info->type) { case RESPONSE_JOB_STEP_PIDS: - if(!resp_out->pid_list) + if (!resp_out->pid_list) resp_out->pid_list = list_create( slurm_free_job_step_pids); list_push(resp_out->pid_list, @@ -568,7 +580,7 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, list_iterator_destroy(itr); list_destroy(ret_list); - if(resp_out->pid_list) + if (resp_out->pid_list) list_sort(resp_out->pid_list, (ListCmpF)_sort_pids_by_name); cleanup: slurm_step_layout_destroy(step_layout); @@ -590,8 +602,8 @@ extern void slurm_job_step_pids_response_msg_free(void *object) { job_step_pids_response_msg_t *step_pids_msg = (job_step_pids_response_msg_t *) object; - if(step_pids_msg) { - if(step_pids_msg->pid_list) + if (step_pids_msg) { + if (step_pids_msg->pid_list) list_destroy(step_pids_msg->pid_list); xfree(step_pids_msg); } @@ -606,8 +618,8 @@ extern void slurm_job_step_stat_response_msg_free(void *object) { job_step_stat_response_msg_t *step_stat_msg = (job_step_stat_response_msg_t *) object; - if(step_stat_msg) { - if(step_stat_msg->stats_list) + if (step_stat_msg) { + if (step_stat_msg->stats_list) list_destroy(step_stat_msg->stats_list); xfree(step_stat_msg); } diff --git a/src/api/node_info.c b/src/api/node_info.c index 47082ff782d2ca51cc760d29f668c0031bfd1035..06ec301896183392c84f39ed0d27421d4d685d19 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,8 +58,10 @@ #include "slurm/slurm.h" #include "src/common/parse_time.h" -#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_auth.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_ext_sensors.h" +#include "src/common/slurm_protocol_api.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -131,6 +133,7 @@ slurm_sprint_node_table (node_info_t * node_ptr, int cpus_per_node = 1; int total_used = node_ptr->cpus; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); + uint32_t alloc_memory; if (node_scaling) cpus_per_node = node_ptr->cpus / node_scaling; @@ -147,6 +150,10 @@ slurm_sprint_node_table (node_info_t * node_ptr, my_state &= (~NODE_STATE_DRAIN); drain_str = "+DRAIN"; } + if (my_state & NODE_STATE_FAIL) { + my_state &= (~NODE_STATE_FAIL); + drain_str = "+FAIL"; + } if (my_state & NODE_STATE_POWER_SAVE) { my_state &= (~NODE_STATE_POWER_SAVE); power_str = "+POWER"; @@ -235,7 +242,7 @@ slurm_sprint_node_table (node_info_t * node_ptr, snprintf(tmp_line, sizeof(tmp_line), "NodeAddr=%s NodeHostName=%s", node_ptr->node_addr, node_ptr->node_hostname); - xstrcat(out, tmp_line); + xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else @@ -247,9 +254,14 @@ slurm_sprint_node_table (node_info_t * node_ptr, snprintf(tmp_line, sizeof(tmp_line), "OS=%s ", node_ptr->os); xstrcat(out, tmp_line); } + slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, + SELECT_NODEDATA_MEM_ALLOC, + NODE_STATE_ALLOCATED, + &alloc_memory); snprintf(tmp_line, sizeof(tmp_line), - "RealMemory=%u Sockets=%u Boards=%u", - node_ptr->real_memory, node_ptr->sockets, node_ptr->boards); + "RealMemory=%u AllocMem=%u Sockets=%u Boards=%u", + node_ptr->real_memory, alloc_memory, + node_ptr->sockets, node_ptr->boards); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); @@ -301,7 +313,31 @@ slurm_sprint_node_table (node_info_t * node_ptr, "LowestJoules=%u ConsumedJoules=%u", node_ptr->energy->current_watts, node_ptr->energy->base_watts, - node_ptr->energy->consumed_energy); + node_ptr->energy->consumed_energy); + xstrcat(out, tmp_line); + if (one_liner) + xstrcat(out, " "); + else + xstrcat(out, "\n "); + + /****** external sensors Line ******/ + if (node_ptr->ext_sensors->consumed_energy == NO_VAL) + snprintf(tmp_line, sizeof(tmp_line), "ExtSensorsJoules=n/s "); + else + snprintf(tmp_line, sizeof(tmp_line), "ExtSensorsJoules=%u ", + node_ptr->ext_sensors->consumed_energy); + xstrcat(out, tmp_line); + if (node_ptr->ext_sensors->current_watts == NO_VAL) + snprintf(tmp_line, sizeof(tmp_line), "ExtSensorsWatts=n/s "); + else + snprintf(tmp_line, sizeof(tmp_line), "ExtSensorsWatts=%u ", + node_ptr->ext_sensors->current_watts); + xstrcat(out, tmp_line); + if (node_ptr->ext_sensors->temperature == NO_VAL) + snprintf(tmp_line, sizeof(tmp_line), "ExtSensorsTemp=n/s"); + else + snprintf(tmp_line, sizeof(tmp_line), "ExtSensorsTemp=%u", + node_ptr->ext_sensors->temperature); xstrcat(out, tmp_line); if (one_liner) @@ -362,7 +398,7 @@ slurm_sprint_node_table (node_info_t * node_ptr, * slurm_load_node - issue RPC to get slurm all node configuration information * if changed since update_time * IN update_time - time of current configuration data - * IN node_info_msg_pptr - place to store a node configuration pointer + * OUT resp - place to store a node configuration pointer * IN show_flags - node filtering options * RET 0 or a slurm error code * NOTE: free the response using slurm_free_node_info_msg @@ -403,3 +439,131 @@ extern int slurm_load_node (time_t update_time, return SLURM_PROTOCOL_SUCCESS; } + +/* + * slurm_load_node_single - issue RPC to get slurm configuration information + * for a specific node + * OUT resp - place to store a node configuration pointer + * IN node_name - name of the node for which information is requested + * IN show_flags - node filtering options + * RET 0 or a slurm error code + * NOTE: free the response using slurm_free_node_info_msg + */ +extern int slurm_load_node_single (node_info_msg_t **resp, + char *node_name, uint16_t show_flags) +{ + int rc; + slurm_msg_t req_msg; + slurm_msg_t resp_msg; + node_info_single_msg_t req; + + slurm_msg_t_init(&req_msg); + slurm_msg_t_init(&resp_msg); + req.node_name = node_name; + req.show_flags = show_flags; + req_msg.msg_type = REQUEST_NODE_INFO_SINGLE; + req_msg.data = &req; + + if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) + return SLURM_ERROR; + + switch (resp_msg.msg_type) { + case RESPONSE_NODE_INFO: + *resp = (node_info_msg_t *) resp_msg.data; + break; + case RESPONSE_SLURM_RC: + rc = ((return_code_msg_t *) resp_msg.data)->return_code; + slurm_free_return_code_msg(resp_msg.data); + if (rc) + slurm_seterrno_ret(rc); + *resp = NULL; + break; + default: + slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); + break; + } + + return SLURM_PROTOCOL_SUCCESS; +} + +/* + * slurm_node_energy - issue RPC to get the energy data on this machine + * IN host - name of node to query, NULL if localhost + * IN delta - Use cache if data is newer than this in seconds + * OUT acct_gather_energy_t structure on success or NULL other wise + * RET 0 or a slurm error code + * NOTE: free the response using slurm_acct_gather_energy_destroy + */ +extern int slurm_get_node_energy(char *host, uint16_t delta, + acct_gather_energy_t **acct_gather_energy) +{ + int rc; + slurm_msg_t req_msg; + slurm_msg_t resp_msg; + acct_gather_energy_req_msg_t req; + uint32_t cluster_flags = slurmdb_setup_cluster_flags(); + char *this_addr; + + slurm_msg_t_init(&req_msg); + slurm_msg_t_init(&resp_msg); + + if (host) + slurm_conf_get_addr(host, &req_msg.address); + else if (cluster_flags & CLUSTER_FLAG_MULTSD) { + if ((this_addr = getenv("SLURMD_NODENAME"))) { + slurm_conf_get_addr(this_addr, &req_msg.address); + } else { + this_addr = "localhost"; + slurm_set_addr(&req_msg.address, + (uint16_t)slurm_get_slurmd_port(), + this_addr); + } + } else { + char this_host[256]; + /* + * Set request message address to slurmd on localhost + */ + gethostname_short(this_host, sizeof(this_host)); + this_addr = slurm_conf_get_nodeaddr(this_host); + if (this_addr == NULL) + this_addr = xstrdup("localhost"); + slurm_set_addr(&req_msg.address, + (uint16_t)slurm_get_slurmd_port(), + this_addr); + xfree(this_addr); + } + + req.delta = delta; + req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY; + req_msg.data = &req; + + rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); + + if (rc != 0 || !resp_msg.auth_cred) { + error("slurm_get_node_energy: %m"); + if (resp_msg.auth_cred) + g_slurm_auth_destroy(resp_msg.auth_cred); + return SLURM_ERROR; + } + if (resp_msg.auth_cred) + g_slurm_auth_destroy(resp_msg.auth_cred); + switch (resp_msg.msg_type) { + case RESPONSE_ACCT_GATHER_ENERGY: + *acct_gather_energy = ((acct_gather_node_resp_msg_t *) + resp_msg.data)->energy; + ((acct_gather_node_resp_msg_t *) resp_msg.data)->energy = NULL; + slurm_free_acct_gather_node_resp_msg(resp_msg.data); + break; + case RESPONSE_SLURM_RC: + rc = ((return_code_msg_t *) resp_msg.data)->return_code; + slurm_free_return_code_msg(resp_msg.data); + if (rc) + slurm_seterrno_ret(rc); + break; + default: + slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); + break; + } + + return SLURM_PROTOCOL_SUCCESS; +} diff --git a/src/api/partition_info.c b/src/api/partition_info.c index c1a920caea01a001c7fb794e0316d5143d66ea0a..2acdb9a40bbce4b585edd3ddd3db109a7ccaa7cd 100644 --- a/src/api/partition_info.c +++ b/src/api/partition_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -157,7 +157,7 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, /****** Line added here for BG partitions to keep with alphabetized output******/ - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { snprintf(tmp_line, sizeof(tmp_line), "BasePartitions=%s", part_ptr->nodes); xstrcat(out, tmp_line); @@ -202,7 +202,7 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, if (part_ptr->max_nodes == INFINITE) sprintf(tmp_line, "MaxNodes=UNLIMITED"); else { - if(cluster_flags & CLUSTER_FLAG_BG) + if (cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)part_ptr->max_nodes, tmp1, sizeof(tmp1), UNIT_NONE); else @@ -220,14 +220,20 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, sprintf(tmp_line, " MaxTime=%s", time_line); } xstrcat(out, tmp_line); - if(cluster_flags & CLUSTER_FLAG_BG) + if (cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)part_ptr->min_nodes, tmp1, sizeof(tmp1), UNIT_NONE); else snprintf(tmp1, sizeof(tmp1), "%u", part_ptr->min_nodes); - sprintf(tmp_line, " MinNodes=%s", tmp1); xstrcat(out, tmp_line); + if (part_ptr->max_cpus_per_node == INFINITE) + sprintf(tmp_line, " MaxCPUsPerNode=UNLIMITED"); + else { + sprintf(tmp_line, " MaxCPUsPerNode=%u", + part_ptr->max_cpus_per_node); + } + xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); @@ -237,7 +243,7 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, /****** Line added here for non BG nodes to keep with alphabetized output******/ - if(!(cluster_flags & CLUSTER_FLAG_BG)) { + if (!(cluster_flags & CLUSTER_FLAG_BG)) { snprintf(tmp_line, sizeof(tmp_line), "Nodes=%s", part_ptr->nodes); xstrcat(out, tmp_line); @@ -301,7 +307,7 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, xstrcat(out, tmp_line); - if(cluster_flags & CLUSTER_FLAG_BG) + if (cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)part_ptr->total_cpus, tmp1, sizeof(tmp1), UNIT_NONE); else @@ -309,7 +315,8 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, sprintf(tmp_line, " TotalCPUs=%s", tmp1); xstrcat(out, tmp_line); - if(cluster_flags & CLUSTER_FLAG_BG) + + if (cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)part_ptr->total_nodes, tmp2, sizeof(tmp2), UNIT_NONE); else @@ -318,15 +325,28 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, sprintf(tmp_line, " TotalNodes=%s", tmp2); xstrcat(out, tmp_line); + if (part_ptr->cr_type & CR_CORE) + sprintf(tmp_line, " SelectTypeParameters=CR_CORE"); + else if (part_ptr->cr_type & CR_SOCKET) + sprintf(tmp_line, " SelectTypeParameters=CR_SOCKET"); + else + sprintf(tmp_line, " SelectTypeParameters=N/A"); + xstrcat(out, tmp_line); + if (one_liner) + xstrcat(out, " "); + else + xstrcat(out, "\n "); + + /****** Line 8 ******/ if (part_ptr->def_mem_per_cpu & MEM_PER_CPU) { - snprintf(tmp_line, sizeof(tmp_line), " DefMemPerCPU=%u", + snprintf(tmp_line, sizeof(tmp_line), "DefMemPerCPU=%u", part_ptr->def_mem_per_cpu & (~MEM_PER_CPU)); xstrcat(out, tmp_line); } else if (part_ptr->def_mem_per_cpu == 0) { - xstrcat(out, " DefMemPerNode=UNLIMITED"); + xstrcat(out, "DefMemPerNode=UNLIMITED"); } else { - snprintf(tmp_line, sizeof(tmp_line), " DefMemPerNode=%u", + snprintf(tmp_line, sizeof(tmp_line), "DefMemPerNode=%u", part_ptr->def_mem_per_cpu); xstrcat(out, tmp_line); } diff --git a/src/api/pmi.c b/src/api/pmi.c index d1b26b9d028262262d9dba01db96eedfaea846ed..f2278387d8728fb11a4c0d4106fedc3cad4f0f3e 100644 --- a/src/api/pmi.c +++ b/src/api/pmi.c @@ -53,7 +53,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -119,6 +119,20 @@ struct kvs_rec { #define _DEBUG 0 +#ifdef WITH_PMI_NOMEM_ERROR_FUNC +# undef pmi_nomem_error + extern void pmi_nomem_error(char *file, int line, char *mesg); +#else /* !WITH_PMI_NOMEM_ERROR_FUNC */ +# ifndef pmi_nomem_error + static void pmi_nomem_error(char *file, int line, char *mesg) + { + fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", + file, line, mesg, strerror(errno)); + abort(); + } +# endif /* !pmi_nomem_error */ +#endif /* !WITH_PMI_NOMEM_ERROR_FUNC */ + static void _del_kvs_rec( struct kvs_rec *kvs_ptr ); static void _init_kvs( char kvsname[] ); inline static void _kvs_dump(void); @@ -867,10 +881,13 @@ static void _init_kvs( char kvsname[] ) i = kvs_rec_cnt; kvs_rec_cnt++; kvs_recs = realloc(kvs_recs, (sizeof(struct kvs_rec) * kvs_rec_cnt)); + if (!kvs_recs) + pmi_nomem_error(__FILE__, __LINE__, "_init_kvs"); /* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */ kvs_recs[i].kvs_name = malloc(PMI_MAX_KVSNAME_LEN); - if (kvs_recs[i].kvs_name) - strncpy(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN); + if (!kvs_recs[i].kvs_name) + pmi_nomem_error(__FILE__, __LINE__, "_init_kvs"); + strncpy(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN); kvs_recs[i].kvs_state = KVS_STATE_LOCAL; kvs_recs[i].kvs_cnt = 0; kvs_recs[i].kvs_inx = 0; @@ -1120,9 +1137,10 @@ static int _kvs_put( const char kvsname[], const char key[], const char value[], /* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */ if (kvs_recs[i].kvs_values[j] == NULL) kvs_recs[i].kvs_values[j] = malloc(PMI_MAX_VAL_LEN); - if (kvs_recs[i].kvs_values[j] == NULL) - rc = PMI_FAIL; /* malloc error */ - else { + if (kvs_recs[i].kvs_values[j] == NULL) { + pmi_nomem_error(__FILE__, __LINE__, "_kvs_put"); + rc = PMI_FAIL; + } else { rc = PMI_SUCCESS; strncpy(kvs_recs[i].kvs_values[j], value, PMI_MAX_VAL_LEN); @@ -1138,10 +1156,11 @@ no_dup: (sizeof (char *) * kvs_recs[i].kvs_cnt)); kvs_recs[i].kvs_keys = realloc(kvs_recs[i].kvs_keys, (sizeof (char *) * kvs_recs[i].kvs_cnt)); - if ((kvs_recs[i].kvs_key_states == NULL) - || (kvs_recs[i].kvs_values == NULL) - || (kvs_recs[i].kvs_keys == NULL)) { - rc = PMI_FAIL; /* malloc error */ + if ((kvs_recs[i].kvs_key_states == NULL) || + (kvs_recs[i].kvs_values == NULL) || + (kvs_recs[i].kvs_keys == NULL)) { + pmi_nomem_error(__FILE__, __LINE__, "_kvs_put"); + rc = PMI_FAIL; goto fini; } if (local) @@ -1151,10 +1170,11 @@ no_dup: /* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */ kvs_recs[i].kvs_values[j] = malloc(PMI_MAX_VAL_LEN); kvs_recs[i].kvs_keys[j] = malloc(PMI_MAX_KEY_LEN); - if ((kvs_recs[i].kvs_values[j] == NULL) - || (kvs_recs[i].kvs_keys[j] == NULL)) - rc = PMI_FAIL; /* malloc error */ - else { + if ((kvs_recs[i].kvs_values[j] == NULL) || + (kvs_recs[i].kvs_keys[j] == NULL)) { + pmi_nomem_error(__FILE__, __LINE__, "_kvs_put"); + rc = PMI_FAIL; + } else { rc = PMI_SUCCESS; strncpy(kvs_recs[i].kvs_values[j], value, PMI_MAX_VAL_LEN); @@ -1210,6 +1230,8 @@ int PMI_KVS_Commit( const char kvsname[] ) * rather than the full set. */ kvs_set.host_cnt = 1; kvs_set.kvs_host_ptr = malloc(sizeof(struct kvs_hosts)); + if (!kvs_set.kvs_host_ptr) + pmi_nomem_error(__FILE__, __LINE__, "PMI_KVS_Commit"); kvs_set.kvs_host_ptr->task_id = pmi_rank; kvs_set.kvs_host_ptr->port = 0; kvs_set.kvs_host_ptr->hostname = NULL; @@ -1235,8 +1257,12 @@ int PMI_KVS_Commit( const char kvsname[] ) kvs_set.kvs_comm_ptr = realloc(kvs_set.kvs_comm_ptr, (sizeof(struct kvs_comm *) * (kvs_set.kvs_comm_recs+1))); + if (!kvs_set.kvs_comm_ptr) + pmi_nomem_error(__FILE__, __LINE__, "PMI_KVS_Commit"); kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs] = malloc(sizeof(struct kvs_comm)); + if (!kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]) + pmi_nomem_error(__FILE__, __LINE__, "PMI_KVS_Commit"); kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]->kvs_name = kvs_recs[i].kvs_name; kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]->kvs_cnt = @@ -1617,8 +1643,10 @@ int PMI_Parse_option(int num_args, char *args[], int *num_parsed, cp = args[0]; temp = (PMI_keyval_t *) malloc(num_args * (sizeof (PMI_keyval_t))); - if (temp == NULL) + if (temp == NULL) { + pmi_nomem_error(__FILE__, __LINE__, "PMI_Parse_option"); return PMI_FAIL; + } cp = args[0]; while (i < num_args) { @@ -1635,6 +1663,7 @@ int PMI_Parse_option(int num_args, char *args[], int *num_parsed, len = cp - kp; temp[s].key = (char *) malloc((len+1) * sizeof (char)); if (temp[s].key == NULL) { + pmi_nomem_error(__FILE__, __LINE__, "PMI_Parse_option"); temp[s].val = NULL; PMI_Free_keyvals(temp, s); return PMI_FAIL; @@ -1652,6 +1681,7 @@ int PMI_Parse_option(int num_args, char *args[], int *num_parsed, len = cp - vp + 1; temp[s].val = (char *) malloc((len+1) * sizeof (char)); if (temp[s].val == NULL) { + pmi_nomem_error(__FILE__, __LINE__, "PMI_Parse_option"); PMI_Free_keyvals(temp, s+1); return PMI_FAIL; } @@ -1728,8 +1758,10 @@ int PMI_Args_to_keyval(int *argcp, char *((*argvp)[]), PMI_keyval_t **keyvalp, return PMI_ERR_INVALID_ARG; temp = (PMI_keyval_t *) malloc(cnt * (sizeof (PMI_keyval_t))); - if (temp == NULL) + if (temp == NULL) { + pmi_nomem_error(__FILE__, __LINE__, "PMI_Args_to_keyval"); return PMI_FAIL; + } j = 0; i = 0; @@ -1737,6 +1769,8 @@ int PMI_Args_to_keyval(int *argcp, char *((*argvp)[]), PMI_keyval_t **keyvalp, if (argv[i][0] != '-') { temp[j].val = (char *) malloc((strlen(argv[i])+1) * sizeof (char)); if (temp[j].val == NULL) { + pmi_nomem_error(__FILE__, __LINE__, + "PMI_Args_to_keyval"); temp[j].key = NULL; PMI_Free_keyvals(temp, j); return PMI_FAIL; @@ -1753,6 +1787,8 @@ int PMI_Args_to_keyval(int *argcp, char *((*argvp)[]), PMI_keyval_t **keyvalp, temp[j].key = (char *) malloc((strlen(argv[i])+1) * sizeof (char)); if (temp[j].key == NULL) { + pmi_nomem_error(__FILE__, __LINE__, + "PMI_Args_to_keyval"); temp[j].val = NULL; PMI_Free_keyvals(temp, j); return PMI_FAIL; @@ -1765,6 +1801,8 @@ int PMI_Args_to_keyval(int *argcp, char *((*argvp)[]), PMI_keyval_t **keyvalp, (strlen(argv[i])+1) * sizeof (char)); if (temp[j].val == NULL) { + pmi_nomem_error(__FILE__, __LINE__, + "PMI_Args_to_keyval"); PMI_Free_keyvals(temp, j+1); return PMI_FAIL; } diff --git a/src/api/pmi_server.c b/src/api/pmi_server.c index d4998b7ba2023434a327f20c8d700a9bf05c26c4..d135ad91c278f3667b4f27cd9e2818ce0e83ee3f 100644 --- a/src/api/pmi_server.c +++ b/src/api/pmi_server.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -150,7 +150,7 @@ static void *_msg_thread(void *x) slurm_msg_t_init(&msg_send); - debug2("KVS_Barrier msg to %s:%u", + debug2("KVS_Barrier msg to %s:%hu", msg_arg_ptr->bar_ptr->hostname, msg_arg_ptr->bar_ptr->port); msg_send.msg_type = PMI_KVS_GET_RESP; @@ -161,7 +161,9 @@ static void *_msg_thread(void *x) timeout = slurm_get_msg_timeout() * 10000; if (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { - error("slurm_send_recv_rc_msg_only_one: %m"); + error("slurm_send_recv_rc_msg_only_one to %s:%hu : %m", + msg_arg_ptr->bar_ptr->hostname, + msg_arg_ptr->bar_ptr->port); } else if (rc != SLURM_SUCCESS) { error("KVS_Barrier confirm from %s, rc=%d", msg_arg_ptr->bar_ptr->hostname, rc); @@ -463,7 +465,7 @@ extern int pmi_kvs_get(kvs_get_msg_t *kvs_get_ptr) #endif #if _DEBUG - info("pmi_kvs_get: rank:%u size:%u port:%u, host:%s", + info("pmi_kvs_get: rank:%u size:%u port:%hu, host:%s", kvs_get_ptr->task_id, kvs_get_ptr->size, kvs_get_ptr->port, kvs_get_ptr->hostname); #endif diff --git a/src/api/pmi_server.h b/src/api/pmi_server.h index ce4cca0645c245ba2d3afd8abe704cd3be3955a0..075b4df9756c0284a3d85429e65cb0b780493e22 100644 --- a/src/api/pmi_server.h +++ b/src/api/pmi_server.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c index 2fd0e027c4efc64a0562c497b8641f6b6f68c75b..cdbdf405e54fd4cadb8ec7f863bca053aef63910 100644 --- a/src/api/reconfigure.c +++ b/src/api/reconfigure.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -170,7 +170,7 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req) resp_msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(resp_msg); - if((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) { + if ((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) { slurm_shutdown_msg_conn(fd); return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; } diff --git a/src/api/reservation_info.c b/src/api/reservation_info.c index 12df91caf1bd603a89d3045cfc5e0808fcdbf26b..1d84f33214a8c8d40fbc445f6b79fef1f7d2e577 100644 --- a/src/api/reservation_info.c +++ b/src/api/reservation_info.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/signal.c b/src/api/signal.c index 4d214fef883f37e660b98a7a7f8e8ab2825f1d6b..97e17475118f9d67cd72776a75e4bdeeaa84f306 100644 --- a/src/api/signal.c +++ b/src/api/signal.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/slurm_get_statistics.c b/src/api/slurm_get_statistics.c index 11391550e8f84d0cd47b7d7b3894d3e397e4c0d7..14a383c92ac0a4aff49e25c272424d37ae679fd8 100644 --- a/src/api/slurm_get_statistics.c +++ b/src/api/slurm_get_statistics.c @@ -5,7 +5,7 @@ * Written by Alejandro Lucero <alucero@bsc.es> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/slurm_hostlist.c b/src/api/slurm_hostlist.c index 94d9a642d65d0865dcc0e5fd9f760306200350d0..de8f16e221a887380f4a34e3b6ffe185393aa449 100644 --- a/src/api/slurm_hostlist.c +++ b/src/api/slurm_hostlist.c @@ -8,7 +8,7 @@ * LLNL-CODE-402394. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/slurm_pmi.c b/src/api/slurm_pmi.c index 6dab963710b8452243fb7680664ece480579c5e6..74744c53d1494a6f9cb371523734ec67658ca3a0 100644 --- a/src/api/slurm_pmi.c +++ b/src/api/slurm_pmi.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -307,7 +307,7 @@ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, slurm_close_accepted_conn(srun_fd); return errno; } - if(msg_rcv.auth_cred) + if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { diff --git a/src/api/slurm_pmi.h b/src/api/slurm_pmi.h index 083b1721473f3f6d34d82103e647c741deec4574..ac940c32b559fdce5aca78ad834739d644c6c437 100644 --- a/src/api/slurm_pmi.h +++ b/src/api/slurm_pmi.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c index f7e038a3f290b50bbf77c7d5da7b58a11c6eea66..0bf06890ed68b6a654458b08f2b49cb313598d3c 100644 --- a/src/api/step_ctx.c +++ b/src/api/step_ctx.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -42,6 +42,7 @@ #endif #include <errno.h> +#include <poll.h> #include <pthread.h> #include <stdarg.h> #include <stdlib.h> @@ -63,10 +64,24 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" #include "src/common/xmalloc.h" +#include "src/common/xsignal.h" #include "src/common/xstring.h" -#include "src/common/slurm_cred.h" #include "src/api/step_ctx.h" +int step_signals[] = { + SIGINT, SIGQUIT, SIGCONT, SIGTERM, SIGHUP, + SIGALRM, SIGUSR1, SIGUSR2, SIGPIPE, 0 }; +static int destroy_step = 0; + +static void _signal_while_allocating(int signo) +{ + debug("Got signal %d", signo); + if (signo == SIGCONT) + return; + + destroy_step = 1; +} + static void _job_fake_cred(struct slurm_step_ctx_struct *ctx) { @@ -126,7 +141,7 @@ static job_step_create_request_msg_t *_create_step_request( step_req->name = xstrdup(step_params->name); step_req->no_kill = step_params->no_kill; step_req->overcommit = step_params->overcommit ? 1 : 0; - step_req->mem_per_cpu = step_params->mem_per_cpu; + step_req->pn_min_memory = step_params->pn_min_memory; step_req->time_limit = step_params->time_limit; return step_req; @@ -148,8 +163,7 @@ slurm_step_ctx_create (const slurm_step_ctx_params_t *step_params) short port = 0; int errnum = 0; - /* First copy the user's step_params into a step request - * struct */ + /* First copy the user's step_params into a step request struct */ step_req = _create_step_request(step_params); /* We will handle the messages in the step_launch.c mesage handler, @@ -189,6 +203,90 @@ fail: return (slurm_step_ctx_t *)ctx; } +/* + * slurm_step_ctx_create - Create a job step and its context. + * IN step_params - job step parameters + * IN timeout - in milliseconds + * RET the step context or NULL on failure with slurm errno set + * NOTE: Free allocated memory using slurm_step_ctx_destroy. + */ +extern slurm_step_ctx_t * +slurm_step_ctx_create_timeout (const slurm_step_ctx_params_t *step_params, + int timeout) +{ + struct slurm_step_ctx_struct *ctx = NULL; + job_step_create_request_msg_t *step_req = NULL; + job_step_create_response_msg_t *step_resp = NULL; + int i, rc, time_left = timeout; + int sock = -1; + short port = 0; + int errnum = 0; + + /* First copy the user's step_params into a step request struct */ + step_req = _create_step_request(step_params); + + /* We will handle the messages in the step_launch.c mesage handler, + * but we need to open the socket right now so we can tell the + * controller which port to use. + */ + if (net_stream_listen(&sock, &port) < 0) { + errnum = errno; + error("unable to initialize step context socket: %m"); + slurm_free_job_step_create_request_msg(step_req); + goto fail; + } + step_req->port = port; + step_req->host = xshort_hostname(); + + rc = slurm_job_step_create(step_req, &step_resp); + if ((rc < 0) && + ((errno == ESLURM_NODES_BUSY) || + (errno == ESLURM_PORTS_BUSY) || + (errno == ESLURM_INTERCONNECT_BUSY))) { + struct pollfd fds; + fds.fd = sock; + fds.events = POLLIN; + xsignal_unblock(step_signals); + for (i = 0; step_signals[i]; i++) + xsignal(step_signals[i], _signal_while_allocating); + while ((rc = poll(&fds, 1, time_left)) <= 0) { + if (destroy_step) + break; + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + break; + } + xsignal_block(step_signals); + if (destroy_step) { + info("Cancelled pending job step"); + errno = ESLURM_ALREADY_DONE; + } else + rc = slurm_job_step_create(step_req, &step_resp); + } + + if ((rc < 0) || (step_resp == NULL)) { + errnum = errno; + slurm_free_job_step_create_request_msg(step_req); + close(sock); + goto fail; + } + + ctx = xmalloc(sizeof(struct slurm_step_ctx_struct)); + ctx->launch_state = NULL; + ctx->magic = STEP_CTX_MAGIC; + ctx->job_id = step_req->job_id; + ctx->user_id = step_req->user_id; + ctx->step_req = step_req; + ctx->step_resp = step_resp; + ctx->verbose_level = step_params->verbose_level; + + ctx->launch_state = step_launch_state_create(ctx); + ctx->launch_state->slurmctld_socket_fd = sock; +fail: + errno = errnum; + return (slurm_step_ctx_t *)ctx; +} + /* * slurm_step_ctx_create_no_alloc - Create a job step and its context without * getting an allocation. diff --git a/src/api/step_ctx.h b/src/api/step_ctx.h index f80bd3c08b60d64badba6bf0cf81c3c269e3e8af..33f1cc48f436947b3ca14e15a64befa9717f3546 100644 --- a/src/api/step_ctx.h +++ b/src/api/step_ctx.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/step_io.c b/src/api/step_io.c index 4cd0247c1ee405e0694484da938ef990dfe8c41d..d01949b79fe32acdfbe7cbedc3d553caca89cc99 100644 --- a/src/api/step_io.c +++ b/src/api/step_io.c @@ -1,6 +1,5 @@ /****************************************************************************\ * step_io.c - process stdin, stdout, and stderr for parallel jobs. - * $Id$ ***************************************************************************** * Copyright (C) 2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -8,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -1373,10 +1372,3 @@ done: pthread_mutex_unlock(&cio->ioservers_lock); return rc; } - - - - - - - diff --git a/src/api/step_io.h b/src/api/step_io.h index 5a9023ab51a74dfa254197c77b96b641e2e0dab0..876533843be1d3eeb1d80a791aa466903ced372c 100644 --- a/src/api/step_io.h +++ b/src/api/step_io.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 1fd335766140014600059760fa7e360aa968967f..9e24597e01de0df558ef9a58747a55b3b91c4218 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -137,7 +137,6 @@ void slurm_step_launch_params_t_init (slurm_step_launch_params_t *ptr) ptr->buffered_stdio = true; memcpy(&ptr->local_fds, &fds, sizeof(fds)); ptr->gid = getgid(); - ptr->acctg_freq = (uint16_t) NO_VAL; ptr->cpu_freq = NO_VAL; } @@ -247,6 +246,7 @@ int slurm_step_launch (slurm_step_ctx_t *ctx, launch.ntasks = ctx->step_resp->step_layout->task_cnt; launch.slurmd_debug = params->slurmd_debug; launch.switch_job = ctx->step_resp->switch_job; + launch.profile = params->profile; launch.task_prolog = params->task_prolog; launch.task_epilog = params->task_epilog; launch.cpu_bind_type = params->cpu_bind_type; @@ -420,6 +420,7 @@ int slurm_step_launch_add (slurm_step_ctx_t *ctx, launch.ntasks = ctx->step_resp->step_layout->task_cnt; launch.slurmd_debug = params->slurmd_debug; launch.switch_job = ctx->step_resp->switch_job; + launch.profile = params->profile; launch.task_prolog = params->task_prolog; launch.task_epilog = params->task_epilog; launch.cpu_bind_type = params->cpu_bind_type; @@ -1009,7 +1010,7 @@ static int _msg_thr_create(struct step_launch_state *sls, int num_nodes) /* multiple jobs (easily induced via no_alloc) and highly * parallel jobs using PMI sometimes result in slow message * responses and timeouts. Raise the default timeout for srun. */ - if(!message_socket_ops.timeout) + if (!message_socket_ops.timeout) message_socket_ops.timeout = slurm_get_msg_timeout() * 8000; for (i = 0; i < sls->num_resp_port; i++) { diff --git a/src/api/step_launch.h b/src/api/step_launch.h index c5123e9f4c8ca5b606b056f871d5ac0626ccf977..277cc7c012f4ce6d1643da256032ad0dbb93d6eb 100644 --- a/src/api/step_launch.h +++ b/src/api/step_launch.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/submit.c b/src/api/submit.c index ed38d5da99ebaf9545734440054f9944837e9949..cb61fcb96745f262b1e0fe44989b819192d14356 100644 --- a/src/api/submit.c +++ b/src/api/submit.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/suspend.c b/src/api/suspend.c index 5624fcacc19879e1ecd11a63a41473cb6649cf8f..342eecc3c8c670639142479bcb9ce05e6fa3ce2f 100644 --- a/src/api/suspend.c +++ b/src/api/suspend.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/topo_info.c b/src/api/topo_info.c index ec8ab0edb26ed3aedb92101101ea2c7846e9b79a..c4bc55e20fff5bcc068c18254c7be53eefaad584 100644 --- a/src/api/topo_info.c +++ b/src/api/topo_info.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/triggers.c b/src/api/triggers.c index b255c230c009203b6dda04c189c014ae63e6958c..c97e906c8d45bf9b24f1f53cdb8e700d625c60e2 100644 --- a/src/api/triggers.c +++ b/src/api/triggers.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/api/update_config.c b/src/api/update_config.c index 31ebc32634704a1dccbdb450ce2fecc760eb34ef..c12d760e9640ba750f74c972dec084ee7287c4f9 100644 --- a/src/api/update_config.c +++ b/src/api/update_config.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/Makefile.am b/src/common/Makefile.am index d6aca3f32f6737115672b7d90e978ba90b25e12c..1ff1fe6d8b1d5b0ae3b760a07ad9096d52de4295 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -8,7 +8,6 @@ # only be 1 address per symbol. If you link to the libcommon.la in # a plugin you will get 2 addresses for one symbol which could lead to problems. -AM_CPPFLAGS = -DGRES_CONFIG_FILE=\"$(sysconfdir)/gres.conf\" AUTOMAKE_OPTIONS = foreign if HAVE_UNSETENV @@ -45,6 +44,8 @@ libcommon_la_SOURCES = \ forward.c forward.h \ strlcpy.c strlcpy.h \ list.c list.h \ + xtree.c xtree.h \ + xhash.c xhash.h \ net.c net.h \ log.c log.h \ cbuf.c cbuf.h \ @@ -64,6 +65,7 @@ libcommon_la_SOURCES = \ slurm_cred.h \ slurm_cred.c \ slurm_errno.c \ + slurm_ext_sensors.c slurm_ext_sensors.h \ slurm_priority.c \ slurm_priority.h \ slurm_protocol_api.c \ @@ -87,9 +89,13 @@ libcommon_la_SOURCES = \ uid.c uid.h \ util-net.c util-net.h \ slurm_auth.c slurm_auth.h \ + slurm_acct_gather.c slurm_acct_gather.h \ slurm_accounting_storage.c slurm_accounting_storage.h \ slurm_jobacct_gather.c slurm_jobacct_gather.h \ slurm_acct_gather_energy.c slurm_acct_gather_energy.h \ + slurm_acct_gather_profile.c slurm_acct_gather_profile.h \ + slurm_acct_gather_infiniband.c slurm_acct_gather_infiniband.h \ + slurm_acct_gather_filesystem.c slurm_acct_gather_filesystem.h \ slurm_jobcomp.c slurm_jobcomp.h \ slurm_topology.c slurm_topology.h \ switch.c switch.h \ @@ -119,8 +125,11 @@ libcommon_la_SOURCES = \ node_conf.h node_conf.c \ gres.h gres.c -EXTRA_libcommon_la_SOURCES = \ - $(extra_unsetenv_src) +EXTRA_libcommon_la_SOURCES = \ + $(extra_unsetenv_src) \ + uthash/LICENSE \ + uthash/README \ + uthash/uthash.h libdaemonize_la_SOURCES = \ daemonize.c \ diff --git a/src/common/Makefile.in b/src/common/Makefile.in index 21b6c158dd5827dc36da2132f8838950d7149601..00a8bf06aca60a97be5a4a65133d636af0067287 100644 --- a/src/common/Makefile.in +++ b/src/common/Makefile.in @@ -69,6 +69,7 @@ subdir = src/common DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -86,6 +87,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -94,11 +96,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -120,14 +124,15 @@ am__libcommon_la_SOURCES_DIST = xcgroup_read_config.c \ xcpuinfo.h cpu_frequency.c cpu_frequency.h assoc_mgr.c \ assoc_mgr.h xmalloc.c xmalloc.h xassert.c xassert.h xstring.c \ xstring.h xsignal.c xsignal.h strnatcmp.c strnatcmp.h \ - forward.c forward.h strlcpy.c strlcpy.h list.c list.h net.c \ - net.h log.c log.h cbuf.c cbuf.h safeopen.c safeopen.h \ - bitstring.c bitstring.h mpi.c mpi.h pack.c pack.h \ - parse_config.c parse_config.h parse_spec.c parse_spec.h \ - plugin.c plugin.h plugrack.c plugrack.h print_fields.c \ - print_fields.h read_config.c read_config.h node_select.c \ - node_select.h env.c env.h fd.c fd.h slurm_cred.h slurm_cred.c \ - slurm_errno.c slurm_priority.c slurm_priority.h \ + forward.c forward.h strlcpy.c strlcpy.h list.c list.h xtree.c \ + xtree.h xhash.c xhash.h net.c net.h log.c log.h cbuf.c cbuf.h \ + safeopen.c safeopen.h bitstring.c bitstring.h mpi.c mpi.h \ + pack.c pack.h parse_config.c parse_config.h parse_spec.c \ + parse_spec.h plugin.c plugin.h plugrack.c plugrack.h \ + print_fields.c print_fields.h read_config.c read_config.h \ + node_select.c node_select.h env.c env.h fd.c fd.h slurm_cred.h \ + slurm_cred.c slurm_errno.c slurm_ext_sensors.c \ + slurm_ext_sensors.h slurm_priority.c slurm_priority.h \ slurm_protocol_api.c slurm_protocol_api.h \ slurm_protocol_pack.c slurm_protocol_pack.h \ slurm_protocol_util.c slurm_protocol_util.h \ @@ -138,14 +143,17 @@ am__libcommon_la_SOURCES_DIST = xcgroup_read_config.c \ slurm_rlimits_info.c slurmdb_defs.c slurmdb_defs.h \ slurmdb_pack.c slurmdb_pack.h slurmdbd_defs.c slurmdbd_defs.h \ working_cluster.c working_cluster.h uid.c uid.h util-net.c \ - util-net.h slurm_auth.c slurm_auth.h \ - slurm_accounting_storage.c slurm_accounting_storage.h \ - slurm_jobacct_gather.c slurm_jobacct_gather.h \ - slurm_acct_gather_energy.c slurm_acct_gather_energy.h \ - slurm_jobcomp.c slurm_jobcomp.h slurm_topology.c \ - slurm_topology.h switch.c switch.h arg_desc.c arg_desc.h \ - macros.h malloc.c malloc.h getopt.h getopt.c getopt1.c \ - unsetenv.c unsetenv.h slurm_selecttype_info.c \ + util-net.h slurm_auth.c slurm_auth.h slurm_acct_gather.c \ + slurm_acct_gather.h slurm_accounting_storage.c \ + slurm_accounting_storage.h slurm_jobacct_gather.c \ + slurm_jobacct_gather.h slurm_acct_gather_energy.c \ + slurm_acct_gather_energy.h slurm_acct_gather_profile.c \ + slurm_acct_gather_profile.h slurm_acct_gather_infiniband.c \ + slurm_acct_gather_infiniband.h slurm_acct_gather_filesystem.c \ + slurm_acct_gather_filesystem.h slurm_jobcomp.c slurm_jobcomp.h \ + slurm_topology.c slurm_topology.h switch.c switch.h arg_desc.c \ + arg_desc.h macros.h malloc.c malloc.h getopt.h getopt.c \ + getopt1.c unsetenv.c unsetenv.h slurm_selecttype_info.c \ slurm_selecttype_info.h slurm_resource_info.c \ slurm_resource_info.h hostlist.c hostlist.h \ slurm_step_layout.c slurm_step_layout.h checkpoint.c \ @@ -159,24 +167,29 @@ am__libcommon_la_SOURCES_DIST = xcgroup_read_config.c \ am_libcommon_la_OBJECTS = xcgroup_read_config.lo xcgroup.lo \ xcpuinfo.lo cpu_frequency.lo assoc_mgr.lo xmalloc.lo \ xassert.lo xstring.lo xsignal.lo strnatcmp.lo forward.lo \ - strlcpy.lo list.lo net.lo log.lo cbuf.lo safeopen.lo \ - bitstring.lo mpi.lo pack.lo parse_config.lo parse_spec.lo \ - plugin.lo plugrack.lo print_fields.lo read_config.lo \ - node_select.lo env.lo fd.lo slurm_cred.lo slurm_errno.lo \ - slurm_priority.lo slurm_protocol_api.lo slurm_protocol_pack.lo \ + strlcpy.lo list.lo xtree.lo xhash.lo net.lo log.lo cbuf.lo \ + safeopen.lo bitstring.lo mpi.lo pack.lo parse_config.lo \ + parse_spec.lo plugin.lo plugrack.lo print_fields.lo \ + read_config.lo node_select.lo env.lo fd.lo slurm_cred.lo \ + slurm_errno.lo slurm_ext_sensors.lo slurm_priority.lo \ + slurm_protocol_api.lo slurm_protocol_pack.lo \ slurm_protocol_util.lo slurm_protocol_socket_implementation.lo \ slurm_protocol_defs.lo slurm_rlimits_info.lo slurmdb_defs.lo \ slurmdb_pack.lo slurmdbd_defs.lo working_cluster.lo uid.lo \ - util-net.lo slurm_auth.lo slurm_accounting_storage.lo \ - slurm_jobacct_gather.lo slurm_acct_gather_energy.lo \ - slurm_jobcomp.lo slurm_topology.lo switch.lo arg_desc.lo \ - malloc.lo getopt.lo getopt1.lo $(am__objects_1) \ - slurm_selecttype_info.lo slurm_resource_info.lo hostlist.lo \ - slurm_step_layout.lo checkpoint.lo job_resources.lo \ - parse_time.lo job_options.lo global_defaults.lo timers.lo \ - stepd_api.lo write_labelled_message.lo proc_args.lo \ - slurm_strcasestr.lo node_conf.lo gres.lo -am__EXTRA_libcommon_la_SOURCES_DIST = unsetenv.c unsetenv.h + util-net.lo slurm_auth.lo slurm_acct_gather.lo \ + slurm_accounting_storage.lo slurm_jobacct_gather.lo \ + slurm_acct_gather_energy.lo slurm_acct_gather_profile.lo \ + slurm_acct_gather_infiniband.lo \ + slurm_acct_gather_filesystem.lo slurm_jobcomp.lo \ + slurm_topology.lo switch.lo arg_desc.lo malloc.lo getopt.lo \ + getopt1.lo $(am__objects_1) slurm_selecttype_info.lo \ + slurm_resource_info.lo hostlist.lo slurm_step_layout.lo \ + checkpoint.lo job_resources.lo parse_time.lo job_options.lo \ + global_defaults.lo timers.lo stepd_api.lo \ + write_labelled_message.lo proc_args.lo slurm_strcasestr.lo \ + node_conf.lo gres.lo +am__EXTRA_libcommon_la_SOURCES_DIST = unsetenv.c unsetenv.h \ + uthash/LICENSE uthash/README uthash/uthash.h libcommon_la_OBJECTS = $(am_libcommon_la_OBJECTS) libcommon_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -252,6 +265,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -273,6 +288,9 @@ EGREP = @EGREP@ # This is needed if compiling on windows EXEEXT = FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -282,6 +300,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -289,6 +309,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -323,6 +352,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -350,6 +382,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -438,7 +473,6 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -AM_CPPFLAGS = -DGRES_CONFIG_FILE=\"$(sysconfdir)/gres.conf\" AUTOMAKE_OPTIONS = foreign @HAVE_UNSETENV_FALSE@build_unsetenv_src = unsetenv.c unsetenv.h @HAVE_UNSETENV_TRUE@build_unsetenv_src = @@ -465,6 +499,8 @@ libcommon_la_SOURCES = \ forward.c forward.h \ strlcpy.c strlcpy.h \ list.c list.h \ + xtree.c xtree.h \ + xhash.c xhash.h \ net.c net.h \ log.c log.h \ cbuf.c cbuf.h \ @@ -484,6 +520,7 @@ libcommon_la_SOURCES = \ slurm_cred.h \ slurm_cred.c \ slurm_errno.c \ + slurm_ext_sensors.c slurm_ext_sensors.h \ slurm_priority.c \ slurm_priority.h \ slurm_protocol_api.c \ @@ -507,9 +544,13 @@ libcommon_la_SOURCES = \ uid.c uid.h \ util-net.c util-net.h \ slurm_auth.c slurm_auth.h \ + slurm_acct_gather.c slurm_acct_gather.h \ slurm_accounting_storage.c slurm_accounting_storage.h \ slurm_jobacct_gather.c slurm_jobacct_gather.h \ slurm_acct_gather_energy.c slurm_acct_gather_energy.h \ + slurm_acct_gather_profile.c slurm_acct_gather_profile.h \ + slurm_acct_gather_infiniband.c slurm_acct_gather_infiniband.h \ + slurm_acct_gather_filesystem.c slurm_acct_gather_filesystem.h \ slurm_jobcomp.c slurm_jobcomp.h \ slurm_topology.c slurm_topology.h \ switch.c switch.h \ @@ -540,7 +581,10 @@ libcommon_la_SOURCES = \ gres.h gres.c EXTRA_libcommon_la_SOURCES = \ - $(extra_unsetenv_src) + $(extra_unsetenv_src) \ + uthash/LICENSE \ + uthash/README \ + uthash/uthash.h libdaemonize_la_SOURCES = \ daemonize.c \ @@ -674,10 +718,15 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/read_config.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/safeopen.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_accounting_storage.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_acct_gather.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_acct_gather_energy.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_acct_gather_filesystem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_acct_gather_infiniband.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_acct_gather_profile.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_auth.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_cred.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_errno.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_ext_sensors.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_jobacct_gather.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_jobcomp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_priority.Plo@am__quote@ @@ -709,9 +758,11 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcgroup.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcgroup_read_config.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcpuinfo.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xhash.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmalloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xsignal.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xstring.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xtree.Plo@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< diff --git a/src/common/arg_desc.c b/src/common/arg_desc.c index 57d7a741efd953a2b58f0647b740db0217da7ced..50da98a69d563bf5fce3c2a0a9db69ff3a9e0b55 100644 --- a/src/common/arg_desc.c +++ b/src/common/arg_desc.c @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/arg_desc.h b/src/common/arg_desc.h index 5aa72c86c0826856c12e9a66f61387992a85e7e5..d041b025f1b04f9599a0235f347344d9f3edd387 100644 --- a/src/common/arg_desc.h +++ b/src/common/arg_desc.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 8ded09a2bd0532d4923411a5252f1cd731580dfd..5850cc4dc897a5251a581597bd67a9356406e417 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -554,8 +554,6 @@ static int _post_association_list(List assoc_list) continue; itr2 = list_iterator_create( assoc->usage->childern_list); - if (itr2 == NULL) - fatal("list_iterator_create: malloc failure"); while ((assoc2 = list_next(itr2))) { if (assoc2->shares_raw != SLURMDB_FS_USE_PARENT) count += assoc2->shares_raw; diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h index bc906bfea49992cef32ff6fa39564aedc0f437bb..2c80a6367cba5904f554ddfd5572b446070f7a25 100644 --- a/src/common/assoc_mgr.h +++ b/src/common/assoc_mgr.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -68,10 +68,11 @@ typedef struct { } assoc_mgr_lock_t; /* Interval lock structure - * we actually use three semaphores for each data type, see macros below - * (assoc_mgr_lock_datatype_t * 3 + 0) = read_lock - * (assoc_mgr_lock_datatype_t * 3 + 1) = write_lock - * (assoc_mgr_lock_datatype_t * 3 + 2) = write_wait_lock + * we actually use the count for each data type, see macros below + * (assoc_mgr_lock_datatype_t * 4 + 0) = read_lock read locks in use + * (assoc_mgr_lock_datatype_t * 4 + 1) = write_lock write locks in use + * (assoc_mgr_lock_datatype_t * 4 + 2) = write_wait_lock write locks pending + * (assoc_mgr_lock_datatype_t * 4 + 3) = write_cnt_lock write lock count */ typedef enum { ASSOC_LOCK, @@ -83,7 +84,7 @@ typedef enum { } assoc_mgr_lock_datatype_t; typedef struct { - int entity[ASSOC_MGR_ENTITY_COUNT * 3]; + int entity[ASSOC_MGR_ENTITY_COUNT * 4]; } assoc_mgr_lock_flags_t; typedef struct { diff --git a/src/common/bitstring.c b/src/common/bitstring.c index 291ec1c321b9c51c6855bf5eeaf6d00c127cd7eb..76aad4d6582991b909c761c359f65da44fddc073 100644 --- a/src/common/bitstring.c +++ b/src/common/bitstring.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -40,12 +40,13 @@ \*****************************************************************************/ #include <assert.h> -#include <stdlib.h> +#include <ctype.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> -#include <ctype.h> #include "src/common/bitstring.h" +#include "src/common/log.h" #include "src/common/macros.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -107,10 +108,13 @@ bit_alloc(bitoff_t nbits) bitstr_t *new; new = (bitstr_t *)calloc(_bitstr_words(nbits), sizeof(bitstr_t)); - if (new) { - _bitstr_magic(new) = BITSTR_MAGIC; - _bitstr_bits(new) = nbits; + if (!new) { + log_oom(__FILE__, __LINE__, __CURRENT_FUNC__); + abort(); } + + _bitstr_magic(new) = BITSTR_MAGIC; + _bitstr_bits(new) = nbits; return new; } @@ -129,12 +133,15 @@ bit_realloc(bitstr_t *b, bitoff_t nbits) _assert_bitstr_valid(b); obits = _bitstr_bits(b); new = realloc(b, _bitstr_words(nbits) * sizeof(bitstr_t)); - if (new) { - _assert_bitstr_valid(new); - _bitstr_bits(new) = nbits; - if (nbits > obits) - bit_nclear(new, obits, nbits - 1); + if (!new) { + log_oom(__FILE__, __LINE__, __CURRENT_FUNC__); + abort(); } + + _assert_bitstr_valid(new); + _bitstr_bits(new) = nbits; + if (nbits > obits) + bit_nclear(new, obits, nbits - 1); return new; } @@ -924,8 +931,8 @@ bit_pick_cnt(bitstr_t *b, bitoff_t nbits) { * types is architecture/compiler dependent, so this may have to be tweaked. */ #ifdef USE_64BIT_BITSTR -#define BITSTR_RANGE_FMT "%llu-%llu," -#define BITSTR_SINGLE_FMT "%llu," +#define BITSTR_RANGE_FMT "%"PRIu64"-%"PRIu64"," +#define BITSTR_SINGLE_FMT "%"PRIu64"," #else #define BITSTR_RANGE_FMT "%u-%u," #define BITSTR_SINGLE_FMT "%u," @@ -1063,7 +1070,7 @@ inx2bitfmt (int *inx) return NULL; while (inx[j] >= 0) { - if(bit_char_ptr) + if (bit_char_ptr) xstrfmtcat(bit_char_ptr, ",%d-%d", inx[j], inx[j+1]); else xstrfmtcat(bit_char_ptr, "%d-%d", inx[j], inx[j+1]); @@ -1269,13 +1276,13 @@ bit_get_bit_num(bitstr_t *b, int pos) for (bit = 0; bit < bit_cnt; bit++) { if (bit_test(b, bit)) { /* we got one */ - if(cnt == pos) + if (cnt == pos) break; cnt++; } } - if(bit >= bit_cnt) + if (bit >= bit_cnt) bit = -1; return bit; @@ -1299,7 +1306,11 @@ bit_get_pos_num(bitstr_t *b, bitoff_t pos) assert(pos <= bit_cnt); if (!bit_test(b, pos)) { +#ifdef USE_64BIT_BITSTR + error("bit %"PRIu64" not set", pos); +#else error("bit %d not set", pos); +#endif return cnt; } for (bit = 0; bit <= pos; bit++) { diff --git a/src/common/bitstring.h b/src/common/bitstring.h index 9fc50645671b649e4948d149f47a39f931ed7d4d..7611ce3672978d3e75de784b34ce1527c87dd9fc 100644 --- a/src/common/bitstring.h +++ b/src/common/bitstring.h @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/cbuf.c b/src/common/cbuf.c index 1f2744c1049e9cb6fb4fbdc92dfc8b02973c4178..d3d4fe38dc5540b1a851a3cf18ccd8a01a59b008 100644 --- a/src/common/cbuf.c +++ b/src/common/cbuf.c @@ -51,7 +51,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include "cbuf.h" +#include "src/common/cbuf.h" +#include "src/common/log.h" /********************* @@ -66,11 +67,12 @@ # include <errno.h> # include <stdio.h> # include <string.h> -# define lsd_fatal_error(file, line, mesg) \ - do { \ - fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", \ - file, line, mesg, strerror(errno)); \ - } while (0) + static void lsd_fatal_error(char *file, int line, char *mesg) + { + fprintf(log_fp(), "ERROR: [%s:%d] %s: %s\n", + file, line, mesg, strerror(errno)); + fflush(log_fp()); + } # endif /* !lsd_fatal_error */ #endif /* !WITH_LSD_FATAL_ERROR_FUNC */ @@ -84,7 +86,12 @@ extern void * lsd_nomem_error(char *file, int line, char *mesg); #else /* !WITH_LSD_NOMEM_ERROR_FUNC */ # ifndef lsd_nomem_error -# define lsd_nomem_error(file, line, mesg) (NULL) + static void * lsd_nomem_error(char *file, int line, char *mesg) + { + log_oom(file, line, mesg); + abort(); + return NULL; + } # endif /* !lsd_nomem_error */ #endif /* !WITH_LSD_NOMEM_ERROR_FUNC */ diff --git a/src/common/checkpoint.c b/src/common/checkpoint.c index d9b99877c6800860a668305adc7df31991767644..82b4399df8ab8e30fb4d446952e091d9a40add22 100644 --- a/src/common/checkpoint.c +++ b/src/common/checkpoint.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -76,6 +76,7 @@ typedef struct slurm_checkpoint_ops { uint16_t protocol_version); int (*ckpt_unpack_jobinfo) (check_jobinfo_t jobinfo, Buf buffer, uint16_t protocol_version); + check_jobinfo_t (*ckpt_copy_jobinfo) (check_jobinfo_t jobinfo); int (*ckpt_stepd_prefork) (void *slurmd_job); int (*ckpt_signal_tasks) (void *slurmd_job, char *image_dir); int (*ckpt_restart_task) (void *slurmd_job, char *image_dir, @@ -94,6 +95,7 @@ static const char *syms[] = { "slurm_ckpt_free_job", "slurm_ckpt_pack_job", "slurm_ckpt_unpack_job", + "slurm_ckpt_copy_job", "slurm_ckpt_stepd_prefork", "slurm_ckpt_signal_tasks", "slurm_ckpt_restart_task" @@ -281,6 +283,20 @@ extern int checkpoint_unpack_jobinfo (check_jobinfo_t jobinfo, Buf buffer, return retval; } +extern check_jobinfo_t checkpoint_copy_jobinfo(check_jobinfo_t jobinfo) +{ + check_jobinfo_t retval = NULL; + + slurm_mutex_lock( &context_lock ); + if ( g_context ) { + retval = (*(ops.ckpt_copy_jobinfo))(jobinfo); + } else { + error ("slurm_checkpoint plugin context not initialized"); + } + slurm_mutex_unlock( &context_lock ); + return retval; +} + extern int checkpoint_stepd_prefork (void *job) { int retval = SLURM_SUCCESS; diff --git a/src/common/checkpoint.h b/src/common/checkpoint.h index 0eae38780889cb0ce39d5fea30e86afa6f9c073d..82299cea2f9545a8966236d4e03a1abc0db3a0e7 100644 --- a/src/common/checkpoint.h +++ b/src/common/checkpoint.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -103,6 +103,9 @@ extern int checkpoint_pack_jobinfo (check_jobinfo_t jobinfo, Buf buffer, extern int checkpoint_unpack_jobinfo (check_jobinfo_t jobinfo, Buf buffer, uint16_t protocol_version); +/* copy a job's checkpoint context */ +extern check_jobinfo_t checkpoint_copy_jobinfo(check_jobinfo_t jobinfo); + /* create the necessary threads before forking the tasks */ extern int checkpoint_stepd_prefork (void *slurmd_job); diff --git a/src/common/cpu_frequency.c b/src/common/cpu_frequency.c index 489851e24baca153a6b14a132811b996ef4c7e02..dd69a95bc37b80d40504df7802f21e536799f9d3 100644 --- a/src/common/cpu_frequency.c +++ b/src/common/cpu_frequency.c @@ -5,7 +5,7 @@ * Written by Don Albert, <don.albert@bull.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/cpu_frequency.h b/src/common/cpu_frequency.h index b2004e8bd2f635f1cd060ea93a6795119882ed47..b3c32b25c4fc0e367c9aebc03df64e0f5e92cf3c 100644 --- a/src/common/cpu_frequency.h +++ b/src/common/cpu_frequency.h @@ -5,7 +5,7 @@ * Written by Don Albert, <don.albert@bull.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/daemonize.c b/src/common/daemonize.c index 09ab391e2937cbd9c592b62fdbf53399408dcbd8..dd9cfc26a92e72fb303b8236646d2e2b8948d5d0 100644 --- a/src/common/daemonize.c +++ b/src/common/daemonize.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -44,6 +44,8 @@ #include <fcntl.h> #include <unistd.h> #include <sys/resource.h> +#include <sys/stat.h> +#include <sys/types.h> #include "src/common/daemonize.h" #include "src/common/fd.h" @@ -75,7 +77,7 @@ daemon(int nochdir, int noclose) default: _exit(0); /* exit parent */ } - if(!nochdir && chdir("/") < 0) { + if (!nochdir && chdir("/") < 0) { error("chdir(/): %m"); return -1; } @@ -159,17 +161,18 @@ int create_pidfile(const char *pidfile, uid_t uid) { FILE *fp; - int fd = -1; + int fd; xassert(pidfile != NULL); xassert(pidfile[0] == '/'); - if (!(fp = fopen(pidfile, "w"))) { + fd = creat_cloexec(pidfile, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { error("Unable to open pidfile `%s': %m", pidfile); return -1; } - fd = fileno(fp); + fp = fdopen(fd, "w"); if (fd_get_write_lock(fd) < 0) { error ("Unable to lock pidfile `%s': %m", pidfile); diff --git a/src/common/daemonize.h b/src/common/daemonize.h index 42e9fb99a2f2beadc00f8bb7322276fa762a785d..3e55f7634a9348697611cf59dd1e4bc9bdbf2364 100644 --- a/src/common/daemonize.h +++ b/src/common/daemonize.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/eio.c b/src/common/eio.c index 9b9faeaf47d22e0b41c9cd658f9c81c80f887687..c453cb7102fd3fa1ad59da2e11b2ea422bd417eb 100644 --- a/src/common/eio.c +++ b/src/common/eio.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -43,13 +43,14 @@ #include <unistd.h> #include <errno.h> -#include "src/common/xmalloc.h" -#include "src/common/xassert.h" -#include "src/common/log.h" -#include "src/common/list.h" #include "src/common/fd.h" #include "src/common/eio.h" +#include "src/common/log.h" +#include "src/common/list.h" +#include "src/common/net.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/xassert.h" +#include "src/common/xmalloc.h" /* How many seconds to wait after eio_signal_shutdown() is called before * terminating the job and abandoning any I/O remaining to be processed */ @@ -167,11 +168,12 @@ int eio_message_socket_accept(eio_obj_t *obj, List objs) return SLURM_SUCCESS; } + net_set_keep_alive(fd); fd_set_close_on_exec(fd); fd_set_blocking(fd); /* Should not call slurm_get_addr() because the IP may not be - in /etc/hosts. */ + * in /etc/hosts. */ uc = (unsigned char *)&addr.sin_addr.s_addr; port = addr.sin_port; debug2("got message connection from %u.%u.%u.%u:%hu %d", @@ -181,7 +183,7 @@ int eio_message_socket_accept(eio_obj_t *obj, List objs) msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); again: - if(slurm_receive_msg(fd, msg, obj->ops->timeout) != 0) { + if (slurm_receive_msg(fd, msg, obj->ops->timeout) != 0) { if (errno == EINTR) { goto again; } diff --git a/src/common/eio.h b/src/common/eio.h index afee1fdc525ef4cda1c8f80068c187afdbddc83b..4e74d5de26afddbf2d45d83ae74cf3b3ebb835df 100644 --- a/src/common/eio.h +++ b/src/common/eio.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/env.c b/src/common/env.c index a818c90a1ffc3ef45af7c106c99509465baa8da7..0f7d237e379fdcaa7f6c3d5cd1046b2181297ed6 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -62,6 +62,7 @@ #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/common/node_select.h" +#include "src/common/proc_args.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_step_layout.h" #include "src/common/slurmdb_defs.h" @@ -225,48 +226,6 @@ static bool _discard_env(char *name, char *value) return false; } -static void _set_distribution(task_dist_states_t distribution, - char **dist, char **lllp_dist) -{ - if (((int)distribution >= 0) - && (distribution != SLURM_DIST_UNKNOWN)) { - switch(distribution) { - case SLURM_DIST_CYCLIC: - *dist = "cyclic"; - break; - case SLURM_DIST_BLOCK: - *dist = "block"; - break; - case SLURM_DIST_PLANE: - *dist = "plane"; - *lllp_dist = "plane"; - break; - case SLURM_DIST_ARBITRARY: - *dist = "arbitrary"; - break; - case SLURM_DIST_CYCLIC_CYCLIC: - *dist = "cyclic"; - *lllp_dist = "cyclic"; - break; - case SLURM_DIST_CYCLIC_BLOCK: - *dist = "cyclic"; - *lllp_dist = "block"; - break; - case SLURM_DIST_BLOCK_CYCLIC: - *dist = "block"; - *lllp_dist = "cyclic"; - break; - case SLURM_DIST_BLOCK_BLOCK: - *dist = "block"; - *lllp_dist = "block"; - break; - default: - error("unknown dist, type %d", distribution); - break; - } - } -} - /* * Return the number of elements in the environment `env' */ @@ -321,6 +280,8 @@ int setenvf(char ***envp, const char *name, const char *fmt, ...) else rc = 1; } else { + /* XXX Space is allocated on the heap and will never + * be reclaimed. */ xstrfmtcat(str, "%s=%s", name, value); rc = putenv(str); } @@ -382,19 +343,13 @@ int setup_env(env_t *env, bool preserve_env) if (env == NULL) return SLURM_ERROR; - if (env->task_pid - && setenvf(&env->env, "SLURM_TASK_PID", "%d", (int)env->task_pid)) { - error("Unable to set SLURM_TASK_PID environment variable"); - rc = SLURM_FAILURE; - } - if (!preserve_env && env->ntasks) { - if(setenvf(&env->env, "SLURM_NTASKS", "%d", env->ntasks)) { + if (setenvf(&env->env, "SLURM_NTASKS", "%d", env->ntasks)) { error("Unable to set SLURM_NTASKS " "environment variable"); rc = SLURM_FAILURE; } - if(setenvf(&env->env, "SLURM_NPROCS", "%d", env->ntasks)) { + if (setenvf(&env->env, "SLURM_NPROCS", "%d", env->ntasks)) { error("Unable to set SLURM_NPROCS " "environment variable"); rc = SLURM_FAILURE; @@ -436,14 +391,14 @@ int setup_env(env_t *env, bool preserve_env) rc = SLURM_FAILURE; } - _set_distribution(env->distribution, &dist, &lllp_dist); - if(dist) + set_distribution(env->distribution, &dist, &lllp_dist); + if (dist) if (setenvf(&env->env, "SLURM_DISTRIBUTION", "%s", dist)) { error("Can't set SLURM_DISTRIBUTION env variable"); rc = SLURM_FAILURE; } - if(env->distribution == SLURM_DIST_PLANE) + if (env->distribution == SLURM_DIST_PLANE) if (setenvf(&env->env, "SLURM_DIST_PLANESIZE", "%u", env->plane_size)) { error("Can't set SLURM_DIST_PLANESIZE " @@ -451,7 +406,7 @@ int setup_env(env_t *env, bool preserve_env) rc = SLURM_FAILURE; } - if(lllp_dist) + if (lllp_dist) if (setenvf(&env->env, "SLURM_DIST_LLLP", "%s", lllp_dist)) { error("Can't set SLURM_DIST_LLLP env variable"); rc = SLURM_FAILURE; @@ -723,22 +678,37 @@ int setup_env(env_t *env, bool preserve_env) } } - if (env->nodeid >= 0 - && setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) { - error("Unable to set SLURM_NODEID environment"); - rc = SLURM_FAILURE; - } + if (!(cluster_flags & CLUSTER_FLAG_BG) + && !(cluster_flags & CLUSTER_FLAG_CRAYXT)) { + /* These aren't relavant to a system not using Slurm + as the launcher. Since there isn't a flag for that + we check for the flags we do have. + */ + if (env->task_pid + && setenvf(&env->env, "SLURM_TASK_PID", "%d", + (int)env->task_pid)) { + error("Unable to set SLURM_TASK_PID environment " + "variable"); + rc = SLURM_FAILURE; + } + if (env->nodeid >= 0 + && setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) { + error("Unable to set SLURM_NODEID environment"); + rc = SLURM_FAILURE; + } - if (env->procid >= 0 - && setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) { - error("Unable to set SLURM_PROCID environment"); - rc = SLURM_FAILURE; - } + if (env->procid >= 0 + && setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) { + error("Unable to set SLURM_PROCID environment"); + rc = SLURM_FAILURE; + } - if (env->localid >= 0 - && setenvf(&env->env, "SLURM_LOCALID", "%d", env->localid)) { - error("Unable to set SLURM_LOCALID environment"); - rc = SLURM_FAILURE; + if (env->localid >= 0 + && setenvf(&env->env, "SLURM_LOCALID", "%d", + env->localid)) { + error("Unable to set SLURM_LOCALID environment"); + rc = SLURM_FAILURE; + } } if (env->stepid >= 0) { @@ -759,6 +729,12 @@ int setup_env(env_t *env, bool preserve_env) rc = SLURM_FAILURE; } + if (env->nhosts + && setenvf(&env->env, "SLURM_JOB_NUM_NODES", "%d", env->nhosts)) { + error("Unable to set SLURM_JOB_NUM_NODES environment var"); + rc = SLURM_FAILURE; + } + if (env->nodelist && setenvf(&env->env, "SLURM_NODELIST", "%s", env->nodelist)) { error("Unable to set SLURM_NODELIST environment var."); @@ -799,7 +775,7 @@ int setup_env(env_t *env, bool preserve_env) rc = SLURM_FAILURE; } - if(cluster_flags & CLUSTER_FLAG_AIX) { + if (cluster_flags & CLUSTER_FLAG_AIX) { char res_env[128]; char *debug_env = (char *)getenv("SLURM_LL_API_DEBUG"); int debug_num = 0; @@ -875,7 +851,7 @@ static char *_uint16_array_to_str(int array_len, const uint16_t *array) char *sep = ","; /* seperator */ char *str = xstrdup(""); - if(array == NULL) + if (array == NULL) return str; for (i = 0; i < array_len; i++) { @@ -989,7 +965,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, env_array_overwrite_fmt(dest, "SLURM_NODE_ALIASES", "%s", alloc->alias_list); - _set_distribution(desc->task_dist, &dist, &lllp_dist); + set_distribution(desc->task_dist, &dist, &lllp_dist); if (dist) env_array_overwrite_fmt(dest, "SLURM_DISTRIBUTION", "%s", dist); @@ -1042,7 +1018,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); - if(num_tasks == NO_VAL) { + if (num_tasks == NO_VAL) { /* If we know how many tasks we are going to do then we set SLURM_TASKS_PER_NODE */ int i=0; @@ -1055,20 +1031,20 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, num_tasks += alloc->cpu_count_reps[i] * alloc->cpus_per_node[i]; } - if((int)desc->cpus_per_task > 1 + if ((int)desc->cpus_per_task > 1 && desc->cpus_per_task != (uint16_t)NO_VAL) num_tasks /= desc->cpus_per_task; //num_tasks = desc->min_cpus; } - if(desc->task_dist == SLURM_DIST_ARBITRARY) { + if (desc->task_dist == SLURM_DIST_ARBITRARY) { tmp = desc->req_nodes; env_array_overwrite_fmt(dest, "SLURM_ARBITRARY_NODELIST", "%s", tmp); } else tmp = alloc->node_list; - if(!(step_layout = slurm_step_layout_create(tmp, + if (!(step_layout = slurm_step_layout_create(tmp, alloc->cpus_per_node, alloc->cpu_count_reps, node_cnt, @@ -1135,10 +1111,16 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes); - if(cluster_flags & CLUSTER_FLAG_BG) + if (cluster_flags & CLUSTER_FLAG_BG) { env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", num_nodes); - + } + if (batch->array_task_id != (uint16_t) NO_VAL) { + env_array_overwrite_fmt(dest, "SLURM_ARRAY_JOB_ID", "%u", + batch->array_job_id); + env_array_overwrite_fmt(dest, "SLURM_ARRAY_TASK_ID", "%u", + batch->array_task_id); + } env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); env_array_overwrite_fmt(dest, "SLURM_NODE_ALIASES", "%s", batch->alias_list); @@ -1164,7 +1146,7 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, else cpus_per_task = 1; /* default value */ - if(num_tasks) { + if (num_tasks) { env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", num_tasks); /* keep around for old scripts */ @@ -1174,21 +1156,21 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, num_tasks = num_cpus / cpus_per_task; } - if((tmp = getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) { + if ((tmp = getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) { task_dist = SLURM_DIST_ARBITRARY; } else { tmp = batch->nodes; task_dist = SLURM_DIST_BLOCK; } - if(!(step_layout = slurm_step_layout_create(tmp, - batch->cpus_per_node, - batch->cpu_count_reps, - num_nodes, - num_tasks, - cpus_per_task, - task_dist, - (uint16_t)NO_VAL))) + if (!(step_layout = slurm_step_layout_create(tmp, + batch->cpus_per_node, + batch->cpu_count_reps, + num_nodes, + num_tasks, + cpus_per_task, + task_dist, + (uint16_t)NO_VAL))) return SLURM_ERROR; tmp = _uint16_array_to_str(step_layout->node_cnt, @@ -1838,7 +1820,7 @@ char **env_array_user_default(const char *username, int timeout, int mode) struct stat buf; if (geteuid() != (uid_t)0) { - fatal("WARNING: you must be root to use --get-user-env"); + error("SlurmdUser must be root to use --get-user-env"); return NULL; } diff --git a/src/common/env.h b/src/common/env.h index 149cf62d412a76c177706208abb112ff9b272f52..b20ee9d247775144bd20d158a45b0f752e9ece69 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/fd.c b/src/common/fd.c index 0bcf353950b73392d6772b9036a4e22313def284..9c4a413ee4b52656c59a72221262d1a13187aca6 100644 --- a/src/common/fd.c +++ b/src/common/fd.c @@ -82,6 +82,33 @@ void fd_set_noclose_on_exec(int fd) return; } + +int open_cloexec(const char *pathname, int flags) +{ +#ifdef O_CLOEXEC + return open(pathname, flags | O_CLOEXEC); +#else + int fd = open(pathname, flags); + if (fd >= 0) + fd_set_close_on_exec(fd); + return fd; +#endif +} + + +int creat_cloexec(const char *pathname, mode_t mode) +{ +#ifdef O_CLOEXEC + return open(pathname, O_CREAT|O_WRONLY|O_TRUNC|O_CLOEXEC, mode); +#else + int fd = creat(pathname, mode); + if (fd >= 0) + fd_set_close_on_exec(fd); + return fd; +#endif +} + + int fd_is_blocking(int fd) { int val = 0; diff --git a/src/common/fd.h b/src/common/fd.h index 704c0e39b4fbd03252879e3b56f2f1e66135064e..2a1dcc2346fcc1fded794e4099a311f2940a68ed 100644 --- a/src/common/fd.h +++ b/src/common/fd.h @@ -58,6 +58,15 @@ static inline void closeall(int fd) close(fd++); } +/* Open a fd with close-on-exec (POSIX 2008, Linux 2.6.23+), emulating + * it on systems that lack it. */ +int open_cloexec(const char *pathname, int flags); + +/* Create a fd with close-on-exec (POSIX 2008, Linux 2.6.23+), + * emulating it on systems that lack it. */ +int creat_cloexec(const char *pathname, mode_t mode); + + void fd_set_close_on_exec(int fd); /* * Sets the file descriptor (fd) to be closed on exec(). diff --git a/src/common/forward.c b/src/common/forward.c index f93f6472025a8c4d6d09ea4d40e07df450d0b690..49046f0e810e1c84c11ea1d6ae996c1bb1461f2b 100644 --- a/src/common/forward.c +++ b/src/common/forward.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -64,6 +64,7 @@ typedef struct { pthread_cond_t *notify; + int *p_thr_count; slurm_msg_t *orig_msg; List ret_list; int timeout; @@ -73,9 +74,19 @@ typedef struct { void _destroy_tree_fwd(fwd_tree_t *fwd_tree) { - if(fwd_tree) { - if(fwd_tree->tree_hl) + if (fwd_tree) { + if (fwd_tree->tree_hl) hostlist_destroy(fwd_tree->tree_hl); + + /* + * Lock and decrease thread counter, start_msg_tree is waiting + * for a null thread count to exit its main loop + */ + slurm_mutex_lock(fwd_tree->tree_mutex); + (*(fwd_tree->p_thr_count))--; + pthread_cond_signal(fwd_tree->notify); + slurm_mutex_unlock(fwd_tree->tree_mutex); + xfree(fwd_tree); } } @@ -95,8 +106,8 @@ void *_forward_thread(void *arg) int start_timeout = fwd_msg->timeout; /* repeat until we are sure the message was sent */ - while((name = hostlist_shift(hl))) { - if(slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { + while ((name = hostlist_shift(hl))) { + if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { error("forward_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(fwd_msg->forward_mutex); @@ -153,7 +164,7 @@ void *_forward_thread(void *arg) /* * forward message */ - if(_slurm_msg_sendto(fd, + if (_slurm_msg_sendto(fd, get_buf_data(buffer), get_buf_offset(buffer), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) { @@ -163,7 +174,7 @@ void *_forward_thread(void *arg) mark_as_failed_forward(&fwd_msg->ret_list, name, errno); free(name); - if(hostlist_count(hl) > 0) { + if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_msg->buf_len); slurm_mutex_unlock(fwd_msg->forward_mutex); @@ -182,7 +193,7 @@ void *_forward_thread(void *arg) list_push(fwd_msg->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); - while((name = hostlist_shift(hl))) { + while ((name = hostlist_shift(hl))) { ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_msg->ret_list, ret_data_info); @@ -192,7 +203,7 @@ void *_forward_thread(void *arg) goto cleanup; } - if(fwd_msg->header.forward.cnt > 0) { + if (fwd_msg->header.forward.cnt > 0) { static int message_timeout = -1; if (message_timeout < 0) message_timeout = @@ -210,13 +221,13 @@ void *_forward_thread(void *arg) /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ - if(!ret_list || (fwd_msg->header.forward.cnt != 0 + if (!ret_list || (fwd_msg->header.forward.cnt != 0 && list_count(ret_list) <= 1)) { slurm_mutex_lock(fwd_msg->forward_mutex); mark_as_failed_forward(&fwd_msg->ret_list, name, errno); free(name); - if(ret_list) + if (ret_list) list_destroy(ret_list); if (hostlist_count(hl) > 0) { free_buf(buffer); @@ -227,7 +238,7 @@ void *_forward_thread(void *arg) continue; } goto cleanup; - } else if((fwd_msg->header.forward.cnt+1) + } else if ((fwd_msg->header.forward.cnt+1) != list_count(ret_list)) { /* this should never be called since the above should catch the failed forwards and pipe @@ -243,23 +254,23 @@ void *_forward_thread(void *arg) "but only got %d back", (fwd_msg->header.forward.cnt+1), list_count(ret_list)); - while((tmp = hostlist_next(host_itr))) { + while ((tmp = hostlist_next(host_itr))) { int node_found = 0; itr = list_iterator_create(ret_list); - while((ret_data_info = list_next(itr))) { - if(!ret_data_info->node_name) { + while ((ret_data_info = list_next(itr))) { + if (!ret_data_info->node_name) { first_node_found = 1; ret_data_info->node_name = xstrdup(name); } - if(!strcmp(tmp, + if (!strcmp(tmp, ret_data_info->node_name)) { node_found = 1; break; } } list_iterator_destroy(itr); - if(!node_found) { + if (!node_found) { mark_as_failed_forward( &fwd_msg->ret_list, tmp, @@ -268,7 +279,7 @@ void *_forward_thread(void *arg) free(tmp); } hostlist_iterator_destroy(host_itr); - if(!first_node_found) { + if (!first_node_found) { mark_as_failed_forward(&fwd_msg->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); @@ -277,9 +288,9 @@ void *_forward_thread(void *arg) break; } slurm_mutex_lock(fwd_msg->forward_mutex); - if(ret_list) { - while((ret_data_info = list_pop(ret_list)) != NULL) { - if(!ret_data_info->node_name) { + if (ret_list) { + while ((ret_data_info = list_pop(ret_list)) != NULL) { + if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); } list_push(fwd_msg->ret_list, ret_data_info); @@ -409,7 +420,7 @@ void *_fwd_tree_thread(void *arg) free(name); /* check for error and try again */ - if(errno == SLURM_COMMUNICATIONS_CONNECTION_ERROR) + if (errno == SLURM_COMMUNICATIONS_CONNECTION_ERROR) continue; break; @@ -429,7 +440,7 @@ void *_fwd_tree_thread(void *arg) */ extern void forward_init(forward_t *forward, forward_t *from) { - if(from && from->init == FORWARD_INIT) { + if (from && from->init == FORWARD_INIT) { forward->cnt = from->cnt; forward->timeout = from->timeout; forward->nodelist = from->nodelist; @@ -466,7 +477,7 @@ extern int forward_msg(forward_struct_t *forward_struct, hostlist_t forward_hl = NULL; char *name = NULL; - if(!forward_struct->ret_list) { + if (!forward_struct->ret_list) { error("didn't get a ret_list from forward_struct"); xfree(span); return SLURM_ERROR; @@ -474,7 +485,7 @@ extern int forward_msg(forward_struct_t *forward_struct, hl = hostlist_create(header->forward.nodelist); hostlist_uniq(hl); - while((name = hostlist_shift(hl))) { + while ((name = hostlist_shift(hl))) { pthread_attr_t attr_agent; pthread_t thread_agent; char *buf = NULL; @@ -489,7 +500,7 @@ extern int forward_msg(forward_struct_t *forward_struct, forward_msg->timeout = forward_struct->timeout; - if(forward_msg->timeout <= 0) { + if (forward_msg->timeout <= 0) { /* convert secs to msec */ forward_msg->timeout = slurm_get_msg_timeout() * 1000; } @@ -514,7 +525,7 @@ extern int forward_msg(forward_struct_t *forward_struct, free(name); for(j = 0; j < span[thr_count]; j++) { name = hostlist_shift(hl); - if(!name) + if (!name) break; hostlist_push(forward_hl, name); free(name); @@ -524,7 +535,7 @@ extern int forward_msg(forward_struct_t *forward_struct, hostlist_destroy(forward_hl); forward_init(&forward_msg->header.forward, NULL); forward_msg->header.forward.nodelist = buf; - while(pthread_create(&thread_agent, &attr_agent, + while (pthread_create(&thread_agent, &attr_agent, _forward_thread, (void *)forward_msg)) { error("pthread_create error %m"); @@ -592,6 +603,7 @@ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) fwd_tree->ret_list = ret_list; fwd_tree->timeout = timeout; fwd_tree->notify = ¬ify; + fwd_tree->p_thr_count = &thr_count; fwd_tree->tree_mutex = &tree_mutex; if (fwd_tree->timeout <= 0) { @@ -609,6 +621,17 @@ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) free(name); } + /* + * Lock and increase thread counter, we need that to protect + * the start_msg_tree waiting loop that was originally designed + * around a "while ((count < host_count))" loop. In case where a + * fwd thread was not able to get all the return codes from + * children, the waiting loop was deadlocked. + */ + slurm_mutex_lock(&tree_mutex); + thr_count++; + slurm_mutex_unlock(&tree_mutex); + while (pthread_create(&thread_agent, &attr_agent, _fwd_tree_thread, (void *)fwd_tree)) { error("pthread_create error %m"); @@ -617,7 +640,7 @@ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) sleep(1); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); - thr_count++; + } xfree(span); @@ -625,12 +648,13 @@ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) count = list_count(ret_list); debug2("Tree head got back %d looking for %d", count, host_count); - while ((count < host_count)) { + while (thr_count > 0) { pthread_cond_wait(¬ify, &tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d", count); } - debug2("Tree head got them all"); + xassert(count >= host_count); /* Tree head did not get all responses, + * but no more active fwd threads!*/ slurm_mutex_unlock(&tree_mutex); slurm_mutex_destroy(&tree_mutex); @@ -652,7 +676,7 @@ extern void mark_as_failed_forward(List *ret_list, char *node_name, int err) ret_data_info_t *ret_data_info = NULL; debug3("problems with %s", node_name); - if(!*ret_list) + if (!*ret_list) *ret_list = list_create(destroy_data_info); ret_data_info = xmalloc(sizeof(ret_data_info_t)); @@ -669,7 +693,7 @@ extern void forward_wait(slurm_msg_t * msg) int count = 0; /* wait for all the other messages on the tree under us */ - if(msg->forward_struct) { + if (msg->forward_struct) { debug2("looking for %d", msg->forward_struct->fwd_cnt); slurm_mutex_lock(&msg->forward_struct->forward_mutex); count = 0; @@ -677,7 +701,7 @@ extern void forward_wait(slurm_msg_t * msg) count = list_count(msg->ret_list); debug2("Got back %d", count); - while((count < msg->forward_struct->fwd_cnt)) { + while ((count < msg->forward_struct->fwd_cnt)) { pthread_cond_wait(&msg->forward_struct->notify, &msg->forward_struct->forward_mutex); @@ -697,7 +721,7 @@ extern void forward_wait(slurm_msg_t * msg) void destroy_data_info(void *object) { ret_data_info_t *ret_data_info = (ret_data_info_t *)object; - if(ret_data_info) { + if (ret_data_info) { slurm_free_msg_data(ret_data_info->type, ret_data_info->data); xfree(ret_data_info->node_name); @@ -707,7 +731,7 @@ void destroy_data_info(void *object) void destroy_forward(forward_t *forward) { - if(forward->init == FORWARD_INIT) { + if (forward->init == FORWARD_INIT) { xfree(forward->nodelist); forward->init = 0; } else { @@ -717,7 +741,7 @@ void destroy_forward(forward_t *forward) void destroy_forward_struct(forward_struct_t *forward_struct) { - if(forward_struct) { + if (forward_struct) { xfree(forward_struct->buf); xfree(forward_struct->forward_msg); slurm_mutex_destroy(&forward_struct->forward_mutex); diff --git a/src/common/forward.h b/src/common/forward.h index 987cab5bb861892458c608448bcc659fdcb4cb67..6d7689ed6c0514cb871b69d9a6f3b3e30c84b2b1 100644 --- a/src/common/forward.h +++ b/src/common/forward.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/getopt.c b/src/common/getopt.c index b0f9c2460dd1aa8dfe5ee1c0874913053c1496b6..6cc56cd383ecea4cad076a6c588f3494d66d7f18 100644 --- a/src/common/getopt.c +++ b/src/common/getopt.c @@ -40,6 +40,8 @@ #endif #include <stdio.h> +#include <stdlib.h> +#include "src/common/log.h" /* Comment out all this code if we are using the GNU C Library, and are not actually compiling the library itself. This code is part of the GNU C @@ -318,16 +320,17 @@ exchange (argv) /* We must extend the array. The user plays games with us and presents new arguments. */ char *new_str = malloc (top + 1); - if (new_str == NULL) + if (new_str == NULL) { nonoption_flags_len = nonoption_flags_max_len = 0; - else - { + log_oom(__FILE__, __LINE__, __CURRENT_FUNC__); + abort(); + } else { memset (__mempcpy (new_str, __getopt_nonoption_flags, nonoption_flags_max_len), '\0', top + 1 - nonoption_flags_max_len); nonoption_flags_max_len = top + 1; __getopt_nonoption_flags = new_str; - } + } } #endif @@ -430,11 +433,14 @@ _getopt_initialize (argc, argv, optstring) nonoption_flags_max_len = argc; __getopt_nonoption_flags = (char *) malloc (nonoption_flags_max_len); - if (__getopt_nonoption_flags == NULL) + if (__getopt_nonoption_flags == NULL) { + log_oom(__FILE__, __LINE__, __CURRENT_FUNC__); + abort(); nonoption_flags_max_len = -1; - else + } else { memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), '\0', nonoption_flags_max_len - len); + } } } nonoption_flags_len = nonoption_flags_max_len; diff --git a/src/common/gres.c b/src/common/gres.c index dc0f9db975f66aee6ded068a9a5fe498c2447423..a9b0eaf0f791e6573d5a73827c029ea945fd439f 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -73,6 +73,7 @@ #include "slurm/slurm_errno.h" #include "src/common/gres.h" #include "src/common/list.h" +#include "src/common/log.h" #include "src/common/macros.h" #include "src/common/pack.h" #include "src/common/parse_config.h" @@ -81,6 +82,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" +#include "src/common/read_config.h" #define GRES_MAGIC 0x438a34d4 @@ -134,7 +136,6 @@ static bitstr_t *_cpu_bitmap_rebuild(bitstr_t *old_cpu_bitmap, int new_size); static void _destroy_gres_slurmd_conf(void *x); static uint32_t _get_gres_cnt(char *orig_config, char *gres_name, char *gres_name_colon, int gres_name_colon_len); -static char * _get_gres_conf(void); static uint32_t _get_tot_gres_cnt(uint32_t plugin_id, uint32_t *set_cnt); static int _gres_find_id(void *x, void *key); static void _gres_job_list_delete(void *list_element); @@ -144,6 +145,10 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, bitstr_t *core_bitmap); static int _job_config_validate(char *config, uint32_t *gres_cnt, slurm_gres_context_t *context_ptr); +static void _job_core_filter(void *job_gres_data, void *node_gres_data, + bool use_total_gres, bitstr_t *cpu_bitmap, + int cpu_start_bit, int cpu_end_bit, + char *gres_name, char *node_name); static int _job_dealloc(void *job_gres_data, void *node_gres_data, int node_offset, char *gres_name, uint32_t job_id, char *node_name); @@ -521,31 +526,6 @@ extern int gres_plugin_reconfig(bool *did_change) return rc; } -/* - * Return the pathname of the gres.conf file - */ -static char *_get_gres_conf(void) -{ - char *val = getenv("SLURM_CONF"); - char *rc = NULL; - int i; - - if (!val) - return xstrdup(GRES_CONFIG_FILE); - - /* Replace file name on end of path */ - i = strlen(val) - strlen("slurm.conf") + strlen("gres.conf") + 2; - rc = xmalloc(i); - strcpy(rc, val); - val = strrchr(rc, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = rc; - strcpy(val, "gres.conf"); - return rc; -} - /* * Destroy a gres_slurmd_conf_t record, free it's memory */ @@ -672,8 +652,6 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, if (s_p_get_string(&p->cpus, "CPUs", tbl)) { bitstr_t *cpu_bitmap; /* Just use to validate config */ cpu_bitmap = bit_alloc(gres_cpu_cnt); - if (cpu_bitmap == NULL) - fatal("bit_alloc: malloc failure"); i = bit_unfmt(cpu_bitmap, p->cpus); if (i != 0) { fatal("Invalid gres data for %s, CPUs=%s (only %u CPUs" @@ -740,8 +718,6 @@ static void _validate_config(slurm_gres_context_t *context_ptr) int has_file = -1, rec_count = 0; iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) { if (gres_slurmd_conf->plugin_id != context_ptr->plugin_id) continue; @@ -778,7 +754,7 @@ extern int gres_plugin_node_config_devices_path(char **dev_path, char *gres_conf_file; gres_plugin_init(); - gres_conf_file = _get_gres_conf(); + gres_conf_file = get_extra_conf_path("gres.conf"); if (stat(gres_conf_file, &config_stat) < 0) { error("can't stat gres.conf file %s: %m", gres_conf_file); xfree(gres_conf_file); @@ -791,8 +767,6 @@ extern int gres_plugin_node_config_devices_path(char **dev_path, fatal("error opening/reading %s", gres_conf_file); FREE_NULL_LIST(gres_conf_list); gres_conf_list = list_create(_destroy_gres_slurmd_conf); - if (gres_conf_list == NULL) - fatal("list_create: malloc failure"); if (s_p_get_array((void ***) &gres_array, &count, "Name", tbl)) { if (count > array_len) { error("GRES device count exceeds array size (%d > %d)", @@ -825,9 +799,6 @@ static int _no_gres_conf(uint32_t cpu_cnt) slurm_mutex_lock(&gres_context_lock); FREE_NULL_LIST(gres_conf_list); gres_conf_list = list_create(_destroy_gres_slurmd_conf); - if (gres_conf_list == NULL) - fatal("list_create: malloc failure"); - p = xmalloc(sizeof(gres_slurmd_conf_t *) * gres_context_cnt); for (i = 0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { p = xmalloc(sizeof(gres_slurmd_conf_t)); p->cpu_cnt = cpu_cnt; @@ -868,7 +839,7 @@ extern int gres_plugin_node_config_load(uint32_t cpu_cnt) if (gres_context_cnt == 0) return SLURM_SUCCESS; - gres_conf_file = _get_gres_conf(); + gres_conf_file = get_extra_conf_path("gres.conf"); if (stat(gres_conf_file, &config_stat) < 0) { error("can't stat gres.conf file %s, assuming zero resource " "counts", gres_conf_file); @@ -883,8 +854,6 @@ extern int gres_plugin_node_config_load(uint32_t cpu_cnt) fatal("error opening/reading %s", gres_conf_file); FREE_NULL_LIST(gres_conf_list); gres_conf_list = list_create(_destroy_gres_slurmd_conf); - if (gres_conf_list == NULL) - fatal("list_create: malloc failure"); if (s_p_get_array((void ***) &gres_array, &count, "Name", tbl)) { for (i = 0; i < count; i++) { list_append(gres_conf_list, gres_array[i]); @@ -927,8 +896,6 @@ extern int gres_plugin_node_config_pack(Buf buffer) pack16(rec_cnt, buffer); if (rec_cnt) { iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) { pack32(magic, buffer); @@ -964,12 +931,8 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) FREE_NULL_LIST(gres_conf_list); gres_conf_list = list_create(_destroy_gres_slurmd_conf); - if (gres_conf_list == NULL) - fatal("list_create: malloc failure"); safe_unpack16(&version, buffer); - if (version != SLURM_PROTOCOL_VERSION) - return SLURM_ERROR; safe_unpack16(&rec_cnt, buffer); if (rec_cnt == 0) @@ -1204,8 +1167,6 @@ static int _node_config_init(char *node_name, char *orig_config, gres_data->gres_bit_alloc = bit_realloc(gres_data->gres_bit_alloc, gres_data->gres_cnt_avail); - if (gres_data->gres_bit_alloc == NULL) - fatal("bit_alloc: malloc failure"); } return rc; @@ -1229,8 +1190,6 @@ extern int gres_plugin_init_node_config(char *node_name, char *orig_config, slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { *gres_list = list_create(_gres_node_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); } for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { /* Find or create gres_state entry on the list */ @@ -1274,8 +1233,6 @@ static uint32_t _get_tot_gres_cnt(uint32_t plugin_id, uint32_t *set_cnt) return gres_cnt; iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) { if (gres_slurmd_conf->plugin_id != plugin_id) continue; @@ -1315,8 +1272,6 @@ extern int gres_gresid_to_gresname(uint32_t gres_id, char* gres_name, } iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) { if (gres_slurmd_conf->plugin_id != gres_id) continue; @@ -1379,32 +1334,22 @@ extern int _node_config_validate(char *node_name, char *orig_config, gres_data->topo_gres_cnt_alloc = xrealloc(gres_data->topo_gres_cnt_alloc, set_cnt * sizeof(uint32_t)); - if (gres_data->topo_gres_cnt_alloc == NULL) - fatal("xrealloc: malloc failure"); gres_data->topo_gres_cnt_avail = xrealloc(gres_data->topo_gres_cnt_avail, set_cnt * sizeof(uint32_t)); - if (gres_data->topo_gres_cnt_avail == NULL) - fatal("xrealloc: malloc failure"); for (i=0; i<gres_data->topo_cnt; i++) FREE_NULL_BITMAP(gres_data->topo_gres_bitmap[i]); gres_data->topo_gres_bitmap = xrealloc(gres_data->topo_gres_bitmap, set_cnt * sizeof(bitstr_t *)); - if (gres_data->topo_gres_bitmap == NULL) - fatal("xrealloc: malloc failure"); for (i=0; i<gres_data->topo_cnt; i++) FREE_NULL_BITMAP(gres_data->topo_cpus_bitmap[i]); gres_data->topo_cpus_bitmap = xrealloc(gres_data->topo_cpus_bitmap, set_cnt * sizeof(bitstr_t *)); - if (gres_data->topo_cpus_bitmap == NULL) - fatal("xrealloc: malloc failure"); gres_data->topo_cnt = set_cnt; iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); gres_inx = i = 0; while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) { @@ -1415,8 +1360,6 @@ extern int _node_config_validate(char *node_name, char *orig_config, gres_slurmd_conf->count; gres_data->topo_cpus_bitmap[i] = bit_alloc(gres_slurmd_conf->cpu_cnt); - if (gres_data->topo_cpus_bitmap[i] == NULL) - fatal("bit_alloc: malloc failure"); if (gres_slurmd_conf->cpus) { bit_unfmt(gres_data->topo_cpus_bitmap[i], gres_slurmd_conf->cpus); @@ -1428,8 +1371,6 @@ extern int _node_config_validate(char *node_name, char *orig_config, (gres_slurmd_conf->cpu_cnt - 1)); } gres_data->topo_gres_bitmap[i] = bit_alloc(gres_cnt); - if (gres_data->topo_gres_bitmap[i] == NULL) - fatal("bit_alloc: malloc failure"); for (j=0; j<gres_slurmd_conf->count; j++) { bit_set(gres_data->topo_gres_bitmap[i], gres_inx++); @@ -1466,8 +1407,6 @@ extern int _node_config_validate(char *node_name, char *orig_config, bit_realloc(gres_data->gres_bit_alloc, gres_data->gres_cnt_avail); } - if (gres_data->gres_bit_alloc == NULL) - fatal("bit_alloc: malloc failure"); } if ((fast_schedule < 2) && @@ -1537,8 +1476,6 @@ extern int gres_plugin_node_config_validate(char *node_name, slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { *gres_list = list_create(_gres_node_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); } for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { /* Find or create gres_state entry on the list */ @@ -1597,8 +1534,6 @@ static int _node_reconfig(char *node_name, char *orig_config, char **new_config, bit_realloc(gres_data->gres_bit_alloc, gres_data->gres_cnt_avail); } - if (gres_data->gres_bit_alloc == NULL) - fatal("bit_alloc: malloc failure"); } if ((fast_schedule < 2) && @@ -1644,8 +1579,6 @@ extern int gres_plugin_node_reconfig(char *node_name, slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { *gres_list = list_create(_gres_node_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); } for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { /* Find gres_state entry on the list */ @@ -1750,11 +1683,8 @@ extern int gres_plugin_node_state_unpack(List *gres_list, Buf buffer, rc = gres_plugin_init(); slurm_mutex_lock(&gres_context_lock); - if ((gres_context_cnt > 0) && (*gres_list == NULL)) { + if ((gres_context_cnt > 0) && (*gres_list == NULL)) *gres_list = list_create(_gres_node_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); - } while ((rc == SLURM_SUCCESS) && (rec_cnt)) { if ((buffer == NULL) || (remaining_buf(buffer) == 0)) @@ -1789,8 +1719,6 @@ extern int gres_plugin_node_state_unpack(List *gres_list, Buf buffer, if (has_bitmap) { gres_node_ptr->gres_bit_alloc = bit_alloc(gres_cnt_avail); - if (gres_node_ptr->gres_bit_alloc == NULL) - fatal("bit_alloc: malloc failure"); } gres_ptr = xmalloc(sizeof(gres_state_t)); gres_ptr->plugin_id = gres_context[i].plugin_id; @@ -1840,9 +1768,6 @@ static void *_node_state_dup(void *gres_data) bit_copy(gres_ptr->topo_cpus_bitmap[i]); new_gres->topo_gres_bitmap[i] = bit_copy(gres_ptr->topo_gres_bitmap[i]); - if ((new_gres->topo_cpus_bitmap[i] == NULL) || - (new_gres->topo_gres_bitmap[i] == NULL)) - fatal("bit_copy: malloc failure"); new_gres->topo_gres_cnt_alloc[i] = gres_ptr->topo_gres_cnt_alloc[i]; new_gres->topo_gres_cnt_avail[i] = @@ -1872,8 +1797,6 @@ extern List gres_plugin_node_state_dup(List gres_list) slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0)) { new_list = list_create(_gres_node_list_delete); - if (new_list == NULL) - fatal("list_create malloc failure"); } gres_iter = list_iterator_create(gres_list); while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { @@ -2150,11 +2073,8 @@ extern int gres_plugin_job_state_validate(char *req_config, List *gres_list) &gres_context[i]); if ((rc2 != SLURM_SUCCESS) || (job_gres_data == NULL)) continue; - if (*gres_list == NULL) { + if (*gres_list == NULL) *gres_list = list_create(_gres_job_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); - } gres_ptr = xmalloc(sizeof(gres_state_t)); gres_ptr->plugin_id = gres_context[i].plugin_id; gres_ptr->gres_data = job_gres_data; @@ -2254,8 +2174,6 @@ List gres_plugin_job_state_extract(List gres_list, int node_index) slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - if (gres_iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { if (node_index == -1) new_gres_data = _job_state_dup(gres_ptr->gres_data); @@ -2267,8 +2185,6 @@ List gres_plugin_job_state_extract(List gres_list, int node_index) break; if (new_gres_list == NULL) { new_gres_list = list_create(_gres_job_list_delete); - if (new_gres_list == NULL) - fatal("list_create: malloc failure"); } new_gres_state = xmalloc(sizeof(gres_state_t)); new_gres_state->plugin_id = gres_ptr->plugin_id; @@ -2396,8 +2312,6 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer, slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { *gres_list = list_create(_gres_job_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); } while ((rc == SLURM_SUCCESS) && (rec_cnt)) { @@ -2491,8 +2405,6 @@ static bitstr_t *_cpu_bitmap_rebuild(bitstr_t *old_cpu_bitmap, int new_size) bitstr_t *new_cpu_bitmap; new_cpu_bitmap = bit_alloc(new_size); - if (new_cpu_bitmap == NULL) - fatal("bit_alloc: malloc failure"); old_size = bit_size(old_cpu_bitmap); if (old_size > new_size) { ratio = old_size / new_size; @@ -2547,6 +2459,43 @@ static void _validate_gres_node_cpus(gres_node_state_t *node_gres_ptr, } } +static void _job_core_filter(void *job_gres_data, void *node_gres_data, + bool use_total_gres, bitstr_t *cpu_bitmap, + int cpu_start_bit, int cpu_end_bit, + char *gres_name, char *node_name) +{ + int i, j, cpus_ctld; + gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data; + gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; + bitstr_t *avail_cpu_bitmap = NULL; + + if (!node_gres_ptr->topo_cnt || !cpu_bitmap || /* No topology info */ + !job_gres_ptr->gres_cnt_alloc) /* No job GRES */ + return; + + /* Determine which specific CPUs can be used */ + avail_cpu_bitmap = bit_copy(cpu_bitmap); + bit_nclear(avail_cpu_bitmap, cpu_start_bit, cpu_end_bit); + for (i = 0; i < node_gres_ptr->topo_cnt; i++) { + if (node_gres_ptr->topo_gres_cnt_avail[i] == 0) + continue; + if (!use_total_gres && + (node_gres_ptr->topo_gres_cnt_alloc[i] >= + node_gres_ptr->topo_gres_cnt_avail[i])) + continue; + cpus_ctld = cpu_end_bit - cpu_start_bit + 1; + _validate_gres_node_cpus(node_gres_ptr, cpus_ctld, node_name); + cpus_ctld = bit_size(node_gres_ptr->topo_cpus_bitmap[i]); + for (j = 0; j < cpus_ctld; j++) { + if (bit_test(node_gres_ptr->topo_cpus_bitmap[i], j)) { + bit_set(avail_cpu_bitmap, cpu_start_bit + j); + } + } + } + bit_and(cpu_bitmap, avail_cpu_bitmap); + FREE_NULL_BITMAP(avail_cpu_bitmap); +} + extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, bool use_total_gres, bitstr_t *cpu_bitmap, int cpu_start_bit, int cpu_end_bit, bool *topo_set, @@ -2557,6 +2506,7 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; uint32_t *cpus_avail = NULL, cpu_cnt = 0; bitstr_t *alloc_cpu_bitmap = NULL; + bool test_cpu_map = true; if (job_gres_ptr->gres_cnt_alloc && node_gres_ptr->topo_cnt && *topo_set) { @@ -2651,12 +2601,21 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, while (gres_avail < job_gres_ptr->gres_cnt_alloc) { top_inx = -1; for (j=0; j<node_gres_ptr->topo_cnt; j++) { + if (gres_avail && test_cpu_map && + !bit_overlap(alloc_cpu_bitmap, + node_gres_ptr-> + topo_cpus_bitmap[j])) + continue; if (top_inx == -1) { if (cpus_avail[j]) top_inx = j; } else if (cpus_avail[j] > cpus_avail[top_inx]) top_inx = j; } + if ((top_inx < 0) && gres_avail && test_cpu_map) { + test_cpu_map = false; + continue; + } if ((top_inx < 0) || (cpus_avail[top_inx] == 0)) { cpu_cnt = 0; break; @@ -2673,9 +2632,16 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, continue; } /* update counts of allocated CPUs and GRES */ + if (gres_avail) { + bit_or(alloc_cpu_bitmap, + node_gres_ptr-> + topo_cpus_bitmap[top_inx]); + } else { + bit_and(alloc_cpu_bitmap, + node_gres_ptr-> + topo_cpus_bitmap[top_inx]); + } gres_avail += i; - bit_and(alloc_cpu_bitmap, - node_gres_ptr->topo_cpus_bitmap[top_inx]); cpu_cnt = bit_set_count(alloc_cpu_bitmap); } if (cpu_bitmap && (cpu_cnt > 0)) { @@ -2698,6 +2664,71 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, } } +/* + * Clear the cpu_bitmap for CPUs which are not usable by this job (i.e. for + * CPUs which are already bound to other jobs or lack GRES) + * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate() + * IN node_gres_list - node's gres_list built by + * gres_plugin_node_config_validate() + * IN use_total_gres - if set then consider all gres resources as available, + * and none are commited to running jobs + * IN/OUT cpu_bitmap - Identification of available CPUs (NULL if no restriction) + * IN cpu_start_bit - index into cpu_bitmap for this node's first CPU + * IN cpu_end_bit - index into cpu_bitmap for this node's last CPU + */ +extern void gres_plugin_job_core_filter(List job_gres_list, List node_gres_list, + bool use_total_gres, + bitstr_t *cpu_bitmap, + int cpu_start_bit, int cpu_end_bit, + char *node_name) +{ + int i; + ListIterator job_gres_iter, node_gres_iter; + gres_state_t *job_gres_ptr, *node_gres_ptr; + + if ((job_gres_list == NULL) || (cpu_bitmap == NULL)) + return; + if (node_gres_list == NULL) { + bit_nclear(cpu_bitmap, cpu_start_bit, cpu_end_bit); + return; + } + + (void) gres_plugin_init(); + + slurm_mutex_lock(&gres_context_lock); + job_gres_iter = list_iterator_create(job_gres_list); + while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) { + node_gres_iter = list_iterator_create(node_gres_list); + while ((node_gres_ptr = (gres_state_t *) + list_next(node_gres_iter))) { + if (job_gres_ptr->plugin_id == node_gres_ptr->plugin_id) + break; + } + list_iterator_destroy(node_gres_iter); + if (node_gres_ptr == NULL) { + /* node lack resources required by the job */ + bit_nclear(cpu_bitmap, cpu_start_bit, cpu_end_bit); + break; + } + + for (i = 0; i < gres_context_cnt; i++) { + if (job_gres_ptr->plugin_id != + gres_context[i].plugin_id) + continue; + _job_core_filter(job_gres_ptr->gres_data, + node_gres_ptr->gres_data, + use_total_gres, cpu_bitmap, + cpu_start_bit, cpu_end_bit, + gres_context[i].gres_name, node_name); + break; + } + } + list_iterator_destroy(job_gres_iter); + slurm_mutex_unlock(&gres_context_lock); + + return; +} + /* * Determine how many CPUs on the node can be used by this job * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate() @@ -2726,7 +2757,7 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, if (job_gres_list == NULL) return NO_VAL; if (node_gres_list == NULL) - return NO_VAL; + return 0; cpu_cnt = NO_VAL; (void) gres_plugin_init(); @@ -2801,7 +2832,7 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, char *gres_name, uint32_t job_id, char *node_name, bitstr_t *core_bitmap) { - int i; + int i, sz1, sz2; uint32_t gres_cnt; gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data; gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; @@ -2845,6 +2876,8 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, /* proceed with request, give job what's available */ } + if (node_offset == 0) /* Avoids memory leak on requeue */ + xfree(job_gres_ptr->gres_cnt_step_alloc); if (job_gres_ptr->gres_cnt_step_alloc == NULL) { job_gres_ptr->gres_cnt_step_alloc = xmalloc(sizeof(uint32_t) * node_cnt); @@ -2859,8 +2892,6 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, node_gres_ptr->gres_bit_alloc = bit_copy(job_gres_ptr-> gres_bit_alloc[node_offset]); - if (node_gres_ptr->gres_bit_alloc == NULL) - fatal("bit_copy: malloc failure"); node_gres_ptr->gres_cnt_alloc += bit_set_count(node_gres_ptr->gres_bit_alloc); } else if (node_gres_ptr->gres_bit_alloc) { @@ -2879,8 +2910,6 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, } else if (node_gres_ptr->gres_bit_alloc) { job_gres_ptr->gres_bit_alloc[node_offset] = bit_alloc(node_gres_ptr->gres_cnt_avail); - if (job_gres_ptr->gres_bit_alloc[node_offset] == NULL) - fatal("bit_copy: malloc failure"); for (i=0; i<node_gres_ptr->gres_cnt_avail && gres_cnt>0; i++) { if (bit_test(node_gres_ptr->gres_bit_alloc, i)) continue; @@ -2919,6 +2948,14 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, !bit_overlap(core_bitmap, node_gres_ptr->topo_cpus_bitmap[i])) continue; + sz1 = bit_size(job_gres_ptr->gres_bit_alloc[node_offset]); + sz2 = bit_size(node_gres_ptr->topo_gres_bitmap[i]); + if (sz1 != sz2) { + /* Avoid abort on bit_overlap below */ + error("Gres count mismatch for node %s " + "(%d != %d)", node_name, sz1, sz2); + continue; + } gres_cnt = bit_overlap(job_gres_ptr-> gres_bit_alloc[node_offset], node_gres_ptr-> @@ -2950,7 +2987,7 @@ extern int _job_alloc(void *job_gres_data, void *node_gres_data, * Allocate resource to a job and update node and job gres information * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate() * IN node_gres_list - node's gres_list built by - * gres_plugin_node_config_validate() + * gres_plugin_node_config_validate() * IN node_cnt - total number of nodes originally allocated to the job * IN node_offset - zero-origin index to the node of interest * IN cpu_cnt - number of CPUs allocated to this job on this node @@ -3027,7 +3064,7 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, int node_offset, char *gres_name, uint32_t job_id, char *node_name) { - int i, len, gres_cnt; + int i, len, gres_cnt, sz1, sz2; gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data; gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; @@ -3087,6 +3124,10 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, node_gres_ptr->topo_gres_bitmap && node_gres_ptr->topo_gres_cnt_alloc) { for (i=0; i<node_gres_ptr->topo_cnt; i++) { + sz1 = bit_size(job_gres_ptr->gres_bit_alloc[node_offset]); + sz2 = bit_size(node_gres_ptr->topo_gres_bitmap[i]); + if (sz1 != sz2) + continue; gres_cnt = bit_overlap(job_gres_ptr-> gres_bit_alloc[node_offset], node_gres_ptr-> @@ -3112,7 +3153,6 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, } } - xfree(job_gres_ptr->gres_cnt_step_alloc); return SLURM_SUCCESS; } @@ -3231,8 +3271,6 @@ extern void gres_plugin_job_merge(List from_job_gres_list, if (!to_job_gres_list) goto step2; gres_iter = list_iterator_create(to_job_gres_list); - if (!gres_iter) - fatal("list_iterator_create: malloc failure"); while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { gres_job_ptr = (gres_job_state_t *) gres_ptr->gres_data; new_gres_bit_alloc = xmalloc(sizeof(bitstr_t *) * @@ -3241,9 +3279,6 @@ extern void gres_plugin_job_merge(List from_job_gres_list, new_node_cnt); new_gres_cnt_step_alloc = xmalloc(sizeof(uint32_t) * new_node_cnt); - if (!new_gres_bit_alloc || !new_gres_bit_step_alloc || - !new_gres_cnt_step_alloc) - fatal("malloc failure"); from_inx = to_inx = new_inx = -1; for (i = i_first; i <= i_last; i++) { @@ -3292,12 +3327,8 @@ step2: if (!from_job_gres_list) goto step3; if (!to_job_gres_list) { to_job_gres_list = list_create(_gres_job_list_delete); - if (!to_job_gres_list) - fatal("list_create: malloc failure"); } gres_iter = list_iterator_create(from_job_gres_list); - if (!gres_iter) - fatal("list_iterator_create: malloc failure"); while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { gres_job_ptr = (gres_job_state_t *) gres_ptr->gres_data; gres_ptr2 = list_find_first(to_job_gres_list, _gres_find_id, @@ -3312,11 +3343,11 @@ step2: if (!from_job_gres_list) gres_job_ptr2->gres_cnt_alloc = gres_job_ptr-> gres_cnt_alloc; gres_job_ptr2->node_cnt = new_node_cnt; - gres_job_ptr2->gres_bit_alloc = + gres_job_ptr2->gres_bit_alloc = xmalloc(sizeof(bitstr_t *) * new_node_cnt); - gres_job_ptr2->gres_bit_step_alloc = + gres_job_ptr2->gres_bit_step_alloc = xmalloc(sizeof(bitstr_t *) * new_node_cnt); - gres_job_ptr2->gres_cnt_step_alloc = + gres_job_ptr2->gres_cnt_step_alloc = xmalloc(sizeof(uint32_t) * new_node_cnt); list_append(to_job_gres_list, gres_ptr2); } @@ -3531,9 +3562,7 @@ extern void gres_plugin_job_state_file(List gres_list, int *gres_bit_alloc, slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - if (!gres_iter) - fatal("list_iterator_create: malloc failure"); - + for (j=0; j<gres_context_cnt; j++) { found = 0; list_iterator_reset(gres_iter); @@ -3646,7 +3675,7 @@ static uint32_t _step_test(void *step_gres_data, void *job_gres_data, job_gres_ptr->gres_cnt_step_alloc[node_offset])) return 0; } else { - error("gres/%s: step_test %u.%u gres_bit_alloc is NULL", + error("gres/%s: step_test %u.%u gres_cnt_step_alloc is NULL", gres_name, job_id, step_id); return 0; } @@ -3729,8 +3758,6 @@ extern int gres_plugin_step_state_validate(char *req_config, /* Now make sure the step's request isn't too big for * the job's gres allocation */ job_gres_iter = list_iterator_create(job_gres_list); - if (job_gres_iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) { if (job_gres_ptr->plugin_id == @@ -3760,8 +3787,6 @@ extern int gres_plugin_step_state_validate(char *req_config, if (*step_gres_list == NULL) { *step_gres_list = list_create( _gres_step_list_delete); - if (*step_gres_list == NULL) - fatal("list_create malloc failure"); } step_gres_ptr = xmalloc(sizeof(gres_state_t)); step_gres_ptr->plugin_id = gres_context[i].plugin_id; @@ -3864,8 +3889,6 @@ List gres_plugin_step_state_extract(List gres_list, int node_index) slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - if (!gres_iter) - fatal("list_iterator_create: malloc failure"); while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { if (node_index == -1) new_gres_data = _step_state_dup(gres_ptr->gres_data); @@ -3875,8 +3898,6 @@ List gres_plugin_step_state_extract(List gres_list, int node_index) } if (new_gres_list == NULL) { new_gres_list = list_create(_gres_step_list_delete); - if (new_gres_list == NULL) - fatal("list_create: malloc failure"); } new_gres_state = xmalloc(sizeof(gres_state_t)); new_gres_state->plugin_id = gres_ptr->plugin_id; @@ -3916,8 +3937,6 @@ void gres_plugin_step_state_rebase(List gres_list, slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - if (!gres_iter) - fatal("list_iterator_create: malloc failure"); while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { gres_step_ptr = (gres_step_state_t *) gres_ptr->gres_data; if (!gres_step_ptr) @@ -3939,8 +3958,6 @@ void gres_plugin_step_state_rebase(List gres_list, continue; } new_node_in_use = bit_alloc(new_node_cnt); - if (!new_node_in_use) - fatal("bit_alloc: malloc failure"); old_inx = new_inx = -1; for (i = i_first; i <= i_last; i++) { @@ -4075,8 +4092,6 @@ extern int gres_plugin_step_state_unpack(List *gres_list, Buf buffer, slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { *gres_list = list_create(_gres_step_list_delete); - if (*gres_list == NULL) - fatal("list_create malloc failure"); } while ((rc == SLURM_SUCCESS) && (rec_cnt)) { @@ -4362,8 +4377,6 @@ static int _step_alloc(void *step_gres_data, void *job_gres_data, step_gres_ptr->node_cnt = job_gres_ptr->node_cnt; if (step_gres_ptr->node_in_use == NULL) { step_gres_ptr->node_in_use = bit_alloc(job_gres_ptr->node_cnt); - if (step_gres_ptr->node_in_use == NULL) - fatal("bit_alloc malloc failure"); } bit_set(step_gres_ptr->node_in_use, node_offset); job_gres_ptr->gres_cnt_step_alloc[node_offset] += @@ -4377,8 +4390,6 @@ static int _step_alloc(void *step_gres_data, void *job_gres_data, } gres_bit_alloc = bit_copy(job_gres_ptr->gres_bit_alloc[node_offset]); - if (gres_bit_alloc == NULL) - fatal("bit_copy malloc failure"); if (job_gres_ptr->gres_bit_step_alloc && job_gres_ptr->gres_bit_step_alloc[node_offset]) { bit_not(job_gres_ptr->gres_bit_step_alloc[node_offset]); @@ -4647,8 +4658,6 @@ extern uint32_t gres_get_value_by_type(List job_gres_list, char* gres_name) slurm_mutex_lock(&gres_context_lock); job_gres_iter = list_iterator_create(job_gres_list); - if (!job_gres_iter) - fatal("list_iterator_create: malloc failure"); while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) { for (i=0; i<gres_context_cnt; i++) { if (job_gres_ptr->plugin_id != plugin_id) @@ -4692,8 +4701,6 @@ extern int gres_num_gres_alloced_all(List gres_list, int arrlen, slurm_mutex_lock(&gres_context_lock); node_gres_iter = list_iterator_create(gres_list); - if (!node_gres_iter) - fatal("list_iterator_create: malloc failure"); while ((node_gres_ptr = (gres_state_t*) list_next(node_gres_iter))) { gres_node_state_t *node_gres_state_ptr; val = 0; @@ -4727,7 +4734,7 @@ extern int gres_num_gres_alloced_all(List gres_list, int arrlen, return rc; } -extern void gres_plugin_step_state_file(List gres_list, int *gres_bit_alloc, +extern void gres_plugin_step_state_file(List gres_list, int *gres_bit_alloc, int *gres_count) { int i, j, p, gres_cnt = 0, len, found; @@ -4741,8 +4748,6 @@ extern void gres_plugin_step_state_file(List gres_list, int *gres_bit_alloc, slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - if (!gres_iter) - fatal("list_iterator_create: malloc failure"); for (j=0; j<gres_context_cnt; j++) { found = 0; diff --git a/src/common/gres.h b/src/common/gres.h index 65d2470c8ac84e82c2b869d3a396417e75eacaec..93fd7c5ae6404796de618b7230fb35fff1a3debb 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -362,6 +362,25 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer, uint32_t job_id, uint16_t protocol_version); +/* + * Clear the cpu_bitmap for CPUs which are not usable by this job (i.e. for + * CPUs which are already bound to other jobs or lack GRES) + * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate() + * IN node_gres_list - node's gres_list built by + * gres_plugin_node_config_validate() + * IN use_total_gres - if set then consider all gres resources as available, + * and none are commited to running jobs + * IN/OUT cpu_bitmap - Identification of available CPUs (NULL if no restriction) + * IN cpu_start_bit - index into cpu_bitmap for this node's first CPU + * IN cpu_end_bit - index into cpu_bitmap for this node's last CPU + * IN node_name - name of the node (for logging) + */ +extern void gres_plugin_job_core_filter(List job_gres_list, List node_gres_list, + bool use_total_gres, + bitstr_t *cpu_bitmap, + int cpu_start_bit, int cpu_end_bit, + char *node_name); + /* * Determine how many CPUs on the node can be used by this job * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate() @@ -386,7 +405,7 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, * Allocate resource to a job and update node and job gres information * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate() * IN node_gres_list - node's gres_list built by - * gres_plugin_node_config_validate() + * gres_plugin_node_config_validate() * IN node_cnt - total number of nodes originally allocated to the job * IN node_offset - zero-origin index to the node of interest * IN cpu_cnt - number of CPUs allocated to this job on this node diff --git a/src/common/hostlist.c b/src/common/hostlist.c index f2c0f74ce98eb66a466ffb3f3b51a54991045814..0416c55f2cec2f71684b0793d0a43b90ca1961a4 100644 --- a/src/common/hostlist.c +++ b/src/common/hostlist.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -77,6 +77,7 @@ #include "src/common/working_cluster.h" #include "src/common/xassert.h" #include "src/common/xmalloc.h" +#include "src/common/bitstring.h" /* * Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -146,11 +147,10 @@ strong_alias(hostset_nth, slurm_hostset_nth); extern void lsd_fatal_error(char *file, int line, char *mesg); #else /* !WITH_LSD_FATAL_ERROR_FUNC */ # ifndef lsd_fatal_error -# define lsd_fatal_error(file, line, mesg) \ - do { \ - fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", \ - file, line, mesg, strerror(errno)); \ - } while (0) + static void lsd_fatal_error(char *file, int line, char *mesg) + { + log_fatal(file, line, mesg, strerror(errno)); + } # endif /* !lsd_fatal_error */ #endif /* !WITH_LSD_FATAL_ERROR_FUNC */ @@ -162,7 +162,12 @@ extern void lsd_fatal_error(char *file, int line, char *mesg); extern void * lsd_nomem_error(char *file, int line, char *mesg); #else /* !WITH_LSD_NOMEM_ERROR_FUNC */ # ifndef lsd_nomem_error -# define lsd_nomem_error(file, line, mesg) (NULL) + static void * lsd_nomem_error(char *file, int line, char *mesg) + { + log_oom(file, line, mesg); + abort(); + return NULL; + } # endif /* !lsd_nomem_error */ #endif /* !WITH_LSD_NOMEM_ERROR_FUNC */ @@ -173,7 +178,6 @@ extern void * lsd_nomem_error(char *file, int line, char *mesg); */ #define out_of_memory(mesg) \ do { \ - fatal("malloc failure"); \ errno = ENOMEM; \ return(lsd_nomem_error(__FILE__, __LINE__, mesg)); \ } while (0) @@ -189,7 +193,7 @@ extern void * lsd_nomem_error(char *file, int line, char *mesg); #define MAX_RANGE (64*1024) /* 64K Hosts */ /* max number of ranges that will be processed between brackets */ -#define MAX_RANGES (12*1024) /* 12K Ranges */ +#define MAX_RANGES (64*1024) /* 64K Hosts */ /* size of internal hostname buffer (+ some slop), hostnames will probably * be truncated if longer than MAXHOSTNAMELEN */ @@ -293,18 +297,13 @@ struct _range { char *alpha_num = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /* logic for block node description */ - -/* to speed things up we will do some calculations once to avoid - * having to do it multiple times. We need to calculate the size of - * the maximum sized array for each dimension. This way we can be - * prepared for any size coming in. - */ -static bool *grid = NULL; +static bitstr_t *bit_grid = NULL; static int grid_start[HIGHEST_DIMENSIONS]; static int grid_end[HIGHEST_DIMENSIONS]; static int offset[HIGHEST_DIMENSIONS]; static int dim_grid_size = -1; +static uint64_t grid_size = 1; /* used to protect the above grid, grid_start, and grid_end. */ static pthread_mutex_t multi_dim_lock = PTHREAD_MUTEX_INITIALIZER; @@ -664,7 +663,7 @@ static hostname_t hostname_create_dims(const char *hostname, int dims) hn->suffix = hn->hostname + idx + 1; - if((dims > 1) && (strlen(hn->suffix) != dims)) + if ((dims > 1) && (strlen(hn->suffix) != dims)) hostlist_base = 10; hn->num = strtoul(hn->suffix, &p, hostlist_base); @@ -1091,8 +1090,8 @@ static hostrange_t hostrange_intersect(hostrange_t h1, hostrange_t h2) assert(hostrange_cmp(h1, h2) <= 0); - if ((hostrange_prefix_cmp(h1, h2) == 0) - && (h1->hi > h2->lo) + if ((h1->hi > h2->lo) + && (hostrange_prefix_cmp(h1, h2) == 0) && (hostrange_width_combine(h1, h2))) { if (!(new = hostrange_copy(h1))) @@ -1409,8 +1408,8 @@ static int hostlist_push_range(hostlist_t hl, hostrange_t hr) goto error; if (hl->nranges > 0 - && hostrange_prefix_cmp(tail, hr) == 0 && tail->hi == hr->lo - 1 + && hostrange_prefix_cmp(tail, hr) == 0 && hostrange_width_combine(tail, hr)) { tail->hi = hr->hi; } else { @@ -1653,7 +1652,7 @@ hostlist_t _hostlist_create(const char *hostlist, char *sep, char *r_op, } done: - if(orig) + if (orig) free(orig); return new; @@ -1680,7 +1679,7 @@ static int _parse_box_range(char *str, struct _range *ranges, char coord2[dims+1]; int i, a; - if(dims <= 1) + if (dims <= 1) fatal("Unsupported dimensions count %d", dims); if ((str[dims] != 'x') || @@ -1740,12 +1739,12 @@ static int _parse_single_range(const char *str, struct _range *range, int dims) range->width = strlen(str); - if(dims > 1) { + if (dims > 1) { /* If we get something here where the width is not SYSTEM_DIMENSIONS we need to treat it as a regular number since that is how it will be treated in the future. */ - if(range->width != dims) + if (range->width != dims) hostlist_base = 10; } range->lo = strtoul(str, &q, hostlist_base); @@ -1829,21 +1828,25 @@ _push_range_list(hostlist_t hl, char *prefix, struct _range *range, strncpy(tmp_prefix, prefix, sizeof(tmp_prefix)); if (((p = strrchr(tmp_prefix, '[')) != NULL) && ((q = strrchr(p, ']')) != NULL)) { - struct _range prefix_range[MAX_RANGES]; + struct _range *prefix_range; struct _range *saved_range = range, *pre_range = prefix_range; unsigned long j, prefix_cnt = 0; *p++ = '\0'; *q++ = '\0'; if (strrchr(tmp_prefix, '[') != NULL) return -1; /* third range is illegal */ + prefix_range = xmalloc(sizeof(struct _range) * MAX_RANGES); nr = _parse_range_list(p, prefix_range, MAX_RANGES, dims); - if (nr < 0) + if (nr < 0) { + xfree(prefix_range); return -1; /* bad numeric expression */ + } for (i = 0; i < nr; i++) { prefix_cnt += pre_range->hi - pre_range->lo + 1; if (prefix_cnt > MAX_PREFIX_CNT) { /* Prevent overflow of memory with user input * of something like "a[0-999999999].b[0-9]" */ + xfree(prefix_range); return -1; } for (j = pre_range->lo; j <= pre_range->hi; j++) { @@ -1860,6 +1863,7 @@ _push_range_list(hostlist_t hl, char *prefix, struct _range *range, } pre_range++; } + xfree(prefix_range); return 0; } @@ -1880,7 +1884,7 @@ _hostlist_create_bracketed(const char *hostlist, char *sep, char *r_op, int dims) { hostlist_t new = hostlist_new(); - struct _range ranges[MAX_RANGES]; + struct _range *ranges; int nr, err; char *p, *tok, *str, *orig; char cur_tok[1024]; @@ -1893,6 +1897,7 @@ _hostlist_create_bracketed(const char *hostlist, char *sep, return NULL; } + ranges = xmalloc(sizeof(struct _range) * MAX_RANGES); while ((tok = _next_tok(sep, &str)) != NULL) { strncpy(cur_tok, tok, 1024); if ((p = strrchr(tok, '[')) != NULL) { @@ -1917,7 +1922,7 @@ _hostlist_create_bracketed(const char *hostlist, char *sep, * wanted. We will just tack one on * the end. */ strcat(cur_tok, "]"); - if(prefix && prefix[0]) + if (prefix && prefix[0]) hostlist_push_host_dims( new, cur_tok, dims); else @@ -1928,6 +1933,7 @@ _hostlist_create_bracketed(const char *hostlist, char *sep, } else hostlist_push_host_dims(new, cur_tok, dims); } + xfree(ranges); free(orig); return new; @@ -2098,7 +2104,7 @@ static void hostlist_shift_iterators(hostlist_t hl, int idx, int depth, int n) { hostlist_iterator_t i; - if(!hl) { + if (!hl) { error("hostlist_shift_iterators: no hostlist given"); return; } @@ -2121,7 +2127,7 @@ char *hostlist_shift(hostlist_t hl) { char *host = NULL; - if(!hl){ + if (!hl){ error("hostlist_shift: no hostlist given"); return NULL; } @@ -2153,7 +2159,7 @@ char *hostlist_pop_range(hostlist_t hl) hostlist_t hltmp; hostrange_t tail; - if(!hl) + if (!hl) return NULL; LOCK_HOSTLIST(hl); if (hl->nranges < 1 || !(hltmp = hostlist_new())) { @@ -2228,7 +2234,7 @@ int hostlist_delete(hostlist_t hl, const char *hosts) int n = 0; char *hostname = NULL; hostlist_t hltmp; - if(!hl) + if (!hl) return -1; if (!(hltmp = hostlist_create(hosts))) @@ -2249,7 +2255,7 @@ int hostlist_delete_host(hostlist_t hl, const char *hostname) { int n; - if(!hl) + if (!hl) return -1; n = hostlist_find(hl, hostname); @@ -2296,7 +2302,7 @@ char * hostlist_nth(hostlist_t hl, int n) char *host = NULL; int i, count; - if(!hl) + if (!hl) return NULL; LOCK_HOSTLIST(hl); count = 0; @@ -2320,7 +2326,7 @@ int hostlist_delete_nth(hostlist_t hl, int n) { int i, count; - if(!hl) + if (!hl) return -1; LOCK_HOSTLIST(hl); assert(n >= 0 && n <= hl->nhosts); @@ -2358,7 +2364,7 @@ done: int hostlist_count(hostlist_t hl) { int retval; - if(!hl) + if (!hl) return -1; LOCK_HOSTLIST(hl); @@ -2442,8 +2448,8 @@ static void hostlist_collapse(hostlist_t hl) hostrange_t hprev = hl->hr[i - 1]; hostrange_t hnext = hl->hr[i]; - if (hostrange_prefix_cmp(hprev, hnext) == 0 && - hprev->hi == hnext->lo - 1 && + if (hprev->hi == hnext->lo - 1 && + hostrange_prefix_cmp(hprev, hnext) == 0 && hostrange_width_combine(hprev, hnext)) { hprev->hi = hnext->hi; hostlist_delete_range(hl, i); @@ -2555,6 +2561,8 @@ char *hostlist_deranged_string_malloc(hostlist_t hl) buf_size *= 2; buf = realloc(buf, buf_size); } + if (buf == NULL) + out_of_memory("hostlist_deranged_string_malloc"); return buf; } @@ -2705,15 +2713,15 @@ static int _tell_if_used(int dim, int curr, for (last[dim]=start[dim]; last[dim]<=grid_end[dim]; last[dim]++) { curr = start_curr + (last[dim] * offset[dim]); - if(dim == (dims-1)) { - if (!grid[curr]) { + if (dim == (dims-1)) { + if (!bit_test(bit_grid, curr)) { /* for(i = 0; i<dims; i++) { */ /* coord[i] = alpha_num[last[i]]; */ /* } */ /* info("%s not used", coord); */ - if((*found) == -1) + if ((*found) == -1) continue; - else if(end[dim] < grid_end[dim]) { + else if (end[dim] < grid_end[dim]) { /* try to get a box out of this slice. */ grid_end[dim] = end[dim]; @@ -2725,7 +2733,7 @@ static int _tell_if_used(int dim, int curr, /* coord[i] = alpha_num[last[i]]; */ /* } */ /* info("%s used", coord); */ - if((*found) == -1) { + if ((*found) == -1) { /* for(i = 0; i<dims; i++) { */ /* coord[i] = alpha_num[last[i]]; */ /* } */ @@ -2733,7 +2741,7 @@ static int _tell_if_used(int dim, int curr, memcpy(start, last, dim_grid_size); memcpy(end, last, dim_grid_size); (*found) = dims; - } else if((*found) >= dim) { + } else if ((*found) >= dim) { /* for(i = 0; i<dims; i++) { */ /* coord[i] = alpha_num[last[i]]; */ /* } */ @@ -2742,19 +2750,19 @@ static int _tell_if_used(int dim, int curr, (*found) = dim; } } else { - if((rc = _tell_if_used(dim+1, curr, + if ((rc = _tell_if_used(dim+1, curr, start, end, last, found, dims)) != 1) { return rc; } - if((*found) >= dim) { + if ((*found) >= dim) { /* for(i = 0; i<dims; i++) { */ /* coord[i] = alpha_num[last[i]]; */ /* } */ /* info("%d here %s", dim, coord); */ memcpy(end, last, dim_grid_size); (*found) = dim; - } else if((*found) == -1) + } else if ((*found) == -1) start[dim] = grid_start[dim]; } } @@ -2782,7 +2790,7 @@ static int _get_next_box(int *start, int *end, int dims) /* memset(coord2, 0, sizeof(coord2)); */ again: - if(start[0] == -1) { + if (start[0] == -1) { memcpy(start, grid_start, dim_grid_size); /* We need to keep track of this to make sure we get all the nodes marked since this could change based @@ -2819,7 +2827,7 @@ again: _set_min_max_of_grid(0, 0, grid_start, orig_grid_end, new_min, new_max, pos, dims); - if(new_max[0] != -1) { + if (new_max[0] != -1) { /* for(i = 0; i<dims; i++) { */ /* coord[i] = alpha_num[new_min[i]]; */ /* coord2[i] = alpha_num[new_max[i]]; */ @@ -2832,7 +2840,7 @@ again: /* for(i = 0; i<dims; i++) */ /* coord[i] = alpha_num[last[i]]; */ /* info("next start %s", coord); */ - if(found == -1) { + if (found == -1) { /* There are still nodes set in the grid, so we need to go through them again to make sure we got all the nodes that weren't included in the boxes of @@ -2841,7 +2849,7 @@ again: } } - if(found != -1) + if (found != -1) rc = 1; return rc; @@ -2870,7 +2878,7 @@ _get_boxes(char *buf, int max_len, int dims, int brackets) curr_min[0] = -1; /* for(i=0; i<HOSTLIST_BASE*HOSTLIST_BASE*HOSTLIST_BASE*HOSTLIST_BASE; i++) { */ -/* if(grid[i]) */ +/* if (grid[i]) */ /* info("got one at %d", i); */ /* } */ @@ -2880,30 +2888,30 @@ _get_boxes(char *buf, int max_len, int dims, int brackets) /* coord2[i] = alpha_num[curr_max[i]]; */ /* } */ /* info("%sx%s is a box", coord, coord2); */ - if(!memcmp(curr_min, curr_max, dim_grid_size)) { + if (!memcmp(curr_min, curr_max, dim_grid_size)) { for(i = 0; i<dims; i++) { - if(len >= max_len) + if (len >= max_len) goto end_it; buf[len++] = alpha_num[curr_min[i]]; } - if(len >= max_len) + if (len >= max_len) goto end_it; buf[len++] = ','; } else { for(i = 0; i<dims; i++) { - if(len >= max_len) + if (len >= max_len) goto end_it; buf[len++] = alpha_num[curr_min[i]]; } - if(len >= max_len) + if (len >= max_len) goto end_it; buf[len++] = 'x'; for(i = 0; i<dims; i++) { - if(len >= max_len) + if (len >= max_len) goto end_it; buf[len++] = alpha_num[curr_max[i]]; } - if(len >= max_len) + if (len >= max_len) goto end_it; buf[len++] = ','; } @@ -2929,9 +2937,12 @@ _set_box_in_grid(int dim, int curr, int *start, for (i=start[dim]; i<=end[dim]; i++) { curr = start_curr + (i * offset[dim]); - if(dim == (dims-1)) - grid[curr] = value; - else + if (dim == (dims-1)) { + if (value) + bit_set(bit_grid, curr); + else + bit_clear(bit_grid, curr); + } else _set_box_in_grid(dim+1, curr, start, end, value, dims); } @@ -2949,7 +2960,7 @@ static int _add_box_ranges(int dim, int curr, for (pos[dim]=start[dim]; pos[dim]<=end[dim]; pos[dim]++) { curr = start_curr + (pos[dim] * offset[dim]); - if(dim == (dims-2)) { + if (dim == (dims-2)) { char new_str[(dims*2)+2]; memset(new_str, 0, sizeof(new_str)); @@ -2975,7 +2986,7 @@ static int _add_box_ranges(int dim, int curr, return 0; (*count)++; } else - if(!_add_box_ranges(dim+1, curr, start, end, pos, + if (!_add_box_ranges(dim+1, curr, start, end, pos, ranges, len, count, dims)) return 0; } @@ -2995,8 +3006,8 @@ static void _set_min_max_of_grid(int dim, int curr, for (pos[dim]=start[dim]; pos[dim]<=end[dim]; pos[dim]++) { curr = start_curr + (pos[dim] * offset[dim]); - if(dim == (dims-1)) { - if(!grid[curr]) + if (dim == (dims-1)) { + if (!bit_test(bit_grid, curr)) continue; for(i = 0; i<dims; i++) { min[i] = MIN(min[i], pos[i]); @@ -3041,11 +3052,11 @@ _test_box_in_grid(int dim, int curr, for (i=start[dim]; i<=end[dim]; i++) { curr = start_curr + (i * offset[dim]); - if(dim == (dims-1)) { - if(!grid[curr]) + if (dim == (dims-1)) { + if (!bit_test(bit_grid, curr)) return false; } else { - if(!_test_box_in_grid(dim+1, curr, start, end, dims)) + if (!_test_box_in_grid(dim+1, curr, start, end, dims)) return false; } } @@ -3076,6 +3087,8 @@ char *hostlist_ranged_string_malloc(hostlist_t hl) buf_size *= 2; buf = realloc(buf, buf_size); } + if (buf == NULL) + out_of_memory("hostlist_ranged_string_malloc"); return buf; } @@ -3108,17 +3121,16 @@ ssize_t hostlist_ranged_string_dims(hostlist_t hl, size_t n, int hostlist_base; static int last_dims = -1; static int max_dims = 1; - DEF_TIMERS; +// DEF_TIMERS; if (!dims) dims = slurmdb_setup_cluster_name_dims(); hostlist_base = hostlist_get_base(dims); - START_TIMER; +// START_TIMER; LOCK_HOSTLIST(hl); if (dims > 1 && hl->nranges) { /* logic for block node description */ - static uint64_t grid_size = 1; slurm_mutex_lock(&multi_dim_lock); /* compute things that only need to be calculated once @@ -3136,24 +3148,20 @@ ssize_t hostlist_ranged_string_dims(hostlist_t hl, size_t n, offset[i] = offset[i+1] * hostlist_base; } - /* This will leave an allocation when ending but it - isn't overwriting and this makes it so we don't - have to allocate it over and over again we fill - this isn't too bad of an alternative. We were - defining this on the stack at first (we wanted to - avoid that). + /* Set this bitmap up once and clear it when everytime + instead of reallocing. Turns out to be about 5 + times faster doing it this way. It does leak the + last alloc, but that shouldn't be a big deal. */ - if (!grid || (grid && (max_dims < dims))) { + if (max_dims < dims) { grid_size = 1; max_dims = dims; - xfree(grid); - for (i=0; i<dims; i++) grid_size *= HIGHEST_BASE; - grid_size *= sizeof(bool); - grid = xmalloc(grid_size); + FREE_NULL_BITMAP(bit_grid); + bit_grid = bit_alloc(grid_size); } else - memset(grid, 0, grid_size); + bit_nclear(bit_grid, 0, grid_size - 1); memset(grid_start, hostlist_base, dim_grid_size); memset(grid_end, -1, dim_grid_size); @@ -3241,7 +3249,7 @@ notbox: } else buf[len] = '\0'; - END_TIMER; +// END_TIMER; // info("time was %s", TIME_STR); return truncated ? -1 : len; @@ -3260,7 +3268,7 @@ static hostlist_iterator_t hostlist_iterator_new(void) { hostlist_iterator_t i = (hostlist_iterator_t) malloc(sizeof(*i)); if (!i) - return NULL; + out_of_memory("hostlist_iterator_new"); i->hl = NULL; i->hr = NULL; i->idx = 0; @@ -3433,7 +3441,8 @@ char *hostlist_next_range(hostlist_iterator_t i) buf_size *= 2; buf = realloc(buf, buf_size); } - + if (!buf) + out_of_memory("hostlist_iterator_create"); UNLOCK_HOSTLIST(i->hl); return buf; @@ -3470,19 +3479,18 @@ hostset_t hostset_create(const char *hostlist) { hostset_t new; - if (!(new = (hostset_t) malloc(sizeof(*new)))) - goto error1; + if (!(new = (hostset_t) malloc(sizeof(*new)))) { + out_of_memory("hostset_create"); + return NULL; + } - if (!(new->hl = hostlist_create(hostlist))) - goto error2; + if (!(new->hl = hostlist_create(hostlist))) { + free(new); + return NULL; + } hostlist_uniq(new->hl); return new; - -error2: - free(new); -error1: - return NULL; } hostset_t hostset_copy(const hostset_t set) @@ -3498,6 +3506,7 @@ hostset_t hostset_copy(const hostset_t set) error2: free(new); error1: + out_of_memory("hostset_copy"); return NULL; } @@ -3612,9 +3621,6 @@ int hostset_intersects(hostset_t set, const char *hosts) assert(set->hl->magic == HOSTLIST_MAGIC); hl = hostlist_create(hosts); - if (!hl) /* malloc failure */ - return retval; - while ((hostname = hostlist_pop(hl)) != NULL) { retval += hostset_find_host(set, hostname); free(hostname); diff --git a/src/common/hostlist.h b/src/common/hostlist.h index be6539f4af0b12a9407b3b8caad63c2602c0acb1..296f5b93b6114e12d79e9b813539e3bc33e5d35a 100644 --- a/src/common/hostlist.h +++ b/src/common/hostlist.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/io_hdr.c b/src/common/io_hdr.c index bbf14b1cff789b6ae98d6cf84fdc53b8d0976f1f..763da806bd00052dc2a467c9a29b66b5732c1691 100644 --- a/src/common/io_hdr.c +++ b/src/common/io_hdr.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/io_hdr.h b/src/common/io_hdr.h index 253109994c53750b1f321706daa75c294118c0e6..b8a0f61ab06704d8b3b822e248a58f898d781041 100644 --- a/src/common/io_hdr.h +++ b/src/common/io_hdr.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/job_options.c b/src/common/job_options.c index 85df63121962f196e4148eac0cf06be1b9ad51f0..8cde7b5882b900e3a5a46682b62ab05d33e4f5b0 100644 --- a/src/common/job_options.c +++ b/src/common/job_options.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/job_options.h b/src/common/job_options.h index 8fcfe754b670f26f0c255c405bc2fe5a1ec8451a..0441d1de47b1a3255adb4d7809b1294182c53547 100644 --- a/src/common/job_options.h +++ b/src/common/job_options.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/job_resources.c b/src/common/job_resources.c index e61130084067c75e15c4bfc4c98c69c589b12260..a0906d163fb4d5410377010fc9001f995909b3c9 100644 --- a/src/common/job_resources.c +++ b/src/common/job_resources.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -120,9 +120,6 @@ extern int build_job_resources(job_resources_t *job_resrcs, #ifndef HAVE_BG job_resrcs->core_bitmap = bit_alloc(core_cnt); job_resrcs->core_bitmap_used = bit_alloc(core_cnt); - if ((job_resrcs->core_bitmap == NULL) || - (job_resrcs->core_bitmap_used == NULL)) - fatal("bit_alloc malloc failure"); #endif return SLURM_SUCCESS; } @@ -1180,8 +1177,6 @@ extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr, } core_bitmap = bit_alloc(core_cnt); - if (!core_bitmap) - fatal("copy_job_resources_node: bit_alloc(%d): %m", core_cnt); for (i = 0; i < core_cnt; i++) { if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++)) bit_set(core_bitmap, i); @@ -1230,13 +1225,16 @@ extern int job_fits_into_cores(job_resources_t *job_resrcs_ptr, const uint16_t *bits_per_node) { int full_node_inx = 0, full_bit_inx = 0, job_bit_inx = 0, i; + int job_node_cnt; if (!full_bitmap) return 1; - for (full_node_inx = 0; full_node_inx < node_record_count; - full_node_inx++) { + job_node_cnt = bit_set_count(job_resrcs_ptr->node_bitmap); + for (full_node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); + job_node_cnt > 0; full_node_inx++) { if (bit_test(job_resrcs_ptr->node_bitmap, full_node_inx)) { + full_bit_inx = cr_node_cores_offset[full_node_inx]; for (i = 0; i < bits_per_node[full_node_inx]; i++) { if (bit_test(full_bitmap, full_bit_inx + i) && bit_test(job_resrcs_ptr->core_bitmap, @@ -1245,8 +1243,8 @@ extern int job_fits_into_cores(job_resources_t *job_resrcs_ptr, } } job_bit_inx += bits_per_node[full_node_inx]; + job_node_cnt --; } - full_bit_inx += bits_per_node[full_node_inx]; } return 1; } @@ -1262,7 +1260,7 @@ extern void add_job_to_cores(job_resources_t *job_resrcs_ptr, bitstr_t **full_core_bitmap, const uint16_t *bits_per_node) { - int full_node_inx = 0; + int full_node_inx = 0, job_node_cnt; int job_bit_inx = 0, full_bit_inx = 0, i; if (!job_resrcs_ptr->core_bitmap) @@ -1274,13 +1272,13 @@ extern void add_job_to_cores(job_resources_t *job_resrcs_ptr, for (i = 0; i < node_record_count; i++) size += bits_per_node[i]; *full_core_bitmap = bit_alloc(size); - if (!*full_core_bitmap) - fatal("add_job_to_cores: bitmap memory error"); } - for (full_node_inx = 0; full_node_inx < node_record_count; - full_node_inx++) { + job_node_cnt = bit_set_count(job_resrcs_ptr->node_bitmap); + for (full_node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); + job_node_cnt > 0; full_node_inx++) { if (bit_test(job_resrcs_ptr->node_bitmap, full_node_inx)) { + full_bit_inx = cr_node_cores_offset[full_node_inx]; for (i = 0; i < bits_per_node[full_node_inx]; i++) { if (!bit_test(job_resrcs_ptr->core_bitmap, job_bit_inx + i)) @@ -1288,8 +1286,8 @@ extern void add_job_to_cores(job_resources_t *job_resrcs_ptr, bit_set(*full_core_bitmap, full_bit_inx + i); } job_bit_inx += bits_per_node[full_node_inx]; + job_node_cnt --; } - full_bit_inx += bits_per_node[full_node_inx]; } } @@ -1304,7 +1302,7 @@ extern void remove_job_from_cores(job_resources_t *job_resrcs_ptr, bitstr_t **full_core_bitmap, const uint16_t *bits_per_node) { - int full_node_inx = 0; + int full_node_inx = 0, job_node_cnt; int job_bit_inx = 0, full_bit_inx = 0, i; if (!job_resrcs_ptr->core_bitmap) @@ -1316,13 +1314,13 @@ extern void remove_job_from_cores(job_resources_t *job_resrcs_ptr, for (i = 0; i < node_record_count; i++) size += bits_per_node[i]; *full_core_bitmap = bit_alloc(size); - if (!*full_core_bitmap) - fatal("add_job_to_cores: bitmap memory error"); } - for (full_node_inx = 0; full_node_inx < node_record_count; - full_node_inx++) { + job_node_cnt = bit_set_count(job_resrcs_ptr->node_bitmap); + for (full_node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); + job_node_cnt > 0; full_node_inx++) { if (bit_test(job_resrcs_ptr->node_bitmap, full_node_inx)) { + full_bit_inx = cr_node_cores_offset[full_node_inx]; for (i = 0; i < bits_per_node[full_node_inx]; i++) { if (!bit_test(job_resrcs_ptr->core_bitmap, job_bit_inx + i)) @@ -1330,8 +1328,8 @@ extern void remove_job_from_cores(job_resources_t *job_resrcs_ptr, bit_clear(*full_core_bitmap, full_bit_inx + i); } job_bit_inx += bits_per_node[full_node_inx]; + job_node_cnt --; } - full_bit_inx += bits_per_node[full_node_inx]; } } diff --git a/src/common/job_resources.h b/src/common/job_resources.h index 7530f6c543c75a0ae686c962961bf4ff150cb6df..e2f44e82bc0fd77f3f67be954c19e3a1bccabb9d 100644 --- a/src/common/job_resources.h +++ b/src/common/job_resources.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -70,7 +70,7 @@ * is duplicated. See NOTES below. * memory_allocated - MB per node reserved for the job or step * memory_used - MB per node of memory consumed by job steps - * nhosts - Number of nodes in the allocation on a + * nhosts - Number of nodes in the allocation. On a * bluegene machine this represents the number * of midplanes used. This should always be * the number of bits set in node_bitmap. diff --git a/src/common/list.c b/src/common/list.c index 589bbbe5862add64cd2225e8d059f9775cbc789a..65f98f132f0d2d9d8687f9891bf0a2f8c4cbb8da 100644 --- a/src/common/list.c +++ b/src/common/list.c @@ -48,11 +48,13 @@ #include <assert.h> #include <errno.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include "list.h" #include "macros.h" #include "xmalloc.h" +#include "src/common/log.h" /* ** Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -92,21 +94,16 @@ strong_alias(list_install_fork_handlers, slurm_list_install_fork_handlers); #include <unistd.h> #ifdef WITH_LSD_FATAL_ERROR_FUNC # undef lsd_fatal_error - extern void lsd_fatal_error(char *file, int line, char *mesg); + extern void lsd_fatal_error(char *file, int line, char *mesg); #else /* !WITH_LSD_FATAL_ERROR_FUNC */ # ifndef lsd_fatal_error -# include <errno.h> -# include <stdio.h> -# include <string.h> -# define lsd_fatal_error(file, line, mesg) \ - do { \ - fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", \ - file, line, mesg, strerror(errno)); \ - } while (0) + static void lsd_fatal_error(char *file, int line, char *mesg) + { + log_fatal(file, line, mesg, strerror(errno)); + } # endif /* !lsd_fatal_error */ #endif /* !WITH_LSD_FATAL_ERROR_FUNC */ - /********************* * lsd_nomem_error * *********************/ @@ -116,7 +113,13 @@ strong_alias(list_install_fork_handlers, slurm_list_install_fork_handlers); extern void * lsd_nomem_error(char *file, int line, char *mesg); #else /* !WITH_LSD_NOMEM_ERROR_FUNC */ # ifndef lsd_nomem_error -# define lsd_nomem_error(file, line, mesg) (NULL) + static void * lsd_nomem_error(char *file, int line, char *mesg) + { + + log_oom(file, line, mesg); + abort(); + return NULL; + } # endif /* !lsd_nomem_error */ #endif /* !WITH_LSD_NOMEM_ERROR_FUNC */ @@ -384,7 +387,7 @@ list_append_list (List l, List sub) assert(sub != NULL); itr = list_iterator_create(sub); while((v = list_next(itr))) { - if(list_append(l, v)) + if (list_append(l, v)) n++; else break; @@ -404,10 +407,10 @@ list_transfer (List l, List sub) assert(sub != NULL); assert(l->fDel == sub->fDel); while((v = list_pop(sub))) { - if(list_append(l, v)) + if (list_append(l, v)) n++; else { - if(l->fDel) + if (l->fDel) l->fDel(v); break; } @@ -526,6 +529,88 @@ list_flush (List l) return(n); } +void +list_sort2 (List l, ListCmpF f) +{ + ListIterator it; + + ListNode p, q, e, tail, head; + int insize, nmerges, psize, qsize, i; + + assert(l != NULL); + assert(f != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + head = l->head; + if (l->count > 1) { + insize=1; + while(1) { + p = head; + head = NULL; + tail = NULL; + + + nmerges = 0; + while(p) { + nmerges++; + q = p; + + psize=0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next; + + if (!q) break; + } + qsize = insize; + while (psize > 0 || (qsize > 0 && q)) { + if (psize == 0) { + e = q; + q = q->next; + qsize--; + } else if (qsize == 0 || !q ) { + e = p; + p = p->next; + psize--; + } else if (f(p->data,q->data) < 0) { + e = p; + p = p->next; + psize--; + } else { + e = q; + q = q->next; + qsize--; + } + if (tail) { + tail->next = e; + } else { + head = e; + } + tail = e; + } + p = q; + + } + tail->next = NULL; + if(nmerges <= 1) { + l->head = head; + l->tail = &tail->next; + for (it=l->iNext; it; it=it->iNext) { + assert(it->magic == LIST_MAGIC); + it->pos = it->list->head; + it->prev = &it->list->head; + } + + list_mutex_unlock(&l->mutex); + return; + } + insize *=2; + } + } + + list_mutex_unlock(&l->mutex); + return; +} void list_sort (List l, ListCmpF f) @@ -560,7 +645,6 @@ list_sort (List l, ListCmpF f) } } l->tail = pp; - for (i=l->iNext; i; i=i->iNext) { assert(i->magic == LIST_MAGIC); i->pos = i->list->head; @@ -1009,8 +1093,10 @@ list_reinit_mutexes (void) void list_install_fork_handlers (void) { int err; - if ((err = pthread_atfork(NULL, NULL, &list_reinit_mutexes))) + if ((err = pthread_atfork(NULL, NULL, &list_reinit_mutexes))) { lsd_fatal_error(__FILE__, __LINE__, "list atfork install"); + abort(); + } return; } #else diff --git a/src/common/list.h b/src/common/list.h index cf3d1c9de6f7fa0c5caa904e44202ce6ba073bca..d24dcdd38390dd59419d5945b4c374d84d9486c3 100644 --- a/src/common/list.h +++ b/src/common/list.h @@ -64,6 +64,9 @@ * This macro may be redefined to invoke another routine instead. * * If WITH_PTHREADS is defined, these routines will be thread-safe. + * + * SLURM's versions of these functions write directly to the log file, using + * fprintf to avoid consuming more memory. */ diff --git a/src/common/log.c b/src/common/log.c index 5a433f80a1234c8cc25c2821160858fcca45fbe6..4c3250d58d1b2dac6e291bf7052ff59a28611632 100644 --- a/src/common/log.c +++ b/src/common/log.c @@ -85,6 +85,7 @@ #include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" +#include "src/common/slurm_protocol_api.h" #ifndef LINEBUFSIZE # define LINEBUFSIZE 256 @@ -101,6 +102,8 @@ strong_alias(log_alter, slurm_log_alter); strong_alias(log_alter_with_fp, slurm_log_alter_with_fp); strong_alias(log_set_fpfx, slurm_log_set_fpfx); strong_alias(log_fp, slurm_log_fp); +strong_alias(log_fatal, slurm_log_fatal); +strong_alias(log_oom, slurm_log_oom); strong_alias(log_has_data, slurm_log_has_data); strong_alias(log_flush, slurm_log_flush); strong_alias(dump_cleanup_list, slurm_dump_cleanup_list); @@ -131,8 +134,11 @@ typedef struct { log_facility_t facility; log_options_t opt; unsigned initialized:1; + uint32_t debug_flags; } log_t; +char *slurm_prog_name = NULL; + /* static variables */ #ifdef WITH_PTHREADS static pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER; @@ -176,7 +182,7 @@ static void _log_flush(log_t *log); /* Write the current local time into the provided buffer. Returns the * number of characters written into the buffer. */ -static size_t _make_timestamp(char *timestamp_buf, size_t max, +static size_t _make_timestamp(char *timestamp_buf, size_t max, const char *timestamp_fmt) { time_t timestamp_t = time(NULL); @@ -296,6 +302,10 @@ _log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile ) log->argv0 = xstrdup(short_name); } + /* Only take the first one here. In some situations it can change. */ + if (!slurm_prog_name) + slurm_prog_name = xstrdup(log->argv0); + if (!log->fpfx) log->fpfx = xstrdup(""); @@ -361,7 +371,7 @@ _log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile ) * logfile = logfile name if logfile level > LOG_QUIET */ static int -_sched_log_init(char *prog, log_options_t opt, log_facility_t fac, +_sched_log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile) { int rc = 0; @@ -377,7 +387,7 @@ _sched_log_init(char *prog, log_options_t opt, log_facility_t fac, } else if (!sched_log->argv0) { const char *short_name; short_name = strrchr((const char *) default_name, '/'); - if (short_name) + if (short_name) short_name++; else short_name = default_name; @@ -477,6 +487,7 @@ void log_fini(void) if (log->logfp) fclose(log->logfp); xfree(log); + xfree(slurm_prog_name); slurm_mutex_unlock(&log_lock); } @@ -538,9 +549,23 @@ int log_alter(log_options_t opt, log_facility_t fac, char *logfile) slurm_mutex_lock(&log_lock); rc = _log_init(NULL, opt, fac, logfile); slurm_mutex_unlock(&log_lock); + log_set_debug_flags(); return rc; } +/* log_set_debug_flags() + * Set or reset the debug flags based on the configuration + * file or the scontrol command. + */ +void log_set_debug_flags(void) +{ + uint32_t debug_flags = slurm_get_debug_flags(); + + slurm_mutex_lock(&log_lock); + log->debug_flags = debug_flags; + slurm_mutex_unlock(&log_lock); +} + /* reinitialize log data structures. Like log_init, but do not init * the log mutex */ @@ -577,20 +602,48 @@ int sched_log_alter(log_options_t opt, log_facility_t fac, char *logfile) return rc; } -/* return the FILE * of the current logfile (stderr if logging to stderr) - */ +/* Return the FILE * of the current logfile (or stderr if not logging to + * a file, but NOT both). Also see log_fatal() and log_oom() below. */ FILE *log_fp(void) { FILE *fp; slurm_mutex_lock(&log_lock); - if (log && log->logfp) + if (log && log->logfp) { fp = log->logfp; - else + } else fp = stderr; slurm_mutex_unlock(&log_lock); return fp; } +/* Log fatal error without message buffering */ +void log_fatal(const char *file, int line, const char *msg, const char *err_str) +{ + if (log && log->logfp) { + fprintf(log->logfp, "ERROR: [%s:%d] %s: %s\n", + file, line, msg, err_str); + fflush(log->logfp); + } + if (!log || log->opt.stderr_level) { + fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", + file, line, msg, err_str); + fflush(stderr); + } +} + +/* Log out of memory without message buffering */ +void log_oom(const char *file, int line, const char *func) +{ + if (log && log->logfp) { + fprintf(log->logfp, "%s:%d: %s: malloc failed\n", + file, line, func); + } + if (!log || log->opt.stderr_level) { + fprintf(stderr, "%s:%d: %s: malloc failed\n", + file, line, func); + } +} + /* return a heap allocated string formed from fmt and ap arglist * returned string is allocated with xmalloc, so must free with xfree. * @@ -651,12 +704,12 @@ static char *vxstrfmt(const char *fmt, va_list ap) xstrcat(buf, tmp); break; #elif defined USE_RFC5424_TIME - case 'M': /* "%M" => "yyyy-mm-ddThh:mm:ss(+/-)hh:mm" */ + case 'M': /* "%M" => "yyyy-mm-ddThh:mm:ss.fff(+/-)hh:mm" */ xrfc5424timecat(buf); break; #elif defined USE_ISO_8601 - case 'M': /* "%M" => "yyyy-mm-ddThh:mm:ss" */ - xstrftimecat(buf, "%Y-%m-%dT%T"); + case 'M': /* "%M" => "yyyy-mm-ddThh:mm:ss.fff" */ + xiso8601timecat(buf); break; #else case 'M': /* "%M" => "Mon DD hh:mm:ss" */ @@ -696,13 +749,13 @@ static char *vxstrfmt(const char *fmt, va_list ap) xstrcat(buf, "%u"); break; case 'l': - if((unprocessed == 0) && (*(p+1) == 'l')) { + if ((unprocessed == 0) && (*(p+1) == 'l')) { long_long = 1; p++; } if ((unprocessed == 0) && (*(p+1) == 'u')) { - if(long_long) { + if (long_long) { snprintf(tmp, sizeof(tmp), "%llu", va_arg(ap, @@ -716,7 +769,7 @@ static char *vxstrfmt(const char *fmt, va_list ap) xstrcat(buf, tmp); p++; } else if ((unprocessed==0) && (*(p+1)=='d')) { - if(long_long) { + if (long_long) { snprintf(tmp, sizeof(tmp), "%lld", va_arg(ap, @@ -729,7 +782,7 @@ static char *vxstrfmt(const char *fmt, va_list ap) xstrcat(buf, tmp); p++; } else if ((unprocessed==0) && (*(p+1)=='f')) { - if(long_long) { + if (long_long) { xstrcat(buf, "%llf"); long_long = 0; } else @@ -739,7 +792,7 @@ static char *vxstrfmt(const char *fmt, va_list ap) xstrcat(buf, tmp); p++; } else if ((unprocessed==0) && (*(p+1)=='x')) { - if(long_long) { + if (long_long) { snprintf(tmp, sizeof(tmp), "%llx", va_arg(ap, @@ -751,7 +804,7 @@ static char *vxstrfmt(const char *fmt, va_list ap) va_arg(ap, long int)); xstrcat(buf, tmp); p++; - } else if(long_long) { + } else if (long_long) { xstrcat(buf, "%ll"); long_long = 0; } else @@ -839,6 +892,25 @@ _log_printf(log_t *log, cbuf_t cb, FILE *stream, const char *fmt, ...) } +#if defined(EXT_DEBUG) +/* set_idbuf() + * Write in the input buffer the current time and milliseconds + * the process id and the current thread id. + */ +static void +set_idbuf(char *idbuf) +{ + struct timeval now; + + gettimeofday(&now, NULL); + + sprintf(idbuf, "\ +%.15s.%-6d %5d %p", ctime(&now.tv_sec) + 4, + (int)now.tv_usec, (int)getpid(), (void *)pthread_self()); + +} +#endif + /* * log a message at the specified level to facilities that have been * configured to receive messages at that level @@ -849,8 +921,16 @@ static void log_msg(log_level_t level, const char *fmt, va_list args) char *buf = NULL; char *msgbuf = NULL; int priority = LOG_INFO; +#if defined(EXT_DEBUG) + char idbuf[128]; +#endif slurm_mutex_lock(&log_lock); + +#if defined(EXT_DEBUG) + set_idbuf(idbuf); +#endif + if (!LOG_INITIALIZED) { log_options_t opts = LOG_OPTS_STDERR_ONLY; _log_init(NULL, opts, 0, NULL); @@ -861,7 +941,7 @@ static void log_msg(log_level_t level, const char *fmt, va_list args) (strncmp(fmt, "sched: ", 7) == 0)) { buf = vxstrfmt(fmt, args); xlogfmtcat(&msgbuf, "[%M] %s%s%s", sched_log->fpfx, pfx, buf); - _log_printf(sched_log, sched_log->fbuf, sched_log->logfp, + _log_printf(sched_log, sched_log->fbuf, sched_log->logfp, "%s\n", msgbuf); fflush(sched_log->logfp); xfree(msgbuf); @@ -933,13 +1013,29 @@ static void log_msg(log_level_t level, const char *fmt, va_list args) if (level <= log->opt.stderr_level) { fflush(stdout); - _log_printf(log, log->buf, stderr, "%s: %s%s\n", - log->argv0, pfx, buf); +#if defined(EXT_DEBUG) + _log_printf(log, log->buf, stderr, "%s ", idbuf); +#endif + if (log->debug_flags & DEBUG_FLAG_THREADID) + _log_printf(log, log->buf, stderr, "%s: %p %s%s\n", + log->argv0, (void *)pthread_self(), + pfx, buf); + else + _log_printf(log, log->buf, stderr, "%s: %s%s\n", + log->argv0, pfx, buf); fflush(stderr); } if ((level <= log->opt.logfile_level) && (log->logfp != NULL)) { - xlogfmtcat(&msgbuf, "[%M] %s%s%s", log->fpfx, pfx, buf); + +#if defined(EXT_DEBUG) + _log_printf(log, log->buf, log->logfp, "%s ", idbuf); +#endif + if (log->debug_flags & DEBUG_FLAG_THREADID) + xlogfmtcat(&msgbuf, "[%M] %p %s%s%s", log->fpfx, + (void *)pthread_self(), pfx, buf); + else + xlogfmtcat(&msgbuf, "[%M] %s%s%s", log->fpfx, pfx, buf); _log_printf(log, log->fbuf, log->logfp, "%s\n", msgbuf); fflush(log->logfp); diff --git a/src/common/log.h b/src/common/log.h index 2d36f7c46ff0df8f5b6d5278c215ce57b11e0c7d..753dd23bd0af5b4d58a6b691a97ed1852c1d2b94 100644 --- a/src/common/log.h +++ b/src/common/log.h @@ -109,6 +109,8 @@ typedef struct { unsigned buffered:1; /* Use internal buffer to never block */ } log_options_t; +extern char *slurm_prog_name; + /* some useful initializers for log_options_t */ #define LOG_OPTS_INITIALIZER \ @@ -205,11 +207,16 @@ void log_set_fpfx(char *pfx); */ void log_set_argv0(char *pfx); -/* grab the FILE * of the current logfile (or stderr if not logging to - * a file) - */ +/* Return the FILE * of the current logfile (or stderr if not logging to + * a file, but NOT both). Also see log_fatal() and log_oom() below. */ FILE *log_fp(void); +/* Log fatal error without message buffering */ +void log_fatal(const char *file, int line, const char *msg, const char *err_str); + +/* Log out of memory without message buffering */ +void log_oom(const char *file, int line, const char *func); + /* * Buffered log functions: * @@ -224,6 +231,12 @@ bool log_has_data(void); */ void log_flush(void); +/* log_set_debug_flags() + * Set or reset the debug flags based on the configuration + * file or the scontrol command. + */ +extern void log_set_debug_flags(void); + /* * the following log a message to the log facility at the appropriate level: * diff --git a/src/common/macros.h b/src/common/macros.h index 04e0300dcc487ffa5daadc57eb96b83e504d8732..f8c1e90c6f6d1acd1b9afbe960fdaac26dd73ab6 100644 --- a/src/common/macros.h +++ b/src/common/macros.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -275,6 +275,19 @@ typedef enum {false, true} bool; # define strndup(src,size) strdup(src) #endif +/* Results strftime() are undefined if buffer too small + * This variant returns a string of "####"... instead */ +#define slurm_strftime(s, max, format, tm) \ +_STMT_START { \ + if (max > 0) { \ + char tmp_string[(max<256?256:max+1)]; \ + if (strftime(tmp_string, sizeof(tmp_string), format, tm) == 0) \ + memset(tmp_string, '#', max); \ + tmp_string[max-1] = 0; \ + strncpy(s, tmp_string, max); \ + } \ +} _STMT_END + /* There are places where we put NO_VAL or INFINITE into a float or double * Use fuzzy_equal below to test for those values rather than an comparision * which could fail due to rounding errors. */ diff --git a/src/common/mpi.c b/src/common/mpi.c index 66a1e1e8a6c5b2fa61048c93eddf931aa786df90..abbc97dc815250ede59818b5d6f59daa80e95fa2 100644 --- a/src/common/mpi.c +++ b/src/common/mpi.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/mpi.h b/src/common/mpi.h index 0be96edbeb6c15837f48d3250017d272088eae94..92e67be8282005125ab3d576059d9f621d703e44 100644 --- a/src/common/mpi.h +++ b/src/common/mpi.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/net.c b/src/common/net.c index c4fb8c41f10f0344c8e64a33d38cbaaf8bab694f..edda32e3d8f0fca113b82e16f61830f15b755eb0 100644 --- a/src/common/net.c +++ b/src/common/net.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -41,6 +41,7 @@ #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <arpa/inet.h> #include <sys/select.h> #include <sys/time.h> @@ -51,9 +52,14 @@ #include <errno.h> #include <stdint.h> -#include "src/common/macros.h" +#if defined(__FreeBSD__) +#define SOL_TCP IPPROTO_TCP +#endif + #include "src/common/log.h" +#include "src/common/macros.h" #include "src/common/net.h" +#include "src/common/slurm_protocol_api.h" /* * Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -165,10 +171,72 @@ int readn(int fd, void *buf, size_t nbytes) int net_set_low_water(int sock, size_t size) { if (setsockopt(sock, SOL_SOCKET, SO_RCVLOWAT, - (const void *) &size, sizeof(size)) < 0) { + (const void *) &size, sizeof(size)) < 0) { error("Unable to set low water socket option: %m"); return -1; } return 0; } + +/* set keep alive time on socket */ +extern int net_set_keep_alive(int sock) +{ + int opt_int; + socklen_t opt_len; + struct linger opt_linger; + static bool keep_alive_set = false; + static int keep_alive_time = (uint16_t) NO_VAL; + + if (!keep_alive_set) { + keep_alive_time = slurm_get_keep_alive_time(); + keep_alive_set = true; + } + + if (keep_alive_time == (uint16_t) NO_VAL) + return 0; + + opt_len = sizeof(struct linger); + opt_linger.l_onoff = 1; + opt_linger.l_linger = keep_alive_time; + if (setsockopt(sock, SOL_SOCKET, SO_LINGER, &opt_linger, opt_len) < 0) + error("Unable to set linger socket option: %m"); + + opt_len = sizeof(int); + opt_int = 1; + if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &opt_int, opt_len) < 0) { + error("Unable to set keep alive socket option: %m"); + return -1; + } + +/* + * TCP_KEEPIDLE used to be defined in FreeBSD, then went away, then came + * back in 9.0. + * + * Removing this call might decrease the robustness of communications, + * but will probably have no noticable effect. + */ +#if ! defined(__FreeBSD__) || (__FreeBSD_version > 900000) + opt_int = keep_alive_time; + if (setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &opt_int, opt_len) < 0) { + error("Unable to set keep alive socket time: %m"); + return -1; + } +#endif + +#if 0 + /* Used to validate above operations for testing purposes */ + opt_linger.l_onoff = 0; + opt_linger.l_linger = 0; + opt_len = sizeof(struct linger); + getsockopt(sock, SOL_SOCKET, SO_LINGER, &opt_linger, &opt_len); + info("got linger time of %d:%d on fd %d", opt_linger.l_onoff, + opt_linger.l_linger, sock); + + opt_len = sizeof(int); + getsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &opt_int, &opt_len); + info("got keep_alive time is %d on fd %d", opt_int, sock); +#endif + + return 0; +} diff --git a/src/common/net.h b/src/common/net.h index 7b7ee2f757d5ccc1cc09fc48087619816b1d5f87..de2a63360717b3db0ea40a3a11751f6ac3f1fe2c 100644 --- a/src/common/net.h +++ b/src/common/net.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -61,5 +61,7 @@ int net_accept_stream(int fd); */ int net_set_low_water(int sock, size_t size); +/* set keep alive time on socket */ +extern int net_set_keep_alive(int sock); #endif /* !_NET_H */ diff --git a/src/common/node_conf.c b/src/common/node_conf.c index 70e88f19511c16325b3635c293ca9f767a696cdb..529293122640fd98520c96206cb6a8115deb6d70 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -13,7 +13,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -71,6 +71,7 @@ #include "src/common/read_config.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_ext_sensors.h" #include "src/common/slurm_topology.h" #include "src/common/xassert.h" #include "src/common/xmalloc.h" @@ -98,6 +99,7 @@ static int _delete_config_record (void); static void _dump_hash (void); #endif static struct node_record *_find_alias_node_record (char *name); +static struct node_record *_find_node_record (char *name, bool test_alias); static int _hash_index (char *name); static void _list_delete_config (void *config_entry); static void _list_delete_feature (void *feature_entry); @@ -113,8 +115,6 @@ static void _add_config_feature(char *feature, bitstr_t *node_bitmap) /* If feature already exists in feature_list, just update the bitmap */ feature_iter = list_iterator_create(feature_list); - if (feature_iter == NULL) - fatal("list_iterator_create malloc failure"); while ((feature_ptr = (struct features_record *) list_next(feature_iter))) { if (strcmp(feature, feature_ptr->name)) @@ -328,7 +328,7 @@ static int _delete_config_record (void) * _dump_hash - print the node_hash_table contents, used for debugging * or analysis of hash technique * global: node_record_table_ptr - pointer to global node table - * node_hash_table - table of hash indecies + * node_hash_table - table of hash indexes */ static void _dump_hash (void) { @@ -355,7 +355,7 @@ static void _dump_hash (void) * input: name - name to be aliased of the desired node * output: return pointer to node record or NULL if not found * global: node_record_table_ptr - pointer to global node table - * node_hash_table - table of hash indecies + * node_hash_table - table of hash indexes */ static struct node_record *_find_alias_node_record (char *name) { @@ -502,8 +502,6 @@ char * bitmap2node_name_sortable (bitstr_t *bitmap, bool sort) last = bit_fls(bitmap); hl = hostlist_create(""); - if (hl == NULL) - fatal("hostlist_create: malloc error"); for (i = first; i <= last; i++) { if (bit_test(bitmap, i) == 0) continue; @@ -554,9 +552,13 @@ static int _list_find_feature (void *feature_entry, void *key) /* Log the contents of a frontend record */ static void _dump_front_end(slurm_conf_frontend_t *fe_ptr) { - info("fe name:%s addr:%s port:%u state:%u reason:%s", + info("fe name:%s addr:%s port:%u state:%u reason:%s " + "allow_groups:%s allow_users:%s " + "deny_groups:%s deny_users:%s", fe_ptr->frontends, fe_ptr->addresses, - fe_ptr->port, fe_ptr->node_state, fe_ptr->reason); + fe_ptr->port, fe_ptr->node_state, fe_ptr->reason, + fe_ptr->allow_groups, fe_ptr->allow_users, + fe_ptr->deny_groups, fe_ptr->deny_users); } #endif @@ -601,12 +603,27 @@ extern int build_all_frontend_info (bool is_slurmd_context) while ((fe_name = hostlist_shift(hl_name))) { fe_addr = hostlist_shift(hl_addr); fe_single = xmalloc(sizeof(slurm_conf_frontend_t)); - if (list_append(front_end_list, fe_single) == NULL) - fatal("list_append: malloc failure"); + list_append(front_end_list, fe_single); fe_single->frontends = xstrdup(fe_name); fe_single->addresses = xstrdup(fe_addr); free(fe_name); free(fe_addr); + if (fe_line->allow_groups && fe_line->allow_groups[0]) { + fe_single->allow_groups = + xstrdup(fe_line->allow_groups); + } + if (fe_line->allow_users && fe_line->allow_users[0]) { + fe_single->allow_users = + xstrdup(fe_line->allow_users); + } + if (fe_line->deny_groups && fe_line->deny_groups[0]) { + fe_single->deny_groups = + xstrdup(fe_line->deny_groups); + } + if (fe_line->deny_users && fe_line->deny_users[0]) { + fe_single->deny_users = + xstrdup(fe_line->deny_users); + } fe_single->port = fe_line->port; if (fe_line->reason && fe_line->reason[0]) fe_single->reason = xstrdup(fe_line->reason); @@ -667,8 +684,6 @@ extern int build_all_nodeline_info (bool set_bitmap) if (set_bitmap) { ListIterator config_iterator; config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal ("memory allocation failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { node_name2bitmap(config_ptr->nodes, true, @@ -691,8 +706,6 @@ extern void build_config_feature_list(struct config_record *config_ptr) /* Clear these nodes from the feature_list record, * then restore as needed */ feature_iter = list_iterator_create(feature_list); - if (feature_iter == NULL) - fatal("list_iterator_create malloc failure"); bit_not(config_ptr->node_bitmap); while ((feature_ptr = (struct features_record *) list_next(feature_iter))) { @@ -792,17 +805,28 @@ extern struct node_record *create_node_record ( node_ptr->tmp_disk = config_ptr->tmp_disk; node_ptr->select_nodeinfo = select_g_select_nodeinfo_alloc(); node_ptr->energy = acct_gather_energy_alloc(); + node_ptr->ext_sensors = ext_sensors_alloc(); xassert (node_ptr->magic = NODE_MAGIC) /* set value */; return node_ptr; } - /* * find_node_record - find a record for node with specified name - * input: name - name of the desired node - * output: return pointer to node record or NULL if not found + * IN: name - name of the desired node + * RET: pointer to node record or NULL if not found */ extern struct node_record *find_node_record (char *name) +{ + return _find_node_record(name, true); +} + +/* + * _find_node_record - find a record for node with specified name + * IN: name - name of the desired node + * IN: test_alias - if set, also test NodeHostName value + * RET: pointer to node record or NULL if not found + */ +static struct node_record *_find_node_record (char *name, bool test_alias) { int i; @@ -841,9 +865,12 @@ extern struct node_record *find_node_record (char *name) } } - /* look for the alias node record if the user put this in - instead of what slurm sees the node name as */ - return _find_alias_node_record (name); + if (test_alias) { + /* look for the alias node record if the user put this in + * instead of what slurm sees the node name as */ + return _find_alias_node_record (name); + } + return NULL; } @@ -873,9 +900,6 @@ extern int init_node_conf (void) config_list = list_create (_list_delete_config); feature_list = list_create (_list_delete_feature); front_end_list = list_create (destroy_frontend); - if ((config_list == NULL) || (feature_list == NULL) || - (front_end_list == NULL)) - fatal("list_create malloc failure"); } return SLURM_SUCCESS; @@ -925,8 +949,6 @@ extern int node_name2bitmap (char *node_names, bool best_effort, hostlist_t host_list; my_bitmap = (bitstr_t *) bit_alloc (node_record_count); - if (my_bitmap == NULL) - fatal("bit_alloc malloc failure"); *bitmap = my_bitmap; if (node_names == NULL) { @@ -944,7 +966,7 @@ extern int node_name2bitmap (char *node_names, bool best_effort, while ( (this_node_name = hostlist_shift (host_list)) ) { struct node_record *node_ptr; - node_ptr = find_node_record (this_node_name); + node_ptr = _find_node_record(this_node_name, best_effort); if (node_ptr) { bit_set (my_bitmap, (bitoff_t) (node_ptr - node_record_table_ptr)); @@ -977,6 +999,7 @@ extern void purge_node_rec (struct node_record *node_ptr) xfree(node_ptr->part_pptr); xfree(node_ptr->reason); acct_gather_energy_destroy(node_ptr->energy); + ext_sensors_destroy(node_ptr->ext_sensors); select_g_select_nodeinfo_free(node_ptr->select_nodeinfo); } @@ -1088,3 +1111,19 @@ extern uint32_t cr_get_coremap_offset(uint32_t node_index) xassert(cr_node_cores_offset); return cr_node_cores_offset[node_index]; } + +/* Given the number of tasks per core and the actual number of hw threads, + * compute how many CPUs are "visible" and, hence, usable on the node. + */ +extern int adjust_cpus_nppcu(uint16_t ntasks_per_core, uint16_t threads, + int cpus) +{ + if ((ntasks_per_core != 0) && (ntasks_per_core != 0xffff) && + (threads != 0)) { + /* Adjust the number of CPUs according to the percentage of the + * hwthreads/core being used. */ + cpus = cpus * ntasks_per_core / threads; + } + + return cpus; +} diff --git a/src/common/node_conf.h b/src/common/node_conf.h index 8cfad11a5d015556f76bdcb1a172f8bdf3581610..e560c9e61117fb1e4f4098ca869ba73ba8346651 100644 --- a/src/common/node_conf.h +++ b/src/common/node_conf.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -75,7 +75,6 @@ struct config_record { uint32_t weight; /* arbitrary priority of node for * scheduling work on */ char *feature; /* arbitrary list of node's features */ - char **feature_array; /* array of feature names */ char *gres; /* arbitrary list of node's generic resources */ char *nodes; /* name of nodes with this configuration */ bitstr_t *node_bitmap; /* bitmap of nodes with this configuration */ @@ -154,6 +153,7 @@ struct node_record { time_t down_time; /* When first set to DOWN state */ #endif /* HAVE_CRAY */ acct_gather_energy_t *energy; + ext_sensors_data_t *ext_sensors; /* external sensor data */ dynamic_plugin_data_t *select_nodeinfo; /* opaque data structure, * use select_g_get_nodeinfo() * to access contents */ @@ -234,7 +234,7 @@ extern struct node_record *create_node_record ( * find_node_record - find a record for node with specified name * input: name - name of the desired node * output: return pointer to node record or NULL if not found - * node_hash_table - table of hash indecies + * node_hash_table - table of hash indexes */ extern struct node_record *find_node_record (char *name); @@ -282,4 +282,10 @@ extern void cr_fini_global_core_data(void); /*return the coremap index to the first core of the given node */ extern uint32_t cr_get_coremap_offset(uint32_t node_index); +/* Given the number of tasks per core and the actual number of hw threads, + * compute how many CPUs are "visible" and, hence, usable on the node. + */ +extern int adjust_cpus_nppcu(uint16_t ntasks_per_core, uint16_t threads, + int cpus); + #endif /* !_HAVE_NODE_CONF_H */ diff --git a/src/common/node_select.c b/src/common/node_select.c index bb90d7d5c79a55721b4928377a729d8ea5fa40a0..1ffe8c61d7ade9b55d032bef7c073079fffd0be1 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -16,7 +16,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -276,11 +276,11 @@ extern int slurm_select_init(bool only_default) if (dir_array[i] == ':') { dir_array[i] = '\0'; got_colon = 1; - } else if(dir_array[i] != '\0') + } else if (dir_array[i] != '\0') continue; /* Open the directory. */ - if(!(dirp = opendir(head))) { + if (!(dirp = opendir(head))) { error("cannot open plugin directory %s", head); goto done; } @@ -288,7 +288,7 @@ extern int slurm_select_init(bool only_default) while (1) { char full_name[128]; - if(!(e = readdir( dirp ))) + if (!(e = readdir( dirp ))) break; /* Check only files with select_ in them. */ if (strncmp(e->d_name, "select_", 7)) @@ -832,8 +832,8 @@ extern int select_g_select_nodeinfo_free(dynamic_plugin_data_t *nodeinfo) if (slurm_select_init(0) < 0) return SLURM_ERROR; - if(nodeinfo) { - if(nodeinfo->data) + if (nodeinfo) { + if (nodeinfo->data) rc = (*(ops[nodeinfo->plugin_id]. nodeinfo_free))(nodeinfo->data); xfree(nodeinfo); @@ -870,7 +870,7 @@ extern int select_g_select_nodeinfo_get(dynamic_plugin_data_t *nodeinfo, if (slurm_select_init(0) < 0) return SLURM_ERROR; - if(nodeinfo) { + if (nodeinfo) { nodedata = nodeinfo->data; plugin_id = nodeinfo->plugin_id; } else @@ -927,7 +927,7 @@ extern int select_g_select_jobinfo_set(dynamic_plugin_data_t *jobinfo, if (slurm_select_init(0) < 0) return SLURM_ERROR; - if(jobinfo) { + if (jobinfo) { jobdata = jobinfo->data; plugin_id = jobinfo->plugin_id; } else @@ -952,7 +952,7 @@ extern int select_g_select_jobinfo_get(dynamic_plugin_data_t *jobinfo, if (slurm_select_init(0) < 0) return SLURM_ERROR; - if(jobinfo) { + if (jobinfo) { jobdata = jobinfo->data; plugin_id = jobinfo->plugin_id; } else @@ -975,7 +975,7 @@ extern dynamic_plugin_data_t *select_g_select_jobinfo_copy( return NULL; jobinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t)); - if(jobinfo) { + if (jobinfo) { jobinfo_ptr->plugin_id = jobinfo->plugin_id; jobinfo_ptr->data = (*(ops[jobinfo->plugin_id]. jobinfo_copy))(jobinfo->data); @@ -1083,7 +1083,7 @@ extern char *select_g_select_jobinfo_sprint(dynamic_plugin_data_t *jobinfo, if (slurm_select_init(0) < 0) return NULL; - if(jobinfo) { + if (jobinfo) { data = jobinfo->data; plugin_id = jobinfo->plugin_id; } else @@ -1107,7 +1107,7 @@ extern char *select_g_select_jobinfo_xstrdup( if (slurm_select_init(0) < 0) return NULL; - if(jobinfo) { + if (jobinfo) { data = jobinfo->data; plugin_id = jobinfo->plugin_id; } else @@ -1244,7 +1244,7 @@ extern int select_g_reconfigure (void) * RET - nodes selected for use by the reservation */ extern bitstr_t * select_g_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, bitstr_t **core_bitmap) + uint32_t *core_cnt, bitstr_t **core_bitmap) { if (slurm_select_init(0) < 0) return NULL; diff --git a/src/common/node_select.h b/src/common/node_select.h index 608410b3dde861317c1176607447a746481ead2b..19da81134670978480ca27dd3f184116a7ab9f53 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -223,7 +223,7 @@ typedef struct slurm_select_ops { int (*reconfigure) (void); bitstr_t * (*resv_test) (bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, + uint32_t *core_cnt, bitstr_t **core_bitmap); void (*ba_init) (node_info_msg_t *node_info_ptr, bool sanity_check); @@ -675,7 +675,7 @@ extern int select_g_step_finish(struct step_record *step_ptr); * RET - nodes selected for use by the reservation */ extern bitstr_t * select_g_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, + uint32_t *core_cnt, bitstr_t **core_bitmap); /*****************************\ diff --git a/src/common/optz.c b/src/common/optz.c index 99ddb5654d1b365a53809e1c57b76f2545a7ba49..cc39b68f890df40efd1f1971c4f33822aa11f464 100644 --- a/src/common/optz.c +++ b/src/common/optz.c @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/optz.h b/src/common/optz.h index 1b537950241c1309a3c61f86766b2320bffa4a12..377769088db57f417cf88c320df6908342b62af5 100644 --- a/src/common/optz.h +++ b/src/common/optz.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/pack.c b/src/common/pack.c index 0571adf0b24ab199b979d8f2dfa611cf3b52b254..f2648074d73d452c001db91719dd25fd021fddd5 100644 --- a/src/common/pack.c +++ b/src/common/pack.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -52,8 +52,9 @@ #include "slurm/slurm_errno.h" -#include "src/common/pack.h" +#include "src/common/log.h" #include "src/common/macros.h" +#include "src/common/pack.h" #include "src/common/xmalloc.h" /* If we unpack a buffer that contains bad data, we want to avoid @@ -149,7 +150,7 @@ Buf init_buf(int size) error("init_buf: buffer size too large"); return NULL; } - if(size <= 0) + if (size <= 0) size = BUF_SIZE; my_buf = xmalloc(sizeof(struct slurm_buf)); my_buf->magic = BUF_MAGIC; @@ -614,6 +615,10 @@ int unpackmem_malloc(char **valp, uint32_t * size_valp, Buf buffer) if (remaining_buf(buffer) < *size_valp) return SLURM_ERROR; *valp = malloc(*size_valp); + if (*valp == NULL) { + log_oom(__FILE__, __LINE__, __CURRENT_FUNC__); + abort(); + } memcpy(*valp, &buffer->head[buffer->processed], *size_valp); buffer->processed += *size_valp; diff --git a/src/common/pack.h b/src/common/pack.h index 80b9483ae7e53dc7f482fa44957b431e50b3d132..aa83ed8732acbdd63832da828b219ab4014bcf1c 100644 --- a/src/common/pack.h +++ b/src/common/pack.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/parse_config.c b/src/common/parse_config.c index 8ab3b6eda893de45519970948b93a2a1cb7b2c15..e3fb9142c9450c21a372492b507c0fe99607b592 100644 --- a/src/common/parse_config.c +++ b/src/common/parse_config.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -71,7 +71,7 @@ strong_alias(s_p_parse_file, slurm_s_p_parse_file); #define BUFFER_SIZE 4096 -#define CONF_HASH_LEN 26 +#define CONF_HASH_LEN 173 static regex_t keyvalue_re; static char *keyvalue_pattern = @@ -100,16 +100,12 @@ struct s_p_values { */ static int _conf_hashtbl_index(const char *key) { - int i; - int idx = 0; + unsigned int hashval; xassert(key); - for (i = 0; i < 10; i++) { - if (key[i] == '\0') - break; - idx += tolower((int)key[i]); - } - return idx % CONF_HASH_LEN; + for (hashval = 0; *key != 0; key++) + hashval = tolower(*key) + 31 * hashval; + return hashval % CONF_HASH_LEN; } static void _conf_hashtbl_insert(s_p_hashtbl_t *hashtbl, @@ -222,6 +218,9 @@ void s_p_hashtbl_destroy(s_p_hashtbl_t *hashtbl) { int i; s_p_values_t *p, *next; + if (!hashtbl) + return; + for (i = 0; i < CONF_HASH_LEN; i++) { for (p = hashtbl[i]; p != NULL; p = next) { next = p->next; @@ -290,17 +289,20 @@ static int _strip_continuation(char *buf, int len) char *ptr; int bs = 0; + if (len == 0) + return len; /* Empty line */ + for (ptr = buf+len-1; ptr >= buf; ptr--) { if (*ptr == '\\') bs++; - else if (isspace((int)*ptr) && bs == 0) + else if (isspace((int)*ptr) && (bs == 0)) continue; else break; } /* Check for an odd number of contiguous backslashes at - the end of the line */ - if (bs % 2 == 1) { + * the end of the line */ + if ((bs % 2) == 1) { ptr = ptr + bs; *ptr = '\0'; return (ptr - buf); @@ -372,7 +374,7 @@ static void _compute_hash_val(uint32_t *hash_val, char *line) { int idx, i, len; - if(!hash_val) + if (!hash_val) return; len = strlen(line); @@ -899,10 +901,11 @@ int s_p_parse_file(s_p_hashtbl_t *hashtbl, uint32_t *hash_val, char *filename, return SLURM_ERROR; } - line = xmalloc(sizeof(char) * stat_buf.st_size); + /* Buffer needs one extra byte for trailing '\0' */ + line = xmalloc(sizeof(char) * stat_buf.st_size + 1); line_number = 1; while ((merged_lines = _get_next_line( - line, stat_buf.st_size, hash_val, f)) > 0) { + line, stat_buf.st_size + 1, hash_val, f)) > 0) { /* skip empty lines */ if (line[0] == '\0') { line_number += merged_lines; @@ -945,7 +948,7 @@ int s_p_parse_file(s_p_hashtbl_t *hashtbl, uint32_t *hash_val, char *filename, /* * s_p_hashtbl_merge - * + * * Merge the contents of two s_p_hashtbl_t data structures. Anything in * from_hashtbl that does not also appear in to_hashtbl is transfered to it. * This is intended primary to support multiple lines of DEFAULT configuration @@ -1384,3 +1387,22 @@ void s_p_dump_values(const s_p_hashtbl_t *hashtbl, } } } + +extern void transfer_s_p_options(s_p_options_t **full_options, + s_p_options_t *options, + int *full_options_cnt) +{ + s_p_options_t *op = NULL; + s_p_options_t *full_options_ptr; + int cnt = *full_options_cnt; + + xassert(full_options); + + for (op = options; op->key != NULL; op++, cnt++) { + xrealloc(*full_options, ((cnt + 1) * sizeof(s_p_options_t))); + full_options_ptr = &(*full_options)[cnt]; + full_options_ptr->key = xstrdup(op->key); + full_options_ptr->type = op->type; + } + *full_options_cnt = cnt; +} diff --git a/src/common/parse_config.h b/src/common/parse_config.h index 78d0768b9d18c420bc307589f163f9ee1cccb0d6..2d25e1e3b3a4a186b195759e3fd098114c268b60 100644 --- a/src/common/parse_config.h +++ b/src/common/parse_config.h @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -343,4 +343,16 @@ void s_p_dump_values(const s_p_hashtbl_t *hashtbl, const s_p_options_t options[]); +/* + * copy options onto the end of full_options + * IN/OUT full_options + * IN options + * IN/OUT full_options_cnt + * + * Used if the full set of options are not available from one location. + */ +extern void transfer_s_p_options(s_p_options_t **full_options, + s_p_options_t *options, + int *full_options_cnt); + #endif /* !_PARSE_CONFIG_H */ diff --git a/src/common/parse_spec.c b/src/common/parse_spec.c index 21a4254aa1e2facea14af7c89f8319ce5b71deb2..8e69551716b883ea60804d06fbb451ff5273f56e 100644 --- a/src/common/parse_spec.c +++ b/src/common/parse_spec.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/parse_spec.h b/src/common/parse_spec.h index f8a4273f72cfdb8f261877af45c1db9e1d469864..05518354f75dd016ce0027a9337b4527effb0e58 100644 --- a/src/common/parse_spec.h +++ b/src/common/parse_spec.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/parse_time.c b/src/common/parse_time.c index 77c39476b6852b1c846d99a4456c183b62382de6..2e028aa8be94c0f950c1fae5d087042328c84bc0 100644 --- a/src/common/parse_time.c +++ b/src/common/parse_time.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -223,9 +223,15 @@ static int _get_date(char *time_str, int *pos, int *month, int *mday, int *year) { int mon, day, yr; int offset = *pos; + int len; - if(time_str[offset+4] && (time_str[offset+4] == '-') - && time_str[offset+7] && (time_str[offset+7] == '-')) { + if (!time_str) + goto prob; + + len = strlen(time_str); + + if ((len >= (offset+7)) && (time_str[offset+4] == '-') + && (time_str[offset+7] == '-')) { /* get year */ if ((time_str[offset] < '0') || (time_str[offset] > '9')) goto prob; @@ -626,7 +632,7 @@ slurm_make_time_str (time_t *time, char *string, int size) if (use_relative_format) display_fmt = _relative_date_fmt(&time_tm); - strftime(string, size, display_fmt, &time_tm); + slurm_strftime(string, size, display_fmt, &time_tm); } } @@ -683,7 +689,7 @@ extern int time_str2secs(const char *string) break; } - if ((days != -1) && (hr == -1) && (min != 0)) { + if ((days != -1) && (hr == -1) && (min != -1)) { /* format was "days-hr" or "days-hr:min" */ hr = min; min = sec; diff --git a/src/common/parse_time.h b/src/common/parse_time.h index e915d00ac023d1d1a9d67693fb40aee787ddf700..e98ba1068a5791636f8c4a8918c99fb5486c231a 100644 --- a/src/common/parse_time.h +++ b/src/common/parse_time.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/plugin.c b/src/common/plugin.c index 6bb5faf0d050e98cebc6fef490140395d430b499..40b78dea497d7ddc7f3e394c6ce1af8e19fe1f9f 100644 --- a/src/common/plugin.c +++ b/src/common/plugin.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -228,11 +228,11 @@ plugin_load_and_link(const char *type_name, int n_syms, so_name = xstrdup_printf("%s.so", type_name); #endif while(so_name[i]) { - if(so_name[i] == '/') + if (so_name[i] == '/') so_name[i] = '_'; i++; } - if(!(dir_array = slurm_get_plugin_dir())) { + if (!(dir_array = slurm_get_plugin_dir())) { error("plugin_load_and_link: No plugin dir given"); xfree(so_name); return plug; @@ -244,7 +244,7 @@ plugin_load_and_link(const char *type_name, int n_syms, if (dir_array[i] == ':') { dir_array[i] = '\0'; got_colon = 1; - } else if(dir_array[i] != '\0') + } else if (dir_array[i] != '\0') continue; file_name = xstrdup_printf("%s/%s", head, so_name); @@ -255,7 +255,7 @@ plugin_load_and_link(const char *type_name, int n_syms, xfree(file_name); err = EPLUGIN_NOTFOUND; } else { - if((err = plugin_load_from_file(&plug, file_name)) + if ((err = plugin_load_from_file(&plug, file_name)) == EPLUGIN_SUCCESS) { if (plugin_get_syms(plug, n_syms, names, ptrs) >= diff --git a/src/common/plugin.h b/src/common/plugin.h index 2773863310381894f18f5e0e67e42d74417d4297..8b9d3a36384871fa5129bc90347a5a7d7db384a1 100644 --- a/src/common/plugin.h +++ b/src/common/plugin.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/plugrack.c b/src/common/plugrack.c index 1d489eccdb9395ce9390a5681f8bef4727d57a00..c11b5a75300a1d731c1fb65ff95f237ccbcfe551 100644 --- a/src/common/plugrack.c +++ b/src/common/plugrack.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -653,8 +653,6 @@ plugrack_print_all_plugin(plugrack_t rack) xassert(rack->entries); itr = list_iterator_create(rack->entries); - if (!itr) - fatal("list_iterator_create: malloc failure"); info("MPI types are..."); while ((e = list_next(itr)) != NULL ) { info("%s", e->full_type); diff --git a/src/common/plugrack.h b/src/common/plugrack.h index d6c29e1dce9b26235cd33e2e0c477070dbb0cec5..51dd5c3bac3157d7f9abfc95765f5d5aba1d00c6 100644 --- a/src/common/plugrack.h +++ b/src/common/plugrack.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/plugstack.c b/src/common/plugstack.c index c3ca324d31ccf5b95b7634bfaa1bf70dd0c67bff..23a890e156022e1abf106eba3bcd7a8adfc76f7d 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -1683,11 +1683,11 @@ static int tasks_execd (spank_t spank) } static spank_err_t -global_to_local_id (slurmd_job_t *job, uint32_t gid, uint32_t *p2uint32) +_global_to_local_id(slurmd_job_t *job, uint32_t gid, uint32_t *p2uint32) { int i; *p2uint32 = (uint32_t) -1; - if (gid >= job->ntasks) + if ((job == NULL) || (gid >= job->ntasks)) return (ESPANK_BAD_ARG); for (i = 0; i < job->node_tasks; i++) { if (job->task[i]->gtid == gid) { @@ -1926,8 +1926,13 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) case S_JOB_SUPPLEMENTARY_GIDS: p2gids = va_arg(vargs, gid_t **); p2int = va_arg(vargs, int *); - *p2gids = slurmd_job->gids; - *p2int = slurmd_job->ngids; + if (slurmd_job) { + *p2gids = slurmd_job->gids; + *p2int = slurmd_job->ngids; + } else { + *p2gids = NULL; + *p2int = 0; + } break; case S_JOB_ID: p2uint32 = va_arg(vargs, uint32_t *); @@ -1942,8 +1947,10 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); if (spank->stack->type == S_TYPE_LOCAL) *p2uint32 = launcher_job->stepid; - else + else if (slurmd_job) *p2uint32 = slurmd_job->stepid; + else + *p2uint32 = 0; break; case S_JOB_NNODES: p2uint32 = va_arg(vargs, uint32_t *); @@ -1955,16 +1962,24 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) *p2uint32 = 0; rc = ESPANK_ENV_NOEXIST; } - } else + } else if (slurmd_job) *p2uint32 = slurmd_job->nnodes; + else + *p2uint32 = 0; break; case S_JOB_NODEID: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->nodeid; + if (slurmd_job) + *p2uint32 = slurmd_job->nodeid; + else + *p2uint32 = 0; break; case S_JOB_LOCAL_TASK_COUNT: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->node_tasks; + if (slurmd_job) + *p2uint32 = slurmd_job->node_tasks; + else + *p2uint32 = 0; break; case S_JOB_TOTAL_TASK_COUNT: p2uint32 = va_arg(vargs, uint32_t *); @@ -1976,16 +1991,24 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) *p2uint32 = 0; rc = ESPANK_ENV_NOEXIST; } - } else + } else if (slurmd_job) *p2uint32 = slurmd_job->ntasks; + else + *p2uint32 = 0; break; case S_JOB_NCPUS: p2uint16 = va_arg(vargs, uint16_t *); - *p2uint16 = slurmd_job->cpus; + if (slurmd_job) + *p2uint16 = slurmd_job->cpus; + else + *p2uint16 = 0; break; case S_STEP_CPUS_PER_TASK: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->cpus_per_task; + if (slurmd_job) + *p2uint32 = slurmd_job->cpus_per_task; + else + *p2uint32 = 0; break; case S_JOB_ARGV: p2int = va_arg(vargs, int *); @@ -1993,14 +2016,20 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) if (spank->stack->type == S_TYPE_LOCAL) { *p2int = launcher_job->argc; *p2argv = launcher_job->argv; - } else { + } else if (slurmd_job) { *p2int = slurmd_job->argc; *p2argv = slurmd_job->argv; + } else { + *p2int = 0; + *p2argv = NULL; } break; case S_JOB_ENV: p2argv = va_arg(vargs, char ***); - *p2argv = slurmd_job->env; + if (slurmd_job) + *p2argv = slurmd_job->env; + else + *p2argv = NULL; break; case S_TASK_ID: p2int = va_arg(vargs, int *); @@ -2065,7 +2094,7 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); *p2uint32 = (uint32_t) -1; - if ((uint32 <= slurmd_job->node_tasks) && + if (slurmd_job && (uint32 <= slurmd_job->node_tasks) && slurmd_job->task && slurmd_job->task[uint32]) { *p2uint32 = slurmd_job->task[uint32]->gtid; } else @@ -2074,23 +2103,35 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) case S_JOB_GLOBAL_TO_LOCAL_ID: uint32 = va_arg(vargs, uint32_t); p2uint32 = va_arg(vargs, uint32_t *); - rc = global_to_local_id (slurmd_job, uint32, p2uint32); + rc = _global_to_local_id (slurmd_job, uint32, p2uint32); break; case S_JOB_ALLOC_CORES: p2str = va_arg(vargs, char **); - *p2str = slurmd_job->job_alloc_cores; + if (slurmd_job) + *p2str = slurmd_job->job_alloc_cores; + else + *p2str = NULL; break; case S_JOB_ALLOC_MEM: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->job_mem; + if (slurmd_job) + *p2uint32 = slurmd_job->job_mem; + else + *p2uint32 = 0; break; case S_STEP_ALLOC_CORES: p2str = va_arg(vargs, char **); - *p2str = slurmd_job->step_alloc_cores; + if (slurmd_job) + *p2str = slurmd_job->step_alloc_cores; + else + *p2str = NULL; break; case S_STEP_ALLOC_MEM: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->step_mem; + if (slurmd_job) + *p2uint32 = slurmd_job->step_mem; + else + *p2uint32 = 0; break; case S_SLURM_VERSION: p2vers = va_arg(vargs, char **); diff --git a/src/common/plugstack.h b/src/common/plugstack.h index 2e480ab3f4209a6a5386b83a349d85e3e254a459..58e68623da6d9597552cbcb33fb1731f26e1c4ec 100644 --- a/src/common/plugstack.h +++ b/src/common/plugstack.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/print_fields.c b/src/common/print_fields.c index 74b55ed212c528be95d3e69cda9f7d6d50efa3da..8914be2e09230fe97d7ff78ab7047dd7038f9f74 100644 --- a/src/common/print_fields.c +++ b/src/common/print_fields.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,7 +58,7 @@ extern void destroy_print_field(void *object) { print_field_t *field = (print_field_t *)object; - if(field) { + if (field) { xfree(field->name); xfree(field); } @@ -71,18 +71,18 @@ extern void print_fields_header(List print_fields_list) int curr_inx = 1; int field_count = 0; - if(!print_fields_list || !print_fields_have_header) + if (!print_fields_list || !print_fields_have_header) return; field_count = list_count(print_fields_list); itr = list_iterator_create(print_fields_list); - while((field = list_next(itr))) { - if(print_fields_parsable_print + while ((field = list_next(itr))) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && (curr_inx == field_count)) printf("%s", field->name); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", field->name); else { int abs_len = abs(field->len); @@ -92,9 +92,9 @@ extern void print_fields_header(List print_fields_list) } list_iterator_reset(itr); printf("\n"); - if(print_fields_parsable_print) + if (print_fields_parsable_print) return; - while((field = list_next(itr))) { + while ((field = list_next(itr))) { int abs_len = abs(field->len); printf("%*.*s ", abs_len, abs_len, "-----------------------------------------------------"); @@ -107,17 +107,14 @@ extern void print_fields_date(print_field_t *field, time_t value, int last) { int abs_len = abs(field->len); char temp_char[abs_len+1]; - time_t now = value; - if(!now) - now = time(NULL); slurm_make_time_str(&value, (char *)temp_char, sizeof(temp_char)); - if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%s", temp_char); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", temp_char); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*.*s ", abs_len, abs_len, temp_char); else printf("%-*.*s ", abs_len, abs_len, temp_char); @@ -128,21 +125,21 @@ extern void print_fields_str(print_field_t *field, char *value, int last) int abs_len = abs(field->len); char temp_char[abs_len+1]; char *print_this = NULL; - if(!value) { - if(print_fields_parsable_print) + if (!value) { + if (print_fields_parsable_print) print_this = ""; else print_this = " "; } else print_this = value; - if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%s", print_this); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", print_this); else { - if(value) { + if (value) { int len = strlen(value); memcpy(&temp_char, value, MIN(len, abs_len) + 1); @@ -151,7 +148,7 @@ extern void print_fields_str(print_field_t *field, char *value, int last) print_this = temp_char; } - if(field->len == abs_len) + if (field->len == abs_len) printf("%*.*s ", abs_len, abs_len, print_this); else printf("%-*.*s ", abs_len, abs_len, print_this); @@ -162,23 +159,23 @@ extern void print_fields_int(print_field_t *field, int value, int last) { int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print + if ((value == NO_VAL) || (value == INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", abs_len, " "); } else { - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%d", value); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%d|", value); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*d ", abs_len, value); else printf("%-*d ", abs_len, value); @@ -189,23 +186,23 @@ extern void print_fields_uint32(print_field_t *field, uint32_t value, int last) { int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print + if ((value == NO_VAL) || (value == INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%u", value); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%u|", value); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*u ", abs_len, value); else printf("%-*u ", abs_len, value); @@ -217,23 +214,23 @@ extern void print_fields_uint64(print_field_t *field, uint64_t value, int last) int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == (uint64_t)NO_VAL) || (value == (uint64_t)INFINITE)) { - if(print_fields_parsable_print + if ((value == (uint64_t)NO_VAL) || (value == (uint64_t)INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%llu", (long long unsigned) value); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%llu|", (long long unsigned) value); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*llu ", abs_len, (long long unsigned) value); else printf("%-*llu ", abs_len, (long long unsigned) value); @@ -244,23 +241,23 @@ extern void print_fields_double(print_field_t *field, double value, int last) { int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print + if ((value == NO_VAL) || (value == INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%f", value); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%f|", value); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*f ", abs_len, value); else printf("%-*f ", abs_len, value); @@ -272,23 +269,23 @@ extern void print_fields_long_double( { int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print + if ((value == NO_VAL) || (value == INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%Lf", value); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%Lf|", value); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*Lf ", abs_len, value); else printf("%-*Lf ", abs_len, value); @@ -300,25 +297,25 @@ extern void print_fields_time(print_field_t *field, uint64_t value, int last) { int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == (uint64_t)NO_VAL) || (value == (uint64_t)INFINITE)) { - if(print_fields_parsable_print + if ((value == (uint64_t)NO_VAL) || (value == (uint64_t)INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { char time_buf[32]; mins2time_str((time_t) value, time_buf, sizeof(time_buf)); - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%s", time_buf); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", time_buf); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*s ", abs_len, time_buf); else printf("%-*s ", abs_len, time_buf); @@ -330,25 +327,25 @@ extern void print_fields_time_from_secs(print_field_t *field, { int abs_len = abs(field->len); /* (value == unset) || (value == cleared) */ - if((value == (uint64_t)NO_VAL) || (value == (uint64_t)INFINITE)) { - if(print_fields_parsable_print + if ((value == (uint64_t)NO_VAL) || (value == (uint64_t)INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { char time_buf[32]; secs2time_str((time_t) value, time_buf, sizeof(time_buf)); - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%s", time_buf); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", time_buf); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*s ", abs_len, time_buf); else printf("%-*s ", abs_len, time_buf); @@ -362,16 +359,16 @@ extern void print_fields_char_list(print_field_t *field, List value, int last) char *print_this = NULL; char *object = NULL; - if(!value || !list_count(value)) { - if(print_fields_parsable_print) + if (!value || !list_count(value)) { + if (print_fields_parsable_print) print_this = xstrdup(""); else print_this = xstrdup(" "); } else { list_sort(value, (ListCmpF)_sort_char_list); itr = list_iterator_create(value); - while((object = list_next(itr))) { - if(print_this) + while ((object = list_next(itr))) { + if (print_this) xstrfmtcat(print_this, ",%s", object); else print_this = xstrdup(object); @@ -379,16 +376,16 @@ extern void print_fields_char_list(print_field_t *field, List value, int last) list_iterator_destroy(itr); } - if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%s", print_this); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", print_this); - else { - if(strlen(print_this) > abs_len) + else if (print_this) { + if (strlen(print_this) > abs_len) print_this[abs_len-1] = '+'; - if(field->len == abs_len) + if (field->len == abs_len) printf("%*.*s ", abs_len, abs_len, print_this); else printf("%-*.*s ", abs_len, abs_len, print_this); diff --git a/src/common/print_fields.h b/src/common/print_fields.h index 786af35d42f789af9d96b97220ee811645c631c9..b6ee424429cc4e04938ab399579f6d1c5e8b0921 100644 --- a/src/common/print_fields.h +++ b/src/common/print_fields.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/proc_args.c b/src/common/proc_args.c index a6b5ac3b31c2207b702b7fbb9aa3b43eccaa62d5..b3c447e77014eb98e0f9add6df60096c35e3b75a 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -7,7 +7,7 @@ * from existing SLURM source code, particularly src/srun/opt.c * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -100,6 +100,48 @@ void print_gres_help(void) printf("No gres help is available\n"); } +void set_distribution(task_dist_states_t distribution, + char **dist, char **lllp_dist) +{ + if (((int)distribution >= 0) + && (distribution != SLURM_DIST_UNKNOWN)) { + switch (distribution) { + case SLURM_DIST_CYCLIC: + *dist = "cyclic"; + break; + case SLURM_DIST_BLOCK: + *dist = "block"; + break; + case SLURM_DIST_PLANE: + *dist = "plane"; + *lllp_dist = "plane"; + break; + case SLURM_DIST_ARBITRARY: + *dist = "arbitrary"; + break; + case SLURM_DIST_CYCLIC_CYCLIC: + *dist = "cyclic:cyclic"; + *lllp_dist = "cyclic"; + break; + case SLURM_DIST_CYCLIC_BLOCK: + *dist = "cyclic:block"; + *lllp_dist = "block"; + break; + case SLURM_DIST_BLOCK_CYCLIC: + *dist = "block:cyclic"; + *lllp_dist = "cyclic"; + break; + case SLURM_DIST_BLOCK_BLOCK: + *dist = "block:block"; + *lllp_dist = "block"; + break; + default: + error("unknown dist, type %d", distribution); + break; + } + } +} + /* * verify that a distribution type in arg is of a known form * returns the task_dist_states, or -1 if state is unknown @@ -118,7 +160,7 @@ task_dist_states_t verify_dist_type(const char *arg, uint32_t *plane_size) } else { /* -m plane=<plane_size> */ dist_str = strchr(arg,'='); - if(dist_str != NULL) { + if (dist_str != NULL) { *plane_size=atoi(dist_str+1); len = dist_str-arg; plane_dist = true; @@ -426,7 +468,7 @@ bool verify_node_list(char **node_list_pptr, enum task_dist_states dist, /* If we are using Arbitrary grab count out of the hostfile using them exactly the way we read it in since we are saying, lay it out this way! */ - if(dist == SLURM_DIST_ARBITRARY) + if (dist == SLURM_DIST_ARBITRARY) nodelist = slurm_read_hostfile(*node_list_pptr, task_count); else nodelist = slurm_read_hostfile(*node_list_pptr, NO_VAL); @@ -470,7 +512,7 @@ bool get_resource_arg_range(const char *arg, const char *what, int* min, if (*p == 'k' || *p == 'K') { result *= 1024; p++; - } else if(*p == 'm' || *p == 'M') { + } else if (*p == 'm' || *p == 'M') { result *= 1048576; p++; } @@ -498,7 +540,7 @@ bool get_resource_arg_range(const char *arg, const char *what, int* min, if ((*p == 'k') || (*p == 'K')) { result *= 1024; p++; - } else if(*p == 'm' || *p == 'M') { + } else if (*p == 'm' || *p == 'M') { result *= 1048576; p++; } @@ -532,7 +574,7 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, { bool tmp_val,ret_val; int i,j; - int max_sockets, max_cores, max_threads; + int max_sockets = 0, max_cores, max_threads; const char *cur_ptr = arg; char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */ buf[0][0] = '\0'; @@ -792,8 +834,8 @@ char *print_geometry(const uint16_t *geometry) char buf[32], *rc = NULL; int dims = slurmdb_setup_cluster_dims(); - if ((dims == 0) - || (geometry[0] == (uint16_t)NO_VAL)) + if ((dims == 0) || !geometry[0] + || (geometry[0] == (uint16_t)NO_VAL)) return NULL; for (i=0; i<dims; i++) { diff --git a/src/common/proc_args.h b/src/common/proc_args.h index 2a49da5789e2b92ad57e849dce1f87f9f9b490f8..d7063ed07fa8e91a929b60f550eb219557de0d50 100644 --- a/src/common/proc_args.h +++ b/src/common/proc_args.h @@ -7,7 +7,7 @@ * from existing SLURM source code, particularly src/srun/opt.c * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -68,6 +68,10 @@ void print_slurm_version(void); /* print the available gres options */ void print_gres_help(void); +/* set distribution type strings from distribution type const */ +void set_distribution(task_dist_states_t distribution, + char **dist, char **lllp_dist); + /* verify the requested distribution type */ task_dist_states_t verify_dist_type(const char *arg, uint32_t *plane_size); diff --git a/src/common/read_config.c b/src/common/read_config.c index 5bac9258c8ac3902929396e9063c6225db5d6669..d3f59fce8826cdd7b4a4bcc01e0cf908d9e6b6de 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -4,14 +4,15 @@ * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2010 Lawrence Livermore National Security. * Portions Copyright (C) 2008 Vijay Ramasubramanian. - * Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>. + * Portions Copyright (C) 2010-2013 SchedMD <http://www.schedmd.com>. * Portions (boards) copyright (C) 2012 Bull, <rod.schultz@bull.com> + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>. * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -77,6 +78,7 @@ #include "src/common/slurm_protocol_defs.h" #include "src/common/slurm_rlimits_info.h" #include "src/common/slurm_selecttype_info.h" +#include "src/common/slurm_strcasestr.h" #include "src/common/strlcpy.h" #include "src/common/uid.h" #include "src/common/util-net.h" @@ -89,6 +91,7 @@ ** for details. */ strong_alias(destroy_config_key_pair, slurm_destroy_config_key_pair); +strong_alias(get_extra_conf_path, slurm_get_extra_conf_path); strong_alias(sort_key_pairs, slurm_sort_key_pairs); /* Instantiation of the "extern slurm_ctl_conf_t slurmcltd_conf" @@ -161,6 +164,9 @@ s_p_options_t slurm_conf_options[] = { {"AccountingStoreJobComment", S_P_BOOLEAN}, {"AcctGatherEnergyType", S_P_STRING}, {"AcctGatherNodeFreq", S_P_UINT16}, + {"AcctGatherProfileType", S_P_STRING}, + {"AcctGatherInfinibandType", S_P_STRING}, + {"AcctGatherFilesystemType", S_P_STRING}, {"AuthType", S_P_STRING}, {"BackupAddr", S_P_STRING}, {"BackupController", S_P_STRING}, @@ -182,10 +188,13 @@ s_p_options_t slurm_conf_options[] = { {"DefMemPerCPU", S_P_UINT32}, {"DefMemPerNode", S_P_UINT32}, {"DisableRootJobs", S_P_BOOLEAN}, + {"DynAllocPort", S_P_UINT16}, {"EnforcePartLimits", S_P_BOOLEAN}, {"Epilog", S_P_STRING}, {"EpilogMsgTime", S_P_UINT32}, {"EpilogSlurmctld", S_P_STRING}, + {"ExtSensorsType", S_P_STRING}, + {"ExtSensorsFreq", S_P_UINT16}, {"FastSchedule", S_P_UINT16}, {"FirstJobId", S_P_UINT32}, {"GetEnvTimeout", S_P_UINT16}, @@ -193,10 +202,11 @@ s_p_options_t slurm_conf_options[] = { {"GroupUpdateForce", S_P_UINT16}, {"GroupUpdateTime", S_P_UINT16}, {"HealthCheckInterval", S_P_UINT16}, + {"HealthCheckNodeState", S_P_STRING}, {"HealthCheckProgram", S_P_STRING}, {"InactiveLimit", S_P_UINT16}, {"JobAcctGatherType", S_P_STRING}, - {"JobAcctGatherFrequency", S_P_UINT16}, + {"JobAcctGatherFrequency", S_P_STRING}, {"JobCheckpointDir", S_P_STRING}, {"JobCompHost", S_P_STRING}, {"JobCompLoc", S_P_STRING}, @@ -209,11 +219,13 @@ s_p_options_t slurm_conf_options[] = { {"JobFileAppend", S_P_UINT16}, {"JobRequeue", S_P_UINT16}, {"JobSubmitPlugins", S_P_STRING}, + {"KeepAliveTime", S_P_UINT16}, {"KillOnBadExit", S_P_UINT16}, {"KillWait", S_P_UINT16}, {"LaunchType", S_P_STRING}, {"Licenses", S_P_STRING}, {"MailProg", S_P_STRING}, + {"MaxArraySize", S_P_UINT16}, {"MaxJobCount", S_P_UINT32}, {"MaxJobId", S_P_UINT32}, {"MaxMemPerCPU", S_P_UINT32}, @@ -253,7 +265,9 @@ s_p_options_t slurm_conf_options[] = { {"ResumeProgram", S_P_STRING}, {"ResumeRate", S_P_UINT16}, {"ResumeTimeout", S_P_UINT16}, + {"ResvEpilog", S_P_STRING}, {"ResvOverRun", S_P_UINT16}, + {"ResvProlog", S_P_STRING}, {"ReturnToService", S_P_UINT16}, {"SallocDefaultCommand", S_P_STRING}, {"SchedulerAuth", S_P_STRING, _defunct_option}, @@ -269,6 +283,7 @@ s_p_options_t slurm_conf_options[] = { {"SlurmctldDebug", S_P_STRING}, {"SlurmctldLogFile", S_P_STRING}, {"SlurmctldPidFile", S_P_STRING}, + {"SlurmctldPlugstack", S_P_STRING}, {"SlurmctldPort", S_P_STRING}, {"SlurmctldTimeout", S_P_UINT16}, {"SlurmdDebug", S_P_STRING}, @@ -389,20 +404,20 @@ static void _set_node_prefix(const char *nodenames) xassert(nodenames != NULL); for (i = 1; nodenames[i] != '\0'; i++) { - if((nodenames[i-1] == '[') + if ((nodenames[i-1] == '[') || (nodenames[i-1] <= '9' && nodenames[i-1] >= '0')) break; } - if(i == 1) { + if (i == 1) { error("In your Node definition in your slurm.conf you " "gave a nodelist '%s' without a prefix. " "Please try something like bg%s.", nodenames, nodenames); } xfree(conf_ptr->node_prefix); - if(nodenames[i] == '\0') + if (nodenames[i] == '\0') conf_ptr->node_prefix = xstrdup(nodenames); else { tmp = xmalloc(sizeof(char)*i+1); @@ -423,6 +438,10 @@ static int _parse_frontend(void **dest, slurm_parser_enum_t type, slurm_conf_frontend_t *n; char *node_state = NULL; static s_p_options_t _frontend_options[] = { + {"AllowGroups", S_P_STRING}, + {"AllowUsers", S_P_STRING}, + {"DenyGroups", S_P_STRING}, + {"DenyUsers", S_P_STRING}, {"FrontendAddr", S_P_STRING}, {"Port", S_P_UINT16}, {"Reason", S_P_STRING}, @@ -462,6 +481,17 @@ static int _parse_frontend(void **dest, slurm_parser_enum_t type, n->frontends = xstrdup(value); + (void) s_p_get_string(&n->allow_groups, "AllowGroups", tbl); + (void) s_p_get_string(&n->allow_users, "AllowUsers", tbl); + (void) s_p_get_string(&n->deny_groups, "DenyGroups", tbl); + (void) s_p_get_string(&n->deny_users, "DenyUsers", tbl); + if (n->allow_groups && n->deny_groups) + fatal("FrontEnd options AllowGroups and DenyGroups " + "are incompatible"); + if (n->allow_users && n->deny_users) + fatal("FrontEnd options AllowUsers and DenyUsers " + "are incompatible"); + if (!s_p_get_string(&n->addresses, "FrontendAddr", tbl)) n->addresses = xstrdup(n->frontends); @@ -767,6 +797,10 @@ static int _parse_nodename(void **dest, slurm_parser_enum_t type, extern void destroy_frontend(void *ptr) { slurm_conf_frontend_t *n = (slurm_conf_frontend_t *) ptr; + xfree(n->allow_groups); + xfree(n->allow_users); + xfree(n->deny_groups); + xfree(n->deny_users); xfree(n->frontends); xfree(n->addresses); xfree(n->reason); @@ -891,6 +925,7 @@ static int _parse_partitionname(void **dest, slurm_parser_enum_t type, {"DisableRootJobs", S_P_BOOLEAN}, /* YES or NO */ {"GraceTime", S_P_UINT32}, {"Hidden", S_P_BOOLEAN}, /* YES or NO */ + {"MaxCPUsPerNode", S_P_UINT32}, {"MaxMemPerCPU", S_P_UINT32}, {"MaxMemPerNode", S_P_UINT32}, {"MaxTime", S_P_STRING}, @@ -901,6 +936,7 @@ static int _parse_partitionname(void **dest, slurm_parser_enum_t type, {"Priority", S_P_UINT16}, {"RootOnly", S_P_BOOLEAN}, /* YES or NO */ {"ReqResv", S_P_BOOLEAN}, /* YES or NO */ + {"SelectTypeParameters", S_P_STRING}, /* CR_Socket, CR_Core */ {"Shared", S_P_STRING}, /* YES, NO, or FORCE */ {"State", S_P_STRING}, /* UP, DOWN, INACTIVE or DRAIN */ {NULL} @@ -949,9 +985,15 @@ static int _parse_partitionname(void **dest, slurm_parser_enum_t type, && !s_p_get_boolean(&p->default_flag, "Default", dflt)) p->default_flag = false; + if (!s_p_get_uint32(&p->max_cpus_per_node, "MaxCPUsPerNode", + tbl) && + !s_p_get_uint32(&p->max_cpus_per_node, "MaxCPUsPerNode", + dflt)) + p->max_cpus_per_node = INFINITE; + if (!s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode", tbl) && - !s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode", + !s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode", dflt)) { if (s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerCPU", tbl) || @@ -965,7 +1007,7 @@ static int _parse_partitionname(void **dest, slurm_parser_enum_t type, if (!s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode", tbl) && - !s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode", + !s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode", dflt)) { if (s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerCPU", tbl) || @@ -981,8 +1023,8 @@ static int _parse_partitionname(void **dest, slurm_parser_enum_t type, "DisableRootJobs", tbl)) p->disable_root_jobs = (uint16_t)NO_VAL; - if (!s_p_get_boolean(&p->hidden_flag, "Hidden", tbl) - && !s_p_get_boolean(&p->hidden_flag, "Hidden", dflt)) + if (!s_p_get_boolean(&p->hidden_flag, "Hidden", tbl) && + !s_p_get_boolean(&p->hidden_flag, "Hidden", dflt)) p->hidden_flag = false; if (!s_p_get_string(&tmp, "MaxTime", tbl) && @@ -1069,6 +1111,23 @@ static int _parse_partitionname(void **dest, slurm_parser_enum_t type, !s_p_get_uint16(&p->priority, "Priority", dflt)) p->priority = 1; + if (s_p_get_string(&tmp, "SelectTypeParameters", tbl)) { + if (strncasecmp(tmp, "CR_Socket", 9) == 0) + p->cr_type = CR_SOCKET; + else if (strncasecmp(tmp, "CR_Core", 7) == 0) + p->cr_type = CR_CORE; + else { + error("Bad value for SelectTypeParameters: %s", + tmp); + _destroy_partitionname(p); + s_p_hashtbl_destroy(tbl); + xfree(tmp); + return -1; + } + xfree(tmp); + } else + p->cr_type = 0; + if (s_p_get_string(&tmp, "Shared", tbl) || s_p_get_string(&tmp, "Shared", dflt)) { if (strcasecmp(tmp, "NO") == 0) @@ -1621,7 +1680,7 @@ extern char *slurm_conf_get_nodename(const char *node_hostname) names_ll_t *p; #ifdef HAVE_FRONT_END slurm_conf_frontend_t *front_end_ptr = NULL; - + slurm_conf_lock(); if (!front_end_list) { debug("front_end_list is NULL"); @@ -2008,10 +2067,15 @@ free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash) xfree (ctl_conf_ptr->control_machine); xfree (ctl_conf_ptr->crypto_type); xfree (ctl_conf_ptr->acct_gather_energy_type); + xfree (ctl_conf_ptr->acct_gather_profile_type); + xfree (ctl_conf_ptr->acct_gather_infiniband_type); + xfree (ctl_conf_ptr->acct_gather_filesystem_type); xfree (ctl_conf_ptr->epilog); xfree (ctl_conf_ptr->epilog_slurmctld); + xfree (ctl_conf_ptr->ext_sensors_type); xfree (ctl_conf_ptr->gres_plugins); xfree (ctl_conf_ptr->health_check_program); + xfree (ctl_conf_ptr->job_acct_gather_freq); xfree (ctl_conf_ptr->job_acct_gather_type); xfree (ctl_conf_ptr->job_ckpt_dir); xfree (ctl_conf_ptr->job_comp_host); @@ -2040,6 +2104,8 @@ free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash) xfree (ctl_conf_ptr->propagate_rlimits_except); xfree (ctl_conf_ptr->reboot_program); xfree (ctl_conf_ptr->resume_program); + xfree (ctl_conf_ptr->resv_epilog); + xfree (ctl_conf_ptr->resv_prolog); xfree (ctl_conf_ptr->salloc_default_command); xfree (ctl_conf_ptr->sched_logfile); xfree (ctl_conf_ptr->sched_params); @@ -2051,6 +2117,7 @@ free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash) xfree (ctl_conf_ptr->slurm_user_name); xfree (ctl_conf_ptr->slurmctld_logfile); xfree (ctl_conf_ptr->slurmctld_pidfile); + xfree (ctl_conf_ptr->slurmctld_plugstack); xfree (ctl_conf_ptr->slurmd_logfile); xfree (ctl_conf_ptr->slurmd_pidfile); xfree (ctl_conf_ptr->slurmd_spooldir); @@ -2108,6 +2175,12 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->disable_root_jobs = 0; ctl_conf_ptr->acct_gather_node_freq = 0; xfree (ctl_conf_ptr->acct_gather_energy_type); + xfree (ctl_conf_ptr->acct_gather_profile_type); + xfree (ctl_conf_ptr->acct_gather_infiniband_type); + xfree (ctl_conf_ptr->acct_gather_filesystem_type); + ctl_conf_ptr->ext_sensors_freq = 0; + xfree (ctl_conf_ptr->ext_sensors_type); + ctl_conf_ptr->dynalloc_port = (uint16_t) NO_VAL; ctl_conf_ptr->enforce_part_limits = 0; xfree (ctl_conf_ptr->epilog); ctl_conf_ptr->epilog_msg_time = (uint32_t) NO_VAL; @@ -2120,8 +2193,8 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->health_check_interval = 0; xfree(ctl_conf_ptr->health_check_program); ctl_conf_ptr->inactive_limit = (uint16_t) NO_VAL; + xfree (ctl_conf_ptr->job_acct_gather_freq); xfree (ctl_conf_ptr->job_acct_gather_type); - ctl_conf_ptr->job_acct_gather_freq = 0; xfree (ctl_conf_ptr->job_ckpt_dir); xfree (ctl_conf_ptr->job_comp_loc); xfree (ctl_conf_ptr->job_comp_pass); @@ -2133,10 +2206,12 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->job_file_append = (uint16_t) NO_VAL; ctl_conf_ptr->job_requeue = (uint16_t) NO_VAL; xfree(ctl_conf_ptr->job_submit_plugins); + ctl_conf_ptr->keep_alive_time = (uint16_t) NO_VAL; ctl_conf_ptr->kill_wait = (uint16_t) NO_VAL; xfree (ctl_conf_ptr->launch_type); xfree (ctl_conf_ptr->licenses); xfree (ctl_conf_ptr->mail_prog); + ctl_conf_ptr->max_array_sz = (uint16_t) NO_VAL; ctl_conf_ptr->max_job_cnt = (uint32_t) NO_VAL; ctl_conf_ptr->max_job_id = NO_VAL; ctl_conf_ptr->max_mem_per_cpu = 0; @@ -2163,7 +2238,9 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->resume_timeout = 0; xfree (ctl_conf_ptr->resume_program); ctl_conf_ptr->resume_rate = (uint16_t) NO_VAL; + xfree (ctl_conf_ptr->resv_epilog); ctl_conf_ptr->resv_over_run = 0; + xfree (ctl_conf_ptr->resv_prolog); ctl_conf_ptr->ret2service = (uint16_t) NO_VAL; xfree( ctl_conf_ptr->salloc_default_command); xfree( ctl_conf_ptr->sched_params ); @@ -2182,6 +2259,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) xfree (ctl_conf_ptr->sched_logfile); ctl_conf_ptr->sched_log_level = (uint16_t) NO_VAL; xfree (ctl_conf_ptr->slurmctld_pidfile); + xfree (ctl_conf_ptr->slurmctld_plugstack); ctl_conf_ptr->slurmctld_port = (uint32_t) NO_VAL; ctl_conf_ptr->slurmctld_port_count = 1; ctl_conf_ptr->slurmctld_timeout = (uint16_t) NO_VAL; @@ -2477,6 +2555,34 @@ static void _normalize_debug_level(uint16_t *level) /* level is uint16, always > LOG_LEVEL_QUIET(0), can't underflow */ } +/* Convert HealthCheckNodeState string to numeric value */ +static uint16_t _health_node_state(char *state_str) +{ + uint16_t state_num = 0; + char *tmp_str = xstrdup(state_str); + char *token, *last = NULL; + + token = strtok_r(tmp_str, ",", &last); + while (token) { + if (!strcasecmp(token, "ANY")) + state_num |= HEALTH_CHECK_NODE_ANY; + else if (!strcasecmp(token, "ALLOC")) + state_num |= HEALTH_CHECK_NODE_ALLOC; + else if (!strcasecmp(token, "IDLE")) + state_num |= HEALTH_CHECK_NODE_IDLE; + else if (!strcasecmp(token, "MIXED")) + state_num |= HEALTH_CHECK_NODE_MIXED; + else { + error("Invalid HealthCheckNodeState value %s ignored", + token); + } + token = strtok_r(NULL, ",", &last); + } + xfree(tmp_str); + + return state_num; +} + /* * * IN/OUT ctl_conf_ptr - a configuration as loaded by read_slurm_conf_ctl @@ -2565,6 +2671,21 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->acct_gather_energy_type = xstrdup(DEFAULT_ACCT_GATHER_ENERGY_TYPE); + if (!s_p_get_string(&conf->acct_gather_profile_type, + "AcctGatherProfileType", hashtbl)) + conf->acct_gather_profile_type = + xstrdup(DEFAULT_ACCT_GATHER_PROFILE_TYPE); + + if (!s_p_get_string(&conf->acct_gather_infiniband_type, + "AcctGatherInfinibandType", hashtbl)) + conf->acct_gather_infiniband_type = + xstrdup(DEFAULT_ACCT_GATHER_INFINIBAND_TYPE); + + if (!s_p_get_string(&conf->acct_gather_filesystem_type, + "AcctGatherFilesystemType", hashtbl)) + conf->acct_gather_filesystem_type = + xstrdup(DEFAULT_ACCT_GATHER_FILESYSTEM_TYPE); + if (!s_p_get_uint16(&conf->acct_gather_node_freq, "AcctGatherNodeFreq", hashtbl)) conf->acct_gather_node_freq = 0; @@ -2580,15 +2701,6 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_string(&conf->job_credential_public_certificate, "JobCredentialPublicCertificate", hashtbl); - if (s_p_get_uint32(&conf->max_job_cnt, "MaxJobCount", hashtbl) && - (conf->max_job_cnt < 1)) - fatal("MaxJobCount=%u, No jobs permitted", conf->max_job_cnt); - if (s_p_get_uint32(&conf->max_step_cnt, "MaxStepCount", hashtbl) && - (conf->max_step_cnt < 1)) { - fatal("MaxStepCount=%u, No steps permitted", - conf->max_step_cnt); - } - if (!s_p_get_string(&conf->authtype, "AuthType", hashtbl)) conf->authtype = xstrdup(DEFAULT_AUTH_TYPE); @@ -2637,6 +2749,14 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) "DisableRootJobs", hashtbl)) conf->disable_root_jobs = DEFAULT_DISABLE_ROOT_JOBS; + if (s_p_get_uint16(&conf->dynalloc_port, "DynAllocPort", hashtbl)) { + if (conf->dynalloc_port == 0) { + error("DynAllocPort=0 is invalid"); + } + } else { + conf->dynalloc_port = 0; + } + if (!s_p_get_boolean((bool *) &conf->enforce_part_limits, "EnforcePartLimits", hashtbl)) conf->enforce_part_limits = DEFAULT_ENFORCE_PART_LIMITS; @@ -2648,13 +2768,20 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_string(&conf->epilog_slurmctld, "EpilogSlurmctld", hashtbl); + if (!s_p_get_string(&conf->ext_sensors_type, + "ExtSensorsType", hashtbl)) + conf->ext_sensors_type = + xstrdup(DEFAULT_EXT_SENSORS_TYPE); + + if (!s_p_get_uint16(&conf->ext_sensors_freq, + "ExtSensorsFreq", hashtbl)) + conf->ext_sensors_freq = 0; + if (!s_p_get_uint16(&conf->fast_schedule, "FastSchedule", hashtbl)) conf->fast_schedule = DEFAULT_FAST_SCHEDULE; if (!s_p_get_uint32(&conf->first_job_id, "FirstJobId", hashtbl)) conf->first_job_id = DEFAULT_FIRST_JOB_ID; - if (!s_p_get_uint32(&conf->max_job_id, "MaxJobId", hashtbl)) - conf->max_job_id = DEFAULT_MAX_JOB_ID; s_p_get_string(&conf->gres_plugins, "GresTypes", hashtbl); @@ -2674,11 +2801,12 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->inactive_limit = DEFAULT_INACTIVE_LIMIT; } - if (!s_p_get_uint16(&conf->job_acct_gather_freq, + if (!s_p_get_string(&conf->job_acct_gather_freq, "JobAcctGatherFrequency", hashtbl)) - conf->job_acct_gather_freq = DEFAULT_JOB_ACCT_GATHER_FREQ; + conf->job_acct_gather_freq = + xstrdup(DEFAULT_JOB_ACCT_GATHER_FREQ); - if(!s_p_get_string(&conf->job_acct_gather_type, + if (!s_p_get_string(&conf->job_acct_gather_type, "JobAcctGatherType", hashtbl)) conf->job_acct_gather_type = xstrdup(DEFAULT_JOB_ACCT_GATHER_TYPE); @@ -2687,8 +2815,8 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->job_ckpt_dir = xstrdup(DEFAULT_JOB_CKPT_DIR); if (!s_p_get_string(&conf->job_comp_type, "JobCompType", hashtbl)) { - if(default_storage_type) { - if(!strcasecmp("slurmdbd", default_storage_type)) { + if (default_storage_type) { + if (!strcasecmp("slurmdbd", default_storage_type)) { error("Can not use the default storage type " "specified for jobcomp since there is " "not slurmdbd type. We are using %s " @@ -2705,9 +2833,9 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE); } if (!s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl)) { - if(default_storage_loc) + if (default_storage_loc) conf->job_comp_loc = xstrdup(default_storage_loc); - else if(!strcmp(conf->job_comp_type, "job_comp/mysql") + else if (!strcmp(conf->job_comp_type, "job_comp/mysql") || !strcmp(conf->job_comp_type, "job_comp/pgsql")) conf->job_comp_loc = xstrdup(DEFAULT_JOB_COMP_DB); else @@ -2716,30 +2844,30 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->job_comp_host, "JobCompHost", hashtbl)) { - if(default_storage_host) + if (default_storage_host) conf->job_comp_host = xstrdup(default_storage_host); else conf->job_comp_host = xstrdup(DEFAULT_STORAGE_HOST); } if (!s_p_get_string(&conf->job_comp_user, "JobCompUser", hashtbl)) { - if(default_storage_user) + if (default_storage_user) conf->job_comp_user = xstrdup(default_storage_user); else conf->job_comp_user = xstrdup(DEFAULT_STORAGE_USER); } if (!s_p_get_string(&conf->job_comp_pass, "JobCompPass", hashtbl)) { - if(default_storage_pass) + if (default_storage_pass) conf->job_comp_pass = xstrdup(default_storage_pass); } if (!s_p_get_uint32(&conf->job_comp_port, "JobCompPort", hashtbl)) { - if(default_storage_port) + if (default_storage_port) conf->job_comp_port = default_storage_port; - else if(!strcmp(conf->job_comp_type, "job_comp/mysql")) + else if (!strcmp(conf->job_comp_type, "job_comp/mysql")) conf->job_comp_port = DEFAULT_MYSQL_PORT; - else if(!strcmp(conf->job_comp_type, "job_comp/pgsql")) + else if (!strcmp(conf->job_comp_type, "job_comp/pgsql")) conf->job_comp_port = DEFAULT_PGSQL_PORT; else conf->job_comp_port = DEFAULT_STORAGE_PORT; @@ -2761,9 +2889,18 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_uint16(&conf->health_check_interval, "HealthCheckInterval", hashtbl); + if (s_p_get_string(&temp_str, "HealthCheckNodeState", hashtbl)) { + conf->health_check_node_state = _health_node_state(temp_str); + xfree(temp_str); + } else + conf->health_check_node_state = HEALTH_CHECK_NODE_ANY; + s_p_get_string(&conf->health_check_program, "HealthCheckProgram", hashtbl); + if (!s_p_get_uint16(&conf->keep_alive_time, "KeepAliveTime", hashtbl)) + conf->keep_alive_time = DEFAULT_KEEP_ALIVE_TIME; + if (!s_p_get_uint16(&conf->kill_on_bad_exit, "KillOnBadExit", hashtbl)) conf->kill_on_bad_exit = DEFAULT_KILL_ON_BAD_EXIT; @@ -2778,12 +2915,30 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->mail_prog, "MailProg", hashtbl)) conf->mail_prog = xstrdup(DEFAULT_MAIL_PROG); + + if (!s_p_get_uint16(&conf->max_array_sz, "MaxArraySize", hashtbl)) + conf->max_array_sz = DEFAULT_MAX_ARRAY_SIZE; + if (!s_p_get_uint32(&conf->max_job_cnt, "MaxJobCount", hashtbl)) conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT; + else if (conf->max_job_cnt < 1) + fatal("MaxJobCount=%u, No jobs permitted", conf->max_job_cnt); if (!s_p_get_uint32(&conf->max_job_id, "MaxJobId", hashtbl)) conf->max_job_id = DEFAULT_MAX_JOB_ID; + if (conf->first_job_id > conf->max_job_id) + fatal("FirstJobId > MaxJobId"); + else { + uint32_t tmp32 = conf->max_job_id - conf->first_job_id + 1; + if (conf->max_job_cnt > tmp32) { + /* Needed for job array support */ + info("Resetting MaxJobCnt to %u " + "(MaxJobId - FirstJobId + 1)", tmp32); + conf->max_job_cnt = tmp32; + } + } + if (s_p_get_uint32(&conf->max_mem_per_cpu, "MaxMemPerCPU", hashtbl)) { conf->max_mem_per_cpu |= MEM_PER_CPU; @@ -2794,6 +2949,10 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint32(&conf->max_step_cnt, "MaxStepCount", hashtbl)) conf->max_step_cnt = DEFAULT_MAX_STEP_COUNT; + else if (conf->max_step_cnt < 1) { + fatal("MaxStepCount=%u, No steps permitted", + conf->max_step_cnt); + } if (!s_p_get_uint16(&conf->max_tasks_per_node, "MaxTasksPerNode", hashtbl)) { @@ -2819,13 +2978,13 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_string(&conf->mpi_params, "MpiParams", hashtbl); - if(!s_p_get_boolean((bool *)&conf->track_wckey, + if (!s_p_get_boolean((bool *)&conf->track_wckey, "TrackWCKey", hashtbl)) conf->track_wckey = false; if (!s_p_get_string(&conf->accounting_storage_type, "AccountingStorageType", hashtbl)) { - if(default_storage_type) + if (default_storage_type) conf->accounting_storage_type = xstrdup_printf("accounting_storage/%s", default_storage_type); @@ -2835,18 +2994,20 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) } if (s_p_get_string(&temp_str, "AccountingStorageEnforce", hashtbl)) { - if (strstr(temp_str, "1") || strstr(temp_str, "associations")) + if (slurm_strcasestr(temp_str, "1") + || slurm_strcasestr(temp_str, "associations")) conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_ASSOCS; - if (strstr(temp_str, "2") || strstr(temp_str, "limits")) { + if (slurm_strcasestr(temp_str, "2") + || slurm_strcasestr(temp_str, "limits")) { conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_ASSOCS; conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_LIMITS; } - if (strstr(temp_str, "safe")) { + if (slurm_strcasestr(temp_str, "safe")) { conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_ASSOCS; conf->accounting_storage_enforce @@ -2855,7 +3016,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) |= ACCOUNTING_ENFORCE_SAFE; } - if (strstr(temp_str, "wckeys")) { + if (slurm_strcasestr(temp_str, "wckeys")) { conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_ASSOCS; conf->accounting_storage_enforce @@ -2863,14 +3024,26 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->track_wckey = true; } - if (strstr(temp_str, "qos")) { + if (slurm_strcasestr(temp_str, "qos")) { conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_ASSOCS; conf->accounting_storage_enforce |= ACCOUNTING_ENFORCE_QOS; } - if (strstr(temp_str, "all")) { + if (slurm_strcasestr(temp_str, "nojobs")) { + conf->accounting_storage_enforce + |= ACCOUNTING_ENFORCE_NO_JOBS; + conf->accounting_storage_enforce + |= ACCOUNTING_ENFORCE_NO_STEPS; + } + + if (slurm_strcasestr(temp_str, "nosteps")) { + conf->accounting_storage_enforce + |= ACCOUNTING_ENFORCE_NO_STEPS; + } + + if (slurm_strcasestr(temp_str, "all")) { conf->accounting_storage_enforce = 0xffff; conf->track_wckey = true; } @@ -2885,7 +3058,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->accounting_storage_host, "AccountingStorageHost", hashtbl)) { - if(default_storage_host) + if (default_storage_host) conf->accounting_storage_host = xstrdup(default_storage_host); else @@ -2910,7 +3083,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) } if (!s_p_get_string(&conf->accounting_storage_user, "AccountingStorageUser", hashtbl)) { - if(default_storage_user) + if (default_storage_user) conf->accounting_storage_user = xstrdup(default_storage_user); else @@ -2919,7 +3092,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) } if (!s_p_get_string(&conf->accounting_storage_pass, "AccountingStoragePass", hashtbl)) { - if(default_storage_pass) + if (default_storage_pass) conf->accounting_storage_pass = xstrdup(default_storage_pass); } @@ -2931,15 +3104,15 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint32(&conf->accounting_storage_port, "AccountingStoragePort", hashtbl)) { - if(default_storage_port) + if (default_storage_port) conf->accounting_storage_port = default_storage_port; - else if(!strcmp(conf->accounting_storage_type, + else if (!strcmp(conf->accounting_storage_type, "accounting_storage/slurmdbd")) conf->accounting_storage_port = SLURMDBD_PORT; - else if(!strcmp(conf->accounting_storage_type, + else if (!strcmp(conf->accounting_storage_type, "accounting_storage/mysql")) conf->accounting_storage_port = DEFAULT_MYSQL_PORT; - else if(!strcmp(conf->accounting_storage_type, + else if (!strcmp(conf->accounting_storage_type, "accounting_storage/pgsql")) conf->accounting_storage_port = DEFAULT_PGSQL_PORT; else @@ -2947,7 +3120,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) } /* remove the user and loc if using slurmdbd */ - if(!strcmp(conf->accounting_storage_type, + if (!strcmp(conf->accounting_storage_type, "accounting_storage/slurmdbd")) { xfree(conf->accounting_storage_loc); conf->accounting_storage_loc = xstrdup("N/A"); @@ -3032,8 +3205,12 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->priority_flags = 0; if (s_p_get_string(&temp_str, "PriorityFlags", hashtbl)) { - if (strstr(temp_str, "ACCRUE_ALWAYS")) + if (slurm_strcasestr(temp_str, "ACCRUE_ALWAYS")) conf->priority_flags |= PRIORITY_FLAGS_ACCRUE_ALWAYS; + if (slurm_strcasestr(temp_str, "SMALL_RELATIVE_TO_TIME")) + conf->priority_flags |= PRIORITY_FLAGS_SIZE_RELATIVE; + if (slurm_strcasestr(temp_str, "TICKET_BASED")) + conf->priority_flags |= PRIORITY_FLAGS_TICKET_BASED; xfree(temp_str); } if (s_p_get_string(&temp_str, "PriorityMaxAge", hashtbl)) { @@ -3069,7 +3246,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) xfree(temp_str); } else { conf->priority_reset_period = PRIORITY_RESET_NONE; - if(!conf->priority_decay_hl) { + if (!conf->priority_decay_hl) { fatal("You have to either have " "PriorityDecayHalfLife != 0 or " "PriorityUsageResetPeriod set to something " @@ -3080,6 +3257,14 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->priority_type, "PriorityType", hashtbl)) conf->priority_type = xstrdup(DEFAULT_PRIORITY_TYPE); + if (!strcasecmp(conf->priority_type, "priority/multifactor2")) { + error("PriorityType=priority/multifactor2 is deprecated. " + "In the future use\nPriorityType=priority/multifactor\n" + "PriortyFlags=Ticket_Based\nThis is what is loaded now."); + xfree(conf->priority_type); + conf->priority_type = xstrdup("priority/multifactor"); + conf->priority_flags |= PRIORITY_FLAGS_TICKET_BASED; + } if (!s_p_get_uint32(&conf->priority_weight_age, "PriorityWeightAge", hashtbl)) @@ -3119,21 +3304,21 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->private_data = 0; /* Set to default before parsing PrivateData */ if (s_p_get_string(&temp_str, "PrivateData", hashtbl)) { - if (strstr(temp_str, "account")) + if (slurm_strcasestr(temp_str, "account")) conf->private_data |= PRIVATE_DATA_ACCOUNTS; - if (strstr(temp_str, "job")) + if (slurm_strcasestr(temp_str, "job")) conf->private_data |= PRIVATE_DATA_JOBS; - if (strstr(temp_str, "node")) + if (slurm_strcasestr(temp_str, "node")) conf->private_data |= PRIVATE_DATA_NODES; - if (strstr(temp_str, "partition")) + if (slurm_strcasestr(temp_str, "partition")) conf->private_data |= PRIVATE_DATA_PARTITIONS; - if (strstr(temp_str, "reservation")) + if (slurm_strcasestr(temp_str, "reservation")) conf->private_data |= PRIVATE_DATA_RESERVATIONS; - if (strstr(temp_str, "usage")) + if (slurm_strcasestr(temp_str, "usage")) conf->private_data |= PRIVATE_DATA_USAGE; - if (strstr(temp_str, "user")) + if (slurm_strcasestr(temp_str, "user")) conf->private_data |= PRIVATE_DATA_USERS; - if (strstr(temp_str, "all")) + if (slurm_strcasestr(temp_str, "all")) conf->private_data = 0xffff; xfree(temp_str); } @@ -3180,7 +3365,9 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) fatal("ReturnToService > 1 is not supported on Cray"); #endif + s_p_get_string(&conf->resv_epilog, "ResvEpilog", hashtbl); s_p_get_uint16(&conf->resv_over_run, "ResvOverRun", hashtbl); + s_p_get_string(&conf->resv_prolog, "ResvProlog", hashtbl); s_p_get_string(&conf->resume_program, "ResumeProgram", hashtbl); if (!s_p_get_uint16(&conf->resume_rate, "ResumeRate", hashtbl)) @@ -3307,6 +3494,9 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) "SlurmctldPidFile", hashtbl)) conf->slurmctld_pidfile = xstrdup(DEFAULT_SLURMCTLD_PIDFILE); + s_p_get_string(&conf->slurmctld_plugstack, "SlurmctldPlugstack", + hashtbl); + s_p_get_string(&conf->slurmctld_logfile, "SlurmctldLogFile", hashtbl); if (s_p_get_string(&temp_str, "SlurmctldPort", hashtbl)) { @@ -3465,6 +3655,12 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->topology_plugin, "TopologyPlugin", hashtbl)) conf->topology_plugin = xstrdup(DEFAULT_TOPOLOGY_PLUGIN); +#ifdef HAVE_BG + if (strcmp(conf->topology_plugin, "topology/none")) { + fatal("On IBM BlueGene systems TopologyPlugin=topology/none " + "is required"); + } +#endif if (s_p_get_uint16(&conf->tree_width, "TreeWidth", hashtbl)) { if (conf->tree_width == 0) { @@ -3532,6 +3728,15 @@ extern char * debug_flags2str(uint32_t debug_flags) { char *rc = NULL; + /* When adding to this please attempt to keep flags in + * alphabetical order. + */ + + if (debug_flags & DEBUG_FLAG_BACKFILL) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Backfill"); + } if (debug_flags & DEBUG_FLAG_BG_ALGO) { if (rc) xstrcat(rc, ","); @@ -3542,11 +3747,6 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "BGBlockAlgoDeep"); } - if (debug_flags & DEBUG_FLAG_BACKFILL) { - if (rc) - xstrcat(rc, ","); - xstrcat(rc, "Backfill"); - } if (debug_flags & DEBUG_FLAG_BG_PICK) { if (rc) xstrcat(rc, ","); @@ -3567,6 +3767,16 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Energy"); } + if (debug_flags & DEBUG_FLAG_EXT_SENSORS) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "ExtSensors"); + } + if (debug_flags & DEBUG_FLAG_FILESYSTEM) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Filesystem"); + } if (debug_flags & DEBUG_FLAG_FRONT_END) { if (rc) xstrcat(rc, ","); @@ -3582,6 +3792,11 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Gres"); } + if (debug_flags & DEBUG_FLAG_INFINIBAND) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Infiniband"); + } if (debug_flags & DEBUG_FLAG_NO_CONF_HASH) { if (rc) xstrcat(rc, ","); @@ -3597,6 +3812,11 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Priority"); } + if (debug_flags & DEBUG_FLAG_PROFILE) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Profile"); + } if (debug_flags & DEBUG_FLAG_RESERVATION) { if (rc) xstrcat(rc, ","); @@ -3617,6 +3837,11 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Switch"); } + if (debug_flags & DEBUG_FLAG_THREADID) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "ThreadID"); + } if (debug_flags & DEBUG_FLAG_TRIGGERS) { if (rc) xstrcat(rc, ","); @@ -3627,7 +3852,6 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Wiki"); } - return rc; } @@ -3661,18 +3885,26 @@ extern uint32_t debug_str2flags(char *debug_flags) rc |= DEBUG_FLAG_CPU_BIND; else if (strcasecmp(tok, "Energy") == 0) rc |= DEBUG_FLAG_ENERGY; + else if (strcasecmp(tok, "ExtSensors") == 0) + rc |= DEBUG_FLAG_EXT_SENSORS; else if (strcasecmp(tok, "FrontEnd") == 0) rc |= DEBUG_FLAG_FRONT_END; else if (strcasecmp(tok, "Gang") == 0) rc |= DEBUG_FLAG_GANG; else if (strcasecmp(tok, "Gres") == 0) rc |= DEBUG_FLAG_GRES; + else if (strcasecmp(tok, "Infiniband") == 0) + rc |= DEBUG_FLAG_INFINIBAND; + else if (strcasecmp(tok, "Filesystem") == 0) + rc |= DEBUG_FLAG_FILESYSTEM; else if (strcasecmp(tok, "NO_CONF_HASH") == 0) rc |= DEBUG_FLAG_NO_CONF_HASH; else if (strcasecmp(tok, "NoRealTime") == 0) rc |= DEBUG_FLAG_NO_REALTIME; else if (strcasecmp(tok, "Priority") == 0) rc |= DEBUG_FLAG_PRIO; + else if (strcasecmp(tok, "Profile") == 0) + rc |= DEBUG_FLAG_PROFILE; else if (strcasecmp(tok, "Reservation") == 0) rc |= DEBUG_FLAG_RESERVATION; else if (strcasecmp(tok, "SelectType") == 0) @@ -3687,6 +3919,8 @@ extern uint32_t debug_str2flags(char *debug_flags) rc |= DEBUG_FLAG_TRIGGERS; else if (strcasecmp(tok, "Wiki") == 0) rc |= DEBUG_FLAG_WIKI; + else if (strcasecmp(tok, "ThreadID") == 0) + rc |= DEBUG_FLAG_THREADID; else { error("Invalid DebugFlag: %s", tok); rc = NO_VAL; @@ -3757,7 +3991,7 @@ extern void destroy_config_key_pair(void *object) { config_key_pair_t *key_pair_ptr = (config_key_pair_t *)object; - if(key_pair_ptr) { + if (key_pair_ptr) { xfree(key_pair_ptr->name); xfree(key_pair_ptr->value); xfree(key_pair_ptr); @@ -3800,3 +4034,52 @@ extern int sort_key_pairs(config_key_pair_t *key_a, config_key_pair_t *key_b) return 0; } +/* + * Return the pathname of the extra .conf file + */ +extern char *get_extra_conf_path(char *conf_name) +{ + char *val = getenv("SLURM_CONF"); + char *rc = NULL; + + if (!val) + val = default_slurm_config_file; + + /* Replace file name on end of path */ + rc = xstrdup(val); + xstrsubstitute(rc, "slurm.conf", conf_name); + + return rc; +} + +extern bool run_in_daemon(char *daemons) +{ + char *full, *start_char, *end_char; + + xassert(slurm_prog_name); + + if (!strcmp(daemons, slurm_prog_name)) + return true; + + full = xstrdup(daemons); + start_char = full; + + while (start_char && (end_char = strstr(start_char, ","))) { + *end_char = 0; + if (!strcmp(start_char, slurm_prog_name)) { + xfree(full); + return true; + } + + start_char = end_char + 1; + } + + if (start_char && !strcmp(start_char, slurm_prog_name)) { + xfree(full); + return true; + } + + xfree(full); + + return false; +} diff --git a/src/common/read_config.h b/src/common/read_config.h index 7d017dcf7c45390fd04e65fe7d33e173a9da733f..7bfef568a6ff71fbc7c1a07f36690dd989ae0fdb 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,6 +58,8 @@ extern char *default_plugstack; #define ACCOUNTING_ENFORCE_WCKEYS 0x0004 #define ACCOUNTING_ENFORCE_QOS 0x0008 #define ACCOUNTING_ENFORCE_SAFE 0x0010 +#define ACCOUNTING_ENFORCE_NO_JOBS 0x0020 +#define ACCOUNTING_ENFORCE_NO_STEPS 0x0040 #define DEFAULT_ACCOUNTING_DB "slurm_acct_db" #define DEFAULT_ACCOUNTING_ENFORCE 0 @@ -67,6 +69,7 @@ extern char *default_plugstack; #define DEFAULT_COMPLETE_WAIT 0 #define DEFAULT_CRYPTO_TYPE "crypto/munge" #define DEFAULT_EPILOG_MSG_TIME 2000 +#define DEFAULT_EXT_SENSORS_TYPE "ext_sensors/none" #define DEFAULT_FAST_SCHEDULE 1 #define DEFAULT_FIRST_JOB_ID 1 #define DEFAULT_GET_ENV_TIMEOUT 2 @@ -75,8 +78,11 @@ extern char *default_plugstack; #define DEFAULT_INACTIVE_LIMIT 0 #define DEFAULT_JOB_ACCT_GATHER_TYPE "jobacct_gather/none" #define JOB_ACCT_GATHER_TYPE_NONE "jobacct_gather/none" -#define DEFAULT_JOB_ACCT_GATHER_FREQ 30 +#define DEFAULT_JOB_ACCT_GATHER_FREQ "30" #define DEFAULT_ACCT_GATHER_ENERGY_TYPE "acct_gather_energy/none" +#define DEFAULT_ACCT_GATHER_PROFILE_TYPE "acct_gather_profile/none" +#define DEFAULT_ACCT_GATHER_INFINIBAND_TYPE "acct_gather_infiniband/none" +#define DEFAULT_ACCT_GATHER_FILESYSTEM_TYPE "acct_gather_filesystem/none" #define ACCOUNTING_STORAGE_TYPE_NONE "accounting_storage/none" #define DEFAULT_DISABLE_ROOT_JOBS 0 #define DEFAULT_ENFORCE_PART_LIMITS 0 @@ -84,6 +90,7 @@ extern char *default_plugstack; #define DEFAULT_JOB_COMP_TYPE "jobcomp/none" #define DEFAULT_JOB_COMP_LOC "/var/log/slurm_jobcomp.log" #define DEFAULT_JOB_COMP_DB "slurm_jobcomp_db" +#define DEFAULT_KEEP_ALIVE_TIME ((uint16_t) NO_VAL) #define DEFAULT_KILL_ON_BAD_EXIT 0 #define DEFAULT_KILL_TREE 0 #define DEFAULT_KILL_WAIT 30 @@ -99,6 +106,7 @@ extern char *default_plugstack; #endif #define DEFAULT_MAIL_PROG "/bin/mail" +#define DEFAULT_MAX_ARRAY_SIZE 1001 #define DEFAULT_MAX_JOB_COUNT 10000 #define DEFAULT_MAX_JOB_ID 0xffff0000 #define DEFAULT_MAX_STEP_COUNT 40000 @@ -131,7 +139,7 @@ extern char *default_plugstack; #define DEFAULT_SCHEDULER_PORT 7321 #define DEFAULT_SCHED_LOG_LEVEL 0 #define DEFAULT_SCHED_TIME_SLICE 30 -#define DEFAULT_SCHEDTYPE "sched/builtin" +#define DEFAULT_SCHEDTYPE "sched/backfill" #ifdef HAVE_BG /* Blue Gene specific default configuration parameters */ # define DEFAULT_SELECT_TYPE "select/bluegene" #elif defined HAVE_CRAY @@ -167,6 +175,10 @@ extern char *default_plugstack; #define DEFAULT_MAX_TASKS_PER_NODE 128 typedef struct slurm_conf_frontend { + char *allow_groups; /* allowed group string */ + char *allow_users; /* allowed user string */ + char *deny_groups; /* denied group string */ + char *deny_users; /* denied user string */ char *frontends; /* frontend node name */ char *addresses; /* frontend node address */ uint16_t port; /* frontend specific port */ @@ -203,6 +215,8 @@ typedef struct slurm_conf_partition { char *allow_groups; /* comma delimited list of groups, * NULL indicates all */ char *alternate; /* name of alternate partition */ + uint16_t cr_type; /* Custom CR values for partition (supported + * by select/cons_res plugin only) */ uint32_t def_mem_per_cpu; /* default MB memory per allocated CPU */ bool default_flag; /* Set if default partition */ uint32_t default_time; /* minutes or INFINITE */ @@ -211,6 +225,7 @@ typedef struct slurm_conf_partition { * default */ uint32_t grace_time; /* default grace time for partition */ bool hidden_flag; /* 1 if hidden by default */ + uint32_t max_cpus_per_node; /* maximum allocated CPUs per node */ uint16_t max_share; /* number of jobs to gang schedule */ uint32_t max_time; /* minutes or INFINITE */ uint32_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ @@ -487,6 +502,17 @@ extern void pack_config_key_pair(void *in, uint16_t rpc_version, Buf buffer); extern int unpack_config_key_pair(void **object, uint16_t rpc_version, Buf buffer); extern int sort_key_pairs(config_key_pair_t *key_a, config_key_pair_t *key_b); +/* + * Return the pathname of the extra .conf file + * return value must be xfreed + */ +extern char *get_extra_conf_path(char *conf_name); +/* Determine slurm_prog_name (calling process) is in list of daemons + * + * in - daemons (comma separated list of daemons i.e. slurmd,slurmstepd + * returns true if slurm_prog_name (set in log.c) is in list, false otherwise. + */ +extern bool run_in_daemon(char *daemons); #endif /* !_READ_CONFIG_H */ diff --git a/src/common/safeopen.c b/src/common/safeopen.c index 8a18c3504857189f451c18beff0f674a6d26010f..2a52d759321d3e01645e8833aed9fc4e4dfaecad 100644 --- a/src/common/safeopen.c +++ b/src/common/safeopen.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -62,7 +62,7 @@ FILE * safeopen(const char *path, const char *mode, int flags) int oflags; struct stat fb1, fb2; - if(mode[0] == 'w') { + if (mode[0] == 'w') { oflags = O_WRONLY; } else if (mode[0] == 'a') { oflags = O_CREAT | O_WRONLY | O_APPEND; diff --git a/src/common/safeopen.h b/src/common/safeopen.h index 5c93cd1b245bbd11e26b578a6c5af0e965df7ff6..0b67f15ed67b64c14ed048354d8e157a0349d51f 100644 --- a/src/common/safeopen.h +++ b/src/common/safeopen.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index be905428e8300c2f5564727e3362fd84c2ae3be7..ce6186a75271fea549d9ec5be449db4e3396e1e8 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -257,6 +257,7 @@ static slurm_acct_storage_ops_t ops; static plugin_context_t *plugin_context = NULL; static pthread_mutex_t plugin_context_lock = PTHREAD_MUTEX_INITIALIZER; static bool init_run = false; +static uint16_t enforce = 0; /* * Initialize context for acct_storage plugin @@ -289,7 +290,7 @@ extern int slurm_acct_storage_init(char *loc) goto done; } init_run = true; - + enforce = slurm_get_accounting_storage_enforce(); done: slurm_mutex_unlock(&plugin_context_lock); xfree(type); @@ -443,6 +444,7 @@ extern List acct_storage_g_modify_job(void *db_conn, uint32_t uid, { if (slurm_acct_storage_init(NULL) < 0) return NULL; + return (*(ops.modify_job))(db_conn, uid, job_cond, job); } @@ -768,6 +770,8 @@ extern int jobacct_storage_g_job_start(void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; + if (enforce & ACCOUNTING_ENFORCE_NO_JOBS) + return SLURM_SUCCESS; /* A pending job's start_time is it's expected initiation time * (changed in slurm v2.1). Rather than changing a bunch of code @@ -793,6 +797,8 @@ extern int jobacct_storage_g_job_complete(void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; + if (enforce & ACCOUNTING_ENFORCE_NO_JOBS) + return SLURM_SUCCESS; return (*(ops.job_complete))(db_conn, job_ptr); } @@ -804,6 +810,8 @@ extern int jobacct_storage_g_step_start(void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; + if (enforce & ACCOUNTING_ENFORCE_NO_STEPS) + return SLURM_SUCCESS; return (*(ops.step_start))(db_conn, step_ptr); } @@ -815,6 +823,8 @@ extern int jobacct_storage_g_step_complete(void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; + if (enforce & ACCOUNTING_ENFORCE_NO_STEPS) + return SLURM_SUCCESS; return (*(ops.step_complete))(db_conn, step_ptr); } @@ -826,6 +836,8 @@ extern int jobacct_storage_g_job_suspend(void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; + if (enforce & ACCOUNTING_ENFORCE_NO_JOBS) + return SLURM_SUCCESS; return (*(ops.job_suspend))(db_conn, job_ptr); } diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index 881fcdc74997a11ec5003a5678a8049128952ca8..4448f2d260f16028cc1bf113af27f200daf31cad 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_acct_gather.c b/src/common/slurm_acct_gather.c new file mode 100644 index 0000000000000000000000000000000000000000..dcc1eda7318d280b0f07b02551e25f9c74ede224 --- /dev/null +++ b/src/common/slurm_acct_gather.c @@ -0,0 +1,191 @@ +/*****************************************************************************\ + * slurm_acct_gather.c - generic interface needed for some + * acct_gather plugins. + ***************************************************************************** + * Copyright (C) 2013 SchedMD LLC. + * Written by Danny Auble <da@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <sys/stat.h> +#include <stdlib.h> + +#include "src/common/slurm_acct_gather.h" +#include "src/common/slurm_strcasestr.h" +#include "src/common/xstring.h" + +bool acct_gather_suspended = false; + +static bool inited = 0; + +static int _get_int(const char *my_str) +{ + char *end = NULL; + int value; + + if (!my_str) + return -1; + value = strtol(my_str, &end, 10); + //info("from %s I get %d and %s: %m", my_str, value, end); + /* means no numbers */ + if (my_str == end) + return -1; + + return value; +} + +extern int acct_gather_conf_init(void) +{ + s_p_hashtbl_t *tbl = NULL; + char *conf_path = NULL; + s_p_options_t *full_options = NULL; + int full_options_cnt = 0, i; + struct stat buf; + + if (inited) + return SLURM_SUCCESS; + inited = 1; + + /* get options from plugins using acct_gather.conf */ + + acct_gather_energy_g_conf_options(&full_options, &full_options_cnt); + acct_gather_profile_g_conf_options(&full_options, &full_options_cnt); + acct_gather_infiniband_g_conf_options(&full_options, &full_options_cnt); + acct_gather_filesystem_g_conf_options(&full_options, &full_options_cnt); + /* ADD MORE HERE */ + + /* for the NULL at the end */ + xrealloc(full_options, + ((full_options_cnt + 1) * sizeof(s_p_options_t))); + + /**************************************************/ + + /* Get the acct_gather.conf path and validate the file */ + conf_path = get_extra_conf_path("acct_gather.conf"); + if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) { + debug2("No acct_gather.conf file (%s)", conf_path); + } else { + debug2("Reading acct_gather.conf file %s", conf_path); + + tbl = s_p_hashtbl_create(full_options); + if (s_p_parse_file(tbl, NULL, conf_path, false) == + SLURM_ERROR) { + fatal("Could not open/read/parse acct_gather.conf file " + "%s. Many times this is because you have " + "defined options for plugins that are not " + "loaded. Please check your slurm.conf file " + "and make sure the plugins for the options " + "listed are loaded.", + conf_path); + } + } + + for (i=0; i<full_options_cnt; i++) + xfree(full_options[i].key); + xfree(full_options); + xfree(conf_path); + + /* handle acct_gather.conf in each plugin */ + acct_gather_energy_g_conf_set(tbl); + acct_gather_profile_g_conf_set(tbl); + acct_gather_infiniband_g_conf_set(tbl); + acct_gather_filesystem_g_conf_set(tbl); + /*********************************************************************/ + /* ADD MORE HERE AND FREE MEMORY IN acct_gather_conf_destroy() BELOW */ + /*********************************************************************/ + + s_p_hashtbl_destroy(tbl); + + return SLURM_SUCCESS; +} + +extern int acct_gather_conf_destroy(void) +{ + int rc; + + if (!inited) + return SLURM_SUCCESS; + + rc = acct_gather_energy_fini(); + rc = MAX(rc, acct_gather_filesystem_fini()); + rc = MAX(rc, acct_gather_infiniband_fini()); + rc = MAX(rc, acct_gather_profile_fini()); + return rc; +} + +extern int acct_gather_parse_freq(int type, char *freq) +{ + int freq_int = -1; + char *sub_str = NULL; + + if (!freq) + return freq_int; + + switch (type) { + case PROFILE_ENERGY: + if ((sub_str = slurm_strcasestr(freq, "energy="))) + freq_int = _get_int(sub_str + 7); + break; + case PROFILE_TASK: + /* backwards compatibility for when the freq was only + for task. + */ + freq_int = _get_int(freq); + if ((freq_int == -1) + && (sub_str = slurm_strcasestr(freq, "task="))) + freq_int = _get_int(sub_str + 5); + break; + case PROFILE_FILESYSTEM: + if ((sub_str = slurm_strcasestr(freq, "filesystem="))) + freq_int = _get_int(sub_str + 11); + break; + case PROFILE_NETWORK: + if ((sub_str = slurm_strcasestr(freq, "network="))) + freq_int = _get_int(sub_str + 8); + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather.c " + "(acct_gather_parse_freq)", type); + } + + return freq_int; +} + +extern void acct_gather_suspend_poll(void) +{ + acct_gather_suspended = true; +} + +extern void acct_gather_resume_poll(void) +{ + acct_gather_suspended = false; +} diff --git a/src/common/slurm_acct_gather.h b/src/common/slurm_acct_gather.h new file mode 100644 index 0000000000000000000000000000000000000000..b4774d0b7728b066a88ae48b01c8b26d6a9f6c96 --- /dev/null +++ b/src/common/slurm_acct_gather.h @@ -0,0 +1,68 @@ +/*****************************************************************************\ + * slurm_acct_gather.h - generic interface needed for some + * acct_gather plugins. + ***************************************************************************** + * Copyright (C) 2013 SchedMD LLC. + * Written by Danny Auble <da@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __SLURM_ACCT_GATHER_H__ +#define __SLURM_ACCT_GATHER_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include "read_config.h" +#include "slurm_acct_gather_energy.h" +#include "slurm_acct_gather_profile.h" +#include "slurm_acct_gather_infiniband.h" +#include "slurm_acct_gather_filesystem.h" + +extern bool acct_gather_suspended; + +extern int acct_gather_conf_init(void); +extern int acct_gather_conf_destroy(void); +extern int acct_gather_parse_freq(int type, char *freq); +extern void acct_gather_suspend_poll(void); +extern void acct_gather_resume_poll(void); + +#endif diff --git a/src/common/slurm_acct_gather_energy.c b/src/common/slurm_acct_gather_energy.c index 2f74ba8f2e6e01e5c6b597abe46ba03acdfcc419..53c8584f476b38484f69e66cc5da80483d003fa6 100644 --- a/src/common/slurm_acct_gather_energy.c +++ b/src/common/slurm_acct_gather_energy.c @@ -6,7 +6,7 @@ * Written by Bull-HN-PHX/d.rusak, * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,12 +48,19 @@ #include "src/common/slurm_acct_gather_energy.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" +/* +** Define slurm-specific aliases for use by plugins, see slurm_xlator.h +** for details. + */ +strong_alias(acct_gather_energy_destroy, slurm_acct_gather_energy_destroy); + typedef struct slurm_acct_gather_energy_ops { int (*update_node_energy) (void); - int (*get_data) (enum acct_energy_type data_type, - acct_gather_energy_t *energy); - int (*set_data) (enum acct_energy_type data_type, - acct_gather_energy_t *energy); + int (*get_data) (enum acct_energy_type data_type, void *data); + int (*set_data) (enum acct_energy_type data_type, void *data); + void (*conf_options) (s_p_options_t **full_options, + int *full_options_cnt); + void (*conf_set) (s_p_hashtbl_t *tbl); } slurm_acct_gather_energy_ops_t; /* * These strings must be kept in the same order as the fields @@ -63,12 +70,36 @@ static const char *syms[] = { "acct_gather_energy_p_update_node_energy", "acct_gather_energy_p_get_data", "acct_gather_energy_p_set_data", + "acct_gather_energy_p_conf_options", + "acct_gather_energy_p_conf_set" }; static slurm_acct_gather_energy_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; static bool init_run = false; +static bool acct_shutdown = true; +static int freq = 0; + + +static void *_watch_node(void *arg) +{ + int type = PROFILE_ENERGY; + int delta = acct_gather_profile_timer[type].freq - 1; + while (init_run && acct_gather_profile_running) { + /* Do this until shutdown is requested */ + (*(ops.set_data))(ENERGY_DATA_PROFILE, &delta); + slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex); + pthread_cond_wait( + &acct_gather_profile_timer[type].notify, + &acct_gather_profile_timer[type].notify_mutex); + slurm_mutex_unlock(&acct_gather_profile_timer[type]. + notify_mutex); + } + + return NULL; +} + extern int slurm_acct_gather_energy_init(void) { @@ -99,6 +130,8 @@ extern int slurm_acct_gather_energy_init(void) done: slurm_mutex_unlock(&g_context_lock); xfree(type); + if (retval == SLURM_SUCCESS) + retval = acct_gather_conf_init(); return retval; } @@ -133,17 +166,34 @@ extern void acct_gather_energy_destroy(acct_gather_energy_t *energy) extern void acct_gather_energy_pack(acct_gather_energy_t *energy, Buf buffer, uint16_t protocol_version) { - if (!energy) { - int i; - for (i=0; i<4; i++) - pack32(0, buffer); - return; + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + if (!energy) { + int i; + for (i=0; i<5; i++) + pack32(0, buffer); + pack_time(0, buffer); + return; + } + + pack32(energy->base_consumed_energy, buffer); + pack32(energy->base_watts, buffer); + pack32(energy->consumed_energy, buffer); + pack32(energy->current_watts, buffer); + pack32(energy->previous_consumed_energy, buffer); + pack_time(energy->poll_time, buffer); + } else { + if (!energy) { + int i; + for (i=0; i<4; i++) + pack32(0, buffer); + return; + } + + pack32(energy->base_consumed_energy, buffer); + pack32(energy->base_watts, buffer); + pack32(energy->consumed_energy, buffer); + pack32(energy->current_watts, buffer); } - - pack32(energy->base_consumed_energy, buffer); - pack32(energy->base_watts, buffer); - pack32(energy->consumed_energy, buffer); - pack32(energy->current_watts, buffer); } extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, @@ -152,10 +202,19 @@ extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, acct_gather_energy_t *energy_ptr = acct_gather_energy_alloc(); *energy = energy_ptr; - safe_unpack32(&energy_ptr->base_consumed_energy, buffer); - safe_unpack32(&energy_ptr->base_watts, buffer); - safe_unpack32(&energy_ptr->consumed_energy, buffer); - safe_unpack32(&energy_ptr->current_watts, buffer); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack32(&energy_ptr->base_consumed_energy, buffer); + safe_unpack32(&energy_ptr->base_watts, buffer); + safe_unpack32(&energy_ptr->consumed_energy, buffer); + safe_unpack32(&energy_ptr->current_watts, buffer); + safe_unpack32(&energy_ptr->previous_consumed_energy, buffer); + safe_unpack_time(&energy_ptr->poll_time, buffer); + } else { + safe_unpack32(&energy_ptr->base_consumed_energy, buffer); + safe_unpack32(&energy_ptr->base_watts, buffer); + safe_unpack32(&energy_ptr->consumed_energy, buffer); + safe_unpack32(&energy_ptr->current_watts, buffer); + } return SLURM_SUCCESS; @@ -178,27 +237,84 @@ extern int acct_gather_energy_g_update_node_energy(void) } extern int acct_gather_energy_g_get_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy) + void *data) { int retval = SLURM_ERROR; if (slurm_acct_gather_energy_init() < 0) return retval; - retval = (*(ops.get_data))(data_type, energy); + retval = (*(ops.get_data))(data_type, data); return retval; } extern int acct_gather_energy_g_set_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy) + void *data) { int retval = SLURM_ERROR; if (slurm_acct_gather_energy_init() < 0) return retval; - retval = (*(ops.set_data))(data_type, energy); + retval = (*(ops.set_data))(data_type, data); return retval; } + +extern int acct_gather_energy_startpoll(uint32_t frequency) +{ + int retval = SLURM_SUCCESS; + pthread_attr_t attr; + pthread_t _watch_node_thread_id; + + if (slurm_acct_gather_energy_init() < 0) + return SLURM_ERROR; + + if (!acct_shutdown) { + error("acct_gather_energy_startpoll: " + "poll already started!"); + return retval; + } + + acct_shutdown = false; + + freq = frequency; + + if (frequency == 0) { /* don't want dynamic monitoring? */ + debug2("acct_gather_energy dynamic logging disabled"); + return retval; + } + + /* create polling thread */ + slurm_attr_init(&attr); + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); + + if (pthread_create(&_watch_node_thread_id, &attr, &_watch_node, NULL)) { + debug("acct_gather_energy failed to create _watch_node " + "thread: %m"); + frequency = 0; + } else + debug3("acct_gather_energy dynamic logging enabled"); + slurm_attr_destroy(&attr); + + return retval; +} + +extern void acct_gather_energy_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + if (slurm_acct_gather_energy_init() < 0) + return; + + (*(ops.conf_options))(full_options, full_options_cnt); +} + +extern void acct_gather_energy_g_conf_set(s_p_hashtbl_t *tbl) +{ + if (slurm_acct_gather_energy_init() < 0) + return; + + (*(ops.conf_set))(tbl); +} diff --git a/src/common/slurm_acct_gather_energy.h b/src/common/slurm_acct_gather_energy.h index f2c7cd97f5d19b73cee8f216c912ee02cb16e2fb..e0dc25423439bc802620c3931751a06e24e3b13b 100644 --- a/src/common/slurm_acct_gather_energy.h +++ b/src/common/slurm_acct_gather_energy.h @@ -6,7 +6,7 @@ * Copyright (C) 2012 Bull-HN-PHX * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -63,8 +63,16 @@ #include "src/common/pack.h" #include "src/common/list.h" #include "src/common/xmalloc.h" +#include "src/common/slurm_acct_gather.h" #include "src/common/slurm_jobacct_gather.h" +typedef struct acct_energy_data { + time_t time; + uint64_t power; + uint64_t cpu_freq; +} acct_energy_data_t; + + extern int acct_gather_energy_init(void); /* load the plugin */ extern int acct_gather_energy_fini(void); /* unload the plugin */ extern acct_gather_energy_t *acct_gather_energy_alloc(void); @@ -76,7 +84,12 @@ extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, extern int acct_gather_energy_g_update_node_energy(void); extern int acct_gather_energy_g_get_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy); + void *data); extern int acct_gather_energy_g_set_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy); + void *data); +extern int acct_gather_energy_startpoll(uint32_t frequency); +extern void acct_gather_energy_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +extern void acct_gather_energy_g_conf_set(s_p_hashtbl_t *tbl); + #endif /*__SLURM_ACCT_GATHER_ENERGY_H__*/ diff --git a/src/common/slurm_acct_gather_filesystem.c b/src/common/slurm_acct_gather_filesystem.c new file mode 100644 index 0000000000000000000000000000000000000000..57ffd98b1193368e5310e80acc682f2a03870c2a --- /dev/null +++ b/src/common/slurm_acct_gather_filesystem.c @@ -0,0 +1,195 @@ +/*****************************************************************************\ + * slurm_acct_gather_filesystem.c - implementation-independent job filesystem + * accounting plugin definitions + ***************************************************************************** + * Copyright (C) 2013 Bull. + * Written by Yiannis Georgiou <yiannis.georgiou@bull.net> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pthread.h> +#include <stdlib.h> +#include <string.h> + +#include "src/common/macros.h" +#include "src/common/plugin.h" +#include "src/common/plugrack.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/common/slurm_acct_gather_filesystem.h" +#include "src/slurmd/slurmstepd/slurmstepd_job.h" + +typedef struct slurm_acct_gather_filesystem_ops { + int (*node_update) (void); + void (*conf_options) (s_p_options_t **full_options, + int *full_options_cnt); + void (*conf_set) (s_p_hashtbl_t *tbl); +} slurm_acct_gather_filesystem_ops_t; +/* + * These strings must be kept in the same order as the fields + * declared for slurm_acct_gather_filesystem_ops_t. + */ +static const char *syms[] = { + "acct_gather_filesystem_p_node_update", + "acct_gather_filesystem_p_conf_options", + "acct_gather_filesystem_p_conf_set", +}; + +static slurm_acct_gather_filesystem_ops_t ops; +static plugin_context_t *g_context = NULL; +static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; +static bool acct_shutdown = true; +static int freq = 0; + +static void *_watch_node(void *arg) +{ + int type = PROFILE_FILESYSTEM; + while (!acct_shutdown && acct_gather_profile_running) { + /* Do this until shutdown is requested */ + (*(ops.node_update))(); + slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex); + pthread_cond_wait( + &acct_gather_profile_timer[type].notify, + &acct_gather_profile_timer[type].notify_mutex); + slurm_mutex_unlock(&acct_gather_profile_timer[type]. + notify_mutex); + } + return NULL; +} + +extern int acct_gather_filesystem_init(void) +{ + int retval = SLURM_SUCCESS; + char *plugin_type = "acct_gather_filesystem"; + char *type = NULL; + + if (init_run && g_context) + return retval; + + slurm_mutex_lock(&g_context_lock); + + if (g_context) + goto done; + + type = slurm_get_acct_gather_filesystem_type(); + + g_context = plugin_context_create( + plugin_type, type, (void **)&ops, syms, sizeof(syms)); + + if (!g_context) { + error("cannot create %s context for %s", plugin_type, type); + retval = SLURM_ERROR; + goto done; + } + init_run = true; + +done: + slurm_mutex_unlock(&g_context_lock); + xfree(type); + if (retval == SLURM_SUCCESS) + retval = acct_gather_conf_init(); + + + return retval; +} + +extern int acct_gather_filesystem_fini(void) +{ + int rc; + + if (!g_context) + return SLURM_SUCCESS; + + init_run = false; + rc = plugin_context_destroy(g_context); + g_context = NULL; + + return rc; +} + +extern int acct_gather_filesystem_startpoll(uint32_t frequency) +{ + int retval = SLURM_SUCCESS; + pthread_attr_t attr; + pthread_t _watch_node_thread_id; + + if (acct_gather_filesystem_init() < 0) + return SLURM_ERROR; + + if (!acct_shutdown) { + error("acct_gather_filesystem_startpoll: " + "poll already started!"); + return retval; + } + + acct_shutdown = false; + + freq = frequency; + + if (frequency == 0) { /* don't want dynamic monitoring? */ + debug2("acct_gather_filesystem dynamic logging disabled"); + return retval; + } + + /* create polling thread */ + slurm_attr_init(&attr); + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); + + if (pthread_create(&_watch_node_thread_id, &attr, &_watch_node, NULL)) { + debug("acct_gather_filesystem failed to create _watch_node " + "thread: %m"); + frequency = 0; + } else + debug3("acct_gather_filesystem dynamic logging enabled"); + slurm_attr_destroy(&attr); + + return retval; +} + + +extern void acct_gather_filesystem_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + if (acct_gather_filesystem_init() < 0) + return; + (*(ops.conf_options))(full_options, full_options_cnt); +} + +extern void acct_gather_filesystem_g_conf_set(s_p_hashtbl_t *tbl) +{ + if (acct_gather_filesystem_init() < 0) + return; + + (*(ops.conf_set))(tbl); +} diff --git a/src/common/slurm_acct_gather_filesystem.h b/src/common/slurm_acct_gather_filesystem.h new file mode 100644 index 0000000000000000000000000000000000000000..d0acd499f50281bb6ff83a62ad5e655976698904 --- /dev/null +++ b/src/common/slurm_acct_gather_filesystem.h @@ -0,0 +1,98 @@ +/*****************************************************************************\ + * slurm_acct_gather_filesystem.h - implementation-independent job filesystem + * accounting plugin definitions + ***************************************************************************** + * Copyright (C) 2013 Bull + * Written by Yiannis Georgiou <yiannis.georgiou@bull.net> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __SLURM_ACCT_GATHER_FILESYSTEM_H__ +#define __SLURM_ACCT_GATHER_FILESYSTEM_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <sys/resource.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "slurm/slurm.h" +#include "slurm/slurmdb.h" + +#include "src/common/macros.h" +#include "src/common/pack.h" +#include "src/common/list.h" +#include "src/common/xmalloc.h" +#include "src/common/slurm_acct_gather.h" + +typedef struct acct_filesystem_data { + uint64_t reads; + uint64_t writes; + double read_size; // currently in megabytes + double write_size; // currently in megabytes +} acct_filesystem_data_t; + +extern int acct_gather_filesystem_init(void); /* load the plugin */ +extern int acct_gather_filesystem_fini(void); /* unload the plugin */ +extern int acct_gather_filesystem_startpoll(uint32_t); +extern int acct_gather_filesystem_g_node_update(void); +/* + * Define plugin local conf for acct_gather.conf + * + * Parameters + * full_options -- pointer that will receive list of plugin local + * definitions + * full_options_cnt -- count of plugin local definitions + */ +extern void acct_gather_filesystem_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +/* + * set plugin local conf from acct_gather.conf into its structure + * + * Parameters + * tbl - hash table of acct_gather.conf key-values. + */ +extern void acct_gather_filesystem_g_conf_set(s_p_hashtbl_t *tbl); + +#endif /*__SLURM_ACCT_GATHER_FILESYSTEM_H__*/ + diff --git a/src/common/slurm_acct_gather_infiniband.c b/src/common/slurm_acct_gather_infiniband.c new file mode 100644 index 0000000000000000000000000000000000000000..341240bea44068faf8b9b31fe018c470d83831bf --- /dev/null +++ b/src/common/slurm_acct_gather_infiniband.c @@ -0,0 +1,197 @@ +/*****************************************************************************\ + * slurm_acct_gather_infiniband.c - implementation-independent job infiniband + * accounting plugin definitions + ***************************************************************************** + * Copyright (C) 2013 Bull. + * Written by Yiannis Georgiou <yiannis.georgiou@bull.net> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pthread.h> +#include <stdlib.h> +#include <string.h> + +#include "src/common/macros.h" +#include "src/common/plugin.h" +#include "src/common/plugrack.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/common/slurm_acct_gather_infiniband.h" +#include "src/common/slurm_acct_gather_profile.h" +#include "src/slurmd/slurmstepd/slurmstepd_job.h" + +typedef struct slurm_acct_gather_infiniband_ops { + int (*node_update) (void); + void (*conf_options) (s_p_options_t **full_options, + int *full_options_cnt); + void (*conf_set) (s_p_hashtbl_t *tbl); +} slurm_acct_gather_infiniband_ops_t; +/* + * These strings must be kept in the same order as the fields + * declared for slurm_acct_gather_infiniband_ops_t. + */ +static const char *syms[] = { + "acct_gather_infiniband_p_node_update", + "acct_gather_infiniband_p_conf_options", + "acct_gather_infiniband_p_conf_set", +}; + +static slurm_acct_gather_infiniband_ops_t ops; +static plugin_context_t *g_context = NULL; +static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; +static bool acct_shutdown = true; +static int freq = 0; + +static void *_watch_node(void *arg) +{ + int type = PROFILE_NETWORK; + while (init_run && acct_gather_profile_running) { + /* Do this until shutdown is requested */ + (*(ops.node_update))(); + slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex); + pthread_cond_wait( + &acct_gather_profile_timer[type].notify, + &acct_gather_profile_timer[type].notify_mutex); + slurm_mutex_unlock(&acct_gather_profile_timer[type]. + notify_mutex); + } + + return NULL; +} + +extern int acct_gather_infiniband_init(void) +{ + int retval = SLURM_SUCCESS; + char *plugin_type = "acct_gather_infiniband"; + char *type = NULL; + + if (init_run && g_context) + return retval; + + slurm_mutex_lock(&g_context_lock); + + if (g_context) + goto done; + + type = slurm_get_acct_gather_infiniband_type(); + + g_context = plugin_context_create( + plugin_type, type, (void **)&ops, syms, sizeof(syms)); + + if (!g_context) { + error("cannot create %s context for %s", plugin_type, type); + retval = SLURM_ERROR; + goto done; + } + init_run = true; + +done: + slurm_mutex_unlock(&g_context_lock); + xfree(type); + if (retval == SLURM_SUCCESS) + retval = acct_gather_conf_init(); + + + return retval; +} + +extern int acct_gather_infiniband_fini(void) +{ + int rc; + + if (!g_context) + return SLURM_SUCCESS; + + init_run = false; + rc = plugin_context_destroy(g_context); + g_context = NULL; + + return rc; +} + +extern int acct_gather_infiniband_startpoll(uint32_t frequency) +{ + int retval = SLURM_SUCCESS; + pthread_attr_t attr; + pthread_t _watch_node_thread_id; + + if (acct_gather_infiniband_init() < 0) + return SLURM_ERROR; + + if (!acct_shutdown) { + error("acct_gather_infiniband_startpoll: " + "poll already started!"); + return retval; + } + + acct_shutdown = false; + + freq = frequency; + + if (frequency == 0) { /* don't want dynamic monitoring? */ + debug2("acct_gather_infiniband dynamic logging disabled"); + return retval; + } + + /* create polling thread */ + slurm_attr_init(&attr); + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); + + if (pthread_create(&_watch_node_thread_id, &attr, &_watch_node, NULL)) { + debug("acct_gather_infiniband failed to create _watch_node " + "thread: %m"); + frequency = 0; + } else + debug3("acct_gather_infiniband dynamic logging enabled"); + slurm_attr_destroy(&attr); + + return retval; +} + + +extern void acct_gather_infiniband_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + if (acct_gather_infiniband_init() < 0) + return; + (*(ops.conf_options))(full_options, full_options_cnt); +} + +extern void acct_gather_infiniband_g_conf_set(s_p_hashtbl_t *tbl) +{ + if (acct_gather_infiniband_init() < 0) + return; + + (*(ops.conf_set))(tbl); +} diff --git a/src/common/slurm_acct_gather_infiniband.h b/src/common/slurm_acct_gather_infiniband.h new file mode 100644 index 0000000000000000000000000000000000000000..5116b00abc2e56dfa9139a53f9f179d0bf9591eb --- /dev/null +++ b/src/common/slurm_acct_gather_infiniband.h @@ -0,0 +1,99 @@ +/*****************************************************************************\ + * slurm_acct_gather_infiniband.h - implementation-independent job infiniband + * accounting plugin definitions + ***************************************************************************** + * Copyright (C) 2013 Bull + * Written by Yiannis Georgiou <yiannis.georgiou@bull.net> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __SLURM_ACCT_GATHER_INFINIBAND_H__ +#define __SLURM_ACCT_GATHER_INFINIBAND_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <sys/resource.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "slurm/slurm.h" +#include "slurm/slurmdb.h" + +#include "src/common/macros.h" +#include "src/common/pack.h" +#include "src/common/list.h" +#include "src/common/xmalloc.h" +#include "src/common/slurm_acct_gather.h" + +typedef struct acct_network_data { + uint64_t packets_in; + uint64_t packets_out; + double size_in; // currently in megabytes + double size_out; // currently in megabytes +} acct_network_data_t; + +extern int acct_gather_infiniband_init(void); /* load the plugin */ +extern int acct_gather_infiniband_fini(void); /* unload the plugin */ +extern int acct_gather_infiniband_startpoll(uint32_t frequency); + +extern int acct_gather_infiniband_g_node_update(void); +/* + * Define plugin local conf for acct_gather.conf + * + * Parameters + * full_options -- pointer that will receive list of plugin local + * definitions + * full_options_cnt -- count of plugin local definitions + */ +extern void acct_gather_infiniband_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +/* + * set plugin local conf from acct_gather.conf into its structure + * + * Parameters + * tbl - hash table of acct_gather.conf key-values. + */ +extern void acct_gather_infiniband_g_conf_set(s_p_hashtbl_t *tbl); + +#endif /*__SLURM_ACCT_GATHER_INFINIBAND_H__*/ + diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c new file mode 100644 index 0000000000000000000000000000000000000000..cc24d6ff81afe0787a64ec2b05ed6f8e3a62cbd3 --- /dev/null +++ b/src/common/slurm_acct_gather_profile.c @@ -0,0 +1,550 @@ +/*****************************************************************************\ + * slurm_acct_gather_profile.c - implementation-independent job profile + * accounting plugin definitions + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pthread.h> +#include <stdlib.h> +#include <string.h> + +#include "src/common/macros.h" +#include "src/common/plugin.h" +#include "src/common/plugrack.h" +#include "src/common/read_config.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/common/slurm_acct_gather_profile.h" +#include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather_infiniband.h" +#include "src/common/slurm_strcasestr.h" +#include "src/common/timers.h" + +/* These 2 should remain the same. */ +#define SLEEP_TIME 1 +#define USLEEP_TIME 1000000 + +typedef struct slurm_acct_gather_profile_ops { + void (*conf_options) (s_p_options_t **full_options, + int *full_options_cnt); + void (*conf_set) (s_p_hashtbl_t *tbl); + void* (*get) (enum acct_gather_profile_info info_type, + void *data); + int (*node_step_start) (slurmd_job_t*); + int (*node_step_end) (void); + int (*task_start) (uint32_t); + int (*task_end) (pid_t); + int (*add_sample_data) (uint32_t, void*); +} slurm_acct_gather_profile_ops_t; + +/* + * These strings must be kept in the same order as the fields + * declared for slurm_acct_gather_profile_ops_t. + */ +static const char *syms[] = { + "acct_gather_profile_p_conf_options", + "acct_gather_profile_p_conf_set", + "acct_gather_profile_p_get", + "acct_gather_profile_p_node_step_start", + "acct_gather_profile_p_node_step_end", + "acct_gather_profile_p_task_start", + "acct_gather_profile_p_task_end", + "acct_gather_profile_p_add_sample_data", +}; + +acct_gather_profile_timer_t acct_gather_profile_timer[PROFILE_CNT]; +bool acct_gather_profile_running = false; + +static slurm_acct_gather_profile_ops_t ops; +static plugin_context_t *g_context = NULL; +static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t profile_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_t timer_thread_id = 0; +static bool init_run = false; + +static void _set_freq(int type, char *freq, char *freq_def) +{ + if ((acct_gather_profile_timer[type].freq = + acct_gather_parse_freq(type, freq)) == -1) + if ((acct_gather_profile_timer[type].freq = + acct_gather_parse_freq(type, freq_def)) == -1) + acct_gather_profile_timer[type].freq = 0; +} + +static void *_timer_thread(void *args) +{ + int i, now, diff; + DEF_TIMERS; + while (acct_gather_profile_running) { + START_TIMER; + now = time(NULL); + + for (i=0; i<PROFILE_CNT; i++) { + if (acct_gather_suspended) { + /* Handle suspended time as if it + * didn't happen */ + if (!acct_gather_profile_timer[i].freq) + continue; + if (acct_gather_profile_timer[i].last_notify) + acct_gather_profile_timer[i]. + last_notify += SLEEP_TIME; + else + acct_gather_profile_timer[i]. + last_notify = now; + continue; + } + + diff = now - acct_gather_profile_timer[i].last_notify; + /* info ("%d is %d and %d", i, */ + /* acct_gather_profile_timer[i].freq, */ + /* diff); */ + if (!acct_gather_profile_timer[i].freq + || (diff < acct_gather_profile_timer[i].freq)) + continue; + debug2("profile signalling type %s", + acct_gather_profile_type_t_name(i)); + + /* signal poller to start */ + slurm_mutex_lock(&acct_gather_profile_timer[i]. + notify_mutex); + pthread_cond_signal( + &acct_gather_profile_timer[i].notify); + slurm_mutex_unlock(&acct_gather_profile_timer[i]. + notify_mutex); + acct_gather_profile_timer[i].last_notify = now; + } + END_TIMER; + usleep(USLEEP_TIME - DELTA_TIMER); + } + + return NULL; +} + +extern int acct_gather_profile_init(void) +{ + int retval = SLURM_SUCCESS; + char *plugin_type = "acct_gather_profile"; + char *type = NULL; + + if (init_run && g_context) + return retval; + + slurm_mutex_lock(&g_context_lock); + + if (g_context) + goto done; + + type = slurm_get_acct_gather_profile_type(); + + g_context = plugin_context_create( + plugin_type, type, (void **)&ops, syms, sizeof(syms)); + + if (!g_context) { + error("cannot create %s context for %s", plugin_type, type); + retval = SLURM_ERROR; + goto done; + } + init_run = true; + +done: + slurm_mutex_unlock(&g_context_lock); + xfree(type); + if (retval == SLURM_SUCCESS) + retval = acct_gather_conf_init(); + + return retval; +} + +extern int acct_gather_profile_fini(void) +{ + int rc = SLURM_SUCCESS, i; + + if (!g_context) + return SLURM_SUCCESS; + + slurm_mutex_lock(&g_context_lock); + + if (!g_context) + goto done; + + init_run = false; + + for (i=0; i < PROFILE_CNT; i++) { + switch (i) { + case PROFILE_ENERGY: + acct_gather_energy_fini(); + break; + case PROFILE_TASK: + jobacct_gather_fini(); + break; + case PROFILE_FILESYSTEM: + acct_gather_filesystem_fini(); + break; + case PROFILE_NETWORK: + acct_gather_infiniband_fini(); + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather_profile.c " + "(acct_gather_profile_fini)", i); + } + } + + rc = plugin_context_destroy(g_context); + g_context = NULL; +done: + slurm_mutex_unlock(&g_context_lock); + + return rc; +} + +extern char *acct_gather_profile_to_string(uint32_t profile) +{ + static char profile_str[128]; + + profile_str[0] = '\0'; + if (profile == ACCT_GATHER_PROFILE_NOT_SET) + strcat(profile_str, "NotSet"); + else if (profile == ACCT_GATHER_PROFILE_NONE) + strcat(profile_str, "None"); + else { + if (profile & ACCT_GATHER_PROFILE_ENERGY) + strcat(profile_str, "Energy"); + if (profile & ACCT_GATHER_PROFILE_LUSTRE) { + if (profile_str[0]) + strcat(profile_str, ","); + strcat(profile_str, "Lustre"); + } + if (profile & ACCT_GATHER_PROFILE_NETWORK) { + if (profile_str[0]) + strcat(profile_str, ","); + strcat(profile_str, "Network"); + } + if (profile & ACCT_GATHER_PROFILE_TASK) { + if (profile_str[0]) + strcat(profile_str, ","); + strcat(profile_str, "Task"); + } + } + return profile_str; +} + +extern uint32_t acct_gather_profile_from_string(char *profile_str) +{ + uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET; + + if (!profile_str) { + } else if (slurm_strcasestr(profile_str, "none")) + profile = ACCT_GATHER_PROFILE_NONE; + else if (slurm_strcasestr(profile_str, "all")) + profile = ACCT_GATHER_PROFILE_ALL; + else { + if (slurm_strcasestr(profile_str, "energy")) + profile |= ACCT_GATHER_PROFILE_ENERGY; + if (slurm_strcasestr(profile_str, "task")) + profile |= ACCT_GATHER_PROFILE_TASK; + + if (slurm_strcasestr(profile_str, "lustre")) + profile |= ACCT_GATHER_PROFILE_LUSTRE; + + if (slurm_strcasestr(profile_str, "network")) + profile |= ACCT_GATHER_PROFILE_NETWORK; + } + + return profile; +} + +extern char *acct_gather_profile_type_to_string(uint32_t series) +{ + if (series == ACCT_GATHER_PROFILE_ENERGY) + return "Energy"; + else if (series == ACCT_GATHER_PROFILE_TASK) + return "Task"; + else if (series == ACCT_GATHER_PROFILE_LUSTRE) + return "Lustre"; + else if (series == ACCT_GATHER_PROFILE_NETWORK) + return "Network"; + + return "Unknown"; +} + +extern uint32_t acct_gather_profile_type_from_string(char *series_str) +{ + if (!strcasecmp(series_str, "energy")) + return ACCT_GATHER_PROFILE_ENERGY; + else if (!strcasecmp(series_str, "task")) + return ACCT_GATHER_PROFILE_TASK; + else if (!strcasecmp(series_str, "lustre")) + return ACCT_GATHER_PROFILE_LUSTRE; + else if (!strcasecmp(series_str, "network")) + return ACCT_GATHER_PROFILE_NETWORK; + + return ACCT_GATHER_PROFILE_NOT_SET; +} + +extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type) +{ + switch (type) { + case PROFILE_ENERGY: + return "Energy"; + break; + case PROFILE_TASK: + return "Task"; + break; + case PROFILE_FILESYSTEM: + return "Lustre"; + break; + case PROFILE_NETWORK: + return "Network"; + break; + case PROFILE_CNT: + return "CNT?"; + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather_profile.c " + "(acct_gather_profile_type_t_name)", type); + } + + return "Unknown"; +} + +extern int acct_gather_profile_startpoll(char *freq, char *freq_def) +{ + int retval = SLURM_SUCCESS; + pthread_attr_t attr; + int i; + uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET; + + if (acct_gather_profile_init() < 0) + return SLURM_ERROR; + + if (acct_gather_profile_running) { + error("acct_gather_profile_startpoll: poll already started!"); + return retval; + } + acct_gather_profile_running = true; + + (*(ops.get))(ACCT_GATHER_PROFILE_RUNNING, &profile); + xassert(profile != ACCT_GATHER_PROFILE_NOT_SET); + + for (i=0; i < PROFILE_CNT; i++) { + memset(&acct_gather_profile_timer[i], 0, + sizeof(acct_gather_profile_timer_t)); + pthread_cond_init(&acct_gather_profile_timer[i].notify, NULL); + slurm_mutex_init(&acct_gather_profile_timer[i].notify_mutex); + + switch (i) { + case PROFILE_ENERGY: + if (!(profile & ACCT_GATHER_PROFILE_ENERGY)) + break; + _set_freq(i, freq, freq_def); + + acct_gather_energy_startpoll( + acct_gather_profile_timer[i].freq); + break; + case PROFILE_TASK: + /* Always set up the task (always first) to be + done since it is used to control memory + consumption and such. It will check + profile inside it's plugin. + */ + _set_freq(i, freq, freq_def); + + jobacct_gather_startpoll( + acct_gather_profile_timer[i].freq); + + break; + case PROFILE_FILESYSTEM: + if (!(profile & ACCT_GATHER_PROFILE_LUSTRE)) + break; + _set_freq(i, freq, freq_def); + + acct_gather_filesystem_startpoll( + acct_gather_profile_timer[i].freq); + break; + case PROFILE_NETWORK: + if (!(profile & ACCT_GATHER_PROFILE_NETWORK)) + break; + _set_freq(i, freq, freq_def); + + acct_gather_infiniband_startpoll( + acct_gather_profile_timer[i].freq); + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather_profile.c " + "(acct_gather_profile_startpoll)", i); + } + } + + /* create polling thread */ + slurm_attr_init(&attr); + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); + + if (pthread_create(&timer_thread_id, &attr, + &_timer_thread, NULL)) { + debug("acct_gather_profile_startpoll failed to create " + "_timer_thread: %m"); + } else + debug3("acct_gather_profile_startpoll dynamic logging enabled"); + slurm_attr_destroy(&attr); + + return retval; +} + +extern void acct_gather_profile_endpoll(void) +{ + int i; + + if (!acct_gather_profile_running) { + debug2("acct_gather_profile_startpoll: poll already ended!"); + return; + } + + acct_gather_profile_running = false; + + for (i=0; i < PROFILE_CNT; i++) { + /* end remote threads */ + slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex); + pthread_cond_signal(&acct_gather_profile_timer[i].notify); + slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex); + pthread_cond_destroy(&acct_gather_profile_timer[i].notify); + acct_gather_profile_timer[i].freq = 0; + switch (i) { + case PROFILE_ENERGY: + break; + case PROFILE_TASK: + jobacct_gather_endpoll(); + break; + case PROFILE_FILESYSTEM: + break; + case PROFILE_NETWORK: + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather_profile.c " + "(acct_gather_profile_endpoll)", i); + } + } +} + +extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + if (acct_gather_profile_init() < 0) + return; + (*(ops.conf_options))(full_options, full_options_cnt); + return; +} + +extern void acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl) +{ + if (acct_gather_profile_init() < 0) + return; + + (*(ops.conf_set))(tbl); + return; +} + +extern void acct_gather_profile_g_get(enum acct_gather_profile_info info_type, + void *data) +{ + if (acct_gather_profile_init() < 0) + return; + + (*(ops.get))(info_type, data); + return; +} + +extern int acct_gather_profile_g_node_step_start(slurmd_job_t* job) +{ + if (acct_gather_profile_init() < 0) + return SLURM_ERROR; + + return (*(ops.node_step_start))(job); +} + +extern int acct_gather_profile_g_node_step_end(void) +{ + int retval = SLURM_ERROR; + + + retval = (*(ops.node_step_end))(); + return retval; +} + +extern int acct_gather_profile_g_task_start(uint32_t taskid) +{ + int retval = SLURM_ERROR; + + if (acct_gather_profile_init() < 0) + return retval; + + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.task_start))(taskid); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_task_end(pid_t taskpid) +{ + int retval = SLURM_ERROR; + + if (acct_gather_profile_init() < 0) + return retval; + + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.task_end))(taskpid); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_add_sample_data(uint32_t type, void* data) +{ + int retval = SLURM_ERROR; + + if (acct_gather_profile_init() < 0) + return retval; + + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.add_sample_data))(type, data); + slurm_mutex_unlock(&profile_mutex); + return retval; +} diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h new file mode 100644 index 0000000000000000000000000000000000000000..d49d9e1470f360c8c296e4b82c48f0a1d62b1f97 --- /dev/null +++ b/src/common/slurm_acct_gather_profile.h @@ -0,0 +1,197 @@ +/*****************************************************************************\ + * slurm_acct_gather_profile.h - implementation-independent job profile + * accounting plugin definitions + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __SLURM_ACCT_GATHER_PROFILE_H__ +#define __SLURM_ACCT_GATHER_PROFILE_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <sys/resource.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "slurm/slurm.h" +#include "slurm/slurmdb.h" +#include "src/common/macros.h" +#include "src/common/pack.h" +#include "src/common/list.h" +#include "src/common/xmalloc.h" +#include "src/common/slurm_acct_gather.h" +#include "src/slurmd/slurmstepd/slurmstepd_job.h" + +typedef enum { + PROFILE_ENERGY, + PROFILE_TASK, + PROFILE_FILESYSTEM, + PROFILE_NETWORK, + PROFILE_CNT +} acct_gather_profile_type_t; + +typedef struct { + int freq; + time_t last_notify; + pthread_cond_t notify; + pthread_mutex_t notify_mutex; +} acct_gather_profile_timer_t; + +extern acct_gather_profile_timer_t acct_gather_profile_timer[PROFILE_CNT]; +extern bool acct_gather_profile_running; + +/* + * Load the plugin + */ +extern int acct_gather_profile_init(void); + +/* + * Unload the plugin + */ +extern int acct_gather_profile_fini(void); + +/* translate uint32_t profile to string (DO NOT free) */ +extern char *acct_gather_profile_to_string(uint32_t profile); + +/* translate string of words to uint32_t filled in with bits set to profile */ +extern uint32_t acct_gather_profile_from_string(char *profile_str); + +extern char *acct_gather_profile_type_to_string(uint32_t series); +extern uint32_t acct_gather_profile_type_from_string(char *series_str); + +extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type); + +extern int acct_gather_profile_startpoll(char *freq, char *freq_def); +extern void acct_gather_profile_endpoll(void); + +/* + * Define plugin local conf for acct_gather.conf + * + * Parameters + * full_options -- pointer that will receive list of plugin local + * definitions + * full_options_cnt -- count of plugin local definitions + */ +extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +/* + * set plugin local conf from acct_gather.conf into its structure + * + * Parameters + * tbl - hash table of acct_gather.conf key-values. + */ +extern void acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl); + +/* + * get info from the profile plugin + * + */ +extern void acct_gather_profile_g_get(enum acct_gather_profile_info info_type, + void *data); + +/* + * Called once per step on each node from slurmstepd, before launching tasks. + * Provides an opportunity to create files and other node-step level + * initialization. + * + * Parameters + * job -- structure defining a slurm job + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_node_step_start(slurmd_job_t* job); + +/* + * Called once per step on each node from slurmstepd, after all tasks end. + * Provides an opportunity to close files, etc. + * + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_node_step_end(void); + +/* + * Called once per task from slurmstepd, BEFORE node step start is called. + * Provides an opportunity to gather beginning values from node counters + * (bytes_read ...) + * At this point in the life cycle, the value of the --profile option isn't + * known and and files are not open so calls to the 'add_*_data' + * functions cannot be made. + * + * Parameters + * taskid -- slurm taskid + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_task_start(uint32_t taskid); + +/* + * Called once per task from slurmstepd. + * Provides an opportunity to put final data for a task. + * + * Parameters + * taskpid -- linux process id of task + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_task_end(pid_t taskpid); + +/* + * Put data at the Node Samples level. Typically called from something called + * at either job_acct_gather interval or acct_gather_energy interval. + * All samples in the same group will eventually be consolidated in one + * dataset + * + * Parameters + * type -- identifies the type of data. + * data -- data structure to be put to the file. + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_add_sample_data(uint32_t type, void *data); + +#endif /*__SLURM_ACCT_GATHER_PROFILE_H__*/ diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c index 3867b59ad5329e44aa099a34b696f5c15de222cd..6a24615ac0ff8e58658a555d35926788ca6ebb47 100644 --- a/src/common/slurm_auth.c +++ b/src/common/slurm_auth.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_auth.h b/src/common/slurm_auth.h index 2f5fea3d5562760f5eed23a53a60fee47cf5d685..c918b2c28c094e7d89ea720995c26cae9b192e78 100644 --- a/src/common/slurm_auth.h +++ b/src/common/slurm_auth.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 8df1da115e4f7035ad01f506eea7e07bd75ef951..e3ed2945a2f96b73f5d7d28717cdf63359b8e86c 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -633,7 +633,7 @@ slurm_cred_copy(slurm_cred_t *cred) slurm_cred_t * slurm_cred_faker(slurm_cred_arg_t *arg) { - int fd; + int fd, i; slurm_cred_t *cred = NULL; xassert(arg != NULL); @@ -649,7 +649,7 @@ slurm_cred_faker(slurm_cred_arg_t *arg) cred->step_hostlist = xstrdup(arg->step_hostlist); #ifndef HAVE_BG { - int i, sock_recs = 0; + int sock_recs = 0; for (i=0; i<arg->job_nhosts; i++) { sock_recs += arg->sock_core_rep_count[i]; if (sock_recs >= arg->job_nhosts) @@ -678,18 +678,19 @@ slurm_cred_faker(slurm_cred_arg_t *arg) cred->signature = xmalloc(cred->siglen * sizeof(char)); if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) { - if (read(fd, cred->signature, cred->siglen) == -1) + if (read(fd, cred->signature, cred->siglen-1) == -1) error("reading fake signature from /dev/urandom: %m"); if (close(fd) < 0) error("close(/dev/urandom): %m"); + for (i=0; i<cred->siglen-1; i++) + cred->signature[i] = 'a' + (cred->signature[i] & 0xf); } else { /* Note: some systems lack this file */ - unsigned int i; struct timeval tv; gettimeofday(&tv, NULL); i = (unsigned int) (tv.tv_sec + tv.tv_usec); srand((unsigned int) i); - for (i=0; i<cred->siglen; i++) - cred->signature[i] = (rand() & 0xff); + for (i=0; i<cred->siglen-1; i++) + cred->signature[i] = 'a' + (rand() & 0xf); } slurm_mutex_unlock(&cred->mutex); @@ -1073,7 +1074,7 @@ static char *_core_format(bitstr_t *core_bitmap) * * NOTE: caller must xfree the returned strings. */ -void format_core_allocs(slurm_cred_t *cred, char *node_name, +void format_core_allocs(slurm_cred_t *cred, char *node_name, uint16_t cpus, char **job_alloc_cores, char **step_alloc_cores, uint32_t *job_mem_limit, uint32_t *step_mem_limit) { @@ -1093,7 +1094,7 @@ void format_core_allocs(slurm_cred_t *cred, char *node_name, hostset_t hset = NULL; int host_index = -1; uint32_t i, j, i_first_bit=0, i_last_bit=0; - uint32_t job_core_cnt=0, step_core_cnt=0; + uint32_t job_cpu_cnt = 0, step_cpu_cnt = 0; xassert(cred); xassert(job_alloc_cores); @@ -1134,38 +1135,39 @@ void format_core_allocs(slurm_cred_t *cred, char *node_name, } } - job_core_bitmap = bit_alloc(i_last_bit - i_first_bit); - if (job_core_bitmap == NULL) { - error("bit_alloc malloc failure"); - hostset_destroy(hset); - return; - } + job_core_bitmap = bit_alloc(i_last_bit - i_first_bit); step_core_bitmap = bit_alloc(i_last_bit - i_first_bit); - if (step_core_bitmap == NULL) { - error("bit_alloc malloc failure"); - FREE_NULL_BITMAP(job_core_bitmap); - hostset_destroy(hset); - return; - } for (i = i_first_bit, j = 0; i < i_last_bit; i++, j++) { if (bit_test(cred->job_core_bitmap, i)) { bit_set(job_core_bitmap, j); - job_core_cnt++; + job_cpu_cnt++; } if (bit_test(cred->step_core_bitmap, i)) { bit_set(step_core_bitmap, j); - step_core_cnt++; + step_cpu_cnt++; + } + } + + /* Scale CPU count, same as slurmd/req.c:_check_job_credential() */ + if (i_last_bit <= i_first_bit) + error("step credential has no CPUs selected"); + else { + uint32_t i = cpus / (i_last_bit - i_first_bit); + if (i > 1) { + info("scaling CPU count by factor of %d", i); + step_cpu_cnt *= i; + job_cpu_cnt *= i; } } if (cred->job_mem_limit & MEM_PER_CPU) { *job_mem_limit = (cred->job_mem_limit & (~MEM_PER_CPU)) * - job_core_cnt; + job_cpu_cnt; } else *job_mem_limit = cred->job_mem_limit; if (cred->step_mem_limit & MEM_PER_CPU) { *step_mem_limit = (cred->step_mem_limit & (~MEM_PER_CPU)) * - step_core_cnt; + step_cpu_cnt; } else if (cred->step_mem_limit) *step_mem_limit = cred->step_mem_limit; else @@ -1694,7 +1696,7 @@ extern char * timestr (const time_t *tp, char *buf, size_t n) #endif if (!localtime_r (tp, &tmval)) error ("localtime_r: %m"); - strftime (buf, n, fmt, &tmval); + slurm_strftime (buf, n, fmt, &tmval); return (buf); } @@ -1824,8 +1826,6 @@ _clear_expired_job_states(slurm_cred_ctx_t ctx) last_scan = now; i = list_iterator_create(ctx->job_list); - if (!i) - fatal("list_iterator_create: malloc failure"); while ((j = list_next(i))) { #if DEBUG_TIME char t1[64], t2[64], t3[64]; @@ -2015,6 +2015,8 @@ _cred_state_unpack(slurm_cred_ctx_t ctx, Buf buffer) if (now < s->expiration) list_append(ctx->state_list, s); + else + _cred_state_destroy(s); } return; @@ -2059,7 +2061,8 @@ _job_state_unpack(slurm_cred_ctx_t ctx, Buf buffer) list_append(ctx->job_list, j); else { debug3 ("not appending expired job %u state", - j->jobid); + j->jobid); + _job_state_destroy(j); } } @@ -2122,22 +2125,6 @@ sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx, return sbcast_cred; } -/* Copy an sbcast credential created using create_sbcast_cred() or - * unpack_sbcast_cred() */ -sbcast_cred_t *copy_sbcast_cred(sbcast_cred_t *sbcast_cred) -{ - sbcast_cred_t *rcred = NULL; - - xassert(sbcast_cred); - rcred->ctime = sbcast_cred->ctime; - rcred->expiration = sbcast_cred->expiration; - rcred->jobid = sbcast_cred->jobid; - rcred->nodes = xstrdup(sbcast_cred->nodes); - rcred->siglen = sbcast_cred->siglen; - rcred->signature = xstrdup(sbcast_cred->signature); - return rcred; -} - /* Delete an sbcast credential created using create_sbcast_cred() or * unpack_sbcast_cred() */ void delete_sbcast_cred(sbcast_cred_t *sbcast_cred) @@ -2227,8 +2214,6 @@ int extract_sbcast_cred(slurm_cred_ctx_t ctx, } sbcast_iter = list_iterator_create(sbcast_cache_list); - if (!sbcast_iter) - fatal("list_iterator_create: malloc failure"); while ((next_cache_rec = (struct sbcast_cache *) list_next(sbcast_iter))) { if ((next_cache_rec->expire == sbcast_cred->expiration) && diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 98312da3ec1cf7e9e839337f80fdb4584aed22df..caa6c78d1e003a66ee42908a50af9cf9a6991f73 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -314,7 +314,7 @@ int slurm_cred_get_signature(slurm_cred_t *cred, char **datap, * * NOTE: caller must xfree the returned strings. */ -void format_core_allocs(slurm_cred_t *cred, char *node_name, +void format_core_allocs(slurm_cred_t *cred, char *node_name, uint16_t cpus, char **job_alloc_cores, char **step_alloc_cores, uint32_t *job_mem_limit, uint32_t *step_mem_limit); @@ -339,7 +339,6 @@ void slurm_cred_print(slurm_cred_t *cred); sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx, uint32_t job_id, char *nodes, time_t expiration); -sbcast_cred_t *copy_sbcast_cred(sbcast_cred_t *sbcast_cred); void delete_sbcast_cred(sbcast_cred_t *sbcast_cred); int extract_sbcast_cred(slurm_cred_ctx_t ctx, sbcast_cred_t *sbcast_cred, uint16_t block_no, diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index 24f50181e701e7fe4d47f4e386569277389d7083..3439668cc8a27d7a90069e5d5d5f16e51302265b 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -231,7 +231,7 @@ static slurm_errtab_t slurm_errtab[] = { "Job violates accounting/QOS policy (job submit limit, user's " "size and/or time limits)"}, { ESLURM_INVALID_TIME_LIMIT, - "Requested time limit is invalid (exceeds some limit)"}, + "Requested time limit is invalid (missing or exceeds some limit)"}, { ESLURM_RESERVATION_ACCESS, "Access denied to requested reservation" }, { ESLURM_RESERVATION_INVALID, @@ -286,6 +286,8 @@ static slurm_errtab_t slurm_errtab[] = { "Switch resources currently not available" }, { ESLURM_RESERVATION_EMPTY, "Reservation request lacks users or accounts" }, + { ESLURM_INVALID_ARRAY, + "Invalid job array specification" }, /* slurmd error codes */ @@ -391,7 +393,11 @@ static slurm_errtab_t slurm_errtab[] = { { ESLURM_ONE_CHANGE, "Can only change one at a time" }, { ESLURM_BAD_NAME, - "Unacceptable name given. (No '.' in name allowed)" } + "Unacceptable name given. (No '.' in name allowed)" }, + + /* plugin and custom errors */ + { ESLURM_MISSING_TIME_LIMIT, + "Missing time limit" } }; /* diff --git a/src/common/slurm_ext_sensors.c b/src/common/slurm_ext_sensors.c new file mode 100644 index 0000000000000000000000000000000000000000..cac220e62159e0812a92f94f45e74c9cc8567e95 --- /dev/null +++ b/src/common/slurm_ext_sensors.c @@ -0,0 +1,214 @@ +/*****************************************************************************\ + * slurm_ext_sensors.c - implementation-independent external sensors plugin + * definitions + ***************************************************************************** + * Copyright (C) 2013 Bull-HN-PHX. + * Written by Bull-HN-PHX/Martin Perry, + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pwd.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "src/common/macros.h" +#include "src/common/parse_config.h" +#include "src/common/plugin.h" +#include "src/common/plugrack.h" +#include "src/common/read_config.h" +#include "src/slurmctld/slurmctld.h" +#include "src/common/slurm_ext_sensors.h" +#include "src/common/slurm_protocol_api.h" +#include "src/slurmd/slurmstepd/slurmstepd_job.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" + + +typedef struct slurm_ext_sensors_ops { + int (*update_component_data) (void); + int (*get_stepstartdata) (struct step_record *step_rec); + int (*get_stependdata) (struct step_record *step_rec); +} slurm_ext_sensors_ops_t; +/* + * These strings must be kept in the same order as the fields + * declared for slurm_ext_sensors_ops_t. + */ +static const char *syms[] = { + "ext_sensors_p_update_component_data", + "ext_sensors_p_get_stepstartdata", + "ext_sensors_p_get_stependdata", +}; + +static slurm_ext_sensors_ops_t ops; +static plugin_context_t *g_context = NULL; +static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; + +extern int ext_sensors_init(void) +{ + int retval = SLURM_SUCCESS; + char *plugin_type = "ext_sensors"; + char *type = NULL; + + if (init_run && g_context) + return retval; + + slurm_mutex_lock(&g_context_lock); + + if (g_context) + goto done; + + type = slurm_get_ext_sensors_type(); + + g_context = plugin_context_create( + plugin_type, type, (void **)&ops, syms, sizeof(syms)); + + if (!g_context) { + error("cannot create %s context for %s", plugin_type, type); + retval = SLURM_ERROR; + goto done; + } + init_run = true; + +done: + slurm_mutex_unlock(&g_context_lock); + xfree(type); + + return retval; +} + +extern int ext_sensors_fini(void) +{ + int rc; + + if (!g_context) + return SLURM_SUCCESS; + + init_run = false; + rc = plugin_context_destroy(g_context); + g_context = NULL; + + return rc; +} + +extern ext_sensors_data_t *ext_sensors_alloc(void) +{ + ext_sensors_data_t *ext_sensors = + xmalloc(sizeof(struct ext_sensors_data)); + + ext_sensors->consumed_energy = NO_VAL; + ext_sensors->temperature = NO_VAL; + + return ext_sensors; +} + +extern void ext_sensors_destroy(ext_sensors_data_t *ext_sensors) +{ + xfree(ext_sensors); +} + +extern void ext_sensors_data_pack(ext_sensors_data_t *ext_sensors, Buf buffer, + uint16_t protocol_version) +{ + if (!ext_sensors) { + pack32(0, buffer); + pack32(0, buffer); + pack_time((time_t)0, buffer); + pack32(0, buffer); + return; + } + + pack32(ext_sensors->consumed_energy, buffer); + pack32(ext_sensors->temperature, buffer); + pack_time(ext_sensors->energy_update_time, buffer); + pack32(ext_sensors->current_watts, buffer); +} + +extern int ext_sensors_data_unpack(ext_sensors_data_t **ext_sensors, Buf buffer, + uint16_t protocol_version) +{ + ext_sensors_data_t *ext_sensors_ptr = ext_sensors_alloc(); + *ext_sensors = ext_sensors_ptr; + if (ext_sensors_ptr == NULL) + return SLURM_ERROR; + + safe_unpack32(&ext_sensors_ptr->consumed_energy, buffer); + safe_unpack32(&ext_sensors_ptr->temperature, buffer); + safe_unpack_time(&ext_sensors_ptr->energy_update_time, buffer); + safe_unpack32(&ext_sensors_ptr->current_watts, buffer); + + return SLURM_SUCCESS; + +unpack_error: + ext_sensors_destroy(ext_sensors_ptr); + *ext_sensors = NULL; + return SLURM_ERROR; +} + +extern int ext_sensors_g_update_component_data(void) +{ + int retval = SLURM_ERROR; + + if (ext_sensors_init() < 0) + return retval; + + retval = (*(ops.update_component_data))(); + + return retval; +} + +extern int ext_sensors_g_get_stepstartdata(struct step_record *step_rec) +{ + int retval = SLURM_ERROR; + + if (ext_sensors_init() < 0) + return retval; + + retval = (*(ops.get_stepstartdata))(step_rec); + + return retval; +} + +extern int ext_sensors_g_get_stependdata(struct step_record *step_rec) +{ + int retval = SLURM_ERROR; + + if (ext_sensors_init() < 0) + return retval; + + retval = (*(ops.get_stependdata))(step_rec); + + return retval; +} diff --git a/src/common/slurm_ext_sensors.h b/src/common/slurm_ext_sensors.h new file mode 100644 index 0000000000000000000000000000000000000000..7468e457683a77ee876c7ed4a8872b933ed44343 --- /dev/null +++ b/src/common/slurm_ext_sensors.h @@ -0,0 +1,80 @@ +/*****************************************************************************\ + * slurm_ext_sensors.h - implementation-independent external sensors plugin + * definitions + ***************************************************************************** + * Written by Bull-HN-PHX/Martin Perry, + * Copyright (C) 2013 Bull-HN-PHX + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __SLURM_EXT_SENSORS_H__ +#define __SLURM_EXT_SENSORS_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <sys/resource.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "slurm/slurm.h" +#include "slurm/slurmdb.h" + +#include "src/slurmctld/slurmctld.h" +#include "src/common/macros.h" +#include "src/common/pack.h" +#include "src/common/list.h" +#include "src/common/xmalloc.h" + +extern int ext_sensors_init(void); /* load the plugin */ +extern int ext_sensors_fini(void); /* unload the plugin */ +extern ext_sensors_data_t *ext_sensors_alloc(void); +extern void ext_sensors_destroy(ext_sensors_data_t *ext_sensors); +extern void ext_sensors_data_pack(ext_sensors_data_t *ext_sensors, Buf buffer, + uint16_t protocol_version); +extern int ext_sensors_data_unpack(ext_sensors_data_t **ext_sensors, Buf buffer, + uint16_t protocol_version); + +extern int ext_sensors_g_update_component_data(void); +extern int ext_sensors_g_get_stepstartdata(struct step_record *step_rec); +extern int ext_sensors_g_get_stependdata(struct step_record *step_rec); +#endif /*__SLURM_EXT_SENSORS_H__*/ diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c index 61664113d72ec889636a65bd420909a161e93b1e..19e3c1fc26dd47235c7bc3355b11f8b3dd0ee1c0 100644 --- a/src/common/slurm_jobacct_gather.c +++ b/src/common/slurm_jobacct_gather.c @@ -10,7 +10,7 @@ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,6 +58,7 @@ #include "src/common/plugin.h" #include "src/common/plugrack.h" #include "src/common/read_config.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/slurmdbd_defs.h" #include "src/common/xmalloc.h" @@ -108,7 +109,6 @@ static uint64_t cont_id = (uint64_t)NO_VAL; static pthread_mutex_t task_list_lock = PTHREAD_MUTEX_INITIALIZER; static bool jobacct_shutdown = true; -static bool jobacct_suspended = 0; static bool plugin_polling = true; static uint32_t jobacct_job_id = 0; @@ -137,7 +137,7 @@ static void _acct_kill_step(void) req.job_id = jobacct_job_id; req.job_step_id = jobacct_step_id; req.signal = SIGKILL; - req.batch_flag = 0; + req.flags = 0; msg.msg_type = REQUEST_CANCEL_JOB_STEP; msg.data = &req; @@ -167,11 +167,35 @@ unpack_error: return SLURM_ERROR; } -static void _poll_data(void) +static void _write_jobacct_id(int fd, jobacct_id_t *jobacct_id, + uint16_t rpc_version) { - if (jobacct_suspended) - return; + if (jobacct_id) { + safe_write(fd, &jobacct_id->nodeid, sizeof(uint32_t)); + safe_write(fd, &jobacct_id->taskid, sizeof(uint16_t)); + } else { + uint32_t no32 = NO_VAL; + uint16_t no16 = (uint16_t)NO_VAL; + safe_write(fd, &no32, sizeof(uint32_t)); + safe_write(fd, &no16, sizeof(uint16_t)); + } +rwfail: + return; +} + +static int _read_jobacct_id(int fd, jobacct_id_t *jobacct_id, + uint16_t rpc_version) +{ + safe_read(fd, &jobacct_id->nodeid, sizeof(uint32_t)); + safe_read(fd, &jobacct_id->taskid, sizeof(uint16_t)); + + return SLURM_SUCCESS; +rwfail: + return SLURM_ERROR; +} +static void _poll_data(void) +{ /* Update the data */ slurm_mutex_lock(&task_list_lock); (*(ops.poll_data))(task_list, pgid_plugin, cont_id); @@ -192,16 +216,22 @@ static void _task_sleep(int rem) static void *_watch_tasks(void *arg) { + int type = PROFILE_TASK; /* Give chance for processes to spawn before starting * the polling. This should largely eliminate the * the chance of having /proc open when the tasks are * spawned, which would prevent a valid checkpoint/restart * with some systems */ _task_sleep(1); - - while (!jobacct_shutdown) { /* Do this until shutdown is requested */ + while (!jobacct_shutdown && acct_gather_profile_running) { + /* Do this until shutdown is requested */ _poll_data(); - _task_sleep(freq); + slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex); + pthread_cond_wait( + &acct_gather_profile_timer[type].notify, + &acct_gather_profile_timer[type].notify_mutex); + slurm_mutex_unlock(&acct_gather_profile_timer[type]. + notify_mutex); } return NULL; } @@ -330,7 +360,7 @@ extern int jobacct_gather_endpoll(void) jobacct_shutdown = true; slurm_mutex_lock(&task_list_lock); - if(task_list) + if (task_list) list_destroy(task_list); task_list = NULL; slurm_mutex_unlock(&task_list_lock); @@ -340,49 +370,6 @@ extern int jobacct_gather_endpoll(void) return retval; } -extern void jobacct_gather_change_poll(uint16_t frequency) -{ - if (jobacct_gather_init() < 0) - return; - - if (plugin_polling && freq == 0 && frequency != 0) { - pthread_attr_t attr; - pthread_t _watch_tasks_thread_id; - /* create polling thread */ - slurm_attr_init(&attr); - if (pthread_attr_setdetachstate(&attr, - PTHREAD_CREATE_DETACHED)) - error("pthread_attr_setdetachstate error %m"); - - if (pthread_create(&_watch_tasks_thread_id, &attr, - &_watch_tasks, NULL)) { - debug("jobacct-gather failed to create _watch_tasks " - "thread: %m"); - frequency = 0; - } - else - debug3("jobacct-gather LINUX dynamic logging enabled"); - slurm_attr_destroy(&attr); - jobacct_shutdown = false; - } - - freq = frequency; - debug("jobacct-gather: frequency changed = %d", frequency); - if (freq == 0) - jobacct_shutdown = true; - return; -} - -extern void jobacct_gather_suspend_poll(void) -{ - jobacct_suspended = true; -} - -extern void jobacct_gather_resume_poll(void) -{ - jobacct_suspended = false; -} - extern int jobacct_gather_add_task(pid_t pid, jobacct_id_t *jobacct_id, int poll) { @@ -446,7 +433,7 @@ extern jobacctinfo_t *jobacct_gather_stat_task(pid_t pid) itr = list_iterator_create(task_list); while ((jobacct = list_next(itr))) { - if(jobacct->pid == pid) + if (jobacct->pid == pid) break; } list_iterator_destroy(itr); @@ -494,13 +481,13 @@ extern jobacctinfo_t *jobacct_gather_remove_task(pid_t pid) itr = list_iterator_create(task_list); while((jobacct = list_next(itr))) { - if(jobacct->pid == pid) { + if (jobacct->pid == pid) { list_remove(itr); break; } } list_iterator_destroy(itr); - if(jobacct) { + if (jobacct) { debug2("removing task %u pid %d from jobacct", jobacct->max_vsize_id.taskid, jobacct->pid); } else { @@ -630,6 +617,12 @@ extern jobacctinfo_t *jobacctinfo_create(jobacct_id_t *jobacct_id) jobacct->tot_cpu = 0; jobacct->act_cpufreq = 0; memset(&jobacct->energy, 0, sizeof(acct_gather_energy_t)); + jobacct->max_disk_read = 0; + memcpy(&jobacct->max_disk_read_id, jobacct_id, sizeof(jobacct_id_t)); + jobacct->tot_disk_read = 0; + jobacct->max_disk_write = 0; + memcpy(&jobacct->max_disk_write_id, jobacct_id, sizeof(jobacct_id_t)); + jobacct->tot_disk_write = 0; return jobacct; } @@ -641,12 +634,14 @@ extern void jobacctinfo_destroy(void *object) } extern int jobacctinfo_setinfo(jobacctinfo_t *jobacct, - enum jobacct_data_type type, void *data) + enum jobacct_data_type type, void *data, + uint16_t protocol_version) { int rc = SLURM_SUCCESS; int *fd = (int *)data; struct rusage *rusage = (struct rusage *)data; uint32_t *uint32 = (uint32_t *) data; + double *dub = (double *) data; jobacct_id_t *jobacct_id = (jobacct_id_t *) data; struct jobacctinfo *send = (struct jobacctinfo *) data; @@ -658,7 +653,81 @@ extern int jobacctinfo_setinfo(jobacctinfo_t *jobacct, memcpy(jobacct, send, sizeof(struct jobacctinfo)); break; case JOBACCT_DATA_PIPE: - safe_write(*fd, jobacct, sizeof(struct jobacctinfo)); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_write(*fd, &jobacct->user_cpu_sec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->user_cpu_usec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->sys_cpu_sec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->sys_cpu_usec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->max_vsize, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_vsize, sizeof(uint32_t)); + safe_write(*fd, &jobacct->max_rss, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_rss, sizeof(uint32_t)); + safe_write(*fd, &jobacct->max_pages, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_pages, sizeof(uint32_t)); + safe_write(*fd, &jobacct->min_cpu, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_cpu, sizeof(uint32_t)); + safe_write(*fd, &jobacct->act_cpufreq, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->energy.consumed_energy, + sizeof(uint32_t)); + + safe_write(*fd, &jobacct->max_disk_read, + sizeof(double)); + safe_write(*fd, &jobacct->tot_disk_read, + sizeof(double)); + safe_write(*fd, &jobacct->max_disk_write, + sizeof(double)); + safe_write(*fd, &jobacct->tot_disk_write, + sizeof(double)); + + _write_jobacct_id(*fd, &jobacct->max_vsize_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->max_rss_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->max_pages_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->min_cpu_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->max_disk_read_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->max_disk_write_id, + protocol_version); + } else { + safe_write(*fd, &jobacct->user_cpu_sec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->user_cpu_usec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->sys_cpu_sec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->sys_cpu_usec, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->max_vsize, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_vsize, sizeof(uint32_t)); + safe_write(*fd, &jobacct->max_rss, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_rss, sizeof(uint32_t)); + safe_write(*fd, &jobacct->max_pages, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_pages, sizeof(uint32_t)); + safe_write(*fd, &jobacct->min_cpu, sizeof(uint32_t)); + safe_write(*fd, &jobacct->tot_cpu, sizeof(uint32_t)); + safe_write(*fd, &jobacct->act_cpufreq, + sizeof(uint32_t)); + safe_write(*fd, &jobacct->energy.consumed_energy, + sizeof(uint32_t)); + + _write_jobacct_id(*fd, &jobacct->max_vsize_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->max_rss_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->max_pages_id, + protocol_version); + _write_jobacct_id(*fd, &jobacct->min_cpu_id, + protocol_version); + } + break; case JOBACCT_DATA_RUSAGE: jobacct->user_cpu_sec = rusage->ru_utime.tv_sec; @@ -708,6 +777,24 @@ extern int jobacctinfo_setinfo(jobacctinfo_t *jobacct, case JOBACCT_DATA_CONSUMED_ENERGY: jobacct->energy.consumed_energy = *uint32; break; + case JOBACCT_DATA_MAX_DISK_READ: + jobacct->max_disk_read = *dub; + break; + case JOBACCT_DATA_MAX_DISK_READ_ID: + jobacct->max_disk_read_id = *jobacct_id; + break; + case JOBACCT_DATA_TOT_DISK_READ: + jobacct->tot_disk_read = *dub; + break; + case JOBACCT_DATA_MAX_DISK_WRITE: + jobacct->max_disk_write = *dub; + break; + case JOBACCT_DATA_MAX_DISK_WRITE_ID: + jobacct->max_disk_write_id = *jobacct_id; + break; + case JOBACCT_DATA_TOT_DISK_WRITE: + jobacct->tot_disk_write = *dub; + break; default: debug("jobacct_g_set_setinfo data_type %d invalid", type); } @@ -718,11 +805,13 @@ rwfail: } extern int jobacctinfo_getinfo( - jobacctinfo_t *jobacct, enum jobacct_data_type type, void *data) + jobacctinfo_t *jobacct, enum jobacct_data_type type, void *data, + uint16_t protocol_version) { int rc = SLURM_SUCCESS; int *fd = (int *)data; uint32_t *uint32 = (uint32_t *) data; + double *dub = (double *) data; jobacct_id_t *jobacct_id = (jobacct_id_t *) data; struct rusage *rusage = (struct rusage *)data; struct jobacctinfo *send = (struct jobacctinfo *) data; @@ -735,7 +824,75 @@ extern int jobacctinfo_getinfo( memcpy(send, jobacct, sizeof(struct jobacctinfo)); break; case JOBACCT_DATA_PIPE: - safe_read(*fd, jobacct, sizeof(struct jobacctinfo)); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_read(*fd, &jobacct->user_cpu_sec, + sizeof(uint32_t)); + safe_read(*fd, &jobacct->user_cpu_usec, + sizeof(uint32_t)); + safe_read(*fd, &jobacct->sys_cpu_sec, sizeof(uint32_t)); + safe_read(*fd, &jobacct->sys_cpu_usec, + sizeof(uint32_t)); + safe_read(*fd, &jobacct->max_vsize, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_vsize, sizeof(uint32_t)); + safe_read(*fd, &jobacct->max_rss, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_rss, sizeof(uint32_t)); + safe_read(*fd, &jobacct->max_pages, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_pages, sizeof(uint32_t)); + safe_read(*fd, &jobacct->min_cpu, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_cpu, sizeof(uint32_t)); + safe_read(*fd, &jobacct->act_cpufreq, sizeof(uint32_t)); + safe_read(*fd, &jobacct->energy.consumed_energy, + sizeof(uint32_t)); + + safe_read(*fd, &jobacct->max_disk_read, sizeof(double)); + safe_read(*fd, &jobacct->tot_disk_read, sizeof(double)); + safe_read(*fd, &jobacct->max_disk_write, + sizeof(double)); + safe_read(*fd, &jobacct->tot_disk_write, + sizeof(double)); + + _read_jobacct_id(*fd, &jobacct->max_vsize_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->max_rss_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->max_pages_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->min_cpu_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->max_disk_read_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->max_disk_write_id, + protocol_version); + } else { + safe_read(*fd, &jobacct->user_cpu_sec, + sizeof(uint32_t)); + safe_read(*fd, &jobacct->user_cpu_usec, + sizeof(uint32_t)); + safe_read(*fd, &jobacct->sys_cpu_sec, sizeof(uint32_t)); + safe_read(*fd, &jobacct->sys_cpu_usec, + sizeof(uint32_t)); + safe_read(*fd, &jobacct->max_vsize, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_vsize, sizeof(uint32_t)); + safe_read(*fd, &jobacct->max_rss, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_rss, sizeof(uint32_t)); + safe_read(*fd, &jobacct->max_pages, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_pages, sizeof(uint32_t)); + safe_read(*fd, &jobacct->min_cpu, sizeof(uint32_t)); + safe_read(*fd, &jobacct->tot_cpu, sizeof(uint32_t)); + safe_read(*fd, &jobacct->act_cpufreq, sizeof(uint32_t)); + safe_read(*fd, &jobacct->energy.consumed_energy, + sizeof(uint32_t)); + + _read_jobacct_id(*fd, &jobacct->max_vsize_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->max_rss_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->max_pages_id, + protocol_version); + _read_jobacct_id(*fd, &jobacct->min_cpu_id, + protocol_version); + } + break; case JOBACCT_DATA_RUSAGE: memset(rusage, 0, sizeof(struct rusage)); @@ -786,6 +943,24 @@ extern int jobacctinfo_getinfo( case JOBACCT_DATA_CONSUMED_ENERGY: *uint32 = jobacct->energy.consumed_energy; break; + case JOBACCT_DATA_MAX_DISK_READ: + *dub = jobacct->max_disk_read; + break; + case JOBACCT_DATA_MAX_DISK_READ_ID: + *jobacct_id = jobacct->max_disk_read_id; + break; + case JOBACCT_DATA_TOT_DISK_READ: + *dub = jobacct->tot_disk_read; + break; + case JOBACCT_DATA_MAX_DISK_WRITE: + *dub = jobacct->max_disk_write; + break; + case JOBACCT_DATA_MAX_DISK_WRITE_ID: + *jobacct_id = jobacct->max_disk_write_id; + break; + case JOBACCT_DATA_TOT_DISK_WRITE: + *dub = jobacct->tot_disk_write; + break; default: debug("jobacct_g_set_getinfo data_type %d invalid", type); } @@ -816,7 +991,46 @@ extern void jobacctinfo_pack(jobacctinfo_t *jobacct, if (protocol_type == PROTOCOL_TYPE_DBD) rpc_version = slurmdbd_translate_rpc(rpc_version); - if (rpc_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (rpc_version >= SLURM_2_6_PROTOCOL_VERSION) { + if (!jobacct) { + for (i = 0; i < 14; i++) + pack32((uint32_t) 0, buffer); + for (i = 0; i < 4; i++) + packdouble((double) 0, buffer); + for (i = 0; i < 6; i++) + _pack_jobacct_id(NULL, rpc_version, buffer); + return; + } + + pack32((uint32_t)jobacct->user_cpu_sec, buffer); + pack32((uint32_t)jobacct->user_cpu_usec, buffer); + pack32((uint32_t)jobacct->sys_cpu_sec, buffer); + pack32((uint32_t)jobacct->sys_cpu_usec, buffer); + pack32((uint32_t)jobacct->max_vsize, buffer); + pack32((uint32_t)jobacct->tot_vsize, buffer); + pack32((uint32_t)jobacct->max_rss, buffer); + pack32((uint32_t)jobacct->tot_rss, buffer); + pack32((uint32_t)jobacct->max_pages, buffer); + pack32((uint32_t)jobacct->tot_pages, buffer); + pack32((uint32_t)jobacct->min_cpu, buffer); + pack32((uint32_t)jobacct->tot_cpu, buffer); + pack32((uint32_t)jobacct->act_cpufreq, buffer); + pack32((uint32_t)jobacct->energy.consumed_energy, buffer); + + packdouble((double)jobacct->max_disk_read, buffer); + packdouble((double)jobacct->tot_disk_read, buffer); + packdouble((double)jobacct->max_disk_write, buffer); + packdouble((double)jobacct->tot_disk_write, buffer); + + _pack_jobacct_id(&jobacct->max_vsize_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->max_rss_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->max_pages_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->min_cpu_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->max_disk_read_id, rpc_version, + buffer); + _pack_jobacct_id(&jobacct->max_disk_write_id, rpc_version, + buffer); + } else if (rpc_version >= SLURM_2_5_PROTOCOL_VERSION) { if (!jobacct) { for (i = 0; i < 14; i++) pack32((uint32_t) 0, buffer); @@ -896,7 +1110,51 @@ extern int jobacctinfo_unpack(jobacctinfo_t **jobacct, if (protocol_type == PROTOCOL_TYPE_DBD) rpc_version = slurmdbd_translate_rpc(rpc_version); - if (rpc_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (rpc_version >= SLURM_2_6_PROTOCOL_VERSION) { + *jobacct = xmalloc(sizeof(struct jobacctinfo)); + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->user_cpu_sec = uint32_tmp; + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->user_cpu_usec = uint32_tmp; + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->sys_cpu_sec = uint32_tmp; + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->sys_cpu_usec = uint32_tmp; + safe_unpack32(&(*jobacct)->max_vsize, buffer); + safe_unpack32(&(*jobacct)->tot_vsize, buffer); + safe_unpack32(&(*jobacct)->max_rss, buffer); + safe_unpack32(&(*jobacct)->tot_rss, buffer); + safe_unpack32(&(*jobacct)->max_pages, buffer); + safe_unpack32(&(*jobacct)->tot_pages, buffer); + safe_unpack32(&(*jobacct)->min_cpu, buffer); + safe_unpack32(&(*jobacct)->tot_cpu, buffer); + safe_unpack32(&(*jobacct)->act_cpufreq, buffer); + safe_unpack32(&(*jobacct)->energy.consumed_energy, buffer); + + safe_unpackdouble(&(*jobacct)->max_disk_read, buffer); + safe_unpackdouble(&(*jobacct)->tot_disk_read, buffer); + safe_unpackdouble(&(*jobacct)->max_disk_write, buffer); + safe_unpackdouble(&(*jobacct)->tot_disk_write, buffer); + + if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, + buffer) != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, + buffer) != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, + buffer) != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, + buffer) != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->max_disk_read_id, + rpc_version, buffer) != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->max_disk_write_id, + rpc_version, buffer) != SLURM_SUCCESS) + goto unpack_error; + } else if (rpc_version >= SLURM_2_5_PROTOCOL_VERSION) { *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; @@ -1038,6 +1296,18 @@ extern void jobacctinfo_aggregate(jobacctinfo_t *dest, jobacctinfo_t *from) dest->energy.consumed_energy = NO_VAL; else dest->energy.consumed_energy += from->energy.consumed_energy; + + if (dest->max_disk_read < from->max_disk_read) { + dest->max_disk_read = from->max_disk_read; + dest->max_disk_read_id = from->max_disk_read_id; + } + dest->tot_disk_read += from->tot_disk_read; + + if (dest->max_disk_write < from->max_disk_write) { + dest->max_disk_write = from->max_disk_write; + dest->max_disk_write_id = from->max_disk_write_id; + } + dest->tot_disk_write += from->tot_disk_write; } extern void jobacctinfo_2_stats(slurmdb_stats_t *stats, jobacctinfo_t *jobacct) @@ -1063,7 +1333,16 @@ extern void jobacctinfo_2_stats(slurmdb_stats_t *stats, jobacctinfo_t *jobacct) stats->cpu_ave = (double)jobacct->tot_cpu; stats->act_cpufreq = (double)jobacct->act_cpufreq; if (jobacct->energy.consumed_energy == NO_VAL) - stats->consumed_energy = NO_VAL; + stats->consumed_energy = (double)NO_VAL; else - stats->consumed_energy = (double)jobacct->energy.consumed_energy; + stats->consumed_energy = + (double)jobacct->energy.consumed_energy; + stats->disk_read_max = jobacct->max_disk_read; + stats->disk_read_max_nodeid = jobacct->max_disk_read_id.nodeid; + stats->disk_read_max_taskid = jobacct->max_disk_read_id.taskid; + stats->disk_read_ave = jobacct->tot_disk_read; + stats->disk_write_max = jobacct->max_disk_write; + stats->disk_write_max_nodeid = jobacct->max_disk_write_id.nodeid; + stats->disk_write_max_taskid = jobacct->max_disk_write_id.taskid; + stats->disk_write_ave = jobacct->tot_disk_write; } diff --git a/src/common/slurm_jobacct_gather.h b/src/common/slurm_jobacct_gather.h index 9fb2da877afc44acbf5c046488dda7297e91ab4a..104a20fc18346370956e23078e2a73d6adda112e 100644 --- a/src/common/slurm_jobacct_gather.h +++ b/src/common/slurm_jobacct_gather.h @@ -10,7 +10,7 @@ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -79,9 +79,16 @@ #include "src/slurmd/slurmstepd/slurmstepd_job.h" -#define FDUMP_FLAG 0x04 #define PROTOCOL_TYPE_SLURM 0 #define PROTOCOL_TYPE_DBD 1 + +struct lustre_data { + uint64_t reads; + double read_size; // currently in megabytes + uint64_t writes; + double write_size; // currently in megabytes +}; + typedef struct { uint16_t taskid; /* contains which task number it was on */ uint32_t nodeid; /* contains which node number it was on */ @@ -115,6 +122,12 @@ struct jobacctinfo { uint32_t this_sampled_cputime; uint32_t current_weighted_freq; uint32_t current_weighted_power; + double max_disk_read; /* max disk read data */ + jobacct_id_t max_disk_read_id; /* max disk read data task id */ + double tot_disk_read; /* total local disk read in megabytes */ + double max_disk_write; /* max disk write data */ + jobacct_id_t max_disk_write_id; /* max disk write data task id */ + double tot_disk_write; /* total local disk writes in megabytes */ }; /* Define jobacctinfo_t below to avoid including extraneous slurm headers */ @@ -128,7 +141,6 @@ extern int jobacct_gather_fini(void); /* unload the plugin */ extern int jobacct_gather_startpoll(uint16_t frequency); extern int jobacct_gather_endpoll(void); -extern void jobacct_gather_change_poll(uint16_t frequency); extern void jobacct_gather_suspend_poll(void); extern void jobacct_gather_resume_poll(void); @@ -148,9 +160,11 @@ extern void jobacct_gather_handle_mem_limit( extern jobacctinfo_t *jobacctinfo_create(jobacct_id_t *jobacct_id); extern void jobacctinfo_destroy(void *object); extern int jobacctinfo_setinfo(jobacctinfo_t *jobacct, - enum jobacct_data_type type, void *data); + enum jobacct_data_type type, void *data, + uint16_t protocol_version); extern int jobacctinfo_getinfo(jobacctinfo_t *jobacct, - enum jobacct_data_type type, void *data); + enum jobacct_data_type type, void *data, + uint16_t protocol_version); extern void jobacctinfo_pack(jobacctinfo_t *jobacct, uint16_t rpc_version, uint16_t protocol_type, Buf buffer); diff --git a/src/common/slurm_jobcomp.c b/src/common/slurm_jobcomp.c index bacac57f701dc6766574c3f02741dcb9a7b31c0c..203692912a1c7c056edc6dff542b4b8f556ce9b4 100644 --- a/src/common/slurm_jobcomp.c +++ b/src/common/slurm_jobcomp.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_jobcomp.h b/src/common/slurm_jobcomp.h index 4e667be14d5c8bb1ad5eca148d9fab7b88a8bbfd..41824ce979d32637b51667cde77407d5a329ede5 100644 --- a/src/common/slurm_jobcomp.h +++ b/src/common/slurm_jobcomp.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_priority.c b/src/common/slurm_priority.c index 9f8b52c522ebc6bb37391d87efb8c0e71793c30a..1ce4e8bec6e90ab588de979c055a44e6c6230162 100644 --- a/src/common/slurm_priority.c +++ b/src/common/slurm_priority.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -44,12 +44,13 @@ typedef struct slurm_priority_ops { uint32_t (*set) (uint32_t last_prio, struct job_record *job_ptr); - void (*reconfig) (void); + void (*reconfig) (bool assoc_clear); void (*set_assoc_usage)(slurmdb_association_rec_t *assoc); double (*calc_fs_factor) (long double usage_efctv, long double shares_norm); List (*get_priority_factors) (priority_factors_request_msg_t *req_msg, uid_t uid); + void (*job_end) (struct job_record *job_ptr); } slurm_priority_ops_t; /* @@ -61,6 +62,7 @@ static const char *syms[] = { "priority_p_set_assoc_usage", "priority_p_calc_fs_factor", "priority_p_get_priority_factors_list", + "priority_p_job_end", }; static slurm_priority_ops_t ops; @@ -124,12 +126,12 @@ extern uint32_t priority_g_set(uint32_t last_prio, struct job_record *job_ptr) return (*(ops.set))(last_prio, job_ptr); } -extern void priority_g_reconfig(void) +extern void priority_g_reconfig(bool assoc_clear) { if (slurm_priority_init() < 0) return; - (*(ops.reconfig))(); + (*(ops.reconfig))(assoc_clear); return; } @@ -162,3 +164,11 @@ extern List priority_g_get_priority_factors_list( return (*(ops.get_priority_factors))(req_msg, uid); } +extern void priority_g_job_end(struct job_record *job_ptr) +{ + if (slurm_priority_init() < 0) + return; + + (*(ops.job_end))(job_ptr); +} + diff --git a/src/common/slurm_priority.h b/src/common/slurm_priority.h index 86c087f4209ac50bf92f5240f0265409b319d299..0c8978cd88863b4fb1eeb52dc0cff297f76da271 100644 --- a/src/common/slurm_priority.h +++ b/src/common/slurm_priority.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -55,7 +55,7 @@ extern int slurm_priority_init(void); extern int slurm_priority_fini(void); extern uint32_t priority_g_set(uint32_t last_prio, struct job_record *job_ptr); -extern void priority_g_reconfig(void); +extern void priority_g_reconfig(bool assoc_clear); /* sets up the normalized usage and the effective usage of an * association. @@ -67,4 +67,10 @@ extern double priority_g_calc_fs_factor(long double usage_efctv, extern List priority_g_get_priority_factors_list( priority_factors_request_msg_t *req_msg, uid_t uid); +/* Call at end of job to remove decayable limits at the end of the job + * at least slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, + * READ_LOCK, READ_LOCK }; should be locked before calling this + */ +extern void priority_g_job_end(struct job_record *job_ptr); + #endif /*_SLURM_PRIORIY_H */ diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 7acc33605b9ab1dce27583fccac5e12fe2f32953..dba327858f6ca8d76d5c062a2bdaec09575e7a45 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,6 +58,7 @@ #include <ctype.h> /* PROJECT INCLUDES */ +#include "src/common/fd.h" #include "src/common/macros.h" #include "src/common/pack.h" #include "src/common/parse_spec.h" @@ -1017,6 +1018,25 @@ char *slurm_get_job_submit_plugins(void) return job_submit_plugins; } +/* slurm_get_slurmctld_plugstack + * get slurmctld_plugstack from slurmctld_conf object from + * slurmctld_conf object + * RET char * - slurmctld_plugstack, MUST be xfreed by caller + */ +char *slurm_get_slurmctld_plugstack(void) +{ + char *slurmctld_plugstack = NULL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + slurmctld_plugstack = xstrdup(conf->slurmctld_plugstack); + slurm_conf_unlock(); + } + return slurmctld_plugstack; +} + /* slurm_get_accounting_storage_type * returns the accounting storage type from slurmctld_conf object * RET char * - accounting storage type, MUST be xfreed by caller @@ -1176,9 +1196,9 @@ int slurm_set_accounting_storage_loc(char *loc) /* slurm_get_accounting_storage_enforce * returns what level to enforce associations at */ -int slurm_get_accounting_storage_enforce(void) +uint16_t slurm_get_accounting_storage_enforce(void) { - int enforce = 0; + uint16_t enforce = 0; slurm_ctl_conf_t *conf; if (slurmdbd_conf) { @@ -1358,15 +1378,15 @@ char *slurm_get_jobacct_gather_type(void) * returns the job accounting poll frequency from the slurmctld_conf object * RET int - job accounting frequency */ -uint16_t slurm_get_jobacct_gather_freq(void) +char *slurm_get_jobacct_gather_freq(void) { - uint16_t freq = 0; + char *freq = NULL; slurm_ctl_conf_t *conf; if (slurmdbd_conf) { } else { conf = slurm_conf_lock(); - freq = conf->job_acct_gather_freq; + freq = xstrdup(conf->job_acct_gather_freq); slurm_conf_unlock(); } return freq; @@ -1391,6 +1411,64 @@ char *slurm_get_acct_gather_energy_type(void) return acct_gather_energy_type; } +/* slurm_get_profile_accounting_type + * get ProfileAccountingType from slurmctld_conf object + * RET char * - profile_accounting type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_profile_type(void) +{ + char *acct_gather_profile_type = NULL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + acct_gather_profile_type = + xstrdup(conf->acct_gather_profile_type); + slurm_conf_unlock(); + } + return acct_gather_profile_type; +} + +/* slurm_get_infiniband_accounting_type + * get InfinibandAccountingType from slurmctld_conf object + * RET char * - infiniband_accounting type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_infiniband_type(void) +{ + char *acct_gather_infiniband_type = NULL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + acct_gather_infiniband_type = + xstrdup(conf->acct_gather_infiniband_type); + slurm_conf_unlock(); + } + return acct_gather_infiniband_type; +} + +/* slurm_get_filesystem_accounting_type + * get FilesystemAccountingType from slurmctld_conf object + * RET char * - filesystem_accounting type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_filesystem_type(void) +{ + char *acct_gather_filesystem_type = NULL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + acct_gather_filesystem_type = + xstrdup(conf->acct_gather_filesystem_type); + slurm_conf_unlock(); + } + return acct_gather_filesystem_type; +} + + extern uint16_t slurm_get_acct_gather_node_freq(void) { uint16_t freq = 0; @@ -1405,6 +1483,39 @@ extern uint16_t slurm_get_acct_gather_node_freq(void) return freq; } +/* slurm_get_ext_sensors_type + * get ExtSensorsType from slurmctld_conf object + * RET char * - ext_sensors type, MUST be xfreed by caller + */ +char *slurm_get_ext_sensors_type(void) +{ + char *ext_sensors_type = NULL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + ext_sensors_type = + xstrdup(conf->ext_sensors_type); + slurm_conf_unlock(); + } + return ext_sensors_type; +} + +extern uint16_t slurm_get_ext_sensors_freq(void) +{ + uint16_t freq = 0; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + freq = conf->ext_sensors_freq; + slurm_conf_unlock(); + } + return freq; +} + /* slurm_get_jobcomp_type * returns the job completion logger type from slurmctld_conf object * RET char * - job completion type, MUST be xfreed by caller @@ -1536,6 +1647,25 @@ int slurm_set_jobcomp_port(uint32_t port) return 0; } +/* slurm_get_keep_alive_time + * returns keep_alive_time slurmctld_conf object + * RET uint16_t - keep_alive_time + */ +uint16_t slurm_get_keep_alive_time(void) +{ + uint16_t keep_alive_time = (uint16_t) NO_VAL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + keep_alive_time = conf->keep_alive_time; + slurm_conf_unlock(); + } + return keep_alive_time; +} + + /* slurm_get_kill_wait * returns kill_wait from slurmctld_conf object * RET uint16_t - kill_wait @@ -2034,7 +2164,10 @@ int slurm_shutdown_msg_conn(slurm_fd_t fd) */ slurm_fd_t slurm_open_msg_conn(slurm_addr_t * slurm_address) { - return _slurm_open_msg_conn(slurm_address); + slurm_fd_t fd = _slurm_open_msg_conn(slurm_address); + if (fd >= 0) + fd_set_close_on_exec(fd); + return fd; } /* Calls connect to make a connection-less datagram connection to the @@ -3236,6 +3369,7 @@ _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout) int retry = 0; List ret_list = NULL; int steps = 0; + int width; if (!req->forward.timeout) { if (!timeout) @@ -3251,8 +3385,11 @@ _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout) * to let the child timeout */ if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; - steps = (req->forward.cnt+1)/slurm_get_tree_width(); - timeout = (message_timeout*steps); + steps = req->forward.cnt + 1; + width = slurm_get_tree_width(); + if (width) + steps /= width; + timeout = (message_timeout * steps); steps++; timeout += (req->forward.timeout*steps); @@ -3533,8 +3670,6 @@ List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout) return ret_list; } else { itr = list_iterator_create(ret_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); while ((ret_data_info = list_next(itr))) if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); @@ -3571,10 +3706,8 @@ int slurm_send_recv_rc_msg_only_one(slurm_msg_t *req, int *rc, int timeout) req->ret_list = NULL; req->forward_struct = NULL; - if ((fd = slurm_open_msg_conn(&req->address)) < 0) { + if ((fd = slurm_open_msg_conn(&req->address)) < 0) return -1; - } - if (!_send_and_recv_msg(fd, req, &resp, timeout)) { if (resp.auth_cred) g_slurm_auth_destroy(resp.auth_cred); @@ -3686,21 +3819,27 @@ extern int nodelist_find(const char *nodelist, const char *name) return id; } -extern void convert_num_unit(float num, char *buf, int buf_size, int orig_type) +extern void convert_num_unit2(float num, char *buf, int buf_size, int orig_type, + int divisor, bool exact) { char *unit = "\0KMGTP?"; - int i = (int)num % 512; + int i; if ((int)num == 0) { - snprintf(buf, buf_size, "%d", (int)num); - return; - } else if (i > 0) { - snprintf(buf, buf_size, "%d%c", (int)num, unit[orig_type]); - return; + snprintf(buf, buf_size, "%d", (int)num); + return; + } else if (exact) { + i = (int)num % (divisor / 2); + + if (i > 0) { + snprintf(buf, buf_size, "%d%c", + (int)num, unit[orig_type]); + return; + } } - while (num > 1024) { - num /= 1024; + while (num > divisor) { + num /= divisor; orig_type++; } @@ -3717,6 +3856,11 @@ extern void convert_num_unit(float num, char *buf, int buf_size, int orig_type) snprintf(buf, buf_size, "%.2f%c", num, unit[orig_type]); } +extern void convert_num_unit(float num, char *buf, int buf_size, int orig_type) +{ + convert_num_unit2(num, buf, buf_size, orig_type, 1024, true); +} + extern int revert_num_unit(const char *buf) { char *unit = "\0KMGTP\0"; diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index dd6b67f5dba122ec8699544dcd8ed23a5f12e584..140bb65feeae6ad184e1fbb7a626cbae7e9f8221 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -220,6 +220,13 @@ char *slurm_get_gres_plugins(void); */ char *slurm_get_job_submit_plugins(void); +/* slurm_get_slurmctld_plugstack + * get slurmctld_plugstack from slurmctld_conf object from + * slurmctld_conf object + * RET char * - slurmctld_plugstack, MUST be xfreed by caller + */ +char *slurm_get_slurmctld_plugstack(void); + /* slurm_get_plugin_dir * get plugin directory from slurmctld_conf object from slurmctld_conf object * RET char * - plugin directory, MUST be xfreed by caller @@ -406,7 +413,7 @@ int slurm_set_accounting_storage_host(char *host); /* slurm_get_accounting_storage_enforce * returns what level to enforce associations at */ -int slurm_get_accounting_storage_enforce(void); +uint16_t slurm_get_accounting_storage_enforce(void); /* slurm_get_is_association_based_accounting * returns if we are doing accounting by associations @@ -471,7 +478,7 @@ char *slurm_get_jobacct_gather_type(void); * returns the job accounting poll frequency from the slurmctld_conf object * RET int - job accounting frequency */ -uint16_t slurm_get_jobacct_gather_freq(void); +char *slurm_get_jobacct_gather_freq(void); /* slurm_get_jobcomp_type * returns the job completion logger type from slurmctld_conf object @@ -515,6 +522,12 @@ uint32_t slurm_get_jobcomp_port(void); */ int slurm_set_jobcomp_port(uint32_t port); +/* slurm_get_keep_alive_time + * returns keep_alive_time slurmctld_conf object + * RET uint16_t - keep_alive_time + */ +uint16_t slurm_get_keep_alive_time(void); + /* slurm_get_kill_wait * returns kill_wait from slurmctld_conf object * RET uint16_t - kill_wait @@ -544,6 +557,25 @@ char *slurm_get_proctrack_type(void); */ char *slurm_get_acct_gather_energy_type(void); +/* slurm_get_acct_gather_profile_type + * get ProfileAccountingType from slurmctld_conf object + * RET char * - acct_gather_profile_type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_profile_type(void); + +/* slurm_get_acct_infiniband_profile_type + * get InfinibandAccountingType from slurmctld_conf object + * RET char * - acct_gather_infiniband_type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_infiniband_type(void); + +/* slurm_get_acct_filesystem_profile_type + * get FilesystemAccountingType from slurmctld_conf object + * RET char * - acct_gather_filesystem_type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_filesystem_type(void); + + /* slurm_get_acct_gather_node_freq * returns the accounting poll frequency for requesting info from a * node from the slurmctld_conf object @@ -551,6 +583,19 @@ char *slurm_get_acct_gather_energy_type(void); */ extern uint16_t slurm_get_acct_gather_node_freq(void); +/* slurm_get_ext_sensors_type + * get ExtSensorsType from slurmctld_conf object + * RET char * - ext_sensors type, MUST be xfreed by caller + */ +char *slurm_get_ext_sensors_type(void); + +/* slurm_get_ext_sensors_freq + * returns the external sensors sampling frequency from the slurmctld_conf + * object for requesting info from a hardware component (node, switch, etc.) + * RET int - external sensors sampling frequency + */ +extern uint16_t slurm_get_ext_sensors_freq(void); + /* slurm_get_root_filter * RET uint16_t - Value of SchedulerRootFilter */ extern uint16_t slurm_get_root_filter(void); @@ -1098,6 +1143,8 @@ extern void slurm_free_msg(slurm_msg_t * msg); /* must free this memory with free not xfree */ extern char *nodelist_nth_host(const char *nodelist, int inx); extern int nodelist_find(const char *nodelist, const char *name); +extern void convert_num_unit2(float num, char *buf, int buf_size, int orig_type, + int divisor, bool exact); extern void convert_num_unit(float num, char *buf, int buf_size, int orig_type); extern int revert_num_unit(const char *buf); extern void parse_int_to_array(int in, int *out); diff --git a/src/common/slurm_protocol_common.h b/src/common/slurm_protocol_common.h index bc1f2f7329f69e984a72138e0807ab27471af46c..14248e07bd33e2386c1ca2ec6bf27c74ee643eff 100644 --- a/src/common/slurm_protocol_common.h +++ b/src/common/slurm_protocol_common.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -70,7 +70,8 @@ * In slurm_protocol_util.c check_header_version(), and init_header() * need to be updated also when changes are added */ #define SLURM_PROTOCOL_VERSION ((SLURM_API_MAJOR << 8) | SLURM_API_AGE) -#define SLURM_2_5_PROTOCOL_VERSION SLURM_PROTOCOL_VERSION +#define SLURM_2_6_PROTOCOL_VERSION SLURM_PROTOCOL_VERSION +#define SLURM_2_5_PROTOCOL_VERSION ((25 << 8) | 0) #define SLURM_2_4_PROTOCOL_VERSION ((24 << 8) | 0) #define SLURM_2_3_PROTOCOL_VERSION ((23 << 8) | 0) #if 0 diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 8ff753dba40c86356b9c0cd5fbc5848e3de4a714..b8d25eac1d84b4665d0c10407939c0ccdd1bf694 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -149,7 +149,7 @@ extern char *slurm_add_slash_to_quotes(char *str) { char *dup, *copy = NULL; int len = 0; - if(!str || !(len = strlen(str))) + if (!str || !(len = strlen(str))) return NULL; /* make a buffer 2 times the size just to be safe */ @@ -311,6 +311,11 @@ extern void slurm_free_job_id_msg(job_id_msg_t * msg) xfree(msg); } +extern void slurm_free_job_user_id_msg(job_user_id_msg_t * msg) +{ + xfree(msg); +} + extern void slurm_free_job_step_id_msg(job_step_id_msg_t * msg) { xfree(msg); @@ -323,7 +328,11 @@ extern void slurm_free_job_id_request_msg(job_id_request_msg_t * msg) extern void slurm_free_update_step_msg(step_update_request_msg_t * msg) { - xfree(msg); + if (msg) { + jobacctinfo_destroy(msg->jobacct); + xfree(msg->name); + xfree(msg); + } } extern void slurm_free_job_id_response_msg(job_id_response_msg_t * msg) @@ -357,6 +366,14 @@ extern void slurm_free_node_info_request_msg(node_info_request_msg_t *msg) xfree(msg); } +extern void slurm_free_node_info_single_msg(node_info_single_msg_t *msg) +{ + if (msg) { + xfree(msg->node_name); + xfree(msg); + } +} + extern void slurm_free_part_info_request_msg(part_info_request_msg_t *msg) { xfree(msg); @@ -368,12 +385,15 @@ extern void slurm_free_job_desc_msg(job_desc_msg_t * msg) if (msg) { xfree(msg->account); + xfree(msg->acctg_freq); xfree(msg->alloc_node); if (msg->argv) { for (i = 0; i < msg->argc; i++) xfree(msg->argv[i]); } xfree(msg->argv); + FREE_NULL_BITMAP(msg->array_bitmap); + xfree(msg->array_inx); xfree(msg->blrtsimage); xfree(msg->ckpt_dir); xfree(msg->comment); @@ -418,6 +438,7 @@ extern void slurm_free_job_launch_msg(batch_job_launch_msg_t * msg) int i; if (msg) { + xfree(msg->acctg_freq); xfree(msg->alias_list); xfree(msg->nodes); xfree(msg->cpu_bind); @@ -505,6 +526,14 @@ extern void slurm_free_acct_gather_node_resp_msg( } } +extern void slurm_free_acct_gather_energy_req_msg( + acct_gather_energy_req_msg_t *msg) +{ + if (msg) { + xfree(msg); + } +} + extern void slurm_free_node_registration_status_msg( slurm_node_registration_status_msg_t * msg) { @@ -566,6 +595,7 @@ extern void slurm_free_resv_desc_msg(resv_desc_msg_t * msg) { if (msg) { xfree(msg->accounts); + xfree(msg->core_cnt); xfree(msg->features); xfree(msg->licenses); xfree(msg->name); @@ -681,6 +711,7 @@ extern void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg } xfree(msg->env); } + xfree(msg->acctg_freq); xfree(msg->alias_list); xfree(msg->cwd); xfree(msg->cpu_bind); @@ -695,7 +726,7 @@ extern void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg xfree(msg->spank_job_env[i]); } xfree(msg->spank_job_env); - if(msg->nnodes && msg->global_task_ids) + if (msg->nnodes && msg->global_task_ids) for(i=0; i<msg->nnodes; i++) { xfree(msg->global_task_ids[i]); } @@ -1148,7 +1179,7 @@ extern uint16_t log_string2num(char *name) * NOTE: Not reentrant */ extern char *sched_param_type_string(uint16_t select_type_param) { - static char select_str[64]; + static char select_str[128]; select_str[0] = '\0'; if ((select_type_param & CR_CPU) && @@ -1179,6 +1210,11 @@ extern char *sched_param_type_string(uint16_t select_type_param) strcat(select_str, ","); strcat(select_str, "CR_CORE_DEFAULT_DIST_BLOCK"); } + if (select_type_param & CR_ALLOCATE_FULL_SOCKET) { + if (select_str[0]) + strcat(select_str, ","); + strcat(select_str, "CR_ALLOCATE_FULL_SOCKET"); + } if (select_str[0] == '\0') strcat(select_str, "NONE"); @@ -1302,6 +1338,34 @@ extern char *trigger_res_type(uint16_t res_type) return "unknown"; } +/* Convert HealthCheckNodeState numeric value to a string. + * Caller must xfree() the return value */ +extern char *health_check_node_state_str(uint16_t node_state) +{ + char *state_str = NULL; + + if (node_state == HEALTH_CHECK_NODE_ANY) { + state_str = xstrdup("ANY"); + return state_str; + } + + state_str = xstrdup(""); + if (node_state & HEALTH_CHECK_NODE_IDLE) + xstrcat(state_str, "IDLE"); + if (node_state & HEALTH_CHECK_NODE_ALLOC) { + if (state_str[0]) + xstrcat(state_str, ","); + xstrcat(state_str, "ALLOC"); + } + if (node_state & HEALTH_CHECK_NODE_MIXED) { + if (state_str[0]) + xstrcat(state_str, ","); + xstrcat(state_str, "MIXED"); + } + + return state_str; +} + extern char *trigger_type(uint32_t trig_type) { if (trig_type == TRIGGER_TYPE_UP) @@ -1731,6 +1795,16 @@ extern void accounting_enforce_string(uint16_t enforce, char *str, int str_len) strcat(str, ","); strcat(str, "limits"); //7 len } + if (enforce & ACCOUNTING_ENFORCE_NO_JOBS) { + if (str[0]) + strcat(str, ","); + strcat(str, "nojobs"); //7 len + } + if (enforce & ACCOUNTING_ENFORCE_NO_JOBS) { + if (str[0]) + strcat(str, ","); + strcat(str, "nosteps"); //8 len + } if (enforce & ACCOUNTING_ENFORCE_QOS) { if (str[0]) strcat(str, ","); @@ -1746,7 +1820,7 @@ extern void accounting_enforce_string(uint16_t enforce, char *str, int str_len) strcat(str, ","); strcat(str, "wckeys"); //7 len } - // total len 35 + // total len 50 if (str[0] == '\0') strcat(str, "none"); @@ -2090,6 +2164,10 @@ static void _free_all_front_end_info(front_end_info_msg_t *msg) extern void slurm_free_front_end_info_members(front_end_info_t * front_end) { if (front_end) { + xfree(front_end->allow_groups); + xfree(front_end->allow_users); + xfree(front_end->deny_groups); + xfree(front_end->deny_users); xfree(front_end->name); xfree(front_end->reason); } @@ -2279,7 +2357,7 @@ extern void slurm_free_job_step_stat(void *object) extern void slurm_free_job_step_pids(void *object) { job_step_pids_t *msg = (job_step_pids_t *)object; - if(msg) { + if (msg) { xfree(msg->node_name); xfree(msg->pid); xfree(msg); @@ -2300,7 +2378,7 @@ extern void slurm_free_block_job_info(void *object) extern void slurm_free_block_info_members(block_info_t *block_info) { - if(block_info) { + if (block_info) { xfree(block_info->bg_block_id); xfree(block_info->blrtsimage); xfree(block_info->ionode_inx); @@ -2316,7 +2394,7 @@ extern void slurm_free_block_info_members(block_info_t *block_info) extern void slurm_free_block_info(block_info_t *block_info) { - if(block_info) { + if (block_info) { slurm_free_block_info_members(block_info); xfree(block_info); } @@ -2324,7 +2402,7 @@ extern void slurm_free_block_info(block_info_t *block_info) extern void slurm_free_block_info_msg(block_info_msg_t *block_info_msg) { - if(block_info_msg) { + if (block_info_msg) { if (block_info_msg->block_array) { int i; for(i=0; i<block_info_msg->record_count; i++) @@ -2369,7 +2447,7 @@ extern void slurm_destroy_association_shares_object(void *object) association_shares_object_t *obj_ptr = (association_shares_object_t *)object; - if(obj_ptr) { + if (obj_ptr) { xfree(obj_ptr->cluster); xfree(obj_ptr->name); xfree(obj_ptr->parent); @@ -2379,10 +2457,10 @@ extern void slurm_destroy_association_shares_object(void *object) extern void slurm_free_shares_request_msg(shares_request_msg_t *msg) { - if(msg) { - if(msg->acct_list) + if (msg) { + if (msg->acct_list) list_destroy(msg->acct_list); - if(msg->user_list) + if (msg->user_list) list_destroy(msg->user_list); xfree(msg); } @@ -2390,8 +2468,8 @@ extern void slurm_free_shares_request_msg(shares_request_msg_t *msg) extern void slurm_free_shares_response_msg(shares_response_msg_t *msg) { - if(msg) { - if(msg->assoc_shares_list) + if (msg) { + if (msg->assoc_shares_list) list_destroy(msg->assoc_shares_list); xfree(msg); } @@ -2414,10 +2492,10 @@ extern void slurm_destroy_priority_factors_object(void *object) extern void slurm_free_priority_factors_request_msg( priority_factors_request_msg_t *msg) { - if(msg) { - if(msg->job_id_list) + if (msg) { + if (msg->job_id_list) list_destroy(msg->job_id_list); - if(msg->uid_list) + if (msg->uid_list) list_destroy(msg->uid_list); xfree(msg); } @@ -2426,8 +2504,8 @@ extern void slurm_free_priority_factors_request_msg( extern void slurm_free_priority_factors_response_msg( priority_factors_response_msg_t *msg) { - if(msg) { - if(msg->priority_factors_list) + if (msg) { + if (msg->priority_factors_list) list_destroy(msg->priority_factors_list); xfree(msg); } @@ -2436,8 +2514,8 @@ extern void slurm_free_priority_factors_response_msg( extern void slurm_free_accounting_update_msg(accounting_update_msg_t *msg) { - if(msg) { - if(msg->update_list) + if (msg) { + if (msg->update_list) list_destroy(msg->update_list); xfree(msg); } @@ -2455,6 +2533,9 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data) case REQUEST_NODE_INFO: slurm_free_node_info_request_msg(data); break; + case REQUEST_NODE_INFO_SINGLE: + slurm_free_node_info_single_msg(data); + break; case REQUEST_PARTITION_INFO: slurm_free_part_info_request_msg(data); break; @@ -2550,6 +2631,9 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data) case REQUEST_JOB_INFO_SINGLE: slurm_free_job_id_msg(data); break; + case REQUEST_JOB_USER_INFO: + slurm_free_job_user_id_msg(data); + break; case REQUEST_SHARE_INFO: slurm_free_shares_request_msg(data); break; diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index a4b4e737f819ee1b93edfd07be3b8df9dad84443..544ec49c515884af8652a6da1b2245653f9f3e37 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -183,6 +183,8 @@ typedef enum { RESPONSE_PING_SLURMD, REQUEST_ACCT_GATHER_UPDATE, RESPONSE_ACCT_GATHER_UPDATE, + REQUEST_ACCT_GATHER_ENERGY, + RESPONSE_ACCT_GATHER_ENERGY, REQUEST_BUILD_INFO = 2001, RESPONSE_BUILD_INFO, @@ -222,6 +224,8 @@ typedef enum { RESPONSE_STATS_INFO, REQUEST_STATS_RESET, RESPONSE_STATS_RESET, + REQUEST_JOB_USER_INFO, + REQUEST_NODE_INFO_SINGLE, REQUEST_UPDATE_JOB = 3001, REQUEST_UPDATE_NODE, @@ -318,7 +322,8 @@ typedef enum { SRUN_EXEC, SRUN_STEP_MISSING, SRUN_REQUEST_SUSPEND, - SRUN_STEP_SIGNAL, /* BluegeneQ: srun forwards signal to runjob */ + SRUN_STEP_SIGNAL, /* for launch plugins aprun, poe and runjob, + * srun forwards signal to the launch command */ PMI_KVS_PUT_REQ = 7201, PMI_KVS_PUT_RESP, @@ -479,7 +484,7 @@ typedef struct job_step_kill_msg { uint32_t job_id; uint32_t job_step_id; uint16_t signal; - uint16_t batch_flag; + uint16_t flags; } job_step_kill_msg_t; typedef struct job_notify_msg { @@ -493,6 +498,11 @@ typedef struct job_id_msg { uint16_t show_flags; } job_id_msg_t; +typedef struct job_user_id_msg { + uint32_t user_id; + uint16_t show_flags; +} job_user_id_msg_t; + typedef struct job_step_id_msg { uint32_t job_id; uint32_t step_id; @@ -515,6 +525,11 @@ typedef struct node_info_request_msg { uint16_t show_flags; } node_info_request_msg_t; +typedef struct node_info_single_msg { + char *node_name; + uint16_t show_flags; +} node_info_single_msg_t; + typedef struct front_end_info_request_msg { time_t last_update; } front_end_info_request_msg_t; @@ -609,7 +624,9 @@ typedef struct job_step_specs { uint16_t immediate; /* 1 if allocate to run or fail immediately, * 0 if to be queued awaiting resources */ uint32_t job_id; /* job ID */ - uint32_t mem_per_cpu; /* MB memory required per CPU, 0=no limit */ + uint32_t pn_min_memory; /* minimum real memory per node OR + * real memory per CPU | MEM_PER_CPU, + * default=0 (use job limit) */ char *name; /* name of the job step, default "" */ char *network; /* network use spec */ uint32_t min_nodes; /* minimum number of nodes required by job, @@ -680,7 +697,7 @@ typedef struct launch_tasks_request_msg { 1 for "user manged" IO */ uint8_t open_mode; /* stdout/err append or truncate */ uint8_t pty; /* use pseudo tty */ - uint16_t acctg_freq; /* accounting polling interval */ + char *acctg_freq; /* accounting polling intervals */ uint32_t cpu_freq; /* requested cpu frequency */ /********** START "normal" IO only options **********/ @@ -694,6 +711,7 @@ typedef struct launch_tasks_request_msg { uint16_t *io_port; /* array of available client IO listen ports */ /********** END "normal" IO only options **********/ + uint32_t profile; char *task_prolog; char *task_epilog; @@ -781,6 +799,8 @@ typedef struct reattach_tasks_response_msg { } reattach_tasks_response_msg_t; typedef struct batch_job_launch_msg { + uint32_t array_job_id; /* job array master job ID */ + uint16_t array_task_id; /* job array ID or NO_VAL */ uint32_t job_id; uint32_t step_id; uint32_t uid; @@ -813,7 +833,7 @@ typedef struct batch_job_launch_msg { uint32_t pn_min_memory; /* minimum real memory per node OR * real memory per CPU | MEM_PER_CPU, * default=0 (no limit) */ - uint16_t acctg_freq; /* accounting polling interval */ + char *acctg_freq; /* accounting polling intervals */ uint32_t cpu_freq; /* requested cpu frequency */ uint32_t job_mem; /* memory limit for job */ uint16_t restart_cnt; /* batch job restart count */ @@ -1024,6 +1044,7 @@ extern void slurm_free_job_step_info_request_msg( extern void slurm_free_front_end_info_request_msg( front_end_info_request_msg_t *msg); extern void slurm_free_node_info_request_msg(node_info_request_msg_t *msg); +extern void slurm_free_node_info_single_msg(node_info_single_msg_t *msg); extern void slurm_free_part_info_request_msg(part_info_request_msg_t *msg); extern void slurm_free_stats_info_request_msg(stats_info_request_msg_t *msg); extern void slurm_free_stats_response_msg(stats_info_response_msg_t *msg); @@ -1058,6 +1079,7 @@ extern void slurm_free_job_info(job_info_t * job); extern void slurm_free_job_info_members(job_info_t * job); extern void slurm_free_job_id_msg(job_id_msg_t * msg); +extern void slurm_free_job_user_id_msg(job_user_id_msg_t * msg); extern void slurm_free_job_id_request_msg(job_id_request_msg_t * msg); extern void slurm_free_job_id_response_msg(job_id_response_msg_t * msg); @@ -1147,6 +1169,8 @@ extern void slurm_free_block_info_request_msg( block_info_request_msg_t *msg); extern void slurm_free_acct_gather_node_resp_msg( acct_gather_node_resp_msg_t *msg); +extern void slurm_free_acct_gather_energy_req_msg( + acct_gather_energy_req_msg_t *msg); extern void slurm_free_job_notify_msg(job_notify_msg_t * msg); extern void slurm_free_accounting_update_msg(accounting_update_msg_t *msg); @@ -1162,6 +1186,10 @@ extern uint16_t preempt_mode_num(const char *preempt_mode); extern char *log_num2string(uint16_t inx); extern uint16_t log_string2num(char *name); +/* Convert HealthCheckNodeState numeric value to a string. + * Caller must xfree() the return value */ +extern char *health_check_node_state_str(uint16_t node_state); + extern char *sched_param_type_string(uint16_t select_type_param); extern char *job_reason_string(enum job_state_reason inx); extern char *job_state_string(uint16_t inx); diff --git a/src/common/slurm_protocol_interface.h b/src/common/slurm_protocol_interface.h index 683d2965882e95baa4c588d66784cffe69e09bab..bf295e788813250dfe724ddb40d5cce7316b0940 100644 --- a/src/common/slurm_protocol_interface.h +++ b/src/common/slurm_protocol_interface.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 5db053f67ab81e146b4a7e337c6159c3ed45a3b5..590a04ef9e5ce46a97976021a4dd1031bd39b708 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -54,6 +54,7 @@ #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_ext_sensors.h" #include "src/common/pack.h" #include "src/common/read_config.h" #include "src/common/slurm_auth.h" @@ -127,6 +128,11 @@ static void _pack_job_ready_msg(job_id_msg_t * msg, Buf buffer, static int _unpack_job_ready_msg(job_id_msg_t ** msg_ptr, Buf buffer, uint16_t protocol_version); +static void _pack_job_user_msg(job_user_id_msg_t * msg, Buf buffer, + uint16_t protocol_version); +static int _unpack_job_user_msg(job_user_id_msg_t ** msg_ptr, Buf buffer, + uint16_t protocol_version); + static void _pack_resource_allocation_response_msg(resource_allocation_response_msg_t * msg, Buf buffer, @@ -160,6 +166,12 @@ static int _unpack_node_info_request_msg( node_info_request_msg_t ** msg, Buf bufer, uint16_t protocol_version); +static void _pack_node_info_single_msg(node_info_single_msg_t * msg, + Buf buffer, uint16_t protocol_version); + +static int _unpack_node_info_single_msg(node_info_single_msg_t ** msg, + Buf buffer, uint16_t protocol_version); + static int _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer, uint16_t protocol_version); static int _unpack_node_info_members(node_info_t * node, Buf buffer, @@ -218,6 +230,12 @@ static int _unpack_acct_gather_node_resp_msg(acct_gather_node_resp_msg_t ** msg, Buf buffer, uint16_t protocol_version); +static void _pack_acct_gather_energy_req(acct_gather_energy_req_msg_t *msg, + Buf buffer, uint16_t protocol_version); +static int _unpack_acct_gather_energy_req(acct_gather_energy_req_msg_t **msg, + Buf buffer, + uint16_t protocol_version); + static void _pack_part_info_request_msg(part_info_request_msg_t * msg, Buf buffer, uint16_t protocol_version); static int _unpack_part_info_request_msg(part_info_request_msg_t ** msg, @@ -712,6 +730,11 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) msg->data, buffer, msg->protocol_version); break; + case REQUEST_NODE_INFO_SINGLE: + _pack_node_info_single_msg((node_info_single_msg_t *) + msg->data, buffer, + msg->protocol_version); + break; case REQUEST_PARTITION_INFO: _pack_part_info_request_msg((part_info_request_msg_t *) msg->data, buffer, @@ -749,6 +772,7 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) msg->protocol_version); break; case RESPONSE_ACCT_GATHER_UPDATE: + case RESPONSE_ACCT_GATHER_ENERGY: _pack_acct_gather_node_resp_msg( (acct_gather_node_resp_msg_t *) msg->data, buffer, msg->protocol_version); @@ -787,6 +811,11 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) case REQUEST_TOPO_INFO: /* Message contains no body/information */ break; + case REQUEST_ACCT_GATHER_ENERGY: + _pack_acct_gather_energy_req( + (acct_gather_energy_req_msg_t *)msg->data, + buffer, msg->protocol_version); + break; case REQUEST_REBOOT_NODES: _pack_reboot_msg((reboot_msg_t *)msg->data, buffer, msg->protocol_version); @@ -1091,6 +1120,11 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) msg->protocol_version); break; + case REQUEST_JOB_USER_INFO: + _pack_job_user_msg((job_user_id_msg_t *)msg->data, buffer, + msg->protocol_version); + break; + case REQUEST_SHARE_INFO: _pack_shares_request_msg((shares_request_msg_t *)msg->data, buffer, @@ -1258,6 +1292,11 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) & (msg->data), buffer, msg->protocol_version); break; + case REQUEST_NODE_INFO_SINGLE: + rc = _unpack_node_info_single_msg((node_info_single_msg_t **) + & (msg->data), buffer, + msg->protocol_version); + break; case REQUEST_PARTITION_INFO: rc = _unpack_part_info_request_msg((part_info_request_msg_t **) & (msg->data), buffer, @@ -1302,6 +1341,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) msg->protocol_version); break; case RESPONSE_ACCT_GATHER_UPDATE: + case RESPONSE_ACCT_GATHER_ENERGY: rc = _unpack_acct_gather_node_resp_msg( (acct_gather_node_resp_msg_t **)&(msg->data), buffer, msg->protocol_version); @@ -1341,6 +1381,11 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) case REQUEST_TOPO_INFO: /* Message contains no body/information */ break; + case REQUEST_ACCT_GATHER_ENERGY: + rc = _unpack_acct_gather_energy_req( + (acct_gather_energy_req_msg_t **) & (msg->data), + buffer, msg->protocol_version); + break; case REQUEST_REBOOT_NODES: rc = _unpack_reboot_msg((reboot_msg_t **) & (msg->data), buffer, msg->protocol_version); @@ -1669,6 +1714,13 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) & msg->data, buffer, msg->protocol_version); break; + + case REQUEST_JOB_USER_INFO: + rc = _unpack_job_user_msg((job_user_id_msg_t **) + &msg->data, buffer, + msg->protocol_version); + break; + case REQUEST_SHARE_INFO: rc = _unpack_shares_request_msg( (shares_request_msg_t **)&msg->data, @@ -2459,6 +2511,36 @@ unpack_error: return SLURM_ERROR; } +static void +_pack_acct_gather_energy_req(acct_gather_energy_req_msg_t *msg, + Buf buffer, uint16_t protocol_version) +{ + xassert(msg != NULL); + pack16(msg->delta, buffer); +} + +static int +_unpack_acct_gather_energy_req(acct_gather_energy_req_msg_t **msg, + Buf buffer, uint16_t protocol_version) +{ + acct_gather_energy_req_msg_t *msg_ptr; + + xassert(msg != NULL); + + msg_ptr = xmalloc(sizeof(acct_gather_energy_req_msg_t)); + *msg = msg_ptr; + + safe_unpack16(&msg_ptr->delta, buffer); + + return SLURM_SUCCESS; + +unpack_error: + slurm_free_acct_gather_energy_req_msg(msg_ptr); + *msg = NULL; + return SLURM_ERROR; + +} + static void _pack_node_registration_status_msg(slurm_node_registration_status_msg_t * msg, Buf buffer, @@ -3006,7 +3088,45 @@ _unpack_node_info_members(node_info_t * node, Buf buffer, xassert(node != NULL); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpackstr_xmalloc(&node->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node->node_hostname, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&node->node_addr, &uint32_tmp, buffer); + safe_unpack16(&node->node_state, buffer); + safe_unpack16(&node->cpus, buffer); + safe_unpack16(&node->boards, buffer); + safe_unpack16(&node->sockets, buffer); + safe_unpack16(&node->cores, buffer); + safe_unpack16(&node->threads, buffer); + + safe_unpack32(&node->real_memory, buffer); + safe_unpack32(&node->tmp_disk, buffer); + safe_unpack32(&node->cpu_load, buffer); + safe_unpack32(&node->weight, buffer); + safe_unpack32(&node->reason_uid, buffer); + + safe_unpack_time(&node->boot_time, buffer); + safe_unpack_time(&node->reason_time, buffer); + safe_unpack_time(&node->slurmd_start_time, buffer); + + select_g_select_nodeinfo_unpack(&node->select_nodeinfo, buffer, + protocol_version); + + safe_unpackstr_xmalloc(&node->arch, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node->features, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node->gres, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node->os, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node->reason, &uint32_tmp, buffer); + if (acct_gather_energy_unpack(&node->energy, buffer, + protocol_version) + != SLURM_SUCCESS) + goto unpack_error; + if (ext_sensors_data_unpack(&node->ext_sensors, buffer, + protocol_version) + != SLURM_SUCCESS) + goto unpack_error; + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&node->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&node->node_hostname, &uint32_tmp, buffer); @@ -3087,7 +3207,27 @@ _pack_update_partition_msg(update_part_msg_t * msg, Buf buffer, { xassert(msg != NULL); - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + packstr(msg->allow_groups, buffer); + packstr(msg->alternate, buffer); + pack32(msg-> grace_time, buffer); + pack32(msg-> max_time, buffer); + pack32(msg-> default_time, buffer); + pack32(msg-> max_nodes, buffer); + pack32(msg-> min_nodes, buffer); + pack32(msg-> max_cpus_per_node, buffer); + pack32(msg-> def_mem_per_cpu, buffer); + pack32(msg-> max_mem_per_cpu, buffer); + packstr(msg->name, buffer); + packstr(msg->nodes, buffer); + pack16(msg-> flags, buffer); + pack16(msg-> max_share, buffer); + pack16(msg-> preempt_mode, buffer); + pack16(msg-> priority, buffer); + pack16(msg-> state_up, buffer); + + packstr(msg->allow_alloc_nodes, buffer); + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { packstr(msg->allow_groups, buffer); packstr(msg->alternate, buffer); pack32(msg-> grace_time, buffer); @@ -3125,7 +3265,30 @@ _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer, tmp_ptr = xmalloc(sizeof(update_part_msg_t)); *msg = tmp_ptr; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpackstr_xmalloc(&tmp_ptr->allow_groups, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&tmp_ptr->alternate, &uint32_tmp, buffer); + safe_unpack32(&tmp_ptr->grace_time, buffer); + safe_unpack32(&tmp_ptr->max_time, buffer); + safe_unpack32(&tmp_ptr->default_time, buffer); + safe_unpack32(&tmp_ptr->max_nodes, buffer); + safe_unpack32(&tmp_ptr->min_nodes, buffer); + safe_unpack32(&tmp_ptr->max_cpus_per_node, buffer); + safe_unpack32(&tmp_ptr->def_mem_per_cpu, buffer); + safe_unpack32(&tmp_ptr->max_mem_per_cpu, buffer); + safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&tmp_ptr->nodes, &uint32_tmp, buffer); + + safe_unpack16(&tmp_ptr->flags, buffer); + safe_unpack16(&tmp_ptr->max_share, buffer); + safe_unpack16(&tmp_ptr->preempt_mode, buffer); + safe_unpack16(&tmp_ptr->priority, buffer); + safe_unpack16(&tmp_ptr->state_up, buffer); + + safe_unpackstr_xmalloc(&tmp_ptr->allow_alloc_nodes, + &uint32_tmp, buffer); + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&tmp_ptr->allow_groups, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&tmp_ptr->alternate, &uint32_tmp, buffer); @@ -3164,10 +3327,10 @@ static void _pack_update_resv_msg(resv_desc_msg_t * msg, Buf buffer, uint16_t protocol_version) { - uint32_t array_len; + uint32_t array_len, core_cnt = 0; xassert(msg != NULL); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { packstr(msg->name, buffer); pack_time(msg->start_time, buffer); pack_time(msg->end_time, buffer); @@ -3182,7 +3345,15 @@ _pack_update_resv_msg(resv_desc_msg_t * msg, Buf buffer, } else array_len = 0; pack32_array(msg->node_cnt, array_len, buffer); - pack32(msg->core_cnt, buffer); + if (msg->core_cnt) { + for (array_len = 0; msg->core_cnt[array_len]; + array_len++) { + /* determine array length */ + } + array_len++; /* Include trailing zero */ + } else + array_len = 0; + pack32_array(msg->core_cnt, array_len, buffer); packstr(msg->node_list, buffer); packstr(msg->features, buffer); packstr(msg->licenses, buffer); @@ -3190,7 +3361,7 @@ _pack_update_resv_msg(resv_desc_msg_t * msg, Buf buffer, packstr(msg->users, buffer); packstr(msg->accounts, buffer); - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { packstr(msg->name, buffer); pack_time(msg->start_time, buffer); pack_time(msg->end_time, buffer); @@ -3205,7 +3376,13 @@ _pack_update_resv_msg(resv_desc_msg_t * msg, Buf buffer, } else array_len = 0; pack32_array(msg->node_cnt, array_len, buffer); - pack32(msg->core_cnt, buffer); + if (msg->core_cnt) { + for (array_len = 0; msg->core_cnt[array_len]; + array_len++) { + core_cnt += msg->core_cnt[array_len]; + } + } else + pack32(core_cnt, buffer); packstr(msg->node_list, buffer); packstr(msg->features, buffer); packstr(msg->licenses, buffer); @@ -3213,16 +3390,28 @@ _pack_update_resv_msg(resv_desc_msg_t * msg, Buf buffer, packstr(msg->users, buffer); packstr(msg->accounts, buffer); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { packstr(msg->name, buffer); pack_time(msg->start_time, buffer); pack_time(msg->end_time, buffer); pack32(msg->duration, buffer); pack16(msg->flags, buffer); - if (msg->node_cnt) - pack32(msg->node_cnt[0], buffer); - else - pack32(NO_VAL, buffer); + if (msg->node_cnt) { + for (array_len = 0; msg->node_cnt[array_len]; + array_len++) { + /* determine array length */ + } + array_len++; /* Include trailing zero */ + } else + array_len = 0; + pack32_array(msg->node_cnt, array_len, buffer); + if (msg->core_cnt) { + for (array_len = 0; msg->core_cnt[array_len]; + array_len++) { + core_cnt += msg->core_cnt[array_len]; + } + } else + pack32(core_cnt, buffer); packstr(msg->node_list, buffer); packstr(msg->features, buffer); packstr(msg->licenses, buffer); @@ -3249,7 +3438,7 @@ _unpack_update_resv_msg(resv_desc_msg_t ** msg, Buf buffer, tmp_ptr = xmalloc(sizeof(resv_desc_msg_t)); *msg = tmp_ptr; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer); safe_unpack_time(&tmp_ptr->start_time, buffer); safe_unpack_time(&tmp_ptr->end_time, buffer); @@ -3264,7 +3453,15 @@ _unpack_update_resv_msg(resv_desc_msg_t ** msg, Buf buffer, /* This avoids a pointer to a zero length buffer */ xfree(tmp_ptr->node_cnt); } - safe_unpack32(&tmp_ptr->core_cnt, buffer); + safe_unpack32_array(&tmp_ptr->core_cnt, &uint32_tmp, buffer); + if (uint32_tmp > 0) { + /* Must be zero terminated */ + if (tmp_ptr->core_cnt[uint32_tmp-1] != 0) + goto unpack_error; + } else { + /* This avoids a pointer to a zero length buffer */ + xfree(tmp_ptr->core_cnt); + } safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&tmp_ptr->features, @@ -3278,22 +3475,23 @@ _unpack_update_resv_msg(resv_desc_msg_t ** msg, Buf buffer, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&tmp_ptr->accounts, &uint32_tmp, buffer); - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer); safe_unpack_time(&tmp_ptr->start_time, buffer); safe_unpack_time(&tmp_ptr->end_time, buffer); safe_unpack32(&tmp_ptr->duration, buffer); safe_unpack16(&tmp_ptr->flags, buffer); safe_unpack32_array(&tmp_ptr->node_cnt, &uint32_tmp, buffer); + tmp_ptr->core_cnt = xmalloc(sizeof(uint32_t) * 2); + safe_unpack32(&tmp_ptr->core_cnt[0], buffer); if (uint32_tmp > 0) { /* Must be zero terminated */ - if (tmp_ptr->node_cnt[uint32_tmp-1] != 0) + if (tmp_ptr->core_cnt[uint32_tmp-1] != 0) goto unpack_error; } else { /* This avoids a pointer to a zero length buffer */ - xfree(tmp_ptr->node_cnt); + xfree(tmp_ptr->core_cnt); } - safe_unpack32(&tmp_ptr->core_cnt, buffer); safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&tmp_ptr->features, @@ -3307,18 +3505,23 @@ _unpack_update_resv_msg(resv_desc_msg_t ** msg, Buf buffer, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&tmp_ptr->accounts, &uint32_tmp, buffer); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { - uint32_t node_cnt; + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer); safe_unpack_time(&tmp_ptr->start_time, buffer); safe_unpack_time(&tmp_ptr->end_time, buffer); safe_unpack32(&tmp_ptr->duration, buffer); safe_unpack16(&tmp_ptr->flags, buffer); - safe_unpack32(&node_cnt, buffer); - if (node_cnt != NO_VAL) { - tmp_ptr->node_cnt = xmalloc(sizeof(uint32_t) * 2); - tmp_ptr->node_cnt[0] = node_cnt; + safe_unpack32_array(&tmp_ptr->node_cnt, &uint32_tmp, buffer); + if (uint32_tmp > 0) { + /* Must be zero terminated */ + if (tmp_ptr->node_cnt[uint32_tmp-1] != 0) + goto unpack_error; + } else { + /* This avoids a pointer to a zero length buffer */ + xfree(tmp_ptr->node_cnt); } + tmp_ptr->core_cnt = xmalloc(sizeof(uint32_t) * 2); + safe_unpack32(&tmp_ptr->core_cnt[0], buffer); safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&tmp_ptr->features, @@ -3444,7 +3647,7 @@ pack_job_step_create_request_msg(job_step_create_request_msg_t * msg, pack32(msg->cpu_count, buffer); pack32(msg->cpu_freq, buffer); pack32(msg->num_tasks, buffer); - pack32(msg->mem_per_cpu, buffer); + pack32(msg->pn_min_memory, buffer); pack32(msg->time_limit, buffer); pack16(msg->relative, buffer); @@ -3473,7 +3676,7 @@ pack_job_step_create_request_msg(job_step_create_request_msg_t * msg, pack32(msg->max_nodes, buffer); pack32(msg->cpu_count, buffer); pack32(msg->num_tasks, buffer); - pack32(msg->mem_per_cpu, buffer); + pack32(msg->pn_min_memory, buffer); pack32(msg->time_limit, buffer); pack16(msg->relative, buffer); @@ -3522,7 +3725,7 @@ unpack_job_step_create_request_msg(job_step_create_request_msg_t ** msg, safe_unpack32(&(tmp_ptr->cpu_count), buffer); safe_unpack32(&(tmp_ptr->cpu_freq), buffer); safe_unpack32(&(tmp_ptr->num_tasks), buffer); - safe_unpack32(&(tmp_ptr->mem_per_cpu), buffer); + safe_unpack32(&(tmp_ptr->pn_min_memory), buffer); safe_unpack32(&(tmp_ptr->time_limit), buffer); safe_unpack16(&(tmp_ptr->relative), buffer); @@ -3555,7 +3758,7 @@ unpack_job_step_create_request_msg(job_step_create_request_msg_t ** msg, safe_unpack32(&(tmp_ptr->max_nodes), buffer); safe_unpack32(&(tmp_ptr->cpu_count), buffer); safe_unpack32(&(tmp_ptr->num_tasks), buffer); - safe_unpack32(&(tmp_ptr->mem_per_cpu), buffer); + safe_unpack32(&(tmp_ptr->pn_min_memory), buffer); safe_unpack32(&(tmp_ptr->time_limit), buffer); safe_unpack16(&(tmp_ptr->relative), buffer); @@ -3896,7 +4099,42 @@ _unpack_partition_info_members(partition_info_t * part, Buf buffer, uint32_t uint32_tmp; char *node_inx_str = NULL; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpackstr_xmalloc(&part->name, &uint32_tmp, buffer); + if (part->name == NULL) + part->name = xmalloc(1);/* part->name = "" implicit */ + safe_unpack32(&part->grace_time, buffer); + safe_unpack32(&part->max_time, buffer); + safe_unpack32(&part->default_time, buffer); + safe_unpack32(&part->max_nodes, buffer); + safe_unpack32(&part->min_nodes, buffer); + safe_unpack32(&part->total_nodes, buffer); + safe_unpack32(&part->total_cpus, buffer); + safe_unpack32(&part->def_mem_per_cpu, buffer); + safe_unpack32(&part->max_cpus_per_node, buffer); + safe_unpack32(&part->max_mem_per_cpu, buffer); + safe_unpack16(&part->flags, buffer); + safe_unpack16(&part->max_share, buffer); + safe_unpack16(&part->preempt_mode, buffer); + safe_unpack16(&part->priority, buffer); + safe_unpack16(&part->state_up, buffer); + safe_unpack16(&part->cr_type , buffer); + + safe_unpackstr_xmalloc(&part->allow_groups, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&part->allow_alloc_nodes, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&part->alternate, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&part->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + if (node_inx_str == NULL) + part->node_inx = bitfmt2int(""); + else { + part->node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + node_inx_str = NULL; + } + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part->name, &uint32_tmp, buffer); if (part->name == NULL) part->name = xmalloc(1);/* part->name = "" implicit */ @@ -3909,12 +4147,13 @@ _unpack_partition_info_members(partition_info_t * part, Buf buffer, safe_unpack32(&part->total_cpus, buffer); safe_unpack32(&part->def_mem_per_cpu, buffer); safe_unpack32(&part->max_mem_per_cpu, buffer); + safe_unpack16(&part->flags, buffer); safe_unpack16(&part->max_share, buffer); safe_unpack16(&part->preempt_mode, buffer); safe_unpack16(&part->priority, buffer); + safe_unpack16(&part->state_up, buffer); - safe_unpack16(&part->state_up, buffer); safe_unpackstr_xmalloc(&part->allow_groups, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&part->allow_alloc_nodes, &uint32_tmp, @@ -4054,7 +4293,9 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer, uint32_t uint32_tmp = 0; char *node_inx_str; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack32(&step->array_job_id, buffer); + safe_unpack16(&step->array_task_id, buffer); safe_unpack32(&step->job_id, buffer); safe_unpack32(&step->step_id, buffer); safe_unpack16(&step->ckpt_interval, buffer); @@ -4063,6 +4304,7 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer, safe_unpack32(&step->cpu_freq, buffer); safe_unpack32(&step->num_tasks, buffer); safe_unpack32(&step->time_limit, buffer); + safe_unpack16(&step->state, buffer); safe_unpack_time(&step->start_time, buffer); safe_unpack_time(&step->run_time, buffer); @@ -4084,12 +4326,13 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer, if (select_g_select_jobinfo_unpack(&step->select_jobinfo, buffer, protocol_version)) goto unpack_error; - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpack32(&step->job_id, buffer); safe_unpack32(&step->step_id, buffer); safe_unpack16(&step->ckpt_interval, buffer); safe_unpack32(&step->user_id, buffer); safe_unpack32(&step->num_cpus, buffer); + safe_unpack32(&step->cpu_freq, buffer); safe_unpack32(&step->num_tasks, buffer); safe_unpack32(&step->time_limit, buffer); @@ -4113,39 +4356,98 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer, if (select_g_select_jobinfo_unpack(&step->select_jobinfo, buffer, protocol_version)) goto unpack_error; - } else { - error("_unpack_job_step_info_members: protocol_version " - "%hu not supported", protocol_version); - goto unpack_error; - } - - return SLURM_SUCCESS; - -unpack_error: - /* no need to free here. (we will just be freeing it 2 times - since this is freed in _unpack_job_step_info_response_msg - */ - //slurm_free_job_step_info_members(step); - return SLURM_ERROR; -} - -static int -_unpack_job_step_info_response_msg(job_step_info_response_msg_t** msg, - Buf buffer, - uint16_t protocol_version) -{ - int i = 0; - job_step_info_t *step; - - xassert(msg != NULL); - *msg = xmalloc(sizeof(job_step_info_response_msg_t)); - - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { - safe_unpack_time(&(*msg)->last_update, buffer); - safe_unpack32(&(*msg)->job_step_count, buffer); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + safe_unpack32(&step->job_id, buffer); + safe_unpack32(&step->step_id, buffer); + safe_unpack16(&step->ckpt_interval, buffer); + safe_unpack32(&step->user_id, buffer); + safe_unpack32(&step->num_cpus, buffer); + safe_unpack32(&step->cpu_freq, buffer); //NLK Don Power okay + safe_unpack32(&step->num_tasks, buffer); + safe_unpack32(&step->time_limit, buffer); - step = (*msg)->job_steps = xmalloc(sizeof(job_step_info_t) - * (*msg)->job_step_count); + safe_unpack_time(&step->start_time, buffer); + safe_unpack_time(&step->run_time, buffer); + + safe_unpackstr_xmalloc(&step->partition, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->resv_ports, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->network, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->ckpt_dir, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->gres, &uint32_tmp, buffer); + if (node_inx_str == NULL) + step->node_inx = bitfmt2int(""); + else { + step->node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + } + if (select_g_select_jobinfo_unpack(&step->select_jobinfo, + buffer, protocol_version)) + goto unpack_error; + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + safe_unpack32(&step->job_id, buffer); + safe_unpack32(&step->step_id, buffer); + safe_unpack16(&step->ckpt_interval, buffer); + safe_unpack32(&step->user_id, buffer); + safe_unpack32(&step->num_cpus, buffer); + safe_unpack32(&step->num_tasks, buffer); + safe_unpack32(&step->time_limit, buffer); + + safe_unpack_time(&step->start_time, buffer); + safe_unpack_time(&step->run_time, buffer); + + safe_unpackstr_xmalloc(&step->partition, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->resv_ports, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->network, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->ckpt_dir, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->gres, &uint32_tmp, buffer); + if (node_inx_str == NULL) + step->node_inx = bitfmt2int(""); + else { + step->node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + } + if (select_g_select_jobinfo_unpack(&step->select_jobinfo, + buffer, protocol_version)) + goto unpack_error; + } else { + error("_unpack_job_step_info_members: protocol_version " + "%hu not supported", protocol_version); + goto unpack_error; + } + + return SLURM_SUCCESS; + +unpack_error: + /* no need to free here. (we will just be freeing it 2 times + since this is freed in _unpack_job_step_info_response_msg + */ + //slurm_free_job_step_info_members(step); + return SLURM_ERROR; +} + +static int +_unpack_job_step_info_response_msg(job_step_info_response_msg_t** msg, + Buf buffer, + uint16_t protocol_version) +{ + int i = 0; + job_step_info_t *step; + + xassert(msg != NULL); + *msg = xmalloc(sizeof(job_step_info_response_msg_t)); + + if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + safe_unpack_time(&(*msg)->last_update, buffer); + safe_unpack32(&(*msg)->job_step_count, buffer); + + step = (*msg)->job_steps = xmalloc(sizeof(job_step_info_t) + * (*msg)->job_step_count); for (i = 0; i < (*msg)->job_step_count; i++) if (_unpack_job_step_info_members(&step[i], buffer, @@ -4182,7 +4484,7 @@ _unpack_job_info_msg(job_info_msg_t ** msg, Buf buffer, *msg = xmalloc(sizeof(job_info_msg_t)); /* load buffer's header (data structure version and time) */ - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack32(&((*msg)->record_count), buffer); safe_unpack_time(&((*msg)->last_update), buffer); job = (*msg)->job_array = @@ -4221,7 +4523,127 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, char *node_inx_str; multi_core_data_t *mc_ptr; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + job->ntasks_per_node = (uint16_t)NO_VAL; + + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack32(&job->array_job_id, buffer); + safe_unpack16(&job->array_task_id, buffer); + safe_unpack32(&job->assoc_id, buffer); + safe_unpack32(&job->job_id, buffer); + safe_unpack32(&job->user_id, buffer); + safe_unpack32(&job->group_id, buffer); + safe_unpack32(&job->profile, buffer); + + safe_unpack16(&job->job_state, buffer); + safe_unpack16(&job->batch_flag, buffer); + safe_unpack16(&job->state_reason, buffer); + safe_unpack16(&job->restart_cnt, buffer); + safe_unpack16(&job->show_flags, buffer); + + safe_unpack32(&job->alloc_sid, buffer); + safe_unpack32(&job->time_limit, buffer); + safe_unpack32(&job->time_min, buffer); + + safe_unpack16(&job->nice, buffer); + + safe_unpack_time(&job->submit_time, buffer); + safe_unpack_time(&job->eligible_time, buffer); + safe_unpack_time(&job->start_time, buffer); + safe_unpack_time(&job->end_time, buffer); + safe_unpack_time(&job->suspend_time, buffer); + safe_unpack_time(&job->pre_sus_time, buffer); + safe_unpack_time(&job->resize_time, buffer); + safe_unpack_time(&job->preempt_time, buffer); + safe_unpack32(&job->priority, buffer); + safe_unpackstr_xmalloc(&job->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->partition, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->account, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->network, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->comment, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->gres, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->batch_host, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->batch_script, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->qos, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->licenses, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->state_desc, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->resv_name, &uint32_tmp, buffer); + + safe_unpack32(&job->exit_code, buffer); + safe_unpack32(&job->derived_ec, buffer); + unpack_job_resources(&job->job_resrcs, buffer, + protocol_version); + + safe_unpackstr_xmalloc(&job->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->wckey, &uint32_tmp, buffer); + safe_unpack32(&job->req_switch, buffer); + safe_unpack32(&job->wait4switch, buffer); + + safe_unpackstr_xmalloc(&job->alloc_node, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + if (node_inx_str == NULL) + job->node_inx = bitfmt2int(""); + else { + job->node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + } + + if (select_g_select_jobinfo_unpack(&job->select_jobinfo, + buffer, protocol_version)) + goto unpack_error; + + /*** unpack default job details ***/ + safe_unpackstr_xmalloc(&job->features, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->work_dir, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->dependency, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->command, &uint32_tmp, buffer); + + safe_unpack32(&job->num_cpus, buffer); + safe_unpack32(&job->max_cpus, buffer); + safe_unpack32(&job->num_nodes, buffer); + safe_unpack32(&job->max_nodes, buffer); + safe_unpack16(&job->requeue, buffer); + safe_unpack16(&job->ntasks_per_node, buffer); + + /*** unpack pending job details ***/ + safe_unpack16(&job->shared, buffer); + safe_unpack16(&job->contiguous, buffer); + safe_unpack16(&job->cpus_per_task, buffer); + safe_unpack16(&job->pn_min_cpus, buffer); + + safe_unpack32(&job->pn_min_memory, buffer); + safe_unpack32(&job->pn_min_tmp_disk, buffer); + + safe_unpackstr_xmalloc(&job->req_nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + if (node_inx_str == NULL) + job->req_node_inx = bitfmt2int(""); + else { + job->req_node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + } + safe_unpackstr_xmalloc(&job->exc_nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + if (node_inx_str == NULL) + job->exc_node_inx = bitfmt2int(""); + else { + job->exc_node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + } + + if (unpack_multi_core_data(&mc_ptr, buffer, protocol_version)) + goto unpack_error; + if (mc_ptr) { + job->boards_per_node = mc_ptr->boards_per_node; + job->sockets_per_board = mc_ptr->sockets_per_board; + job->sockets_per_node = mc_ptr->sockets_per_node; + job->cores_per_socket = mc_ptr->cores_per_socket; + job->threads_per_core = mc_ptr->threads_per_core; + job->ntasks_per_board = mc_ptr->ntasks_per_board; + job->ntasks_per_socket = mc_ptr->ntasks_per_socket; + job->ntasks_per_core = mc_ptr->ntasks_per_core; + xfree(mc_ptr); + } + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpack32(&job->assoc_id, buffer); safe_unpack32(&job->job_id, buffer); safe_unpack32(&job->user_id, buffer); @@ -4336,7 +4758,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, job->ntasks_per_core = mc_ptr->ntasks_per_core; xfree(mc_ptr); } - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack32(&job->assoc_id, buffer); safe_unpack32(&job->job_id, buffer); safe_unpack32(&job->user_id, buffer); @@ -4465,9 +4887,10 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, uint16_t protocol_version) { uint32_t count = NO_VAL; + uint16_t uint16 = 0; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { pack_time(build_ptr->last_update, buffer); pack16(build_ptr->accounting_storage_enforce, buffer); @@ -4479,7 +4902,10 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->accounting_storage_user, buffer); pack16(build_ptr->acctng_store_job_comment, buffer); packstr(build_ptr->acct_gather_energy_type, buffer); + packstr(build_ptr->acct_gather_filesystem_type, buffer); + packstr(build_ptr->acct_gather_infiniband_type, buffer); pack16(build_ptr->acct_gather_node_freq, buffer); + packstr(build_ptr->acct_gather_profile_type, buffer); packstr(build_ptr->authtype, buffer); @@ -4498,11 +4924,14 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack32(build_ptr->def_mem_per_cpu, buffer); pack32(build_ptr->debug_flags, buffer); pack16(build_ptr->disable_root_jobs, buffer); + pack16(build_ptr->dynalloc_port, buffer); pack16(build_ptr->enforce_part_limits, buffer); packstr(build_ptr->epilog, buffer); pack32(build_ptr->epilog_msg_time, buffer); packstr(build_ptr->epilog_slurmctld, buffer); + packstr(build_ptr->ext_sensors_type, buffer); + pack16(build_ptr->ext_sensors_freq, buffer); pack16(build_ptr->fast_schedule, buffer); pack32(build_ptr->first_job_id, buffer); @@ -4514,11 +4943,12 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack32(build_ptr->hash_val, buffer); pack16(build_ptr->health_check_interval, buffer); + pack16(build_ptr->health_check_node_state, buffer); packstr(build_ptr->health_check_program, buffer); pack16(build_ptr->inactive_limit, buffer); - pack16(build_ptr->job_acct_gather_freq, buffer); + packstr(build_ptr->job_acct_gather_freq, buffer); packstr(build_ptr->job_acct_gather_type, buffer); packstr(build_ptr->job_ckpt_dir, buffer); @@ -4535,6 +4965,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->job_requeue, buffer); packstr(build_ptr->job_submit_plugins, buffer); + pack16(build_ptr->keep_alive_time, buffer); pack16(build_ptr->kill_on_bad_exit, buffer); pack16(build_ptr->kill_wait, buffer); @@ -4542,6 +4973,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->licenses, buffer); packstr(build_ptr->licenses_used, buffer); + pack16(build_ptr->max_array_sz, buffer); packstr(build_ptr->mail_prog, buffer); pack32(build_ptr->max_job_cnt, buffer); pack32(build_ptr->max_job_id, buffer); @@ -4589,7 +5021,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->resume_program, buffer); pack16(build_ptr->resume_rate, buffer); pack16(build_ptr->resume_timeout, buffer); + packstr(build_ptr->resv_epilog, buffer); pack16(build_ptr->resv_over_run, buffer); + packstr(build_ptr->resv_prolog, buffer); pack16(build_ptr->ret2service, buffer); packstr(build_ptr->salloc_default_command, buffer); @@ -4628,6 +5062,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->slurmctld_debug, buffer); packstr(build_ptr->slurmctld_logfile, buffer); packstr(build_ptr->slurmctld_pidfile, buffer); + packstr(build_ptr->slurmctld_plugstack, buffer); pack32(build_ptr->slurmctld_port, buffer); pack16(build_ptr->slurmctld_port_count, buffer); pack16(build_ptr->slurmctld_timeout, buffer); @@ -4670,7 +5105,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->z_16, buffer); pack32(build_ptr->z_32, buffer); packstr(build_ptr->z_char, buffer); - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { pack_time(build_ptr->last_update, buffer); pack16(build_ptr->accounting_storage_enforce, buffer); @@ -4681,6 +5116,8 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->accounting_storage_type, buffer); packstr(build_ptr->accounting_storage_user, buffer); pack16(build_ptr->acctng_store_job_comment, buffer); + packstr(build_ptr->acct_gather_energy_type, buffer); + pack16(build_ptr->acct_gather_node_freq, buffer); packstr(build_ptr->authtype, buffer); @@ -4718,8 +5155,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->health_check_program, buffer); pack16(build_ptr->inactive_limit, buffer); - - pack16(build_ptr->job_acct_gather_freq, buffer); + if (build_ptr->job_acct_gather_freq) + uint16 = atoi(build_ptr->job_acct_gather_freq); + pack16(uint16, buffer); packstr(build_ptr->job_acct_gather_type, buffer); packstr(build_ptr->job_ckpt_dir, buffer); @@ -4739,7 +5177,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->kill_on_bad_exit, buffer); pack16(build_ptr->kill_wait, buffer); + packstr(build_ptr->launch_type, buffer); packstr(build_ptr->licenses, buffer); + packstr(build_ptr->licenses_used, buffer); packstr(build_ptr->mail_prog, buffer); pack32(build_ptr->max_job_cnt, buffer); @@ -4866,11 +5306,10 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->vsize_factor, buffer); pack16(build_ptr->wait_time, buffer); - pack16(build_ptr->z_16, buffer); pack32(build_ptr->z_32, buffer); packstr(build_ptr->z_char, buffer); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { pack_time(build_ptr->last_update, buffer); pack16(build_ptr->accounting_storage_enforce, buffer); @@ -4919,7 +5358,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->inactive_limit, buffer); - pack16(build_ptr->job_acct_gather_freq, buffer); + if (build_ptr->job_acct_gather_freq) + uint16 = atoi(build_ptr->job_acct_gather_freq); + pack16(uint16, buffer); packstr(build_ptr->job_acct_gather_type, buffer); packstr(build_ptr->job_ckpt_dir, buffer); @@ -4965,6 +5406,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack32(build_ptr->priority_decay_hl, buffer); pack32(build_ptr->priority_calc_period, buffer); pack16(build_ptr->priority_favor_small, buffer); + pack16(build_ptr->priority_flags, buffer); pack32(build_ptr->priority_max_age, buffer); pack16(build_ptr->priority_reset_period, buffer); packstr(build_ptr->priority_type, buffer); @@ -4982,6 +5424,8 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->propagate_rlimits, buffer); packstr(build_ptr->propagate_rlimits_except, buffer); + packstr(build_ptr->reboot_program, buffer); + pack16(build_ptr->reconfig_flags, buffer); packstr(build_ptr->resume_program, buffer); pack16(build_ptr->resume_rate, buffer); pack16(build_ptr->resume_timeout, buffer); @@ -5079,6 +5523,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, { uint32_t count = NO_VAL; uint32_t uint32_tmp; + uint16_t uint16 = 0; slurm_ctl_conf_info_msg_t *build_ptr; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); @@ -5090,7 +5535,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, build_ptr->hash_val = NO_VAL; /* load the data values */ - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { /* unpack timestamp of snapshot */ safe_unpack_time(&build_ptr->last_update, buffer); @@ -5110,7 +5555,13 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->acctng_store_job_comment, buffer); safe_unpackstr_xmalloc(&build_ptr->acct_gather_energy_type, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->acct_gather_filesystem_type, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->acct_gather_infiniband_type, + &uint32_tmp, buffer); safe_unpack16(&build_ptr->acct_gather_node_freq, buffer); + safe_unpackstr_xmalloc(&build_ptr->acct_gather_profile_type, + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&build_ptr->authtype, &uint32_tmp, buffer); @@ -5137,6 +5588,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->def_mem_per_cpu, buffer); safe_unpack32(&build_ptr->debug_flags, buffer); safe_unpack16(&build_ptr->disable_root_jobs, buffer); + safe_unpack16(&build_ptr->dynalloc_port, buffer); safe_unpack16(&build_ptr->enforce_part_limits, buffer); safe_unpackstr_xmalloc(&build_ptr->epilog, &uint32_tmp, @@ -5144,6 +5596,9 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->epilog_msg_time, buffer); safe_unpackstr_xmalloc(&build_ptr->epilog_slurmctld, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->ext_sensors_type, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->ext_sensors_freq, buffer); safe_unpack16(&build_ptr->fast_schedule, buffer); safe_unpack32(&build_ptr->first_job_id, buffer); @@ -5156,12 +5611,14 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->hash_val, buffer); safe_unpack16(&build_ptr->health_check_interval, buffer); + safe_unpack16(&build_ptr->health_check_node_state, buffer); safe_unpackstr_xmalloc(&build_ptr->health_check_program, &uint32_tmp, buffer); safe_unpack16(&build_ptr->inactive_limit, buffer); - safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_freq, + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, &uint32_tmp, buffer); @@ -5188,6 +5645,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->job_submit_plugins, &uint32_tmp, buffer); + safe_unpack16(&build_ptr->keep_alive_time, buffer); safe_unpack16(&build_ptr->kill_on_bad_exit, buffer); safe_unpack16(&build_ptr->kill_wait, buffer); @@ -5198,6 +5656,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->licenses_used, &uint32_tmp, buffer); + safe_unpack16(&build_ptr->max_array_sz, buffer); safe_unpackstr_xmalloc(&build_ptr->mail_prog, &uint32_tmp, buffer); safe_unpack32(&build_ptr->max_job_cnt, buffer); @@ -5260,7 +5719,11 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, &uint32_tmp, buffer); safe_unpack16(&build_ptr->resume_rate, buffer); safe_unpack16(&build_ptr->resume_timeout, buffer); + safe_unpackstr_xmalloc(&build_ptr->resv_epilog, &uint32_tmp, + buffer); safe_unpack16(&build_ptr->resv_over_run, buffer); + safe_unpackstr_xmalloc(&build_ptr->resv_prolog, &uint32_tmp, + buffer); safe_unpack16(&build_ptr->ret2service, buffer); safe_unpackstr_xmalloc(&build_ptr->salloc_default_command, @@ -5309,6 +5772,8 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&build_ptr->slurmctld_pidfile, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->slurmctld_plugstack, + &uint32_tmp, buffer); safe_unpack32(&build_ptr->slurmctld_port, buffer); safe_unpack16(&build_ptr->slurmctld_port_count, buffer); safe_unpack16(&build_ptr->slurmctld_timeout, buffer); @@ -5371,7 +5836,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->z_32, buffer); safe_unpackstr_xmalloc(&build_ptr->z_char, &uint32_tmp, buffer); - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { /* unpack timestamp of snapshot */ safe_unpack_time(&build_ptr->last_update, buffer); @@ -5389,6 +5854,9 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->accounting_storage_user, &uint32_tmp, buffer); safe_unpack16(&build_ptr->acctng_store_job_comment, buffer); + safe_unpackstr_xmalloc(&build_ptr->acct_gather_energy_type, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->acct_gather_node_freq, buffer); safe_unpackstr_xmalloc(&build_ptr->authtype, &uint32_tmp, buffer); @@ -5439,7 +5907,10 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->inactive_limit, buffer); - safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + build_ptr->job_acct_gather_freq = + xstrdup_printf("%u", uint16); safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, &uint32_tmp, buffer); @@ -5469,8 +5940,12 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->kill_on_bad_exit, buffer); safe_unpack16(&build_ptr->kill_wait, buffer); + safe_unpackstr_xmalloc(&build_ptr->launch_type, + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&build_ptr->licenses, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->licenses_used, + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&build_ptr->mail_prog, &uint32_tmp, buffer); @@ -5645,7 +6120,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->z_32, buffer); safe_unpackstr_xmalloc(&build_ptr->z_char, &uint32_tmp, buffer); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { /* unpack timestamp of snapshot */ safe_unpack_time(&build_ptr->last_update, buffer); @@ -5713,7 +6188,10 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->inactive_limit, buffer); - safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + build_ptr->job_acct_gather_freq = + xstrdup_printf("%u", uint16); safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, &uint32_tmp, buffer); @@ -5777,6 +6255,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->priority_decay_hl, buffer); safe_unpack32(&build_ptr->priority_calc_period, buffer); safe_unpack16(&build_ptr->priority_favor_small, buffer); + safe_unpack16(&build_ptr->priority_flags, buffer); safe_unpack32(&build_ptr->priority_max_age, buffer); safe_unpack16(&build_ptr->priority_reset_period, buffer); safe_unpackstr_xmalloc(&build_ptr->priority_type, &uint32_tmp, @@ -5800,6 +6279,9 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->propagate_rlimits_except, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->reboot_program, &uint32_tmp, + buffer); + safe_unpack16(&build_ptr->reconfig_flags, buffer); safe_unpackstr_xmalloc(&build_ptr->resume_program, &uint32_tmp, buffer); safe_unpack16(&build_ptr->resume_rate, buffer); @@ -5828,9 +6310,9 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, int i; for (i=0; i<count; i++) { if (unpack_config_key_pair( - (void *)&object, protocol_version, - buffer) - == SLURM_ERROR) + (void *)&object, protocol_version, + buffer) + == SLURM_ERROR) goto unpack_error; list_append(tmp_list, object); } @@ -5938,8 +6420,10 @@ static void _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, uint16_t protocol_version) { + uint16_t uint16 = 0; + /* load the data values */ - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { pack16(job_desc_ptr->contiguous, buffer); pack16(job_desc_ptr->task_dist, buffer); pack16(job_desc_ptr->kill_on_node_fail, buffer); @@ -5950,6 +6434,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, packstr(job_desc_ptr->alloc_node, buffer); pack32(job_desc_ptr->alloc_sid, buffer); + packstr(job_desc_ptr->array_inx, buffer); pack16(job_desc_ptr->pn_min_cpus, buffer); pack32(job_desc_ptr->pn_min_memory, buffer); pack32(job_desc_ptr->pn_min_tmp_disk, buffer); @@ -5960,11 +6445,12 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, packstr(job_desc_ptr->account, buffer); packstr(job_desc_ptr->comment, buffer); pack16(job_desc_ptr->nice, buffer); + pack32(job_desc_ptr->profile, buffer); packstr(job_desc_ptr->qos, buffer); pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); - pack16(job_desc_ptr->acctg_freq, buffer); + packstr(job_desc_ptr->acctg_freq, buffer); pack32(job_desc_ptr->num_tasks, buffer); pack16(job_desc_ptr->ckpt_interval, buffer); @@ -6085,7 +6571,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, job_desc_ptr->select_jobinfo = NULL; } pack16(job_desc_ptr->wait_all_nodes, buffer); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { pack16(job_desc_ptr->contiguous, buffer); pack16(job_desc_ptr->task_dist, buffer); pack16(job_desc_ptr->kill_on_node_fail, buffer); @@ -6110,7 +6596,9 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); - pack16(job_desc_ptr->acctg_freq, buffer); + if (job_desc_ptr->acctg_freq) + uint16 = atoi(job_desc_ptr->acctg_freq); + pack16(uint16, buffer); pack32(job_desc_ptr->num_tasks, buffer); pack16(job_desc_ptr->ckpt_interval, buffer); @@ -6134,6 +6622,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack16(job_desc_ptr->shared, buffer); pack16(job_desc_ptr->cpus_per_task, buffer); pack16(job_desc_ptr->ntasks_per_node, buffer); + pack16(job_desc_ptr->ntasks_per_board, buffer); pack16(job_desc_ptr->ntasks_per_socket, buffer); pack16(job_desc_ptr->ntasks_per_core, buffer); @@ -6149,6 +6638,8 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack32(job_desc_ptr->max_cpus, buffer); pack32(job_desc_ptr->min_nodes, buffer); pack32(job_desc_ptr->max_nodes, buffer); + pack16(job_desc_ptr->boards_per_node, buffer); + pack16(job_desc_ptr->sockets_per_board, buffer); pack16(job_desc_ptr->sockets_per_node, buffer); pack16(job_desc_ptr->cores_per_socket, buffer); pack16(job_desc_ptr->threads_per_core, buffer); @@ -6161,94 +6652,379 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack_time(job_desc_ptr->begin_time, buffer); pack_time(job_desc_ptr->end_time, buffer); - packstr(job_desc_ptr->licenses, buffer); - pack16(job_desc_ptr->mail_type, buffer); - packstr(job_desc_ptr->mail_user, buffer); - packstr(job_desc_ptr->reservation, buffer); - pack16(job_desc_ptr->warn_signal, buffer); - pack16(job_desc_ptr->warn_time, buffer); - packstr(job_desc_ptr->wckey, buffer); - pack32(job_desc_ptr->req_switch, buffer); - pack32(job_desc_ptr->wait4switch, buffer); + packstr(job_desc_ptr->licenses, buffer); + pack16(job_desc_ptr->mail_type, buffer); + packstr(job_desc_ptr->mail_user, buffer); + packstr(job_desc_ptr->reservation, buffer); + pack16(job_desc_ptr->warn_signal, buffer); + pack16(job_desc_ptr->warn_time, buffer); + packstr(job_desc_ptr->wckey, buffer); + pack32(job_desc_ptr->req_switch, buffer); + pack32(job_desc_ptr->wait4switch, buffer); + + if (job_desc_ptr->select_jobinfo) { + select_g_select_jobinfo_pack( + job_desc_ptr->select_jobinfo, + buffer, protocol_version); + } else { + job_desc_ptr->select_jobinfo = + select_g_select_jobinfo_alloc(); + if (job_desc_ptr->geometry[0] != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_GEOMETRY, + job_desc_ptr->geometry); + + if (job_desc_ptr->conn_type[0] != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_CONN_TYPE, + &(job_desc_ptr->conn_type)); + if (job_desc_ptr->reboot != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_REBOOT, + &(job_desc_ptr->reboot)); + if (job_desc_ptr->rotate != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_ROTATE, + &(job_desc_ptr->rotate)); + if (job_desc_ptr->blrtsimage) { + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_BLRTS_IMAGE, + job_desc_ptr->blrtsimage); + } + if (job_desc_ptr->linuximage) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_LINUX_IMAGE, + job_desc_ptr->linuximage); + if (job_desc_ptr->mloaderimage) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_MLOADER_IMAGE, + job_desc_ptr->mloaderimage); + if (job_desc_ptr->ramdiskimage) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_RAMDISK_IMAGE, + job_desc_ptr->ramdiskimage); + select_g_select_jobinfo_pack( + job_desc_ptr->select_jobinfo, + buffer, protocol_version); + select_g_select_jobinfo_free( + job_desc_ptr->select_jobinfo); + job_desc_ptr->select_jobinfo = NULL; + } + pack16(job_desc_ptr->wait_all_nodes, buffer); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + pack16(job_desc_ptr->contiguous, buffer); + pack16(job_desc_ptr->task_dist, buffer); + pack16(job_desc_ptr->kill_on_node_fail, buffer); + packstr(job_desc_ptr->features, buffer); + packstr(job_desc_ptr->gres, buffer); + pack32(job_desc_ptr->job_id, buffer); + packstr(job_desc_ptr->name, buffer); + + packstr(job_desc_ptr->alloc_node, buffer); + pack32(job_desc_ptr->alloc_sid, buffer); + pack16(job_desc_ptr->pn_min_cpus, buffer); + pack32(job_desc_ptr->pn_min_memory, buffer); + pack32(job_desc_ptr->pn_min_tmp_disk, buffer); + + packstr(job_desc_ptr->partition, buffer); + pack32(job_desc_ptr->priority, buffer); + packstr(job_desc_ptr->dependency, buffer); + packstr(job_desc_ptr->account, buffer); + packstr(job_desc_ptr->comment, buffer); + pack16(job_desc_ptr->nice, buffer); + packstr(job_desc_ptr->qos, buffer); + + pack8(job_desc_ptr->open_mode, buffer); + pack8(job_desc_ptr->overcommit, buffer); + if (job_desc_ptr->acctg_freq) + uint16 = atoi(job_desc_ptr->acctg_freq); + pack16(uint16, buffer); + pack32(job_desc_ptr->num_tasks, buffer); + pack16(job_desc_ptr->ckpt_interval, buffer); + + packstr(job_desc_ptr->req_nodes, buffer); + packstr(job_desc_ptr->exc_nodes, buffer); + packstr_array(job_desc_ptr->environment, + job_desc_ptr->env_size, buffer); + packstr_array(job_desc_ptr->spank_job_env, + job_desc_ptr->spank_job_env_size, buffer); + packstr(job_desc_ptr->script, buffer); + packstr_array(job_desc_ptr->argv, job_desc_ptr->argc, buffer); + + packstr(job_desc_ptr->std_err, buffer); + packstr(job_desc_ptr->std_in, buffer); + packstr(job_desc_ptr->std_out, buffer); + packstr(job_desc_ptr->work_dir, buffer); + packstr(job_desc_ptr->ckpt_dir, buffer); + + pack16(job_desc_ptr->immediate, buffer); + pack16(job_desc_ptr->requeue, buffer); + pack16(job_desc_ptr->shared, buffer); + pack16(job_desc_ptr->cpus_per_task, buffer); + pack16(job_desc_ptr->ntasks_per_node, buffer); + pack16(job_desc_ptr->ntasks_per_socket, buffer); + pack16(job_desc_ptr->ntasks_per_core, buffer); + + pack16(job_desc_ptr->plane_size, buffer); + pack16(job_desc_ptr->cpu_bind_type, buffer); + pack16(job_desc_ptr->mem_bind_type, buffer); + packstr(job_desc_ptr->cpu_bind, buffer); + packstr(job_desc_ptr->mem_bind, buffer); + + pack32(job_desc_ptr->time_limit, buffer); + pack32(job_desc_ptr->time_min, buffer); + pack32(job_desc_ptr->min_cpus, buffer); + pack32(job_desc_ptr->max_cpus, buffer); + pack32(job_desc_ptr->min_nodes, buffer); + pack32(job_desc_ptr->max_nodes, buffer); + pack16(job_desc_ptr->sockets_per_node, buffer); + pack16(job_desc_ptr->cores_per_socket, buffer); + pack16(job_desc_ptr->threads_per_core, buffer); + pack32(job_desc_ptr->user_id, buffer); + pack32(job_desc_ptr->group_id, buffer); + + pack16(job_desc_ptr->alloc_resp_port, buffer); + pack16(job_desc_ptr->other_port, buffer); + packstr(job_desc_ptr->network, buffer); + pack_time(job_desc_ptr->begin_time, buffer); + pack_time(job_desc_ptr->end_time, buffer); + + packstr(job_desc_ptr->licenses, buffer); + pack16(job_desc_ptr->mail_type, buffer); + packstr(job_desc_ptr->mail_user, buffer); + packstr(job_desc_ptr->reservation, buffer); + pack16(job_desc_ptr->warn_signal, buffer); + pack16(job_desc_ptr->warn_time, buffer); + packstr(job_desc_ptr->wckey, buffer); + pack32(job_desc_ptr->req_switch, buffer); + pack32(job_desc_ptr->wait4switch, buffer); + + if (job_desc_ptr->select_jobinfo) { + select_g_select_jobinfo_pack( + job_desc_ptr->select_jobinfo, + buffer, protocol_version); + } else { + job_desc_ptr->select_jobinfo = + select_g_select_jobinfo_alloc(); + if (job_desc_ptr->geometry[0] != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_GEOMETRY, + job_desc_ptr->geometry); + + if (job_desc_ptr->conn_type[0] != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_CONN_TYPE, + &(job_desc_ptr->conn_type)); + if (job_desc_ptr->reboot != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_REBOOT, + &(job_desc_ptr->reboot)); + if (job_desc_ptr->rotate != (uint16_t) NO_VAL) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_ROTATE, + &(job_desc_ptr->rotate)); + if (job_desc_ptr->blrtsimage) { + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_BLRTS_IMAGE, + job_desc_ptr->blrtsimage); + } + if (job_desc_ptr->linuximage) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_LINUX_IMAGE, + job_desc_ptr->linuximage); + if (job_desc_ptr->mloaderimage) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_MLOADER_IMAGE, + job_desc_ptr->mloaderimage); + if (job_desc_ptr->ramdiskimage) + select_g_select_jobinfo_set( + job_desc_ptr->select_jobinfo, + SELECT_JOBDATA_RAMDISK_IMAGE, + job_desc_ptr->ramdiskimage); + select_g_select_jobinfo_pack( + job_desc_ptr->select_jobinfo, + buffer, protocol_version); + select_g_select_jobinfo_free( + job_desc_ptr->select_jobinfo); + job_desc_ptr->select_jobinfo = NULL; + } + pack16(job_desc_ptr->wait_all_nodes, buffer); + } else { + error("_pack_job_desc_msg: protocol_version " + "%hu not supported", protocol_version); + } +} + +/* _unpack_job_desc_msg + * unpacks a job_desc struct + * OUT job_desc_buffer_ptr - place to put pointer to allocated job desc struct + * IN/OUT buffer - source of the unpack, contains pointers that are + * automatically updated + */ +static int +_unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, + uint16_t protocol_version) +{ + uint32_t uint32_tmp; + uint16_t uint16 = 0; + job_desc_msg_t *job_desc_ptr = NULL; + + /* alloc memory for structure */ + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + job_desc_ptr = xmalloc(sizeof(job_desc_msg_t)); + *job_desc_buffer_ptr = job_desc_ptr; + + /* load the data values */ + safe_unpack16(&job_desc_ptr->contiguous, buffer); + safe_unpack16(&job_desc_ptr->task_dist, buffer); + safe_unpack16(&job_desc_ptr->kill_on_node_fail, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->features, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->gres, &uint32_tmp,buffer); + safe_unpack32(&job_desc_ptr->job_id, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->name, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&job_desc_ptr->alloc_node, + &uint32_tmp, buffer); + safe_unpack32(&job_desc_ptr->alloc_sid, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->array_inx, + &uint32_tmp, buffer); + safe_unpack16(&job_desc_ptr->pn_min_cpus, buffer); + safe_unpack32(&job_desc_ptr->pn_min_memory, buffer); + safe_unpack32(&job_desc_ptr->pn_min_tmp_disk, buffer); + + safe_unpackstr_xmalloc(&job_desc_ptr->partition, + &uint32_tmp, buffer); + safe_unpack32(&job_desc_ptr->priority, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->dependency, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->account, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->comment, + &uint32_tmp, buffer); + safe_unpack16(&job_desc_ptr->nice, buffer); + safe_unpack32(&job_desc_ptr->profile, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->qos, &uint32_tmp, + buffer); + + safe_unpack8(&job_desc_ptr->open_mode, buffer); + safe_unpack8(&job_desc_ptr->overcommit, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->acctg_freq, + &uint32_tmp, buffer); + safe_unpack32(&job_desc_ptr->num_tasks, buffer); + safe_unpack16(&job_desc_ptr->ckpt_interval, buffer); + + safe_unpackstr_xmalloc(&job_desc_ptr->req_nodes, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->exc_nodes, + &uint32_tmp, buffer); + safe_unpackstr_array(&job_desc_ptr->environment, + &job_desc_ptr->env_size, buffer); + safe_unpackstr_array(&job_desc_ptr->spank_job_env, + &job_desc_ptr->spank_job_env_size, + buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->script, + &uint32_tmp, buffer); + safe_unpackstr_array(&job_desc_ptr->argv, + &job_desc_ptr->argc, buffer); + + safe_unpackstr_xmalloc(&job_desc_ptr->std_err, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->std_in, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->std_out, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->work_dir, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->ckpt_dir, + &uint32_tmp, buffer); + + safe_unpack16(&job_desc_ptr->immediate, buffer); + safe_unpack16(&job_desc_ptr->requeue, buffer); + safe_unpack16(&job_desc_ptr->shared, buffer); + safe_unpack16(&job_desc_ptr->cpus_per_task, buffer); + safe_unpack16(&job_desc_ptr->ntasks_per_node, buffer); + safe_unpack16(&job_desc_ptr->ntasks_per_board, buffer); + safe_unpack16(&job_desc_ptr->ntasks_per_socket, buffer); + safe_unpack16(&job_desc_ptr->ntasks_per_core, buffer); + + safe_unpack16(&job_desc_ptr->plane_size, buffer); + safe_unpack16(&job_desc_ptr->cpu_bind_type, buffer); + safe_unpack16(&job_desc_ptr->mem_bind_type, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->cpu_bind, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->mem_bind, + &uint32_tmp, buffer); + + safe_unpack32(&job_desc_ptr->time_limit, buffer); + safe_unpack32(&job_desc_ptr->time_min, buffer); + safe_unpack32(&job_desc_ptr->min_cpus, buffer); + safe_unpack32(&job_desc_ptr->max_cpus, buffer); + safe_unpack32(&job_desc_ptr->min_nodes, buffer); + safe_unpack32(&job_desc_ptr->max_nodes, buffer); + safe_unpack16(&job_desc_ptr->boards_per_node, buffer); + safe_unpack16(&job_desc_ptr->sockets_per_board, buffer); + safe_unpack16(&job_desc_ptr->sockets_per_node, buffer); + safe_unpack16(&job_desc_ptr->cores_per_socket, buffer); + safe_unpack16(&job_desc_ptr->threads_per_core, buffer); + safe_unpack32(&job_desc_ptr->user_id, buffer); + safe_unpack32(&job_desc_ptr->group_id, buffer); - if (job_desc_ptr->select_jobinfo) { - select_g_select_jobinfo_pack( - job_desc_ptr->select_jobinfo, - buffer, protocol_version); - } else { - job_desc_ptr->select_jobinfo = - select_g_select_jobinfo_alloc(); - if (job_desc_ptr->geometry[0] != (uint16_t) NO_VAL) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_GEOMETRY, - job_desc_ptr->geometry); + safe_unpack16(&job_desc_ptr->alloc_resp_port, buffer); + safe_unpack16(&job_desc_ptr->other_port, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->network, + &uint32_tmp, buffer); + safe_unpack_time(&job_desc_ptr->begin_time, buffer); + safe_unpack_time(&job_desc_ptr->end_time, buffer); - if (job_desc_ptr->conn_type[0] != (uint16_t) NO_VAL) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_CONN_TYPE, - &(job_desc_ptr->conn_type)); - if (job_desc_ptr->reboot != (uint16_t) NO_VAL) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_REBOOT, - &(job_desc_ptr->reboot)); - if (job_desc_ptr->rotate != (uint16_t) NO_VAL) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_ROTATE, - &(job_desc_ptr->rotate)); - if (job_desc_ptr->blrtsimage) { - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_BLRTS_IMAGE, - job_desc_ptr->blrtsimage); - } - if (job_desc_ptr->linuximage) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_LINUX_IMAGE, - job_desc_ptr->linuximage); - if (job_desc_ptr->mloaderimage) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_MLOADER_IMAGE, - job_desc_ptr->mloaderimage); - if (job_desc_ptr->ramdiskimage) - select_g_select_jobinfo_set( - job_desc_ptr->select_jobinfo, - SELECT_JOBDATA_RAMDISK_IMAGE, - job_desc_ptr->ramdiskimage); - select_g_select_jobinfo_pack( - job_desc_ptr->select_jobinfo, - buffer, protocol_version); - select_g_select_jobinfo_free( - job_desc_ptr->select_jobinfo); - job_desc_ptr->select_jobinfo = NULL; - } - pack16(job_desc_ptr->wait_all_nodes, buffer); - } else { - error("_pack_job_desc_msg: protocol_version " - "%hu not supported", protocol_version); - } -} + safe_unpackstr_xmalloc(&job_desc_ptr->licenses, + &uint32_tmp, buffer); + safe_unpack16(&job_desc_ptr->mail_type, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->mail_user, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->reservation, + &uint32_tmp, buffer); + safe_unpack16(&job_desc_ptr->warn_signal, buffer); + safe_unpack16(&job_desc_ptr->warn_time, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->wckey, + &uint32_tmp, buffer); + safe_unpack32(&job_desc_ptr->req_switch, buffer); + safe_unpack32(&job_desc_ptr->wait4switch, buffer); -/* _unpack_job_desc_msg - * unpacks a job_desc struct - * OUT job_desc_buffer_ptr - place to put pointer to allocated job desc struct - * IN/OUT buffer - source of the unpack, contains pointers that are - * automatically updated - */ -static int -_unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, - uint16_t protocol_version) -{ - uint32_t uint32_tmp; - job_desc_msg_t *job_desc_ptr = NULL; + if (select_g_select_jobinfo_unpack( + &job_desc_ptr->select_jobinfo, + buffer, protocol_version)) + goto unpack_error; - /* alloc memory for structure */ - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + /* These are set so we don't confuse them later for what is + * set in the select_jobinfo structure. + */ + job_desc_ptr->geometry[0] = (uint16_t)NO_VAL; + job_desc_ptr->conn_type[0] = (uint16_t)NO_VAL; + job_desc_ptr->reboot = (uint16_t)NO_VAL; + job_desc_ptr->rotate = (uint16_t)NO_VAL; + job_desc_ptr->blrtsimage = NULL; + job_desc_ptr->linuximage = NULL; + job_desc_ptr->mloaderimage = NULL; + job_desc_ptr->ramdiskimage = NULL; + safe_unpack16(&job_desc_ptr->wait_all_nodes, buffer); + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { job_desc_ptr = xmalloc(sizeof(job_desc_msg_t)); *job_desc_buffer_ptr = job_desc_ptr; @@ -6285,7 +7061,9 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpack8(&job_desc_ptr->open_mode, buffer); safe_unpack8(&job_desc_ptr->overcommit, buffer); - safe_unpack16(&job_desc_ptr->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + job_desc_ptr->acctg_freq = xstrdup_printf("%u", uint16); safe_unpack32(&job_desc_ptr->num_tasks, buffer); safe_unpack16(&job_desc_ptr->ckpt_interval, buffer); @@ -6383,7 +7161,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, job_desc_ptr->mloaderimage = NULL; job_desc_ptr->ramdiskimage = NULL; safe_unpack16(&job_desc_ptr->wait_all_nodes, buffer); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { job_desc_ptr = xmalloc(sizeof(job_desc_msg_t)); *job_desc_buffer_ptr = job_desc_ptr; @@ -6420,7 +7198,9 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpack8(&job_desc_ptr->open_mode, buffer); safe_unpack8(&job_desc_ptr->overcommit, buffer); - safe_unpack16(&job_desc_ptr->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + job_desc_ptr->acctg_freq = xstrdup_printf("%u", uint16); safe_unpack32(&job_desc_ptr->num_tasks, buffer); safe_unpack16(&job_desc_ptr->ckpt_interval, buffer); @@ -6811,9 +7591,11 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, { uint32_t cluster_flags = slurmdb_setup_cluster_flags(); int i = 0; + uint16_t uint16 = 0; + xassert(msg != NULL); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->job_step_id, buffer); pack32(msg->ntasks, buffer); @@ -6860,6 +7642,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, for (i = 0; i < msg->num_io_port; i++) pack16(msg->io_port[i], buffer); } + pack32(msg->profile, buffer); packstr(msg->task_prolog, buffer); packstr(msg->task_epilog, buffer); pack16(msg->slurmd_debug, buffer); @@ -6870,7 +7653,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->pty, buffer); - pack16(msg->acctg_freq, buffer); + packstr(msg->acctg_freq, buffer); pack32(msg->cpu_freq, buffer); packstr(msg->ckpt_dir, buffer); packstr(msg->restart_dir, buffer); @@ -6885,7 +7668,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, buffer, protocol_version); } - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->job_step_id, buffer); pack32(msg->ntasks, buffer); @@ -6942,7 +7725,10 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->pty, buffer); - pack16(msg->acctg_freq, buffer); + if (msg->acctg_freq) + uint16 = atoi(msg->acctg_freq); + pack16(uint16, buffer); + pack32(msg->cpu_freq, buffer); packstr(msg->ckpt_dir, buffer); packstr(msg->restart_dir, buffer); if (!(cluster_flags & CLUSTER_FLAG_BG)) { @@ -6956,7 +7742,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, buffer, protocol_version); } - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->job_step_id, buffer); pack32(msg->ntasks, buffer); @@ -6970,7 +7756,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack16(msg->task_dist, buffer); slurm_cred_pack(msg->cred, buffer); - for (i=0; i<msg->nnodes; i++) { + for (i = 0; i < msg->nnodes; i++) { pack16(msg->tasks_to_launch[i], buffer); pack16(msg->cpus_allocated[i], buffer); pack32_array(msg->global_task_ids[i], @@ -7008,21 +7794,23 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack16(msg->slurmd_debug, buffer); switch_pack_jobinfo(msg->switch_job, buffer); job_options_pack(msg->options, buffer); + packstr(msg->alias_list, buffer); packstr(msg->complete_nodelist, buffer); pack8(msg->open_mode, buffer); pack8(msg->pty, buffer); - pack16(msg->acctg_freq, buffer); + if (msg->acctg_freq) + uint16 = atoi(msg->acctg_freq); + pack16(uint16, buffer); packstr(msg->ckpt_dir, buffer); packstr(msg->restart_dir, buffer); if (!(cluster_flags & CLUSTER_FLAG_BG)) { /* If on a Blue Gene cluster do not send this to the - slurmstepd, it will overwrite the environment that is - already set up correctly for both the job - and the step. The slurmstep treats this - select_jobinfo as if it were for the job - instead of for the step. - */ + * slurmstepd, it will overwrite the environment that + * ia already set up correctly for both the job and the + * step. The slurmstep treats this select_jobinfo as if + * were for the job instead of for the step. + */ select_g_select_jobinfo_pack(msg->select_jobinfo, buffer, protocol_version); @@ -7040,6 +7828,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** { uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint32_t uint32_tmp; + uint16_t uint16 = 0; launch_tasks_request_msg_t *msg; int i = 0; @@ -7047,7 +7836,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** msg = xmalloc(sizeof(launch_tasks_request_msg_t)); *msg_ptr = msg; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); safe_unpack32(&msg->ntasks, buffer); @@ -7113,6 +7902,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** buffer); } } + safe_unpack32(&msg->profile, buffer); safe_unpackstr_xmalloc(&msg->task_prolog, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->task_epilog, &uint32_tmp, buffer); safe_unpack16(&msg->slurmd_debug, buffer); @@ -7134,7 +7924,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); - safe_unpack16(&msg->acctg_freq, buffer); + safe_unpackstr_xmalloc(&msg->acctg_freq, &uint32_tmp, buffer); safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); @@ -7143,7 +7933,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** buffer, protocol_version); } - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); safe_unpack32(&msg->ntasks, buffer); @@ -7230,7 +8020,10 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); - safe_unpack16(&msg->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + msg->acctg_freq = xstrdup_printf("%u", uint16); + safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); if (!(cluster_flags & CLUSTER_FLAG_BG)) { @@ -7238,7 +8031,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** buffer, protocol_version); } - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); safe_unpack32(&msg->ntasks, buffer); @@ -7257,7 +8050,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** msg->cpus_allocated = xmalloc(sizeof(uint16_t) * msg->nnodes); msg->global_task_ids = xmalloc(sizeof(uint32_t *) * msg->nnodes); - for (i=0; i<msg->nnodes; i++) { + for (i = 0; i < msg->nnodes; i++) { safe_unpack16(&msg->tasks_to_launch[i], buffer); safe_unpack16(&msg->cpus_allocated[i], buffer); safe_unpack32_array(&msg->global_task_ids[i], @@ -7319,12 +8112,16 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** error("Unable to unpack extra job options: %m"); goto unpack_error; } + safe_unpackstr_xmalloc(&msg->alias_list, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->complete_nodelist, &uint32_tmp, buffer); safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); - safe_unpack16(&msg->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + msg->acctg_freq = xstrdup_printf("%u", uint16); + safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); if (!(cluster_flags & CLUSTER_FLAG_BG)) { @@ -7525,7 +8322,7 @@ _pack_job_step_kill_msg(job_step_kill_msg_t * msg, Buf buffer, pack32((uint32_t)msg->job_id, buffer); pack32((uint32_t)msg->job_step_id, buffer); pack16((uint16_t)msg->signal, buffer); - pack16((uint16_t)msg->batch_flag, buffer); + pack16((uint16_t)msg->flags, buffer); } /* _unpack_job_step_kill_msg @@ -7546,7 +8343,7 @@ _unpack_job_step_kill_msg(job_step_kill_msg_t ** msg_ptr, Buf buffer, safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); safe_unpack16(&msg->signal, buffer); - safe_unpack16(&msg->batch_flag, buffer); + safe_unpack16(&msg->flags, buffer); return SLURM_SUCCESS; unpack_error: @@ -7559,9 +8356,27 @@ static void _pack_update_job_step_msg(step_update_request_msg_t * msg, Buf buffer, uint16_t protocol_version) { - pack32(msg->job_id, buffer); - pack32(msg->step_id, buffer); - pack32(msg->time_limit, buffer); + uint8_t with_jobacct = 0; + + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + pack_time(msg->end_time, buffer); + pack32(msg->exit_code, buffer); + pack32(msg->job_id, buffer); + if (msg->jobacct) + with_jobacct = 1; + pack8(with_jobacct, buffer); + if (with_jobacct) + jobacctinfo_pack(msg->jobacct, protocol_version, + PROTOCOL_TYPE_SLURM, buffer); + packstr(msg->name, buffer); + pack_time(msg->start_time, buffer); + pack32(msg->step_id, buffer); + pack32(msg->time_limit, buffer); + } else { + pack32(msg->job_id, buffer); + pack32(msg->step_id, buffer); + pack32(msg->time_limit, buffer); + } } static int @@ -7569,13 +8384,31 @@ _unpack_update_job_step_msg(step_update_request_msg_t ** msg_ptr, Buf buffer, uint16_t protocol_version) { step_update_request_msg_t *msg; + uint8_t with_jobacct = 0; + uint32_t uint32_tmp; msg = xmalloc(sizeof(step_update_request_msg_t)); *msg_ptr = msg; - safe_unpack32(&msg->job_id, buffer); - safe_unpack32(&msg->step_id, buffer); - safe_unpack32(&msg->time_limit, buffer); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + unpack_time(&msg->end_time, buffer); + safe_unpack32(&msg->exit_code, buffer); + safe_unpack32(&msg->job_id, buffer); + safe_unpack8(&with_jobacct, buffer); + if (with_jobacct) + if (jobacctinfo_unpack(&msg->jobacct, protocol_version, + PROTOCOL_TYPE_SLURM, buffer) + != SLURM_SUCCESS) + goto unpack_error; + safe_unpackstr_xmalloc(&msg->name, &uint32_tmp, buffer); + unpack_time(&msg->start_time, buffer); + safe_unpack32(&msg->step_id, buffer); + safe_unpack32(&msg->time_limit, buffer); + } else { + safe_unpack32(&msg->job_id, buffer); + safe_unpack32(&msg->step_id, buffer); + safe_unpack32(&msg->time_limit, buffer); + } return SLURM_SUCCESS; unpack_error: @@ -8356,7 +9189,7 @@ _pack_node_info_request_msg(node_info_request_msg_t * msg, Buf buffer, uint16_t protocol_version) { pack_time(msg->last_update, buffer); - pack16((uint16_t)msg->show_flags, buffer); + pack16(msg->show_flags, buffer); } static int @@ -8378,6 +9211,34 @@ unpack_error: return SLURM_ERROR; } +static void +_pack_node_info_single_msg(node_info_single_msg_t * msg, Buf buffer, + uint16_t protocol_version) +{ + packstr(msg->node_name, buffer); + pack16(msg->show_flags, buffer); +} + +static int +_unpack_node_info_single_msg(node_info_single_msg_t ** msg, Buf buffer, + uint16_t protocol_version) +{ + node_info_single_msg_t* node_info; + uint32_t uint32_tmp; + + node_info = xmalloc(sizeof(node_info_single_msg_t)); + *msg = node_info; + + safe_unpackstr_xmalloc(&node_info->node_name, &uint32_tmp, buffer); + safe_unpack16(&node_info->show_flags, buffer); + return SLURM_SUCCESS; + +unpack_error: + slurm_free_node_info_single_msg(node_info); + *msg = NULL; + return SLURM_ERROR; +} + static void _pack_front_end_info_request_msg(front_end_info_request_msg_t * msg, Buf buffer, uint16_t protocol_version) @@ -8414,7 +9275,7 @@ _unpack_front_end_info_msg(front_end_info_msg_t ** msg, Buf buffer, *msg = xmalloc(sizeof(front_end_info_msg_t)); /* load buffer's header (data structure version and time) */ - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack32(&((*msg)->record_count), buffer); safe_unpack_time(&((*msg)->last_update), buffer); front_end = xmalloc(sizeof(front_end_info_t) * @@ -8449,7 +9310,25 @@ _unpack_front_end_info_members(front_end_info_t *front_end, Buf buffer, xassert(front_end != NULL); - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpackstr_xmalloc(&front_end->allow_groups, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&front_end->allow_users, &uint32_tmp, + buffer); + safe_unpack_time(&front_end->boot_time, buffer); + safe_unpackstr_xmalloc(&front_end->deny_groups, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&front_end->deny_users, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&front_end->name, &uint32_tmp, buffer); + safe_unpack16(&front_end->node_state, buffer); + + safe_unpackstr_xmalloc(&front_end->reason, &uint32_tmp, buffer); + safe_unpack_time(&front_end->reason_time, buffer); + safe_unpack32(&front_end->reason_uid, buffer); + + safe_unpack_time(&front_end->slurmd_start_time, buffer); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack_time(&front_end->boot_time, buffer); safe_unpackstr_xmalloc(&front_end->name, &uint32_tmp, buffer); safe_unpack16(&front_end->node_state, buffer); @@ -8609,9 +9488,11 @@ static void _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, uint16_t protocol_version) { + uint16_t uint16 = 0; + xassert(msg != NULL); - if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->step_id, buffer); pack32(msg->uid, buffer); @@ -8622,7 +9503,10 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->overcommit, buffer); - pack16(msg->acctg_freq, buffer); + pack32(msg->array_job_id, buffer); + pack16(msg->array_task_id, buffer); + + packstr(msg->acctg_freq, buffer); pack16(msg->cpu_bind_type, buffer); pack16(msg->cpus_per_task, buffer); pack16(msg->restart_cnt, buffer); @@ -8661,7 +9545,7 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, select_g_select_jobinfo_pack(msg->select_jobinfo, buffer, protocol_version); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->step_id, buffer); pack32(msg->uid, buffer); @@ -8672,7 +9556,9 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->overcommit, buffer); - pack16(msg->acctg_freq, buffer); + if (msg->acctg_freq) + uint16 = atoi(msg->acctg_freq); + pack16(uint16, buffer); pack16(msg->cpu_bind_type, buffer); pack16(msg->cpus_per_task, buffer); pack16(msg->restart_cnt, buffer); @@ -8685,6 +9571,7 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, buffer); } + packstr(msg->alias_list, buffer); packstr(msg->cpu_bind, buffer); packstr(msg->nodes, buffer); packstr(msg->script, buffer); @@ -8721,13 +9608,14 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, uint16_t protocol_version) { uint32_t uint32_tmp; + uint16_t uint16 = 0; batch_job_launch_msg_t *launch_msg_ptr; xassert(msg != NULL); launch_msg_ptr = xmalloc(sizeof(batch_job_launch_msg_t)); *msg = launch_msg_ptr; - if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&launch_msg_ptr->job_id, buffer); safe_unpack32(&launch_msg_ptr->step_id, buffer); safe_unpack32(&launch_msg_ptr->uid, buffer); @@ -8738,7 +9626,11 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, safe_unpack8(&launch_msg_ptr->open_mode, buffer); safe_unpack8(&launch_msg_ptr->overcommit, buffer); - safe_unpack16(&launch_msg_ptr->acctg_freq, buffer); + safe_unpack32(&launch_msg_ptr->array_job_id, buffer); + safe_unpack16(&launch_msg_ptr->array_task_id, buffer); + + safe_unpackstr_xmalloc(&launch_msg_ptr->acctg_freq, + &uint32_tmp, buffer); safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer); safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer); safe_unpack16(&launch_msg_ptr->restart_cnt, buffer); @@ -8799,7 +9691,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, select_jobinfo, buffer, protocol_version)) goto unpack_error; - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack32(&launch_msg_ptr->job_id, buffer); safe_unpack32(&launch_msg_ptr->step_id, buffer); safe_unpack32(&launch_msg_ptr->uid, buffer); @@ -8810,7 +9702,11 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, safe_unpack8(&launch_msg_ptr->open_mode, buffer); safe_unpack8(&launch_msg_ptr->overcommit, buffer); - safe_unpack16(&launch_msg_ptr->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + launch_msg_ptr->acctg_freq = + xstrdup_printf("%u", uint16); + safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer); safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer); safe_unpack16(&launch_msg_ptr->restart_cnt, buffer); @@ -8827,6 +9723,9 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, goto unpack_error; } + + safe_unpackstr_xmalloc(&launch_msg_ptr->alias_list, + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&launch_msg_ptr->cpu_bind, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&launch_msg_ptr->nodes, &uint32_tmp, @@ -9080,8 +9979,8 @@ _pack_job_ready_msg(job_id_msg_t * msg, Buf buffer, { xassert ( msg != NULL ); - pack32((uint32_t)msg->job_id , buffer ) ; - pack16((uint16_t)msg->show_flags, buffer); + pack32(msg->job_id , buffer ) ; + pack16(msg->show_flags, buffer); } static int @@ -9104,6 +10003,36 @@ unpack_error: return SLURM_ERROR; } +static void +_pack_job_user_msg(job_user_id_msg_t * msg, Buf buffer, + uint16_t protocol_version) +{ + xassert ( msg != NULL ); + + pack32(msg->user_id , buffer ) ; + pack16(msg->show_flags, buffer); +} + +static int +_unpack_job_user_msg(job_user_id_msg_t ** msg_ptr, Buf buffer, + uint16_t protocol_version) +{ + job_user_id_msg_t * msg; + xassert ( msg_ptr != NULL ); + + msg = xmalloc ( sizeof (job_user_id_msg_t) ); + *msg_ptr = msg ; + + safe_unpack32(&msg->user_id , buffer ) ; + safe_unpack16(&msg->show_flags, buffer); + return SLURM_SUCCESS; + +unpack_error: + *msg_ptr = NULL; + slurm_free_job_user_id_msg(msg); + return SLURM_ERROR; +} + static void _pack_srun_timeout_msg(srun_timeout_msg_t * msg, Buf buffer, uint16_t protocol_version) @@ -10040,8 +10969,6 @@ _pack_will_run_response_msg(will_run_response_msg_t *msg, Buf buffer, pack32(count, buffer); if (count && (count != NO_VAL)) { ListIterator itr = list_iterator_create(msg->preemptee_job_id); - if (itr == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_id_ptr = list_next(itr))) pack32(job_id_ptr[0], buffer); list_iterator_destroy(itr); diff --git a/src/common/slurm_protocol_pack.h b/src/common/slurm_protocol_pack.h index 0dd05f6d5c5ce337dfb232889c51c0e3dfcde782..d3e9efee8a2203cfa99283abc180811fb0c11b15 100644 --- a/src/common/slurm_protocol_pack.h +++ b/src/common/slurm_protocol_pack.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_protocol_socket_common.h b/src/common/slurm_protocol_socket_common.h index 15435293cdb1b9e70806ec0cd27c882c32140fb6..ebf74df833a46a45ed3ba6db83cf3a85df7ecfaf 100644 --- a/src/common/slurm_protocol_socket_common.h +++ b/src/common/slurm_protocol_socket_common.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c index 3a48370f7fb49c7ae6f86af8d145e481a287e2c3..04de75e1b3caff5fd27642bba14264fbb64f6c0d 100644 --- a/src/common/slurm_protocol_socket_implementation.c +++ b/src/common/slurm_protocol_socket_implementation.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -88,7 +88,7 @@ * Maximum message size. Messages larger than this value (in bytes) * will not be received. */ -#define MAX_MSG_SIZE (128*1024*1024) +#define MAX_MSG_SIZE (1024*1024*1024) /**************************************************************** * MIDDLE LAYER MSG FUNCTIONS diff --git a/src/common/slurm_protocol_util.c b/src/common/slurm_protocol_util.c index 14a4bad457d7ece1c5b039181286d02d7a262b1e..33c880b3ad3a2f487accab7ae8bdb9ba6bf6b35f 100644 --- a/src/common/slurm_protocol_util.c +++ b/src/common/slurm_protocol_util.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -52,8 +52,10 @@ uint16_t _get_slurm_version(uint32_t rpc_version) { uint16_t version; - if (rpc_version >= 11) + if (rpc_version >= 12) version = SLURM_PROTOCOL_VERSION; + else if (rpc_version >= 11) + version = SLURM_2_5_PROTOCOL_VERSION; else if (rpc_version >= 10) version = SLURM_2_4_PROTOCOL_VERSION; else @@ -72,88 +74,45 @@ int check_header_version(header_t * header) { uint16_t check_version = SLURM_PROTOCOL_VERSION; - if (working_cluster_rec) + if (working_cluster_rec) { check_version = _get_slurm_version( working_cluster_rec->rpc_version); + } if (slurmdbd_conf) { if ((header->version != SLURM_PROTOCOL_VERSION) && - (header->version != SLURM_2_4_PROTOCOL_VERSION) && - (header->version != SLURM_2_3_PROTOCOL_VERSION)) + (header->version != SLURM_2_5_PROTOCOL_VERSION) && + (header->version != SLURM_2_4_PROTOCOL_VERSION)) { + debug("unsupported RPC version %hu msg type %u", + header->version, header->msg_type); slurm_seterrno_ret(SLURM_PROTOCOL_VERSION_ERROR); + } } else if (header->version != check_version) { - /* Starting with 2.2 we will handle previous versions - * of SLURM for some calls */ - switch(header->msg_type) { - case MESSAGE_NODE_REGISTRATION_STATUS: - case REQUEST_ACCT_GATHER_UPDATE: - case REQUEST_BLOCK_INFO: - case REQUEST_BUILD_INFO: - case REQUEST_CANCEL_JOB_STEP: - case REQUEST_CHECKPOINT: - case REQUEST_CHECKPOINT_COMP: - case REQUEST_CHECKPOINT_TASK_COMP: - case REQUEST_COMPLETE_BATCH_SCRIPT: /* From slurmstepd */ - case REQUEST_COMPLETE_JOB_ALLOCATION: - case REQUEST_CREATE_PARTITION: - case REQUEST_CREATE_RESERVATION: - case REQUEST_DELETE_PARTITION: - case REQUEST_DELETE_RESERVATION: - case REQUEST_FRONT_END_INFO: - case REQUEST_JOB_ALLOCATION_INFO: - case REQUEST_JOB_ALLOCATION_INFO_LITE: - case REQUEST_JOB_END_TIME: - case REQUEST_JOB_INFO: - case REQUEST_JOB_INFO_SINGLE: - case REQUEST_JOB_NOTIFY: - case REQUEST_JOB_READY: - case REQUEST_JOB_REQUEUE: - case REQUEST_JOB_STEP_INFO: - case REQUEST_JOB_WILL_RUN: - case REQUEST_NODE_INFO: - case REQUEST_PARTITION_INFO: - case REQUEST_PING: - case REQUEST_PRIORITY_FACTORS: - case REQUEST_REBOOT_NODES: - case REQUEST_RECONFIGURE: - case REQUEST_RESERVATION_INFO: - case REQUEST_SET_DEBUG_FLAGS: - case REQUEST_SET_DEBUG_LEVEL: - case REQUEST_SET_SCHEDLOG_LEVEL: - case REQUEST_SHARE_INFO: - case REQUEST_SHUTDOWN: - case REQUEST_SHUTDOWN_IMMEDIATE: - case REQUEST_SPANK_ENVIRONMENT: - case REQUEST_STEP_COMPLETE: /* From slurmstepd */ - case REQUEST_STEP_LAYOUT: - case REQUEST_SUBMIT_BATCH_JOB: - case REQUEST_SUSPEND: - case REQUEST_TERMINATE_JOB: - case REQUEST_TERMINATE_TASKS: - case REQUEST_TOPO_INFO: - case REQUEST_TRIGGER_CLEAR: - case REQUEST_TRIGGER_GET: - case REQUEST_TRIGGER_PULL: - case REQUEST_TRIGGER_SET: - case REQUEST_UPDATE_BLOCK: - case REQUEST_UPDATE_FRONT_END: - case REQUEST_UPDATE_JOB: - case REQUEST_UPDATE_JOB_STEP: - case REQUEST_UPDATE_NODE: - case REQUEST_UPDATE_PARTITION: - case REQUEST_UPDATE_RESERVATION: - case RESPONSE_ACCT_GATHER_UPDATE: - case RESPONSE_SLURM_RC: - if ((header->version == SLURM_2_5_PROTOCOL_VERSION) || - (header->version == SLURM_2_4_PROTOCOL_VERSION) || - (header->version == SLURM_2_3_PROTOCOL_VERSION)) - break; - default: - debug("unsupported RPC %d", header->msg_type); + switch (header->msg_type) { + case REQUEST_JOB_STEP_CREATE: + case REQUEST_LAUNCH_TASKS: + case REQUEST_RUN_JOB_STEP: + case RESPONSE_JOB_STEP_CREATE: + case RESPONSE_LAUNCH_TASKS: + case RESPONSE_RUN_JOB_STEP: + /* Disable job step creation/launch between major + * releases. Other RPCs should all be supported. */ + debug("unsupported RPC type %hu", header->msg_type); slurm_seterrno_ret(SLURM_PROTOCOL_VERSION_ERROR); break; + default: + if ((header->version != SLURM_PROTOCOL_VERSION) && + (header->version != SLURM_2_5_PROTOCOL_VERSION) && + (header->version != SLURM_2_4_PROTOCOL_VERSION)) { + debug("Unsupported RPC version %hu msg type %u", + header->version, header->msg_type); + slurm_seterrno_ret(SLURM_PROTOCOL_VERSION_ERROR); + } + break; + } } + return SLURM_PROTOCOL_SUCCESS; } @@ -166,7 +125,7 @@ int check_header_version(header_t * header) */ void init_header(header_t *header, slurm_msg_t *msg, uint16_t flags) { - memset(header, 0, sizeof(header)); + memset(header, 0, sizeof(header_t)); /* Since the slurmdbd could talk to a host of different versions of slurm this needs to be kept current when the protocol version changes. */ diff --git a/src/common/slurm_protocol_util.h b/src/common/slurm_protocol_util.h index b535cc132d3995a74ad3164d7d7d7f07807d7ed7..98965c72dc73bd6b97201487dee861b54d79794c 100644 --- a/src/common/slurm_protocol_util.h +++ b/src/common/slurm_protocol_util.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_resource_info.c b/src/common/slurm_resource_info.c index 031a54382df001049fa5507aee96c0aeadad260b..c7feeaf01ad72acf172f839fa247c53efbadfb7b 100644 --- a/src/common/slurm_resource_info.c +++ b/src/common/slurm_resource_info.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -54,7 +54,7 @@ #include "src/common/xmalloc.h" #include "src/common/xstring.h" -#if(0) +#if (0) #define DEBUG 1 #endif @@ -98,216 +98,6 @@ static bool _have_task_affinity(void) return rc; } -/* - * slurm_get_avail_procs - Get the number of "available" cpus on a node - * given this number given the number of cpus_per_task and - * maximum sockets, cores, threads. Note that the value of - * cpus is the lowest-level logical processor (LLLP). - * IN socket_cnt - Job requested socket count - * IN core_cnt - Job requested core count - * IN threads_cnt - Job requested thread count - * IN cpus_per_task - Job requested cpus per task - * IN ntaskspernode - number of tasks per node - * IN ntaskspersocket- number of tasks per socket - * IN ntaskspercore - number of tasks per core - * IN/OUT cpus - Available cpu count - * IN/OUT sockets - Available socket count - * IN/OUT cores - Available core count - * IN/OUT threads - Available thread count - * IN alloc_cores - Allocated cores (per socket) count to other jobs - * IN cr_type - Consumable Resource type - * IN job_id - job ID - * IN name - Node name - * - * Note: currently only used in the select/linear plugin. - */ -int slurm_get_avail_procs(const uint16_t socket_cnt, - const uint16_t core_cnt, - const uint16_t thread_cnt, - uint16_t cpus_per_task, - const uint16_t ntaskspernode, - const uint16_t ntaskspersocket, - const uint16_t ntaskspercore, - uint16_t *cpus, - uint16_t *sockets, - uint16_t *cores, - uint16_t *threads, - const uint16_t *alloc_cores, - const uint16_t cr_type, - uint32_t job_id, - char *name) -{ - uint16_t avail_cpus = 0, max_cpus = 0; - uint16_t allocated_cpus = 0, allocated_cores = 0, allocated_sockets = 0; - uint16_t max_avail_cpus = 0xffff; /* for alloc_* accounting */ - uint16_t min_sockets = 1, max_sockets = 0xffff; - uint16_t min_cores = 1, max_cores = 0xffff; - uint16_t max_threads = 0xffff; - int i; - - /* pick defaults for any unspecified items */ - if (socket_cnt != (uint16_t) NO_VAL) - min_sockets = max_sockets = socket_cnt; - if (core_cnt != (uint16_t) NO_VAL) - min_cores = max_cores = core_cnt; - if (thread_cnt != (uint16_t) NO_VAL) - max_threads = thread_cnt; - if (cpus_per_task <= 0) - cpus_per_task = 1; - if (*threads <= 0) - *threads = 1; - if (*cores <= 0) - *cores = 1; - if (*sockets <= 0) - *sockets = *cpus / *cores / *threads; - for (i = 0 ; alloc_cores && i < *sockets; i++) { - allocated_cores += alloc_cores[i]; - if (alloc_cores[i]) - allocated_sockets++; - } -#if(DEBUG) - info("get_avail_procs %u %s User_ sockets %u cores %u threads %u", - job_id, name, socket_cnt, core_cnt, thread_cnt); - info("get_avail_procs %u %s HW_ sockets %u cores %u threads %u", - job_id, name, *sockets, *cores, *threads); - info("get_avail_procs %u %s Ntask node %u sockets %u core %u", - job_id, name, ntaskspernode, ntaskspersocket, - ntaskspercore); - info("get_avail_procs %u %s cr_type %d cpus %u alloc_ c %u s %u", - job_id, name, cr_type, *cpus, allocated_cores, - allocated_sockets); - for (i = 0; alloc_cores && i < *sockets; i++) - info("get_avail_procs %u %s alloc_cores[%d] = %u", - job_id, name, i, alloc_cores[i]); -#endif - allocated_cpus = allocated_cores * (*threads); - - /* For the following CR types, nodes have no notion of socket, core, - and thread. Only one level of logical processors */ - if (cr_type & CR_CORE) { - if (*cpus >= allocated_cpus) - *cpus -= allocated_cpus; - else { - *cpus = 0; - error("cons_res: *cpus underflow"); - } - if (allocated_cores > 0) { - max_avail_cpus = 0; - int tmp_diff = 0; - for (i=0; i<*sockets; i++) { - tmp_diff = *cores - alloc_cores[i]; - if (min_cores <= tmp_diff) { - tmp_diff *= (*threads); - max_avail_cpus += tmp_diff; - } - } - } - - /*** honor socket/core/thread maximums ***/ - *sockets = MIN(*sockets, max_sockets); - *cores = MIN(*cores, max_cores); - *threads = MIN(*threads, max_threads); - - if (min_sockets > *sockets) { - *cpus = 0; - } else { - int max_cpus_socket = 0; - max_cpus = 0; - for (i=0; i<*sockets; i++) { - max_cpus_socket = 0; - if (min_cores <= *cores) { - int num_threads = *threads; - if (ntaskspercore > 0) { - num_threads = MIN(num_threads, - ntaskspercore); - } - max_cpus_socket = *cores * num_threads; - } - if (ntaskspersocket > 0) { - max_cpus_socket = MIN(max_cpus_socket, - ntaskspersocket); - } - max_cpus += max_cpus_socket; - } - max_cpus = MIN(max_cpus, max_avail_cpus); - } - - /*** honor any availability maximum ***/ - max_cpus = MIN(max_cpus, max_avail_cpus); - - if (ntaskspernode > 0) { - max_cpus = MIN(max_cpus, ntaskspernode); - } - } else if (cr_type & CR_SOCKET) { - if (*sockets >= allocated_sockets) - *sockets -= allocated_sockets; /* sockets count */ - else { - *sockets = 0; - error("cons_res: *sockets underflow"); - } - if (*cpus >= allocated_cpus) - *cpus -= allocated_cpus; - else { - *cpus = 0; - error("cons_res: *cpus underflow"); - } - - if (min_sockets > *sockets) - *cpus = 0; - - /*** honor socket/core/thread maximums ***/ - *sockets = MIN(*sockets, max_sockets); - *cores = MIN(*cores, max_cores); - *threads = MIN(*threads, max_threads); - - /*** compute an overall maximum cpu count honoring ntasks* ***/ - max_cpus = *threads; - if (ntaskspercore > 0) { - max_cpus = MIN(max_cpus, ntaskspercore); - } - max_cpus *= *cores; - if (ntaskspersocket > 0) { - max_cpus = MIN(max_cpus, ntaskspersocket); - } - max_cpus *= *sockets; - if (ntaskspernode > 0) { - max_cpus = MIN(max_cpus, ntaskspernode); - } - - /*** honor any availability maximum ***/ - max_cpus = MIN(max_cpus, max_avail_cpus); - } else { /* CR_CPU (default) */ - if ((cr_type & CR_CPU) || - (!(cr_type & CR_MEMORY))) { - if (*cpus >= allocated_cpus) - *cpus -= allocated_cpus; - else { - *cpus = 0; - error("cons_res: *cpus underflow"); - } - } - - /*** compute an overall maximum cpu count honoring ntasks* ***/ - max_cpus = *cpus; - if (ntaskspernode > 0) { - max_cpus = MIN(max_cpus, ntaskspernode); - } - } - - /*** factor cpus_per_task into max_cpus ***/ - max_cpus *= cpus_per_task; - /*** round down available based on cpus_per_task ***/ - avail_cpus = (*cpus / cpus_per_task) * cpus_per_task; - avail_cpus = MIN(avail_cpus, max_cpus); - -#if(DEBUG) - info("get_avail_procs %u %s return cpus %u sockets %u cores %u threads %u", - job_id, name, *cpus, *sockets, *cores, *threads); - info("get_avail_procs %d %s avail_cpus %u", job_id, name, avail_cpus); -#endif - return(avail_cpus); -} - /* * slurm_sprint_cpu_bind_type * @@ -515,7 +305,7 @@ int slurm_verify_cpu_bind(const char *arg, char **cpu_bind, (strncasecmp(tok, "mapcpu", 6) == 0)) { char *list; list = strsep(&tok, ":="); - list = strsep(&tok, ":="); + list = strsep(&tok, ":="); /* THIS IS NOT REDUNDANT */ _clear_then_set((int *)flags, bind_bits, CPU_BIND_MAP); xfree(*cpu_bind); if (list && *list) { @@ -530,7 +320,7 @@ int slurm_verify_cpu_bind(const char *arg, char **cpu_bind, (strncasecmp(tok, "maskcpu", 7) == 0)) { char *list; list = strsep(&tok, ":="); - list = strsep(&tok, ":="); + list = strsep(&tok, ":="); /* THIS IS NOT REDUNDANT */ _clear_then_set((int *)flags, bind_bits, CPU_BIND_MASK); xfree(*cpu_bind); if (list && *list) { @@ -549,7 +339,7 @@ int slurm_verify_cpu_bind(const char *arg, char **cpu_bind, (strncasecmp(tok, "mapldom", 7) == 0)) { char *list; list = strsep(&tok, ":="); - list = strsep(&tok, ":="); + list = strsep(&tok, ":="); /* THIS IS NOT REDUNDANT */ _clear_then_set((int *)flags, bind_bits, CPU_BIND_LDMAP); xfree(*cpu_bind); @@ -565,7 +355,7 @@ int slurm_verify_cpu_bind(const char *arg, char **cpu_bind, (strncasecmp(tok, "maskldom", 8) == 0)) { char *list; list = strsep(&tok, ":="); - list = strsep(&tok, ":="); + list = strsep(&tok, ":="); /* THIS IS NOT REDUNDANT */ _clear_then_set((int *)flags, bind_bits, CPU_BIND_LDMASK); xfree(*cpu_bind); @@ -712,7 +502,7 @@ int slurm_verify_mem_bind(const char *arg, char **mem_bind, (strncasecmp(tok, "mapmem", 6) == 0)) { char *list; list = strsep(&tok, ":="); - list = strsep(&tok, ":="); + list = strsep(&tok, ":="); /* THIS IS NOT REDUNDANT */ _clear_then_set((int *)flags, bind_bits, MEM_BIND_MAP); xfree(*mem_bind); if (list && *list) { @@ -726,7 +516,7 @@ int slurm_verify_mem_bind(const char *arg, char **mem_bind, (strncasecmp(tok, "maskmem", 7) == 0)) { char *list; list = strsep(&tok, ":="); - list = strsep(&tok, ":="); + list = strsep(&tok, ":="); /* THIS IS NOT REDUNDANT */ _clear_then_set((int *)flags, bind_bits, MEM_BIND_MASK); xfree(*mem_bind); if (list && *list) { diff --git a/src/common/slurm_resource_info.h b/src/common/slurm_resource_info.h index d043e5c540cc14d1fd1d492f2145806027486278..c979a4c88b78e70fcc635e145ab5cf86977d6030 100644 --- a/src/common/slurm_resource_info.h +++ b/src/common/slurm_resource_info.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -50,21 +50,6 @@ # endif /* HAVE_INTTYPES_H */ #endif -int slurm_get_avail_procs(const uint16_t socket_cnt, - const uint16_t core_cnt, - const uint16_t thread_cnt, - const uint16_t cpuspertask, - const uint16_t ntaskspernode, - const uint16_t ntaskspersocket, - const uint16_t ntaskspercore, - uint16_t *cpus, - uint16_t *sockets, - uint16_t *cores, - uint16_t *threads, - const uint16_t *alloc_cores, - const uint16_t cr_type, - uint32_t job_id, char *name); - void slurm_print_cpu_bind_help(void); void slurm_print_mem_bind_help(void); diff --git a/src/common/slurm_rlimits_info.c b/src/common/slurm_rlimits_info.c index e2b10911543f493fc3763c7126854e1dbc97775b..8276920e4954cf272aecb7f852594802075fd13c 100644 --- a/src/common/slurm_rlimits_info.c +++ b/src/common/slurm_rlimits_info.c @@ -6,7 +6,7 @@ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_rlimits_info.h b/src/common/slurm_rlimits_info.h index b102a0b4cc45812878d693ab730fa972ae078479..2b5c07a8b679fca928e323160b239261db6df67f 100644 --- a/src/common/slurm_rlimits_info.h +++ b/src/common/slurm_rlimits_info.h @@ -6,7 +6,7 @@ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_selecttype_info.c b/src/common/slurm_selecttype_info.c index 274c9e82204086994a21e35eed8faeeb762b351b..d0a53391756f9c897170a297c48f79f49758c6b1 100644 --- a/src/common/slurm_selecttype_info.c +++ b/src/common/slurm_selecttype_info.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -81,6 +81,9 @@ int parse_select_type_param(char *select_type_parameters, uint16_t *param) *param |= CR_CPU; *param |= CR_MEMORY; param_cnt++; + } else if (!strcasecmp(str_parameters, + "CR_ALLOCATE_FULL_SOCKET")) { + *param |= CR_ALLOCATE_FULL_SOCKET; } else if (!strcasecmp(str_parameters, "CR_ONE_TASK_PER_CORE")) { *param |= CR_ONE_TASK_PER_CORE; diff --git a/src/common/slurm_selecttype_info.h b/src/common/slurm_selecttype_info.h index 4cee33cd24b73914ae905cc7f2ebd4cc746de963..5b08341f6de1b9607289dcde8a13b36daf330f3d 100644 --- a/src/common/slurm_selecttype_info.h +++ b/src/common/slurm_selecttype_info.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_step_layout.c b/src/common/slurm_step_layout.c index f13bd1cdd256b48c9e4bd78208b674df50c25188..910373f439072007d3321dbf48daffe6b195caee 100644 --- a/src/common/slurm_step_layout.c +++ b/src/common/slurm_step_layout.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -354,7 +354,7 @@ int slurm_step_layout_host_id (slurm_step_layout_t *s, int taskid) return SLURM_ERROR; for (i=0; i < s->node_cnt; i++) for (j=0; j<s->tasks[i]; j++) - if(s->tids[i][j] == taskid) + if (s->tids[i][j] == taskid) return i; return SLURM_ERROR; @@ -442,10 +442,10 @@ static int _init_task_layout(slurm_step_layout_t *step_layout, (task_dist == SLURM_DIST_CYCLIC_CYCLIC) || (task_dist == SLURM_DIST_CYCLIC_BLOCK)) return _task_layout_cyclic(step_layout, cpus); - else if(task_dist == SLURM_DIST_ARBITRARY + else if (task_dist == SLURM_DIST_ARBITRARY && !(cluster_flags & CLUSTER_FLAG_FE)) return _task_layout_hostfile(step_layout, arbitrary_nodes); - else if(task_dist == SLURM_DIST_PLANE) + else if (task_dist == SLURM_DIST_PLANE) return _task_layout_plane(step_layout, cpus); else return _task_layout_block(step_layout, cpus); @@ -467,14 +467,14 @@ static int _task_layout_hostfile(slurm_step_layout_t *step_layout, debug2("job list is %s", step_layout->node_list); job_alloc_hosts = hostlist_create(step_layout->node_list); itr = hostlist_iterator_create(job_alloc_hosts); - if(!arbitrary_nodes) { + if (!arbitrary_nodes) { error("no hostlist given for arbitrary dist"); return SLURM_ERROR; } debug2("list is %s", arbitrary_nodes); step_alloc_hosts = hostlist_create(arbitrary_nodes); - if(hostlist_count(step_alloc_hosts) != step_layout->task_cnt) { + if (hostlist_count(step_alloc_hosts) != step_layout->task_cnt) { error("Asked for %u tasks have %d in the nodelist. " "Check your nodelist, or set the -n option to be %d", step_layout->task_cnt, @@ -486,16 +486,16 @@ static int _task_layout_hostfile(slurm_step_layout_t *step_layout, while((host = hostlist_next(itr))) { step_layout->tasks[i] = 0; while((host_task = hostlist_next(itr_task))) { - if(!strcmp(host, host_task)) { + if (!strcmp(host, host_task)) { step_layout->tasks[i]++; task_cnt++; } free(host_task); - if(task_cnt >= step_layout->task_cnt) + if (task_cnt >= step_layout->task_cnt) break; } debug3("%s got %u tasks", host, step_layout->tasks[i]); - if(step_layout->tasks[i] == 0) + if (step_layout->tasks[i] == 0) goto reset_hosts; step_layout->tids[i] = xmalloc(sizeof(uint32_t) * step_layout->tasks[i]); @@ -503,27 +503,27 @@ static int _task_layout_hostfile(slurm_step_layout_t *step_layout, j = 0; hostlist_iterator_reset(itr_task); while((host_task = hostlist_next(itr_task))) { - if(!strcmp(host, host_task)) { + if (!strcmp(host, host_task)) { step_layout->tids[i][j] = taskid; j++; } taskid++; free(host_task); - if(j >= step_layout->tasks[i]) + if (j >= step_layout->tasks[i]) break; } i++; reset_hosts: hostlist_iterator_reset(itr_task); free(host); - if(i > step_layout->task_cnt) + if (i > step_layout->task_cnt) break; } hostlist_iterator_destroy(itr); hostlist_iterator_destroy(itr_task); hostlist_destroy(job_alloc_hosts); hostlist_destroy(step_alloc_hosts); - if(task_cnt != step_layout->task_cnt) { + if (task_cnt != step_layout->task_cnt) { error("Asked for %u tasks but placed %d. Check your nodelist", step_layout->task_cnt, task_cnt); return SLURM_ERROR; @@ -688,7 +688,7 @@ static int _task_layout_plane(slurm_step_layout_t *step_layout, return SLURM_ERROR; } -#if(0) +#if (0) /* debugging only */ for (i=0; i < step_layout->node_cnt; i++) { info("tasks[%d]: %u", i, step_layout->tasks[i]); diff --git a/src/common/slurm_step_layout.h b/src/common/slurm_step_layout.h index 5ad537663feb6d50a1b003dde05f0bd0a0c43d2a..1866632a725b6022b1d2b639cc3dedc50f14aaff 100644 --- a/src/common/slurm_step_layout.h +++ b/src/common/slurm_step_layout.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_strcasestr.c b/src/common/slurm_strcasestr.c index a44679a11bf9d252d76e5a43e1fa8569dfd5d838..3dcc8a41030552f382d33d02f39b44b517e2393a 100644 --- a/src/common/slurm_strcasestr.c +++ b/src/common/slurm_strcasestr.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_strcasestr.h b/src/common/slurm_strcasestr.h index 6a8e5009822491068112cb2f6c3d527cf76491a3..664b08e1a65f838607423472fe656d7b7f17faf2 100644 --- a/src/common/slurm_strcasestr.h +++ b/src/common/slurm_strcasestr.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_topology.c b/src/common/slurm_topology.c index 264ee2789e12419b0e82d8501a7b5026f0bc4a0e..69d01c80755c1bca0db3fbf6c17a3c6e1aaf9dc3 100644 --- a/src/common/slurm_topology.c +++ b/src/common/slurm_topology.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurm_topology.h b/src/common/slurm_topology.h index ecde1e772fea6ac2f771969e59ffc013f18015ff..c33698ef5f9d86cba3490fcffdf7ea33bcd1da78 100644 --- a/src/common/slurm_topology.h +++ b/src/common/slurm_topology.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -47,6 +47,7 @@ * defined here but is really tree plugin related \*****************************************************************************/ struct switch_record { + uint32_t consumed_energy; /* consumed energy, in joules */ int level; /* level in hierarchy, leaf=0 */ uint32_t link_speed; /* link speed, arbitrary units */ char *name; /* switch name */ @@ -54,6 +55,7 @@ struct switch_record { * this switch */ char *nodes; /* name if direct descendent nodes */ char *switches; /* name if direct descendent switches */ + uint32_t temp; /* temperature, in celsius */ }; extern struct switch_record *switch_record_table; /* ptr to switch records */ diff --git a/src/common/slurm_xlator.h b/src/common/slurm_xlator.h index 593650b4a7424204869d4ca7dbcf47e81e061209..8832724f056ce1ceda95f24896d99fba4b9cf26d 100644 --- a/src/common/slurm_xlator.h +++ b/src/common/slurm_xlator.h @@ -34,7 +34,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -272,8 +272,9 @@ #define env_array_overwrite_fmt slurm_env_array_overwrite_fmt /* read_config.[ch] functions */ -#define destroy_config_key_pair slurm_destroy_config_key_pair -#define sort_key_pairs slurm_sort_key_pairs +#define destroy_config_key_pair slurm_destroy_config_key_pair +#define get_extra_conf_path slurm_get_extra_conf_path +#define sort_key_pairs slurm_sort_key_pairs /* slurm_auth.[ch] functions * None exported today. @@ -303,6 +304,7 @@ #define _xstrcatchar slurm_xstrcatchar #define _xslurm_strerrorcat slurm_xslurm_strerrorcat #define _xstrftimecat slurm_xstrftimecat +#define _xiso8601timecat slurm_xiso8601timecat #define _xrfc5424timecat slurm_xrfc5424timecat #define _xstrfmtcat slurm_xstrfmtcat #define _xmemcat slurm_xmemcat diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index 70d77b7037a38cec178b9ca6733524558dbb334b..c4de318cc101ac68c2f8ab5f917130b97b76c569 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -54,13 +54,13 @@ slurmdb_cluster_rec_t *working_cluster_rec = NULL; static void _free_assoc_rec_members(slurmdb_association_rec_t *assoc) { if (assoc) { - if(assoc->accounting_list) + if (assoc->accounting_list) list_destroy(assoc->accounting_list); xfree(assoc->acct); xfree(assoc->cluster); xfree(assoc->parent_acct); xfree(assoc->partition); - if(assoc->qos_list) + if (assoc->qos_list) list_destroy(assoc->qos_list); xfree(assoc->user); @@ -71,7 +71,7 @@ static void _free_assoc_rec_members(slurmdb_association_rec_t *assoc) static void _free_cluster_rec_members(slurmdb_cluster_rec_t *cluster) { if (cluster) { - if(cluster->accounting_list) + if (cluster->accounting_list) list_destroy(cluster->accounting_list); xfree(cluster->control_host); xfree(cluster->dim_size); @@ -87,7 +87,7 @@ static void _free_qos_rec_members(slurmdb_qos_rec_t *qos) xfree(qos->description); xfree(qos->name); FREE_NULL_BITMAP(qos->preempt_bitstr); - if(qos->preempt_list) + if (qos->preempt_list) list_destroy(qos->preempt_list); destroy_assoc_mgr_qos_usage(qos->usage); } @@ -96,7 +96,7 @@ static void _free_qos_rec_members(slurmdb_qos_rec_t *qos) static void _free_wckey_rec_members(slurmdb_wckey_rec_t *wckey) { if (wckey) { - if(wckey->accounting_list) + if (wckey->accounting_list) list_destroy(wckey->accounting_list); xfree(wckey->cluster); xfree(wckey->name); @@ -115,7 +115,7 @@ static void _free_cluster_cond_members(slurmdb_cluster_cond_t *cluster_cond) /* * Comparator used for sorting immediate childern of acct_hierarchical_recs * - * returns: -1: assoc_a > assoc_b 0: assoc_a == assoc_b 1: assoc_a < assoc_b + * returns: -1 assoc_a < assoc_b 0: assoc_a == assoc_b 1: assoc_a > assoc_b * */ @@ -131,9 +131,9 @@ static int _sort_childern_list(slurmdb_hierarchical_rec_t *assoc_a, /* check to see if this is a user association or an account. * We want the accounts at the bottom */ - if(assoc_a->assoc->user && !assoc_b->assoc->user) + if (assoc_a->assoc->user && !assoc_b->assoc->user) return -1; - else if(!assoc_a->assoc->user && assoc_b->assoc->user) + else if (!assoc_a->assoc->user && assoc_b->assoc->user) return 1; /* Sort by alpha */ @@ -151,14 +151,16 @@ static int _sort_childern_list(slurmdb_hierarchical_rec_t *assoc_a, /* * Comparator used for sorting immediate childern of acct_hierarchical_recs * - * returns: -1: assoc_a > assoc_b 0: assoc_a == assoc_b 1: assoc_a < assoc_b + * returns: -1 assoc_a < assoc_b 0: assoc_a == assoc_b 1: assoc_a > assoc_b * */ static int _sort_assoc_by_lft_dec(slurmdb_association_rec_t *assoc_a, slurmdb_association_rec_t *assoc_b) { - if(assoc_a->lft > assoc_b->lft) + if (assoc_a->lft == assoc_b->lft) + return 0; + if (assoc_a->lft > assoc_b->lft) return 1; return -1; } @@ -169,14 +171,14 @@ static int _sort_slurmdb_hierarchical_rec_list( slurmdb_hierarchical_rec_t *slurmdb_hierarchical_rec = NULL; ListIterator itr; - if(!list_count(slurmdb_hierarchical_rec_list)) + if (!list_count(slurmdb_hierarchical_rec_list)) return SLURM_SUCCESS; list_sort(slurmdb_hierarchical_rec_list, (ListCmpF)_sort_childern_list); itr = list_iterator_create(slurmdb_hierarchical_rec_list); while((slurmdb_hierarchical_rec = list_next(itr))) { - if(list_count(slurmdb_hierarchical_rec->childern)) + if (list_count(slurmdb_hierarchical_rec->childern)) _sort_slurmdb_hierarchical_rec_list( slurmdb_hierarchical_rec->childern); } @@ -191,17 +193,17 @@ static int _append_hierarchical_childern_ret_list( slurmdb_hierarchical_rec_t *slurmdb_hierarchical_rec = NULL; ListIterator itr; - if(!ret_list) + if (!ret_list) return SLURM_ERROR; - if(!list_count(slurmdb_hierarchical_rec_list)) + if (!list_count(slurmdb_hierarchical_rec_list)) return SLURM_SUCCESS; itr = list_iterator_create(slurmdb_hierarchical_rec_list); while((slurmdb_hierarchical_rec = list_next(itr))) { list_append(ret_list, slurmdb_hierarchical_rec->assoc); - if(list_count(slurmdb_hierarchical_rec->childern)) + if (list_count(slurmdb_hierarchical_rec->childern)) _append_hierarchical_childern_ret_list( ret_list, slurmdb_hierarchical_rec->childern); } @@ -216,12 +218,12 @@ static char *_get_qos_list_str(List qos_list) ListIterator itr = NULL; slurmdb_qos_rec_t *qos = NULL; - if(!qos_list) + if (!qos_list) return NULL; itr = list_iterator_create(qos_list); while((qos = list_next(itr))) { - if(qos_char) + if (qos_char) xstrfmtcat(qos_char, ",%s", qos->name); else xstrcat(qos_char, qos->name); @@ -361,16 +363,16 @@ extern void slurmdb_destroy_user_rec(void *object) { slurmdb_user_rec_t *slurmdb_user = (slurmdb_user_rec_t *)object; - if(slurmdb_user) { - if(slurmdb_user->assoc_list) + if (slurmdb_user) { + if (slurmdb_user->assoc_list) list_destroy(slurmdb_user->assoc_list); - if(slurmdb_user->coord_accts) + if (slurmdb_user->coord_accts) list_destroy(slurmdb_user->coord_accts); xfree(slurmdb_user->default_acct); xfree(slurmdb_user->default_wckey); xfree(slurmdb_user->name); xfree(slurmdb_user->old_name); - if(slurmdb_user->wckey_list) + if (slurmdb_user->wckey_list) list_destroy(slurmdb_user->wckey_list); xfree(slurmdb_user); } @@ -381,10 +383,10 @@ extern void slurmdb_destroy_account_rec(void *object) slurmdb_account_rec_t *slurmdb_account = (slurmdb_account_rec_t *)object; - if(slurmdb_account) { - if(slurmdb_account->assoc_list) + if (slurmdb_account) { + if (slurmdb_account->assoc_list) list_destroy(slurmdb_account->assoc_list); - if(slurmdb_account->coordinators) + if (slurmdb_account->coordinators) list_destroy(slurmdb_account->coordinators); xfree(slurmdb_account->description); xfree(slurmdb_account->name); @@ -398,7 +400,7 @@ extern void slurmdb_destroy_coord_rec(void *object) slurmdb_coord_rec_t *slurmdb_coord = (slurmdb_coord_rec_t *)object; - if(slurmdb_coord) { + if (slurmdb_coord) { xfree(slurmdb_coord->name); xfree(slurmdb_coord); } @@ -409,7 +411,7 @@ extern void slurmdb_destroy_cluster_accounting_rec(void *object) slurmdb_cluster_accounting_rec_t *clusteracct_rec = (slurmdb_cluster_accounting_rec_t *)object; - if(clusteracct_rec) { + if (clusteracct_rec) { xfree(clusteracct_rec); } } @@ -419,7 +421,7 @@ extern void slurmdb_destroy_cluster_rec(void *object) slurmdb_cluster_rec_t *slurmdb_cluster = (slurmdb_cluster_rec_t *)object; - if(slurmdb_cluster) { + if (slurmdb_cluster) { _free_cluster_rec_members(slurmdb_cluster); xfree(slurmdb_cluster); } @@ -430,7 +432,7 @@ extern void slurmdb_destroy_accounting_rec(void *object) slurmdb_accounting_rec_t *slurmdb_accounting = (slurmdb_accounting_rec_t *)object; - if(slurmdb_accounting) { + if (slurmdb_accounting) { xfree(slurmdb_accounting); } } @@ -440,7 +442,7 @@ extern void slurmdb_destroy_association_rec(void *object) slurmdb_association_rec_t *slurmdb_association = (slurmdb_association_rec_t *)object; - if(slurmdb_association) { + if (slurmdb_association) { _free_assoc_rec_members(slurmdb_association); xfree(slurmdb_association); } @@ -451,7 +453,7 @@ extern void slurmdb_destroy_event_rec(void *object) slurmdb_event_rec_t *slurmdb_event = (slurmdb_event_rec_t *)object; - if(slurmdb_event) { + if (slurmdb_event) { xfree(slurmdb_event->cluster); xfree(slurmdb_event->cluster_nodes); xfree(slurmdb_event->node_name); @@ -472,7 +474,7 @@ extern void slurmdb_destroy_job_rec(void *object) xfree(job->jobname); xfree(job->partition); xfree(job->nodes); - if(job->steps) { + if (job->steps) { list_destroy(job->steps); job->steps = NULL; } @@ -485,7 +487,7 @@ extern void slurmdb_destroy_job_rec(void *object) extern void slurmdb_destroy_qos_rec(void *object) { slurmdb_qos_rec_t *slurmdb_qos = (slurmdb_qos_rec_t *)object; - if(slurmdb_qos) { + if (slurmdb_qos) { _free_qos_rec_members(slurmdb_qos); xfree(slurmdb_qos); } @@ -495,7 +497,7 @@ extern void slurmdb_destroy_reservation_rec(void *object) { slurmdb_reservation_rec_t *slurmdb_resv = (slurmdb_reservation_rec_t *)object; - if(slurmdb_resv) { + if (slurmdb_resv) { xfree(slurmdb_resv->assocs); xfree(slurmdb_resv->cluster); xfree(slurmdb_resv->name); @@ -519,7 +521,7 @@ extern void slurmdb_destroy_step_rec(void *object) extern void slurmdb_destroy_txn_rec(void *object) { slurmdb_txn_rec_t *slurmdb_txn = (slurmdb_txn_rec_t *)object; - if(slurmdb_txn) { + if (slurmdb_txn) { xfree(slurmdb_txn->accts); xfree(slurmdb_txn->actor_name); xfree(slurmdb_txn->clusters); @@ -534,7 +536,7 @@ extern void slurmdb_destroy_wckey_rec(void *object) { slurmdb_wckey_rec_t *wckey = (slurmdb_wckey_rec_t *)object; - if(wckey) { + if (wckey) { _free_wckey_rec_members(wckey); xfree(wckey); } @@ -544,7 +546,7 @@ extern void slurmdb_destroy_archive_rec(void *object) { slurmdb_archive_rec_t *arch_rec = (slurmdb_archive_rec_t *)object; - if(arch_rec) { + if (arch_rec) { xfree(arch_rec->archive_file); xfree(arch_rec->insert); xfree(arch_rec); @@ -555,7 +557,7 @@ extern void slurmdb_destroy_report_assoc_rec(void *object) { slurmdb_report_assoc_rec_t *slurmdb_report_assoc = (slurmdb_report_assoc_rec_t *)object; - if(slurmdb_report_assoc) { + if (slurmdb_report_assoc) { xfree(slurmdb_report_assoc->acct); xfree(slurmdb_report_assoc->cluster); xfree(slurmdb_report_assoc->parent_acct); @@ -568,11 +570,11 @@ extern void slurmdb_destroy_report_user_rec(void *object) { slurmdb_report_user_rec_t *slurmdb_report_user = (slurmdb_report_user_rec_t *)object; - if(slurmdb_report_user) { + if (slurmdb_report_user) { xfree(slurmdb_report_user->acct); - if(slurmdb_report_user->acct_list) + if (slurmdb_report_user->acct_list) list_destroy(slurmdb_report_user->acct_list); - if(slurmdb_report_user->assoc_list) + if (slurmdb_report_user->assoc_list) list_destroy(slurmdb_report_user->assoc_list); xfree(slurmdb_report_user->name); xfree(slurmdb_report_user); @@ -583,11 +585,11 @@ extern void slurmdb_destroy_report_cluster_rec(void *object) { slurmdb_report_cluster_rec_t *slurmdb_report_cluster = (slurmdb_report_cluster_rec_t *)object; - if(slurmdb_report_cluster) { - if(slurmdb_report_cluster->assoc_list) + if (slurmdb_report_cluster) { + if (slurmdb_report_cluster->assoc_list) list_destroy(slurmdb_report_cluster->assoc_list); xfree(slurmdb_report_cluster->name); - if(slurmdb_report_cluster->user_list) + if (slurmdb_report_cluster->user_list) list_destroy(slurmdb_report_cluster->user_list); xfree(slurmdb_report_cluster); } @@ -597,11 +599,11 @@ extern void slurmdb_destroy_user_cond(void *object) { slurmdb_user_cond_t *slurmdb_user = (slurmdb_user_cond_t *)object; - if(slurmdb_user) { + if (slurmdb_user) { slurmdb_destroy_association_cond(slurmdb_user->assoc_cond); - if(slurmdb_user->def_acct_list) + if (slurmdb_user->def_acct_list) list_destroy(slurmdb_user->def_acct_list); - if(slurmdb_user->def_wckey_list) + if (slurmdb_user->def_wckey_list) list_destroy(slurmdb_user->def_wckey_list); xfree(slurmdb_user); } @@ -612,11 +614,11 @@ extern void slurmdb_destroy_account_cond(void *object) slurmdb_account_cond_t *slurmdb_account = (slurmdb_account_cond_t *)object; - if(slurmdb_account) { + if (slurmdb_account) { slurmdb_destroy_association_cond(slurmdb_account->assoc_cond); - if(slurmdb_account->description_list) + if (slurmdb_account->description_list) list_destroy(slurmdb_account->description_list); - if(slurmdb_account->organization_list) + if (slurmdb_account->organization_list) list_destroy(slurmdb_account->organization_list); xfree(slurmdb_account); } @@ -627,7 +629,7 @@ extern void slurmdb_destroy_cluster_cond(void *object) slurmdb_cluster_cond_t *slurmdb_cluster = (slurmdb_cluster_cond_t *)object; - if(slurmdb_cluster) { + if (slurmdb_cluster) { _free_cluster_cond_members(slurmdb_cluster); xfree(slurmdb_cluster); } @@ -638,63 +640,63 @@ extern void slurmdb_destroy_association_cond(void *object) slurmdb_association_cond_t *slurmdb_association = (slurmdb_association_cond_t *)object; - if(slurmdb_association) { - if(slurmdb_association->acct_list) + if (slurmdb_association) { + if (slurmdb_association->acct_list) list_destroy(slurmdb_association->acct_list); - if(slurmdb_association->cluster_list) + if (slurmdb_association->cluster_list) list_destroy(slurmdb_association->cluster_list); - if(slurmdb_association->def_qos_id_list) + if (slurmdb_association->def_qos_id_list) list_destroy(slurmdb_association->def_qos_id_list); - if(slurmdb_association->fairshare_list) + if (slurmdb_association->fairshare_list) list_destroy(slurmdb_association->fairshare_list); - if(slurmdb_association->grp_cpu_mins_list) + if (slurmdb_association->grp_cpu_mins_list) list_destroy(slurmdb_association->grp_cpu_mins_list); - if(slurmdb_association->grp_cpu_run_mins_list) + if (slurmdb_association->grp_cpu_run_mins_list) list_destroy(slurmdb_association-> grp_cpu_run_mins_list); - if(slurmdb_association->grp_cpus_list) + if (slurmdb_association->grp_cpus_list) list_destroy(slurmdb_association->grp_cpus_list); - if(slurmdb_association->grp_jobs_list) + if (slurmdb_association->grp_jobs_list) list_destroy(slurmdb_association->grp_jobs_list); - if(slurmdb_association->grp_mem_list) + if (slurmdb_association->grp_mem_list) list_destroy(slurmdb_association->grp_mem_list); - if(slurmdb_association->grp_nodes_list) + if (slurmdb_association->grp_nodes_list) list_destroy(slurmdb_association->grp_nodes_list); - if(slurmdb_association->grp_submit_jobs_list) + if (slurmdb_association->grp_submit_jobs_list) list_destroy(slurmdb_association->grp_submit_jobs_list); - if(slurmdb_association->grp_wall_list) + if (slurmdb_association->grp_wall_list) list_destroy(slurmdb_association->grp_wall_list); - if(slurmdb_association->id_list) + if (slurmdb_association->id_list) list_destroy(slurmdb_association->id_list); - if(slurmdb_association->max_cpu_mins_pj_list) + if (slurmdb_association->max_cpu_mins_pj_list) list_destroy(slurmdb_association->max_cpu_mins_pj_list); - if(slurmdb_association->max_cpu_run_mins_list) + if (slurmdb_association->max_cpu_run_mins_list) list_destroy(slurmdb_association-> max_cpu_run_mins_list); - if(slurmdb_association->max_cpus_pj_list) + if (slurmdb_association->max_cpus_pj_list) list_destroy(slurmdb_association->max_cpus_pj_list); - if(slurmdb_association->max_jobs_list) + if (slurmdb_association->max_jobs_list) list_destroy(slurmdb_association->max_jobs_list); - if(slurmdb_association->max_nodes_pj_list) + if (slurmdb_association->max_nodes_pj_list) list_destroy(slurmdb_association->max_nodes_pj_list); - if(slurmdb_association->max_submit_jobs_list) + if (slurmdb_association->max_submit_jobs_list) list_destroy(slurmdb_association->max_submit_jobs_list); - if(slurmdb_association->max_wall_pj_list) + if (slurmdb_association->max_wall_pj_list) list_destroy(slurmdb_association->max_wall_pj_list); - if(slurmdb_association->partition_list) + if (slurmdb_association->partition_list) list_destroy(slurmdb_association->partition_list); - if(slurmdb_association->parent_acct_list) + if (slurmdb_association->parent_acct_list) list_destroy(slurmdb_association->parent_acct_list); - if(slurmdb_association->qos_list) + if (slurmdb_association->qos_list) list_destroy(slurmdb_association->qos_list); - if(slurmdb_association->user_list) + if (slurmdb_association->user_list) list_destroy(slurmdb_association->user_list); xfree(slurmdb_association); } @@ -705,16 +707,16 @@ extern void slurmdb_destroy_event_cond(void *object) slurmdb_event_cond_t *slurmdb_event = (slurmdb_event_cond_t *)object; - if(slurmdb_event) { - if(slurmdb_event->cluster_list) + if (slurmdb_event) { + if (slurmdb_event->cluster_list) list_destroy(slurmdb_event->cluster_list); - if(slurmdb_event->node_list) + if (slurmdb_event->node_list) list_destroy(slurmdb_event->node_list); - if(slurmdb_event->reason_list) + if (slurmdb_event->reason_list) list_destroy(slurmdb_event->reason_list); - if(slurmdb_event->reason_uid_list) + if (slurmdb_event->reason_uid_list) list_destroy(slurmdb_event->reason_uid_list); - if(slurmdb_event->state_list) + if (slurmdb_event->state_list) list_destroy(slurmdb_event->state_list); xfree(slurmdb_event); } @@ -725,33 +727,33 @@ extern void slurmdb_destroy_job_cond(void *object) slurmdb_job_cond_t *job_cond = (slurmdb_job_cond_t *)object; - if(job_cond) { - if(job_cond->acct_list) + if (job_cond) { + if (job_cond->acct_list) list_destroy(job_cond->acct_list); - if(job_cond->associd_list) + if (job_cond->associd_list) list_destroy(job_cond->associd_list); - if(job_cond->cluster_list) + if (job_cond->cluster_list) list_destroy(job_cond->cluster_list); - if(job_cond->groupid_list) + if (job_cond->groupid_list) list_destroy(job_cond->groupid_list); - if(job_cond->jobname_list) + if (job_cond->jobname_list) list_destroy(job_cond->jobname_list); - if(job_cond->partition_list) + if (job_cond->partition_list) list_destroy(job_cond->partition_list); - if(job_cond->qos_list) + if (job_cond->qos_list) list_destroy(job_cond->qos_list); - if(job_cond->resv_list) + if (job_cond->resv_list) list_destroy(job_cond->resv_list); - if(job_cond->resvid_list) + if (job_cond->resvid_list) list_destroy(job_cond->resvid_list); - if(job_cond->step_list) + if (job_cond->step_list) list_destroy(job_cond->step_list); - if(job_cond->state_list) + if (job_cond->state_list) list_destroy(job_cond->state_list); xfree(job_cond->used_nodes); - if(job_cond->userid_list) + if (job_cond->userid_list) list_destroy(job_cond->userid_list); - if(job_cond->wckey_list) + if (job_cond->wckey_list) list_destroy(job_cond->wckey_list); xfree(job_cond); } @@ -762,7 +764,7 @@ extern void slurmdb_destroy_job_modify_cond(void *object) slurmdb_job_modify_cond_t *job_cond = (slurmdb_job_modify_cond_t *)object; - if(job_cond) { + if (job_cond) { xfree(job_cond->cluster); xfree(job_cond); } @@ -771,10 +773,10 @@ extern void slurmdb_destroy_job_modify_cond(void *object) extern void slurmdb_destroy_qos_cond(void *object) { slurmdb_qos_cond_t *slurmdb_qos = (slurmdb_qos_cond_t *)object; - if(slurmdb_qos) { - if(slurmdb_qos->id_list) + if (slurmdb_qos) { + if (slurmdb_qos->id_list) list_destroy(slurmdb_qos->id_list); - if(slurmdb_qos->name_list) + if (slurmdb_qos->name_list) list_destroy(slurmdb_qos->name_list); xfree(slurmdb_qos); } @@ -784,12 +786,12 @@ extern void slurmdb_destroy_reservation_cond(void *object) { slurmdb_reservation_cond_t *slurmdb_resv = (slurmdb_reservation_cond_t *)object; - if(slurmdb_resv) { - if(slurmdb_resv->cluster_list) + if (slurmdb_resv) { + if (slurmdb_resv->cluster_list) list_destroy(slurmdb_resv->cluster_list); - if(slurmdb_resv->id_list) + if (slurmdb_resv->id_list) list_destroy(slurmdb_resv->id_list); - if(slurmdb_resv->name_list) + if (slurmdb_resv->name_list) list_destroy(slurmdb_resv->name_list); xfree(slurmdb_resv->nodes); xfree(slurmdb_resv); @@ -799,22 +801,22 @@ extern void slurmdb_destroy_reservation_cond(void *object) extern void slurmdb_destroy_txn_cond(void *object) { slurmdb_txn_cond_t *slurmdb_txn = (slurmdb_txn_cond_t *)object; - if(slurmdb_txn) { - if(slurmdb_txn->acct_list) + if (slurmdb_txn) { + if (slurmdb_txn->acct_list) list_destroy(slurmdb_txn->acct_list); - if(slurmdb_txn->action_list) + if (slurmdb_txn->action_list) list_destroy(slurmdb_txn->action_list); - if(slurmdb_txn->actor_list) + if (slurmdb_txn->actor_list) list_destroy(slurmdb_txn->actor_list); - if(slurmdb_txn->cluster_list) + if (slurmdb_txn->cluster_list) list_destroy(slurmdb_txn->cluster_list); - if(slurmdb_txn->id_list) + if (slurmdb_txn->id_list) list_destroy(slurmdb_txn->id_list); - if(slurmdb_txn->info_list) + if (slurmdb_txn->info_list) list_destroy(slurmdb_txn->info_list); - if(slurmdb_txn->name_list) + if (slurmdb_txn->name_list) list_destroy(slurmdb_txn->name_list); - if(slurmdb_txn->user_list) + if (slurmdb_txn->user_list) list_destroy(slurmdb_txn->user_list); xfree(slurmdb_txn); } @@ -824,14 +826,14 @@ extern void slurmdb_destroy_wckey_cond(void *object) { slurmdb_wckey_cond_t *wckey = (slurmdb_wckey_cond_t *)object; - if(wckey) { - if(wckey->cluster_list) + if (wckey) { + if (wckey->cluster_list) list_destroy(wckey->cluster_list); - if(wckey->id_list) + if (wckey->id_list) list_destroy(wckey->id_list); - if(wckey->name_list) + if (wckey->name_list) list_destroy(wckey->name_list); - if(wckey->user_list) + if (wckey->user_list) list_destroy(wckey->user_list); xfree(wckey); } @@ -841,7 +843,7 @@ extern void slurmdb_destroy_archive_cond(void *object) { slurmdb_archive_cond_t *arch_cond = (slurmdb_archive_cond_t *)object; - if(arch_cond) { + if (arch_cond) { xfree(arch_cond->archive_dir); xfree(arch_cond->archive_script); slurmdb_destroy_job_cond(arch_cond->job_cond); @@ -855,8 +857,8 @@ extern void slurmdb_destroy_update_object(void *object) slurmdb_update_object_t *slurmdb_update = (slurmdb_update_object_t *) object; - if(slurmdb_update) { - if(slurmdb_update->objects) + if (slurmdb_update) { + if (slurmdb_update->objects) list_destroy(slurmdb_update->objects); xfree(slurmdb_update); @@ -868,7 +870,7 @@ extern void slurmdb_destroy_used_limits(void *object) slurmdb_used_limits_t *slurmdb_used_limits = (slurmdb_used_limits_t *)object; - if(slurmdb_used_limits) { + if (slurmdb_used_limits) { xfree(slurmdb_used_limits); } } @@ -883,7 +885,7 @@ extern void slurmdb_destroy_print_tree(void *object) slurmdb_print_tree_t *slurmdb_print_tree = (slurmdb_print_tree_t *)object; - if(slurmdb_print_tree) { + if (slurmdb_print_tree) { xfree(slurmdb_print_tree->name); xfree(slurmdb_print_tree->print_name); xfree(slurmdb_print_tree->spaces); @@ -898,8 +900,8 @@ extern void slurmdb_destroy_hierarchical_rec(void *object) */ slurmdb_hierarchical_rec_t *slurmdb_hierarchical_rec = (slurmdb_hierarchical_rec_t *)object; - if(slurmdb_hierarchical_rec) { - if(slurmdb_hierarchical_rec->childern) { + if (slurmdb_hierarchical_rec) { + if (slurmdb_hierarchical_rec->childern) { list_destroy(slurmdb_hierarchical_rec->childern); } xfree(slurmdb_hierarchical_rec); @@ -918,8 +920,8 @@ extern void slurmdb_destroy_report_job_grouping(void *object) { slurmdb_report_job_grouping_t *job_grouping = (slurmdb_report_job_grouping_t *)object; - if(job_grouping) { - if(job_grouping->jobs) + if (job_grouping) { + if (job_grouping->jobs) list_destroy(job_grouping->jobs); xfree(job_grouping); } @@ -929,9 +931,9 @@ extern void slurmdb_destroy_report_acct_grouping(void *object) { slurmdb_report_acct_grouping_t *acct_grouping = (slurmdb_report_acct_grouping_t *)object; - if(acct_grouping) { + if (acct_grouping) { xfree(acct_grouping->acct); - if(acct_grouping->groups) + if (acct_grouping->groups) list_destroy(acct_grouping->groups); xfree(acct_grouping); } @@ -941,9 +943,9 @@ extern void slurmdb_destroy_report_cluster_grouping(void *object) { slurmdb_report_cluster_grouping_t *cluster_grouping = (slurmdb_report_cluster_grouping_t *)object; - if(cluster_grouping) { + if (cluster_grouping) { xfree(cluster_grouping->cluster); - if(cluster_grouping->acct_list) + if (cluster_grouping->acct_list) list_destroy(cluster_grouping->acct_list); xfree(cluster_grouping); } @@ -986,8 +988,6 @@ extern List slurmdb_get_info_cluster(char *cluster_names) } } else { itr2 = list_iterator_create(cluster_cond.cluster_list); - if (itr2 == NULL) - fatal("list_iterator_create: malloc failure"); while ((cluster_name = list_next(itr2))) { while ((cluster_rec = list_next(itr))) { if (!strcmp(cluster_name, cluster_rec->name)) @@ -1129,7 +1129,7 @@ extern void slurmdb_init_wckey_rec(slurmdb_wckey_rec_t *wckey, bool free_it) extern void slurmdb_init_cluster_cond(slurmdb_cluster_cond_t *cluster, bool free_it) { - if(!cluster) + if (!cluster) return; if (free_it) @@ -1143,21 +1143,21 @@ extern char *slurmdb_qos_str(List qos_list, uint32_t level) ListIterator itr = NULL; slurmdb_qos_rec_t *qos = NULL; - if(!qos_list) { + if (!qos_list) { error("We need a qos list to translate"); return NULL; - } else if(!level) { + } else if (!level) { debug2("no level"); return ""; } itr = list_iterator_create(qos_list); while((qos = list_next(itr))) { - if(level == qos->id) + if (level == qos->id) break; } list_iterator_destroy(itr); - if(qos) + if (qos) return qos->name; else return NULL; @@ -1169,14 +1169,14 @@ extern uint32_t str_2_slurmdb_qos(List qos_list, char *level) slurmdb_qos_rec_t *qos = NULL; char *working_level = NULL; - if(!qos_list) { + if (!qos_list) { error("We need a qos list to translate"); return NO_VAL; - } else if(!level) { + } else if (!level) { debug2("no level"); return 0; } - if(level[0] == '+' || level[0] == '-') + if (level[0] == '+' || level[0] == '-') working_level = level+1; else working_level = level; @@ -1187,7 +1187,7 @@ extern uint32_t str_2_slurmdb_qos(List qos_list, char *level) break; } list_iterator_destroy(itr); - if(qos) + if (qos) return qos->id; else return NO_VAL; @@ -1284,13 +1284,13 @@ extern char *slurmdb_admin_level_str(slurmdb_admin_level_t level) extern slurmdb_admin_level_t str_2_slurmdb_admin_level(char *level) { - if(!level) { + if (!level) { return SLURMDB_ADMIN_NOTSET; - } else if(!strncasecmp(level, "None", 1)) { + } else if (!strncasecmp(level, "None", 1)) { return SLURMDB_ADMIN_NONE; - } else if(!strncasecmp(level, "Operator", 1)) { + } else if (!strncasecmp(level, "Operator", 1)) { return SLURMDB_ADMIN_OPERATOR; - } else if(!strncasecmp(level, "SuperUser", 1) + } else if (!strncasecmp(level, "SuperUser", 1) || !strncasecmp(level, "Admin", 1)) { return SLURMDB_ADMIN_SUPER_USER; } else { @@ -1361,7 +1361,7 @@ extern List slurmdb_get_acct_hierarchical_rec_list(List assoc_list) will look for it in the list. If it isn't there we will just add it to the parent and call it good */ - if(!assoc->parent_id) { + if (!assoc->parent_id) { arch_rec->sort_name = assoc->cluster; list_append(arch_rec_list, arch_rec); @@ -1370,15 +1370,15 @@ extern List slurmdb_get_acct_hierarchical_rec_list(List assoc_list) continue; } - if(assoc->user) + if (assoc->user) arch_rec->sort_name = assoc->user; else arch_rec->sort_name = assoc->acct; - if(last_parent && assoc->parent_id == last_parent->assoc->id + if (last_parent && assoc->parent_id == last_parent->assoc->id && !strcmp(assoc->cluster, last_parent->assoc->cluster)) { par_arch_rec = last_parent; - } else if(last_acct_parent + } else if (last_acct_parent && (assoc->parent_id == last_acct_parent->assoc->id) && !strcmp(assoc->cluster, last_acct_parent->assoc->cluster)) { @@ -1386,10 +1386,10 @@ extern List slurmdb_get_acct_hierarchical_rec_list(List assoc_list) } else { list_iterator_reset(itr2); while((par_arch_rec = list_next(itr2))) { - if(assoc->parent_id == par_arch_rec->assoc->id + if (assoc->parent_id == par_arch_rec->assoc->id && !strcmp(assoc->cluster, par_arch_rec->assoc->cluster)) { - if(assoc->user) + if (assoc->user) last_parent = par_arch_rec; else last_parent @@ -1400,7 +1400,7 @@ extern List slurmdb_get_acct_hierarchical_rec_list(List assoc_list) } } - if(!par_arch_rec) { + if (!par_arch_rec) { list_append(arch_rec_list, arch_rec); last_parent = last_acct_parent = arch_rec; } else @@ -1425,37 +1425,37 @@ extern char *slurmdb_tree_name_get(char *name, char *parent, List tree_list) slurmdb_print_tree_t *slurmdb_print_tree = NULL; slurmdb_print_tree_t *par_slurmdb_print_tree = NULL; - if(!tree_list) + if (!tree_list) return NULL; itr = list_iterator_create(tree_list); while((slurmdb_print_tree = list_next(itr))) { /* we don't care about users in this list. They are only there so we don't leak memory */ - if(slurmdb_print_tree->user) + if (slurmdb_print_tree->user) continue; - if(!strcmp(name, slurmdb_print_tree->name)) + if (!strcmp(name, slurmdb_print_tree->name)) break; - else if(parent && !strcmp(parent, slurmdb_print_tree->name)) + else if (parent && !strcmp(parent, slurmdb_print_tree->name)) par_slurmdb_print_tree = slurmdb_print_tree; } list_iterator_destroy(itr); - if(parent && slurmdb_print_tree) + if (parent && slurmdb_print_tree) return slurmdb_print_tree->print_name; slurmdb_print_tree = xmalloc(sizeof(slurmdb_print_tree_t)); slurmdb_print_tree->name = xstrdup(name); - if(par_slurmdb_print_tree) + if (par_slurmdb_print_tree) slurmdb_print_tree->spaces = xstrdup_printf(" %s", par_slurmdb_print_tree->spaces); else slurmdb_print_tree->spaces = xstrdup(""); /* user account */ - if(name[0] == '|') { + if (name[0] == '|') { slurmdb_print_tree->print_name = xstrdup_printf( "%s%s", slurmdb_print_tree->spaces, parent); slurmdb_print_tree->user = 1; @@ -1478,21 +1478,21 @@ extern int set_qos_bitstr_from_list(bitstr_t *valid_qos, List qos_list) xassert(valid_qos); - if(!qos_list) + if (!qos_list) return SLURM_ERROR; itr = list_iterator_create(qos_list); while((temp_char = list_next(itr))) { - if(temp_char[0] == '-') { + if (temp_char[0] == '-') { temp_char++; my_function = bit_clear; - } else if(temp_char[0] == '+') { + } else if (temp_char[0] == '+') { temp_char++; my_function = bit_set; } else my_function = bit_set; bit = atoi(temp_char); - if(bit >= bit_size(valid_qos)) { + if (bit >= bit_size(valid_qos)) { rc = SLURM_ERROR; break; } @@ -1511,22 +1511,22 @@ extern char *get_qos_complete_str_bitstr(List qos_list, bitstr_t *valid_qos) ListIterator itr = NULL; int i = 0; - if(!qos_list || !list_count(qos_list) + if (!qos_list || !list_count(qos_list) || !valid_qos || (bit_ffs(valid_qos) == -1)) return xstrdup(""); temp_list = list_create(NULL); for(i=0; i<bit_size(valid_qos); i++) { - if(!bit_test(valid_qos, i)) + if (!bit_test(valid_qos, i)) continue; - if((temp_char = slurmdb_qos_str(qos_list, i))) + if ((temp_char = slurmdb_qos_str(qos_list, i))) list_append(temp_list, temp_char); } list_sort(temp_list, (ListCmpF)slurm_sort_char_list_asc); itr = list_iterator_create(temp_list); while((temp_char = list_next(itr))) { - if(print_this) + if (print_this) xstrfmtcat(print_this, ",%s", temp_char); else print_this = xstrdup(temp_char); @@ -1534,7 +1534,7 @@ extern char *get_qos_complete_str_bitstr(List qos_list, bitstr_t *valid_qos) list_iterator_destroy(itr); list_destroy(temp_list); - if(!print_this) + if (!print_this) return xstrdup(""); return print_this; @@ -1548,7 +1548,7 @@ extern char *get_qos_complete_str(List qos_list, List num_qos_list) ListIterator itr = NULL; int option = 0; - if(!qos_list || !list_count(qos_list) + if (!qos_list || !list_count(qos_list) || !num_qos_list || !list_count(num_qos_list)) return xstrdup(""); @@ -1557,13 +1557,13 @@ extern char *get_qos_complete_str(List qos_list, List num_qos_list) itr = list_iterator_create(num_qos_list); while((temp_char = list_next(itr))) { option = 0; - if(temp_char[0] == '+' || temp_char[0] == '-') { + if (temp_char[0] == '+' || temp_char[0] == '-') { option = temp_char[0]; temp_char++; } temp_char = slurmdb_qos_str(qos_list, atoi(temp_char)); - if(temp_char) { - if(option) + if (temp_char) { + if (option) list_append(temp_list, xstrdup_printf( "%c%s", option, temp_char)); else @@ -1574,7 +1574,7 @@ extern char *get_qos_complete_str(List qos_list, List num_qos_list) list_sort(temp_list, (ListCmpF)slurm_sort_char_list_asc); itr = list_iterator_create(temp_list); while((temp_char = list_next(itr))) { - if(print_this) + if (print_this) xstrfmtcat(print_this, ",%s", temp_char); else print_this = xstrdup(temp_char); @@ -1582,7 +1582,7 @@ extern char *get_qos_complete_str(List qos_list, List num_qos_list) list_iterator_destroy(itr); list_destroy(temp_list); - if(!print_this) + if (!print_this) return xstrdup(""); return print_this; @@ -1598,25 +1598,25 @@ extern char *get_classification_str(uint16_t class) return NULL; break; case SLURMDB_CLASS_CAPACITY: - if(classified) + if (classified) return "*Capacity"; else return "Capacity"; break; case SLURMDB_CLASS_CAPABILITY: - if(classified) + if (classified) return "*Capability"; else return "Capability"; break; case SLURMDB_CLASS_CAPAPACITY: - if(classified) + if (classified) return "*Capapacity"; else return "Capapacity"; break; default: - if(classified) + if (classified) return "*Unknown"; else return "Unknown"; @@ -1627,19 +1627,19 @@ extern char *get_classification_str(uint16_t class) extern uint16_t str_2_classification(char *class) { uint16_t type = 0; - if(!class) + if (!class) return type; - if(slurm_strcasestr(class, "capac")) + if (slurm_strcasestr(class, "capac")) type = SLURMDB_CLASS_CAPACITY; - else if(slurm_strcasestr(class, "capab")) + else if (slurm_strcasestr(class, "capab")) type = SLURMDB_CLASS_CAPABILITY; - else if(slurm_strcasestr(class, "capap")) + else if (slurm_strcasestr(class, "capap")) type = SLURMDB_CLASS_CAPAPACITY; - if(slurm_strcasestr(class, "*")) + if (slurm_strcasestr(class, "*")) type |= SLURMDB_CLASSIFIED_FLAG; - else if(slurm_strcasestr(class, "class")) + else if (slurm_strcasestr(class, "class")) type |= SLURMDB_CLASSIFIED_FLAG; return type; @@ -1675,16 +1675,16 @@ extern uint16_t str_2_slurmdb_problem(char *problem) { uint16_t type = 0; - if(!problem) + if (!problem) return type; - if(slurm_strcasestr(problem, "account no associations")) + if (slurm_strcasestr(problem, "account no associations")) type = SLURMDB_PROBLEM_USER_NO_ASSOC; - else if(slurm_strcasestr(problem, "account no users")) + else if (slurm_strcasestr(problem, "account no users")) type = SLURMDB_PROBLEM_ACCT_NO_USERS; - else if(slurm_strcasestr(problem, "user no associations")) + else if (slurm_strcasestr(problem, "user no associations")) type = SLURMDB_PROBLEM_USER_NO_ASSOC; - else if(slurm_strcasestr(problem, "user no uid")) + else if (slurm_strcasestr(problem, "user no uid")) type = SLURMDB_PROBLEM_USER_NO_UID; return type; @@ -1699,111 +1699,111 @@ extern void log_assoc_rec(slurmdb_association_rec_t *assoc_ptr, debug2(" acct : %s", assoc_ptr->acct); debug2(" cluster : %s", assoc_ptr->cluster); - if(assoc_ptr->shares_raw == INFINITE) + if (assoc_ptr->shares_raw == INFINITE) debug2(" RawShares : NONE"); - else if(assoc_ptr->shares_raw != NO_VAL) + else if (assoc_ptr->shares_raw != NO_VAL) debug2(" RawShares : %u", assoc_ptr->shares_raw); - if(assoc_ptr->def_qos_id) + if (assoc_ptr->def_qos_id) debug2(" Default QOS : %s", slurmdb_qos_str(qos_list, assoc_ptr->def_qos_id)); else debug2(" Default QOS : NONE"); - if(assoc_ptr->grp_cpu_mins == INFINITE) + if (assoc_ptr->grp_cpu_mins == INFINITE) debug2(" GrpCPUMins : NONE"); - else if(assoc_ptr->grp_cpu_mins != NO_VAL) + else if (assoc_ptr->grp_cpu_mins != NO_VAL) debug2(" GrpCPUMins : %"PRIu64"", assoc_ptr->grp_cpu_mins); - if(assoc_ptr->grp_cpu_run_mins == INFINITE) + if (assoc_ptr->grp_cpu_run_mins == INFINITE) debug2(" GrpCPURunMins : NONE"); - else if(assoc_ptr->grp_cpu_run_mins != NO_VAL) + else if (assoc_ptr->grp_cpu_run_mins != NO_VAL) debug2(" GrpCPURunMins : %"PRIu64"", assoc_ptr->grp_cpu_run_mins); - if(assoc_ptr->grp_cpus == INFINITE) + if (assoc_ptr->grp_cpus == INFINITE) debug2(" GrpCPUs : NONE"); - else if(assoc_ptr->grp_cpus != NO_VAL) + else if (assoc_ptr->grp_cpus != NO_VAL) debug2(" GrpCPUs : %u", assoc_ptr->grp_cpus); - if(assoc_ptr->grp_jobs == INFINITE) + if (assoc_ptr->grp_jobs == INFINITE) debug2(" GrpJobs : NONE"); - else if(assoc_ptr->grp_jobs != NO_VAL) + else if (assoc_ptr->grp_jobs != NO_VAL) debug2(" GrpJobs : %u", assoc_ptr->grp_jobs); - if(assoc_ptr->grp_mem == INFINITE) + if (assoc_ptr->grp_mem == INFINITE) debug2(" GrpMemory : NONE"); - else if(assoc_ptr->grp_mem != NO_VAL) + else if (assoc_ptr->grp_mem != NO_VAL) debug2(" GrpMemory : %u", assoc_ptr->grp_mem); - if(assoc_ptr->grp_nodes == INFINITE) + if (assoc_ptr->grp_nodes == INFINITE) debug2(" GrpNodes : NONE"); - else if(assoc_ptr->grp_nodes != NO_VAL) + else if (assoc_ptr->grp_nodes != NO_VAL) debug2(" GrpNodes : %u", assoc_ptr->grp_nodes); - if(assoc_ptr->grp_submit_jobs == INFINITE) + if (assoc_ptr->grp_submit_jobs == INFINITE) debug2(" GrpSubmitJobs : NONE"); - else if(assoc_ptr->grp_submit_jobs != NO_VAL) + else if (assoc_ptr->grp_submit_jobs != NO_VAL) debug2(" GrpSubmitJobs : %u", assoc_ptr->grp_submit_jobs); - if(assoc_ptr->grp_wall == INFINITE) + if (assoc_ptr->grp_wall == INFINITE) debug2(" GrpWall : NONE"); - else if(assoc_ptr->grp_wall != NO_VAL) { + else if (assoc_ptr->grp_wall != NO_VAL) { char time_buf[32]; mins2time_str((time_t) assoc_ptr->grp_wall, time_buf, sizeof(time_buf)); debug2(" GrpWall : %s", time_buf); } - if(assoc_ptr->max_cpu_mins_pj == INFINITE) + if (assoc_ptr->max_cpu_mins_pj == INFINITE) debug2(" MaxCPUMins : NONE"); - else if(assoc_ptr->max_cpu_mins_pj != NO_VAL) + else if (assoc_ptr->max_cpu_mins_pj != NO_VAL) debug2(" MaxCPUMins : %"PRIu64"", assoc_ptr->max_cpu_mins_pj); - if(assoc_ptr->max_cpu_run_mins == INFINITE) + if (assoc_ptr->max_cpu_run_mins == INFINITE) debug2(" MaxCPURunMins : NONE"); - else if(assoc_ptr->max_cpu_run_mins != NO_VAL) + else if (assoc_ptr->max_cpu_run_mins != NO_VAL) debug2(" MaxCPURunMins : %"PRIu64"", assoc_ptr->max_cpu_run_mins); - if(assoc_ptr->max_cpus_pj == INFINITE) + if (assoc_ptr->max_cpus_pj == INFINITE) debug2(" MaxCPUs : NONE"); - else if(assoc_ptr->max_cpus_pj != NO_VAL) + else if (assoc_ptr->max_cpus_pj != NO_VAL) debug2(" MaxCPUs : %u", assoc_ptr->max_cpus_pj); - if(assoc_ptr->max_jobs == INFINITE) + if (assoc_ptr->max_jobs == INFINITE) debug2(" MaxJobs : NONE"); - else if(assoc_ptr->max_jobs != NO_VAL) + else if (assoc_ptr->max_jobs != NO_VAL) debug2(" MaxJobs : %u", assoc_ptr->max_jobs); - if(assoc_ptr->max_nodes_pj == INFINITE) + if (assoc_ptr->max_nodes_pj == INFINITE) debug2(" MaxNodes : NONE"); - else if(assoc_ptr->max_nodes_pj != NO_VAL) + else if (assoc_ptr->max_nodes_pj != NO_VAL) debug2(" MaxNodes : %u", assoc_ptr->max_nodes_pj); - if(assoc_ptr->max_submit_jobs == INFINITE) + if (assoc_ptr->max_submit_jobs == INFINITE) debug2(" MaxSubmitJobs : NONE"); - else if(assoc_ptr->max_submit_jobs != NO_VAL) + else if (assoc_ptr->max_submit_jobs != NO_VAL) debug2(" MaxSubmitJobs : %u", assoc_ptr->max_submit_jobs); - if(assoc_ptr->max_wall_pj == INFINITE) + if (assoc_ptr->max_wall_pj == INFINITE) debug2(" MaxWall : NONE"); - else if(assoc_ptr->max_wall_pj != NO_VAL) { + else if (assoc_ptr->max_wall_pj != NO_VAL) { char time_buf[32]; mins2time_str((time_t) assoc_ptr->max_wall_pj, time_buf, sizeof(time_buf)); debug2(" MaxWall : %s", time_buf); } - if(assoc_ptr->qos_list) { + if (assoc_ptr->qos_list) { char *temp_char = get_qos_complete_str(qos_list, assoc_ptr->qos_list); - if(temp_char) { + if (temp_char) { debug2(" Qos : %s", temp_char); xfree(temp_char); - if(assoc_ptr->usage && assoc_ptr->usage->valid_qos) { + if (assoc_ptr->usage && assoc_ptr->usage->valid_qos) { temp_char = get_qos_complete_str_bitstr( qos_list, assoc_ptr->usage->valid_qos); debug3(" Valid Qos : %s", temp_char); @@ -1847,8 +1847,8 @@ extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) // info("now got %d and %d sent", (*start), (*end)); /* Default is going to be the last day */ - if(!sent_end) { - if(!localtime_r(&my_time, &end_tm)) { + if (!sent_end) { + if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from end %ld", (long)my_time); return SLURM_ERROR; @@ -1857,14 +1857,14 @@ extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) //(*end) = mktime(&end_tm); } else { temp_time = sent_end; - if(!localtime_r(&temp_time, &end_tm)) { + if (!localtime_r(&temp_time, &end_tm)) { error("Couldn't get localtime from user end %ld", (long)my_time); return SLURM_ERROR; } - if(end_tm.tm_sec >= 30) + if (end_tm.tm_sec >= 30) end_tm.tm_min++; - if(end_tm.tm_min >= 30) + if (end_tm.tm_min >= 30) end_tm.tm_hour++; } @@ -1873,8 +1873,8 @@ extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) end_tm.tm_isdst = -1; (*end) = mktime(&end_tm); - if(!sent_start) { - if(!localtime_r(&my_time, &start_tm)) { + if (!sent_start) { + if (!localtime_r(&my_time, &start_tm)) { error("Couldn't get localtime from start %ld", (long)my_time); return SLURM_ERROR; @@ -1884,14 +1884,14 @@ extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) //(*start) = mktime(&start_tm); } else { temp_time = sent_start; - if(!localtime_r(&temp_time, &start_tm)) { + if (!localtime_r(&temp_time, &start_tm)) { error("Couldn't get localtime from user start %ld", (long)my_time); return SLURM_ERROR; } - if(start_tm.tm_sec >= 30) + if (start_tm.tm_sec >= 30) start_tm.tm_min++; - if(start_tm.tm_min >= 30) + if (start_tm.tm_min >= 30) start_tm.tm_hour++; } start_tm.tm_sec = 0; @@ -1899,7 +1899,7 @@ extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) start_tm.tm_isdst = -1; (*start) = mktime(&start_tm); - if((*end)-(*start) < 3600) + if ((*end)-(*start) < 3600) (*end) = (*start) + 3600; /* info("now got %d and %d sent", (*start), (*end)); */ /* char start_char[20]; */ @@ -1937,7 +1937,7 @@ extern uint32_t slurmdb_parse_purge(char *string) while(string[i]) { if ((string[i] >= '0') && (string[i] <= '9')) { - if(purge == NO_VAL) + if (purge == NO_VAL) purge = 0; purge = (purge * 10) + (string[i] - '0'); } else @@ -1947,11 +1947,11 @@ extern uint32_t slurmdb_parse_purge(char *string) if (purge != NO_VAL) { int len = strlen(string+i); - if(!len || !strncasecmp("months", string+i, MAX(len, 1))) { + if (!len || !strncasecmp("months", string+i, MAX(len, 1))) { purge |= SLURMDB_PURGE_MONTHS; - } else if(!strncasecmp("hours", string+i, MAX(len, 1))) { + } else if (!strncasecmp("hours", string+i, MAX(len, 1))) { purge |= SLURMDB_PURGE_HOURS; - } else if(!strncasecmp("days", string+i, MAX(len, 1))) { + } else if (!strncasecmp("days", string+i, MAX(len, 1))) { purge |= SLURMDB_PURGE_DAYS; } else { error("Invalid purge unit '%s', valid options " @@ -1969,18 +1969,18 @@ extern char *slurmdb_purge_string(uint32_t purge, char *string, int len, { uint32_t units = SLURMDB_PURGE_GET_UNITS(purge); - if(SLURMDB_PURGE_IN_HOURS(purge)) { - if(with_archive && SLURMDB_PURGE_ARCHIVE_SET(purge)) + if (SLURMDB_PURGE_IN_HOURS(purge)) { + if (with_archive && SLURMDB_PURGE_ARCHIVE_SET(purge)) snprintf(string, len, "%u hours*", units); else snprintf(string, len, "%u hours", units); - } else if(SLURMDB_PURGE_IN_DAYS(purge)) { - if(with_archive && SLURMDB_PURGE_ARCHIVE_SET(purge)) + } else if (SLURMDB_PURGE_IN_DAYS(purge)) { + if (with_archive && SLURMDB_PURGE_ARCHIVE_SET(purge)) snprintf(string, len, "%u days*", units); else snprintf(string, len, "%u days", units); } else { - if(with_archive && SLURMDB_PURGE_ARCHIVE_SET(purge)) + if (with_archive && SLURMDB_PURGE_ARCHIVE_SET(purge)) snprintf(string, len, "%u months*", units); else snprintf(string, len, "%u months", units); @@ -2002,18 +2002,18 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, int equal_set = 0; int add_set = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } - if(!qos_list || !list_count(qos_list)) { + if (!qos_list || !list_count(qos_list)) { debug2("No real qos_list"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -2021,14 +2021,14 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, } start = i; while(names[i]) { - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { int tmp_option = option; - if(names[start] == '+' + if (names[start] == '+' || names[start] == '-') { tmp_option = names[start]; start++; @@ -2037,7 +2037,7 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, memcpy(name, names+start, (i-start)); id = str_2_slurmdb_qos(qos_list, name); - if(id == NO_VAL) { + if (id == NO_VAL) { char *tmp = _get_qos_list_str( qos_list); error("You gave a bad qos " @@ -2050,8 +2050,8 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, } xfree(name); - if(tmp_option) { - if(equal_set) { + if (tmp_option) { + if (equal_set) { error("You can't set " "qos equal to " "something and " @@ -2065,7 +2065,7 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, name = xstrdup_printf( "%c%u", tmp_option, id); } else { - if(add_set) { + if (add_set) { error("You can't set " "qos equal to " "something and " @@ -2079,12 +2079,12 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, name = xstrdup_printf("%u", id); } while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } list_iterator_reset(itr); - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -2096,7 +2096,7 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, i++; start = i; - if(!names[i]) { + if (!names[i]) { error("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -2105,9 +2105,9 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { int tmp_option = option; - if(names[start] == '+' || names[start] == '-') { + if (names[start] == '+' || names[start] == '-') { tmp_option = names[start]; start++; } @@ -2115,7 +2115,7 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, memcpy(name, names+start, (i-start)); id = str_2_slurmdb_qos(qos_list, name); - if(id == NO_VAL) { + if (id == NO_VAL) { char *tmp = _get_qos_list_str(qos_list); error("You gave a bad qos " "'%s'. Valid QOS's are " @@ -2127,8 +2127,8 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, } xfree(name); - if(tmp_option) { - if(equal_set) { + if (tmp_option) { + if (equal_set) { error("You can't set " "qos equal to " "something and " @@ -2141,7 +2141,7 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, name = xstrdup_printf( "%c%u", tmp_option, id); } else { - if(add_set) { + if (add_set) { error("You can't set " "qos equal to " "something and " @@ -2154,11 +2154,11 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, name = xstrdup_printf("%u", id); } while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -2168,7 +2168,7 @@ extern int slurmdb_addto_qos_char_list(List char_list, List qos_list, count++; } } - if(!count) { + if (!count) { error("You gave me an empty qos list"); } @@ -2209,7 +2209,7 @@ extern int slurmdb_send_accounting_update(List update_list, char *cluster, slurm_msg_t_init(&req); slurm_set_addr_char(&req.address, port, host); req.msg_type = ACCOUNTING_UPDATE_MSG; - if(slurmdbd_conf) + if (slurmdbd_conf) req.flags = SLURM_GLOBAL_AUTH_KEY; req.data = &msg; slurm_msg_t_init(&resp); @@ -2234,7 +2234,7 @@ extern int slurmdb_send_accounting_update(List update_list, char *cluster, slurm_free_return_code_msg(resp.data); break; default: - if(rc != SLURM_ERROR) + if (rc != SLURM_ERROR) error("Unknown response message %u", resp.msg_type); rc = SLURM_ERROR; break; diff --git a/src/common/slurmdb_defs.h b/src/common/slurmdb_defs.h index f853847c423b74a72a746e27c38e12debe1f4445..1edbf3a8bc29726c45b6638c0a97e908bd7fd834 100644 --- a/src/common/slurmdb_defs.h +++ b/src/common/slurmdb_defs.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurmdb_pack.c b/src/common/slurmdb_pack.c index 9761c5c1c351e3573d08f3078622967fbab722f8..18b78ece2063c6bc2bdda58da4a36e75c94d3e80 100644 --- a/src/common/slurmdb_pack.c +++ b/src/common/slurmdb_pack.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,7 +48,49 @@ static void _pack_slurmdb_stats(slurmdb_stats_t *stats, { int i=0; - if (rpc_version >= SLURMDBD_2_5_VERSION) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { + if (!stats) { + for (i=0; i<4; i++) + pack32((uint32_t) 0, buffer); + + for (i=0; i<10; i++) + packdouble(0, buffer); + + for (i=0; i<12; i++) { + pack32(0, buffer); + } + return; + } + + pack32(stats->vsize_max, buffer); + pack32(stats->rss_max, buffer); + pack32(stats->pages_max, buffer); + pack32(stats->cpu_min, buffer); + + packdouble(stats->vsize_ave, buffer); + packdouble(stats->rss_ave, buffer); + packdouble(stats->pages_ave, buffer); + packdouble(stats->cpu_ave, buffer); + packdouble(stats->act_cpufreq, buffer); + packdouble(stats->consumed_energy, buffer); + packdouble(stats->disk_read_max, buffer); + packdouble(stats->disk_read_ave, buffer); + packdouble(stats->disk_write_max, buffer); + packdouble(stats->disk_write_ave, buffer); + + pack32(stats->vsize_max_nodeid, buffer); + pack32(stats->vsize_max_taskid, buffer); + pack32(stats->rss_max_nodeid, buffer); + pack32(stats->rss_max_taskid, buffer); + pack32(stats->pages_max_nodeid, buffer); + pack32(stats->pages_max_taskid, buffer); + pack32(stats->cpu_min_nodeid, buffer); + pack32(stats->cpu_min_taskid, buffer); + pack32(stats->disk_read_max_nodeid, buffer); + pack32(stats->disk_read_max_taskid, buffer); + pack32(stats->disk_write_max_nodeid, buffer); + pack32(stats->disk_write_max_taskid, buffer); + } else if (rpc_version >= SLURMDBD_2_5_VERSION) { if (!stats) { for (i=0; i<4; i++) pack32((uint32_t) 0, buffer); @@ -76,15 +118,15 @@ static void _pack_slurmdb_stats(slurmdb_stats_t *stats, packdouble(stats->consumed_energy, buffer); pack32(stats->vsize_max_nodeid, buffer); - pack16(stats->vsize_max_taskid, buffer); + pack16((uint16_t)stats->vsize_max_taskid, buffer); pack32(stats->rss_max_nodeid, buffer); - pack16(stats->rss_max_taskid, buffer); + pack16((uint16_t)stats->rss_max_taskid, buffer); pack32(stats->pages_max_nodeid, buffer); - pack16(stats->pages_max_taskid, buffer); + pack16((uint16_t)stats->pages_max_taskid, buffer); pack32(stats->cpu_min_nodeid, buffer); - pack16(stats->cpu_min_taskid, buffer); + pack16((uint16_t)stats->cpu_min_taskid, buffer); } else if (rpc_version >= SLURMDBD_VERSION_MIN) { - if(!stats) { + if (!stats) { for(i=0; i<4; i++) pack32((uint32_t) 0, buffer); @@ -109,20 +151,22 @@ static void _pack_slurmdb_stats(slurmdb_stats_t *stats, packdouble(stats->cpu_ave, buffer); pack32(stats->vsize_max_nodeid, buffer); - pack16(stats->vsize_max_taskid, buffer); + pack16((uint16_t)stats->vsize_max_taskid, buffer); pack32(stats->rss_max_nodeid, buffer); - pack16(stats->rss_max_taskid, buffer); + pack16((uint16_t)stats->rss_max_taskid, buffer); pack32(stats->pages_max_nodeid, buffer); - pack16(stats->pages_max_taskid, buffer); + pack16((uint16_t)stats->pages_max_taskid, buffer); pack32(stats->cpu_min_nodeid, buffer); - pack16(stats->cpu_min_taskid, buffer); + pack16((uint16_t)stats->cpu_min_taskid, buffer); } } static int _unpack_slurmdb_stats(slurmdb_stats_t *stats, uint16_t rpc_version, Buf buffer) { - if (rpc_version >= SLURMDBD_2_5_VERSION) { + uint16_t tmp_uint16; + + if (rpc_version >= SLURMDBD_2_6_VERSION) { safe_unpack32(&stats->vsize_max, buffer); safe_unpack32(&stats->rss_max, buffer); safe_unpack32(&stats->pages_max, buffer); @@ -134,15 +178,48 @@ static int _unpack_slurmdb_stats(slurmdb_stats_t *stats, safe_unpackdouble(&stats->cpu_ave, buffer); safe_unpackdouble(&stats->act_cpufreq, buffer); safe_unpackdouble(&stats->consumed_energy, buffer); + safe_unpackdouble(&stats->disk_read_max, buffer); + safe_unpackdouble(&stats->disk_read_ave, buffer); + safe_unpackdouble(&stats->disk_write_max, buffer); + safe_unpackdouble(&stats->disk_write_ave, buffer); safe_unpack32(&stats->vsize_max_nodeid, buffer); - safe_unpack16(&stats->vsize_max_taskid, buffer); + safe_unpack32(&stats->vsize_max_taskid, buffer); safe_unpack32(&stats->rss_max_nodeid, buffer); - safe_unpack16(&stats->rss_max_taskid, buffer); + safe_unpack32(&stats->rss_max_taskid, buffer); safe_unpack32(&stats->pages_max_nodeid, buffer); - safe_unpack16(&stats->pages_max_taskid, buffer); + safe_unpack32(&stats->pages_max_taskid, buffer); safe_unpack32(&stats->cpu_min_nodeid, buffer); - safe_unpack16(&stats->cpu_min_taskid, buffer); + safe_unpack32(&stats->cpu_min_taskid, buffer); + safe_unpack32(&stats->disk_read_max_nodeid, buffer); + safe_unpack32(&stats->disk_read_max_taskid, buffer); + safe_unpack32(&stats->disk_write_max_nodeid, buffer); + safe_unpack32(&stats->disk_write_max_taskid, buffer); + } else if (rpc_version >= SLURMDBD_2_5_VERSION) { + safe_unpack32(&stats->vsize_max, buffer); + safe_unpack32(&stats->rss_max, buffer); + safe_unpack32(&stats->pages_max, buffer); + safe_unpack32(&stats->cpu_min, buffer); + + safe_unpackdouble(&stats->vsize_ave, buffer); + safe_unpackdouble(&stats->rss_ave, buffer); + safe_unpackdouble(&stats->pages_ave, buffer); + safe_unpackdouble(&stats->cpu_ave, buffer); + safe_unpackdouble(&stats->act_cpufreq, buffer); + safe_unpackdouble(&stats->consumed_energy, buffer); + + safe_unpack32(&stats->vsize_max_nodeid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->vsize_max_taskid = tmp_uint16; + safe_unpack32(&stats->rss_max_nodeid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->rss_max_taskid = tmp_uint16; + safe_unpack32(&stats->pages_max_nodeid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->pages_max_taskid = tmp_uint16; + safe_unpack32(&stats->cpu_min_nodeid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->cpu_min_taskid = tmp_uint16; } else if (rpc_version >= SLURMDBD_VERSION_MIN) { safe_unpack32(&stats->vsize_max, buffer); safe_unpack32(&stats->rss_max, buffer); @@ -155,13 +232,17 @@ static int _unpack_slurmdb_stats(slurmdb_stats_t *stats, safe_unpackdouble(&stats->cpu_ave, buffer); safe_unpack32(&stats->vsize_max_nodeid, buffer); - safe_unpack16(&stats->vsize_max_taskid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->vsize_max_taskid = tmp_uint16; safe_unpack32(&stats->rss_max_nodeid, buffer); - safe_unpack16(&stats->rss_max_taskid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->rss_max_taskid = tmp_uint16; safe_unpack32(&stats->pages_max_nodeid, buffer); - safe_unpack16(&stats->pages_max_taskid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->pages_max_taskid = tmp_uint16; safe_unpack32(&stats->cpu_min_nodeid, buffer); - safe_unpack16(&stats->cpu_min_taskid, buffer); + safe_unpack16(&tmp_uint16, buffer); + stats->cpu_min_taskid = tmp_uint16; } return SLURM_SUCCESS; @@ -181,8 +262,8 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) slurmdb_association_rec_t *assoc = NULL; slurmdb_wckey_rec_t *wckey = NULL; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { pack16(0, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -197,11 +278,11 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) pack16(object->admin_level, buffer); - if(object->assoc_list) + if (object->assoc_list) count = list_count(object->assoc_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->assoc_list); while((assoc = list_next(itr))) { slurmdb_pack_association_rec(assoc, rpc_version, @@ -211,11 +292,11 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->coord_accts) + if (object->coord_accts) count = list_count(object->coord_accts); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->coord_accts); while((coord = list_next(itr))) { slurmdb_pack_coord_rec(coord, @@ -232,11 +313,11 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) pack32(object->uid, buffer); - if(object->wckey_list) + if (object->wckey_list) count = list_count(object->wckey_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->wckey_list); while((wckey = list_next(itr))) { slurmdb_pack_wckey_rec(wckey, rpc_version, @@ -245,8 +326,8 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) list_iterator_destroy(itr); } count = NO_VAL; - } else if(rpc_version >= 4) { - if(!object) { + } else if (rpc_version >= 4) { + if (!object) { pack16(0, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -260,11 +341,11 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) pack16(object->admin_level, buffer); - if(object->assoc_list) + if (object->assoc_list) count = list_count(object->assoc_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->assoc_list); while((assoc = list_next(itr))) { slurmdb_pack_association_rec(assoc, rpc_version, @@ -274,11 +355,11 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->coord_accts) + if (object->coord_accts) count = list_count(object->coord_accts); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->coord_accts); while((coord = list_next(itr))) { slurmdb_pack_coord_rec(coord, @@ -294,11 +375,11 @@ extern void slurmdb_pack_user_rec(void *in, uint16_t rpc_version, Buf buffer) pack32(object->uid, buffer); - if(object->wckey_list) + if (object->wckey_list) count = list_count(object->wckey_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->wckey_list); while((wckey = list_next(itr))) { slurmdb_pack_wckey_rec(wckey, rpc_version, @@ -323,14 +404,14 @@ extern int slurmdb_unpack_user_rec(void **object, uint16_t rpc_version, *object = object_ptr; - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpack16(&object_ptr->admin_level, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->assoc_list = list_create(slurmdb_destroy_association_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_association_rec( + if (slurmdb_unpack_association_rec( (void *)&assoc, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -338,11 +419,11 @@ extern int slurmdb_unpack_user_rec(void **object, uint16_t rpc_version, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->coord_accts = list_create(slurmdb_destroy_coord_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_coord_rec((void *)&coord, + if (slurmdb_unpack_coord_rec((void *)&coord, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -358,11 +439,11 @@ extern int slurmdb_unpack_user_rec(void **object, uint16_t rpc_version, &uint32_tmp, buffer); safe_unpack32(&object_ptr->uid, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->wckey_list = list_create(slurmdb_destroy_wckey_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_wckey_rec( + if (slurmdb_unpack_wckey_rec( (void *)&wckey, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -370,14 +451,14 @@ extern int slurmdb_unpack_user_rec(void **object, uint16_t rpc_version, } } - } else if(rpc_version >= 4) { + } else if (rpc_version >= 4) { safe_unpack16(&object_ptr->admin_level, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->assoc_list = list_create(slurmdb_destroy_association_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_association_rec( + if (slurmdb_unpack_association_rec( (void *)&assoc, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -385,11 +466,11 @@ extern int slurmdb_unpack_user_rec(void **object, uint16_t rpc_version, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->coord_accts = list_create(slurmdb_destroy_coord_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_coord_rec((void *)&coord, + if (slurmdb_unpack_coord_rec((void *)&coord, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -403,11 +484,11 @@ extern int slurmdb_unpack_user_rec(void **object, uint16_t rpc_version, safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpack32(&object_ptr->uid, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->wckey_list = list_create(slurmdb_destroy_wckey_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_wckey_rec( + if (slurmdb_unpack_wckey_rec( (void *)&wckey, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -429,8 +510,8 @@ extern void slurmdb_pack_used_limits(void *in, uint16_t rpc_version, Buf buffer) { slurmdb_used_limits_t *object = (slurmdb_used_limits_t *)in; - if(rpc_version >= 9) { - if(!object) { + if (rpc_version >= 9) { + if (!object) { pack64(0, buffer); pack32(0, buffer); pack32(0, buffer); @@ -447,7 +528,7 @@ extern void slurmdb_pack_used_limits(void *in, uint16_t rpc_version, Buf buffer) pack32(object->submit_jobs, buffer); pack32(object->uid, buffer); } else if (rpc_version >= 8) { - if(!object) { + if (!object) { pack64(0, buffer); pack32(0, buffer); pack32(0, buffer); @@ -459,8 +540,8 @@ extern void slurmdb_pack_used_limits(void *in, uint16_t rpc_version, Buf buffer) pack32(object->jobs, buffer); pack32(object->submit_jobs, buffer); pack32(object->uid, buffer); - } else if(rpc_version >= 6) { - if(!object) { + } else if (rpc_version >= 6) { + if (!object) { pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); @@ -493,7 +574,7 @@ extern int slurmdb_unpack_used_limits(void **object, safe_unpack32(&object_ptr->jobs, buffer); safe_unpack32(&object_ptr->submit_jobs, buffer); safe_unpack32(&object_ptr->uid, buffer); - } else if(rpc_version >= 6) { + } else if (rpc_version >= 6) { safe_unpack32(&object_ptr->jobs, buffer); safe_unpack32(&object_ptr->submit_jobs, buffer); safe_unpack32(&object_ptr->uid, buffer); @@ -514,8 +595,8 @@ extern void slurmdb_pack_account_rec(void *in, uint16_t rpc_version, Buf buffer) slurmdb_account_rec_t *object = (slurmdb_account_rec_t *)in; slurmdb_association_rec_t *assoc = NULL; - if(rpc_version >= 3) { - if(!object) { + if (rpc_version >= 3) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); packnull(buffer); @@ -524,11 +605,11 @@ extern void slurmdb_pack_account_rec(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->assoc_list) + if (object->assoc_list) count = list_count(object->assoc_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->assoc_list); while((assoc = list_next(itr))) { slurmdb_pack_association_rec(assoc, rpc_version, @@ -538,11 +619,11 @@ extern void slurmdb_pack_account_rec(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->coordinators) + if (object->coordinators) count = list_count(object->coordinators); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->coordinators); while((coord = list_next(itr))) { slurmdb_pack_coord_rec(coord, @@ -571,13 +652,13 @@ extern int slurmdb_unpack_account_rec(void **object, uint16_t rpc_version, *object = object_ptr; - if(rpc_version >= 3) { + if (rpc_version >= 3) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->assoc_list = list_create(slurmdb_destroy_association_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_association_rec( + if (slurmdb_unpack_association_rec( (void *)&assoc, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -585,11 +666,11 @@ extern int slurmdb_unpack_account_rec(void **object, uint16_t rpc_version, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->coordinators = list_create(slurmdb_destroy_coord_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_coord_rec((void *)&coord, + if (slurmdb_unpack_coord_rec((void *)&coord, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; @@ -615,7 +696,7 @@ extern void slurmdb_pack_coord_rec(void *in, uint16_t rpc_version, Buf buffer) { slurmdb_coord_rec_t *object = (slurmdb_coord_rec_t *)in; - if(!object) { + if (!object) { packnull(buffer); pack16(0, buffer); return; @@ -648,8 +729,8 @@ extern void slurmdb_pack_cluster_accounting_rec(void *in, uint16_t rpc_version, slurmdb_cluster_accounting_rec_t *object = (slurmdb_cluster_accounting_rec_t *)in; - if(rpc_version >= 5) { - if(!object) { + if (rpc_version >= 5) { + if (!object) { pack64(0, buffer); pack32(0, buffer); pack64(0, buffer); @@ -681,7 +762,7 @@ extern int slurmdb_unpack_cluster_accounting_rec(void **object, *object = object_ptr; - if(rpc_version >= 5) { + if (rpc_version >= 5) { safe_unpack64(&object_ptr->alloc_secs, buffer); safe_unpack32(&object_ptr->cpu_count, buffer); safe_unpack64(&object_ptr->down_secs, buffer); @@ -707,8 +788,8 @@ extern void slurmdb_pack_cluster_rec(void *in, uint16_t rpc_version, Buf buffer) uint32_t count = NO_VAL; slurmdb_cluster_rec_t *object = (slurmdb_cluster_rec_t *)in; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { pack32(NO_VAL, buffer); pack16(0, buffer); packnull(buffer); @@ -728,12 +809,12 @@ extern void slurmdb_pack_cluster_rec(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_cluster_accounting_rec( @@ -759,8 +840,8 @@ extern void slurmdb_pack_cluster_rec(void *in, uint16_t rpc_version, Buf buffer) rpc_version, buffer); pack16(object->rpc_version, buffer); - } else if(rpc_version >= 5) { - if(!object) { + } else if (rpc_version >= 5) { + if (!object) { pack32(NO_VAL, buffer); pack16(0, buffer); packnull(buffer); @@ -776,12 +857,12 @@ extern void slurmdb_pack_cluster_rec(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_cluster_accounting_rec( @@ -819,9 +900,9 @@ extern int slurmdb_unpack_cluster_rec(void **object, uint16_t rpc_version, *object = object_ptr; slurmdb_init_cluster_rec(object_ptr, 0); - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create( slurmdb_destroy_cluster_accounting_rec); for(i=0; i<count; i++) { @@ -847,16 +928,16 @@ extern int slurmdb_unpack_cluster_rec(void **object, uint16_t rpc_version, safe_unpack32(&object_ptr->plugin_id_select, buffer); - if(slurmdb_unpack_association_rec( + if (slurmdb_unpack_association_rec( (void **)&object_ptr->root_assoc, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; safe_unpack16(&object_ptr->rpc_version, buffer); - } else if(rpc_version >= 5) { + } else if (rpc_version >= 5) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create( slurmdb_destroy_cluster_accounting_rec); for(i=0; i<count; i++) { @@ -878,7 +959,7 @@ extern int slurmdb_unpack_cluster_rec(void **object, uint16_t rpc_version, safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->nodes, &uint32_tmp, buffer); - if(slurmdb_unpack_association_rec( + if (slurmdb_unpack_association_rec( (void **)&object_ptr->root_assoc, rpc_version, buffer) == SLURM_ERROR) @@ -900,7 +981,7 @@ extern void slurmdb_pack_accounting_rec(void *in, uint16_t rpc_version, { slurmdb_accounting_rec_t *object = (slurmdb_accounting_rec_t *)in; - if(!object) { + if (!object) { pack64(0, buffer); pack32(0, buffer); pack_time(0, buffer); @@ -942,7 +1023,7 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, slurmdb_association_rec_t *object = (slurmdb_association_rec_t *)in; if (rpc_version >= 10) { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); packnull(buffer); packnull(buffer); @@ -985,12 +1066,12 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_accounting_rec(slurmdb_info, @@ -1036,12 +1117,12 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, pack32(object->parent_id, buffer); packstr(object->partition, buffer); - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -1055,7 +1136,7 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, packstr(object->user, buffer); } else if (rpc_version >= 8) { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); packnull(buffer); packnull(buffer); @@ -1097,12 +1178,12 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_accounting_rec(slurmdb_info, @@ -1147,12 +1228,12 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, pack32(object->parent_id, buffer); packstr(object->partition, buffer); - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -1166,7 +1247,7 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, packstr(object->user, buffer); } else if (rpc_version >= 4) { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); packnull(buffer); packnull(buffer); @@ -1203,12 +1284,12 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_accounting_rec(slurmdb_info, @@ -1248,12 +1329,12 @@ extern void slurmdb_pack_association_rec(void *in, uint16_t rpc_version, pack32(object->parent_id, buffer); packstr(object->partition, buffer); - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -1286,11 +1367,11 @@ extern int slurmdb_unpack_association_rec(void **object, uint16_t rpc_version, if (rpc_version >= 10) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create(slurmdb_destroy_accounting_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_accounting_rec( + if (slurmdb_unpack_accounting_rec( (void **)&slurmdb_info, rpc_version, buffer) == SLURM_ERROR) @@ -1338,7 +1419,7 @@ extern int slurmdb_unpack_association_rec(void **object, uint16_t rpc_version, safe_unpack32(&count, buffer); /* This needs to look for zero to tell if something has changed */ - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -1353,11 +1434,11 @@ extern int slurmdb_unpack_association_rec(void **object, uint16_t rpc_version, safe_unpackstr_xmalloc(&object_ptr->user, &uint32_tmp, buffer); } else if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create(slurmdb_destroy_accounting_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_accounting_rec( + if (slurmdb_unpack_accounting_rec( (void **)&slurmdb_info, rpc_version, buffer) == SLURM_ERROR) @@ -1404,7 +1485,7 @@ extern int slurmdb_unpack_association_rec(void **object, uint16_t rpc_version, safe_unpack32(&count, buffer); /* This needs to look for zero to tell if something has changed */ - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -1419,11 +1500,11 @@ extern int slurmdb_unpack_association_rec(void **object, uint16_t rpc_version, safe_unpackstr_xmalloc(&object_ptr->user, &uint32_tmp, buffer); } else if (rpc_version >= 4) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create(slurmdb_destroy_accounting_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_accounting_rec( + if (slurmdb_unpack_accounting_rec( (void **)&slurmdb_info, rpc_version, buffer) == SLURM_ERROR) @@ -1465,7 +1546,7 @@ extern int slurmdb_unpack_association_rec(void **object, uint16_t rpc_version, safe_unpack32(&count, buffer); /* This needs to look for zero to tell if something has changed */ - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -1492,7 +1573,7 @@ extern void slurmdb_pack_event_rec(void *in, uint16_t rpc_version, Buf buffer) { slurmdb_event_rec_t *object = (slurmdb_event_rec_t *)in; - if(!object) { + if (!object) { packnull(buffer); packnull(buffer); pack32(NO_VAL, buffer); @@ -1853,12 +1934,12 @@ extern void slurmdb_pack_qos_rec(void *in, uint16_t rpc_version, Buf buffer) pack_bit_str(object->preempt_bitstr, buffer); - if(object->preempt_list) + if (object->preempt_list) count = list_count(object->preempt_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->preempt_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2051,7 +2132,7 @@ extern int slurmdb_unpack_qos_rec(void **object, uint16_t rpc_version, unpack_bit_str(&object_ptr->preempt_bitstr, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->preempt_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2080,7 +2161,7 @@ extern void slurmdb_pack_reservation_rec(void *in, uint16_t rpc_version, { slurmdb_reservation_rec_t *object = (slurmdb_reservation_rec_t *)in; - if(!object) { + if (!object) { pack64(0, buffer); packnull(buffer); packnull(buffer); @@ -2147,8 +2228,8 @@ extern void slurmdb_pack_txn_rec(void *in, uint16_t rpc_version, Buf buffer) { slurmdb_txn_rec_t *object = (slurmdb_txn_rec_t *)in; - if(rpc_version >= 3) { - if(!object) { + if (rpc_version >= 3) { + if (!object) { packnull(buffer); pack16(0, buffer); packnull(buffer); @@ -2212,8 +2293,8 @@ extern void slurmdb_pack_wckey_rec(void *in, uint16_t rpc_version, Buf buffer) uint32_t count = NO_VAL; slurmdb_wckey_rec_t *object = (slurmdb_wckey_rec_t *)in; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { pack32(NO_VAL, buffer); packnull(buffer); @@ -2228,12 +2309,12 @@ extern void slurmdb_pack_wckey_rec(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_accounting_rec( @@ -2255,7 +2336,7 @@ extern void slurmdb_pack_wckey_rec(void *in, uint16_t rpc_version, Buf buffer) packstr(object->user, buffer); } else { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); packnull(buffer); @@ -2270,12 +2351,12 @@ extern void slurmdb_pack_wckey_rec(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->accounting_list) + if (object->accounting_list) count = list_count(object->accounting_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((slurmdb_info = list_next(itr))) { slurmdb_pack_accounting_rec( @@ -2309,13 +2390,13 @@ extern int slurmdb_unpack_wckey_rec(void **object, uint16_t rpc_version, *object = object_ptr; - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create(slurmdb_destroy_accounting_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_accounting_rec( + if (slurmdb_unpack_accounting_rec( (void **)&slurmdb_info, rpc_version, buffer) == SLURM_ERROR) @@ -2339,11 +2420,11 @@ extern int slurmdb_unpack_wckey_rec(void **object, uint16_t rpc_version, safe_unpackstr_xmalloc(&object_ptr->user, &uint32_tmp, buffer); } else { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->accounting_list = list_create(slurmdb_destroy_accounting_rec); for(i=0; i<count; i++) { - if(slurmdb_unpack_accounting_rec( + if (slurmdb_unpack_accounting_rec( (void **)&slurmdb_info, rpc_version, buffer) == SLURM_ERROR) @@ -2376,7 +2457,7 @@ extern void slurmdb_pack_archive_rec(void *in, uint16_t rpc_version, Buf buffer) { slurmdb_archive_rec_t *object = (slurmdb_archive_rec_t *)in; - if(!object) { + if (!object) { packnull(buffer); packnull(buffer); return; @@ -2447,7 +2528,7 @@ extern void slurmdb_pack_user_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->def_wckey_list) + if (object->def_wckey_list) count = list_count(object->def_wckey_list); pack32(count, buffer); @@ -2542,8 +2623,8 @@ extern void slurmdb_pack_account_cond(void *in, uint16_t rpc_version, slurmdb_account_cond_t *object = (slurmdb_account_cond_t *)in; uint32_t count = NO_VAL; - if(rpc_version >= 3) { - if(!object) { + if (rpc_version >= 3) { + if (!object) { slurmdb_pack_association_cond(NULL, rpc_version, buffer); pack32(NO_VAL, buffer); @@ -2557,12 +2638,12 @@ extern void slurmdb_pack_account_cond(void *in, uint16_t rpc_version, rpc_version, buffer); count = NO_VAL; - if(object->description_list) + if (object->description_list) count = list_count(object->description_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->description_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2571,12 +2652,12 @@ extern void slurmdb_pack_account_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->organization_list) + if (object->organization_list) count = list_count(object->organization_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->organization_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2604,13 +2685,13 @@ extern int slurmdb_unpack_account_cond(void **object, uint16_t rpc_version, *object = object_ptr; if (rpc_version >= 3) { - if(slurmdb_unpack_association_cond( + if (slurmdb_unpack_association_cond( (void **)&object_ptr->assoc_cond, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->description_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2621,7 +2702,7 @@ extern int slurmdb_unpack_account_cond(void **object, uint16_t rpc_version, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->organization_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2652,8 +2733,8 @@ extern void slurmdb_pack_cluster_cond(void *in, uint16_t rpc_version, slurmdb_cluster_cond_t *object = (slurmdb_cluster_cond_t *)in; uint32_t count = NO_VAL; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { pack16(0, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -2668,12 +2749,12 @@ extern void slurmdb_pack_cluster_cond(void *in, uint16_t rpc_version, pack16(object->classification, buffer); - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2684,12 +2765,12 @@ extern void slurmdb_pack_cluster_cond(void *in, uint16_t rpc_version, pack32(object->flags, buffer); - if(object->plugin_id_select_list) + if (object->plugin_id_select_list) count = list_count(object->plugin_id_select_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->plugin_id_select_list); while((tmp_info = list_next(itr))) { @@ -2699,12 +2780,12 @@ extern void slurmdb_pack_cluster_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->rpc_version_list) + if (object->rpc_version_list) count = list_count(object->rpc_version_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->rpc_version_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2718,8 +2799,8 @@ extern void slurmdb_pack_cluster_cond(void *in, uint16_t rpc_version, pack16(object->with_usage, buffer); pack16(object->with_deleted, buffer); - } else if(rpc_version >= 5) { - if(!object) { + } else if (rpc_version >= 5) { + if (!object) { pack16(0, buffer); pack32(NO_VAL, buffer); pack_time(0, buffer); @@ -2731,12 +2812,12 @@ extern void slurmdb_pack_cluster_cond(void *in, uint16_t rpc_version, pack16(object->classification, buffer); - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2766,10 +2847,10 @@ extern int slurmdb_unpack_cluster_cond(void **object, uint16_t rpc_version, *object = object_ptr; slurmdb_init_cluster_cond(object_ptr, 0); - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpack16(&object_ptr->classification, buffer); safe_unpack32(&count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2782,7 +2863,7 @@ extern int slurmdb_unpack_cluster_cond(void **object, uint16_t rpc_version, safe_unpack32(&object_ptr->flags, buffer); safe_unpack32(&count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { object_ptr->plugin_id_select_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2794,7 +2875,7 @@ extern int slurmdb_unpack_cluster_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { object_ptr->rpc_version_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2810,10 +2891,10 @@ extern int slurmdb_unpack_cluster_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->with_usage, buffer); safe_unpack16(&object_ptr->with_deleted, buffer); - } else if(rpc_version >= 5) { + } else if (rpc_version >= 5) { safe_unpack16(&object_ptr->classification, buffer); safe_unpack32(&count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -2845,8 +2926,8 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, ListIterator itr = NULL; slurmdb_association_cond_t *object = (slurmdb_association_cond_t *)in; - if(rpc_version >= 10) { - if(!object) { + if (rpc_version >= 10) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -2894,11 +2975,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, return; } - if(object->acct_list) + if (object->acct_list) count = list_count(object->acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2907,11 +2988,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2920,11 +3001,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->def_qos_id_list) + if (object->def_qos_id_list) count = list_count(object->def_qos_id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->def_qos_id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2933,11 +3014,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->fairshare_list) + if (object->fairshare_list) count = list_count(object->fairshare_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->fairshare_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2946,11 +3027,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpu_mins_list) + if (object->grp_cpu_mins_list) count = list_count(object->grp_cpu_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_cpu_mins_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2959,11 +3040,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpu_run_mins_list) + if (object->grp_cpu_run_mins_list) count = list_count(object->grp_cpu_run_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->grp_cpu_run_mins_list); while((tmp_info = list_next(itr))) { @@ -2973,11 +3054,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpus_list) + if (object->grp_cpus_list) count = list_count(object->grp_cpus_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_cpus_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2986,11 +3067,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_jobs_list) + if (object->grp_jobs_list) count = list_count(object->grp_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_jobs_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -2999,11 +3080,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_mem_list) + if (object->grp_mem_list) count = list_count(object->grp_mem_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_mem_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3012,11 +3093,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_nodes_list) + if (object->grp_nodes_list) count = list_count(object->grp_nodes_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_nodes_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3025,11 +3106,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_submit_jobs_list) + if (object->grp_submit_jobs_list) count = list_count(object->grp_submit_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->grp_submit_jobs_list); while((tmp_info = list_next(itr))) { @@ -3039,11 +3120,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_wall_list) + if (object->grp_wall_list) count = list_count(object->grp_wall_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_wall_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3052,11 +3133,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3064,11 +3145,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpu_mins_pj_list) + if (object->max_cpu_mins_pj_list) count = list_count(object->max_cpu_mins_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_cpu_mins_pj_list); while((tmp_info = list_next(itr))) { @@ -3078,11 +3159,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpu_run_mins_list) + if (object->max_cpu_run_mins_list) count = list_count(object->max_cpu_run_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_cpu_run_mins_list); while((tmp_info = list_next(itr))) { @@ -3092,11 +3173,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpus_pj_list) + if (object->max_cpus_pj_list) count = list_count(object->max_cpus_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_cpus_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3104,11 +3185,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_jobs_list) + if (object->max_jobs_list) count = list_count(object->max_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_jobs_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3116,11 +3197,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_nodes_pj_list) + if (object->max_nodes_pj_list) count = list_count(object->max_nodes_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_nodes_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3128,11 +3209,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_submit_jobs_list) + if (object->max_submit_jobs_list) count = list_count(object->max_submit_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_submit_jobs_list); while((tmp_info = list_next(itr))) { @@ -3141,11 +3222,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_wall_pj_list) + if (object->max_wall_pj_list) count = list_count(object->max_wall_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_wall_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3156,11 +3237,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack16(object->only_defs, buffer); - if(object->partition_list) + if (object->partition_list) count = list_count(object->partition_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3169,11 +3250,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->parent_acct_list) + if (object->parent_acct_list) count = list_count(object->parent_acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->parent_acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3182,12 +3263,12 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3199,11 +3280,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack_time(object->usage_end, buffer); pack_time(object->usage_start, buffer); - if(object->user_list) + if (object->user_list) count = list_count(object->user_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3218,8 +3299,8 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack16(object->with_sub_accts, buffer); pack16(object->without_parent_info, buffer); pack16(object->without_parent_limits, buffer); - } else if(rpc_version >= 8) { - if(!object) { + } else if (rpc_version >= 8) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -3266,11 +3347,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, return; } - if(object->acct_list) + if (object->acct_list) count = list_count(object->acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3279,11 +3360,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3292,11 +3373,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->def_qos_id_list) + if (object->def_qos_id_list) count = list_count(object->def_qos_id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->def_qos_id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3305,11 +3386,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->fairshare_list) + if (object->fairshare_list) count = list_count(object->fairshare_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->fairshare_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3318,11 +3399,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpu_mins_list) + if (object->grp_cpu_mins_list) count = list_count(object->grp_cpu_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_cpu_mins_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3331,11 +3412,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpu_run_mins_list) + if (object->grp_cpu_run_mins_list) count = list_count(object->grp_cpu_run_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->grp_cpu_run_mins_list); while((tmp_info = list_next(itr))) { @@ -3345,11 +3426,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpus_list) + if (object->grp_cpus_list) count = list_count(object->grp_cpus_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_cpus_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3358,11 +3439,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_jobs_list) + if (object->grp_jobs_list) count = list_count(object->grp_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_jobs_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3371,11 +3452,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_nodes_list) + if (object->grp_nodes_list) count = list_count(object->grp_nodes_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_nodes_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3384,11 +3465,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_submit_jobs_list) + if (object->grp_submit_jobs_list) count = list_count(object->grp_submit_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->grp_submit_jobs_list); while((tmp_info = list_next(itr))) { @@ -3398,11 +3479,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_wall_list) + if (object->grp_wall_list) count = list_count(object->grp_wall_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_wall_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3411,11 +3492,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3423,11 +3504,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpu_mins_pj_list) + if (object->max_cpu_mins_pj_list) count = list_count(object->max_cpu_mins_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_cpu_mins_pj_list); while((tmp_info = list_next(itr))) { @@ -3437,11 +3518,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpu_run_mins_list) + if (object->max_cpu_run_mins_list) count = list_count(object->max_cpu_run_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_cpu_run_mins_list); while((tmp_info = list_next(itr))) { @@ -3451,11 +3532,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpus_pj_list) + if (object->max_cpus_pj_list) count = list_count(object->max_cpus_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_cpus_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3463,11 +3544,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_jobs_list) + if (object->max_jobs_list) count = list_count(object->max_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_jobs_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3475,11 +3556,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_nodes_pj_list) + if (object->max_nodes_pj_list) count = list_count(object->max_nodes_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_nodes_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3487,11 +3568,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_submit_jobs_list) + if (object->max_submit_jobs_list) count = list_count(object->max_submit_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_submit_jobs_list); while((tmp_info = list_next(itr))) { @@ -3500,11 +3581,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_wall_pj_list) + if (object->max_wall_pj_list) count = list_count(object->max_wall_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_wall_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3515,11 +3596,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack16(object->only_defs, buffer); - if(object->partition_list) + if (object->partition_list) count = list_count(object->partition_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3528,11 +3609,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->parent_acct_list) + if (object->parent_acct_list) count = list_count(object->parent_acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->parent_acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3541,12 +3622,12 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3558,11 +3639,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack_time(object->usage_end, buffer); pack_time(object->usage_start, buffer); - if(object->user_list) + if (object->user_list) count = list_count(object->user_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3577,8 +3658,8 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack16(object->with_sub_accts, buffer); pack16(object->without_parent_info, buffer); pack16(object->without_parent_limits, buffer); - } else if(rpc_version >= 5) { - if(!object) { + } else if (rpc_version >= 5) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -3619,11 +3700,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, return; } - if(object->acct_list) + if (object->acct_list) count = list_count(object->acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3632,11 +3713,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3645,11 +3726,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->fairshare_list) + if (object->fairshare_list) count = list_count(object->fairshare_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->fairshare_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3658,11 +3739,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpu_mins_list) + if (object->grp_cpu_mins_list) count = list_count(object->grp_cpu_mins_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_cpu_mins_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3671,11 +3752,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_cpus_list) + if (object->grp_cpus_list) count = list_count(object->grp_cpus_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_cpus_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3684,11 +3765,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_jobs_list) + if (object->grp_jobs_list) count = list_count(object->grp_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_jobs_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3697,11 +3778,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_nodes_list) + if (object->grp_nodes_list) count = list_count(object->grp_nodes_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_nodes_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3710,11 +3791,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_submit_jobs_list) + if (object->grp_submit_jobs_list) count = list_count(object->grp_submit_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->grp_submit_jobs_list); while((tmp_info = list_next(itr))) { @@ -3724,11 +3805,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->grp_wall_list) + if (object->grp_wall_list) count = list_count(object->grp_wall_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->grp_wall_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3737,11 +3818,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3749,11 +3830,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->max_cpu_mins_pj_list) + if (object->max_cpu_mins_pj_list) count = list_count(object->max_cpu_mins_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_cpu_mins_pj_list); while((tmp_info = list_next(itr))) { @@ -3762,11 +3843,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_cpus_pj_list) + if (object->max_cpus_pj_list) count = list_count(object->max_cpus_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_cpus_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3774,11 +3855,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_jobs_list) + if (object->max_jobs_list) count = list_count(object->max_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_jobs_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3786,11 +3867,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_nodes_pj_list) + if (object->max_nodes_pj_list) count = list_count(object->max_nodes_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_nodes_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3798,11 +3879,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_submit_jobs_list) + if (object->max_submit_jobs_list) count = list_count(object->max_submit_jobs_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create( object->max_submit_jobs_list); while((tmp_info = list_next(itr))) { @@ -3811,11 +3892,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, list_iterator_destroy(itr); } count = NO_VAL; - if(object->max_wall_pj_list) + if (object->max_wall_pj_list) count = list_count(object->max_wall_pj_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->max_wall_pj_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3824,11 +3905,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->partition_list) + if (object->partition_list) count = list_count(object->partition_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3837,11 +3918,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->parent_acct_list) + if (object->parent_acct_list) count = list_count(object->parent_acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->parent_acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3850,12 +3931,12 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3867,11 +3948,11 @@ extern void slurmdb_pack_association_cond(void *in, uint16_t rpc_version, pack_time(object->usage_end, buffer); pack_time(object->usage_start, buffer); - if(object->user_list) + if (object->user_list) count = list_count(object->user_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -3902,7 +3983,7 @@ extern int slurmdb_unpack_association_cond(void **object, if (rpc_version >= 10) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3912,7 +3993,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3924,7 +4005,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->def_qos_id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3936,7 +4017,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->fairshare_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3948,7 +4029,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpu_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3959,7 +4040,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpu_run_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3970,7 +4051,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpus_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3981,7 +4062,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -3992,7 +4073,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_mem_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4003,7 +4084,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_nodes_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4014,7 +4095,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_submit_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4025,7 +4106,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_wall_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4037,7 +4118,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4047,7 +4128,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpu_mins_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4058,7 +4139,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpu_run_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4069,7 +4150,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpus_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4080,7 +4161,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4091,7 +4172,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_nodes_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4102,7 +4183,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_submit_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4113,7 +4194,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_wall_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4127,7 +4208,7 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack16(&object_ptr->only_defs, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4139,7 +4220,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->parent_acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4151,7 +4232,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4164,7 +4245,7 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack_time(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4182,7 +4263,7 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack16(&object_ptr->without_parent_limits, buffer); } else if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4192,7 +4273,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4204,7 +4285,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->def_qos_id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4216,7 +4297,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->fairshare_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4228,7 +4309,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpu_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4239,7 +4320,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpu_run_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4250,7 +4331,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpus_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4261,7 +4342,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4272,7 +4353,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_nodes_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4283,7 +4364,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_submit_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4294,7 +4375,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_wall_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4306,7 +4387,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4316,7 +4397,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpu_mins_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4327,7 +4408,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpu_run_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4338,7 +4419,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpus_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4349,7 +4430,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4360,7 +4441,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_nodes_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4371,7 +4452,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_submit_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4382,7 +4463,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_wall_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4396,7 +4477,7 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack16(&object_ptr->only_defs, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4408,7 +4489,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->parent_acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4420,7 +4501,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4433,7 +4514,7 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack_time(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4449,9 +4530,9 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack16(&object_ptr->with_sub_accts, buffer); safe_unpack16(&object_ptr->without_parent_info, buffer); safe_unpack16(&object_ptr->without_parent_limits, buffer); - } else if(rpc_version >= 5) { + } else if (rpc_version >= 5) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4461,7 +4542,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4473,7 +4554,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->fairshare_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4485,7 +4566,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpu_mins_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4496,7 +4577,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_cpus_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4507,7 +4588,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4518,7 +4599,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_nodes_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4529,7 +4610,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_submit_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4540,7 +4621,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->grp_wall_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4552,7 +4633,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4562,7 +4643,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpu_mins_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4573,7 +4654,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_cpus_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4584,7 +4665,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4595,7 +4676,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_nodes_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4606,7 +4687,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_submit_jobs_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4617,7 +4698,7 @@ extern int slurmdb_unpack_association_cond(void **object, } } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->max_wall_pj_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4629,7 +4710,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4641,7 +4722,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->parent_acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4653,7 +4734,7 @@ extern int slurmdb_unpack_association_cond(void **object, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4666,7 +4747,7 @@ extern int slurmdb_unpack_association_cond(void **object, safe_unpack_time(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -4700,7 +4781,7 @@ extern void slurmdb_pack_event_cond(void *in, uint16_t rpc_version, Buf buffer) ListIterator itr = NULL; slurmdb_event_cond_t *object = (slurmdb_event_cond_t *)in; - if(!object) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -4714,11 +4795,11 @@ extern void slurmdb_pack_event_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -4731,11 +4812,11 @@ extern void slurmdb_pack_event_cond(void *in, uint16_t rpc_version, Buf buffer) pack32(object->cpus_min, buffer); pack16(object->event_type, buffer); - if(object->node_list) + if (object->node_list) count = list_count(object->node_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->node_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -4747,11 +4828,11 @@ extern void slurmdb_pack_event_cond(void *in, uint16_t rpc_version, Buf buffer) pack_time(object->period_end, buffer); pack_time(object->period_start, buffer); - if(object->reason_list) + if (object->reason_list) count = list_count(object->reason_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->reason_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -4760,11 +4841,11 @@ extern void slurmdb_pack_event_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->reason_uid_list) + if (object->reason_uid_list) count = list_count(object->reason_uid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->reason_uid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -4773,11 +4854,11 @@ extern void slurmdb_pack_event_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->state_list) + if (object->state_list) count = list_count(object->state_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->state_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -4800,7 +4881,7 @@ extern int slurmdb_unpack_event_cond(void **object, uint16_t rpc_version, *object = object_ptr; safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4813,7 +4894,7 @@ extern int slurmdb_unpack_event_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->event_type, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->node_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4826,7 +4907,7 @@ extern int slurmdb_unpack_event_cond(void **object, uint16_t rpc_version, safe_unpack_time(&object_ptr->period_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->reason_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4836,7 +4917,7 @@ extern int slurmdb_unpack_event_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->reason_uid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -4846,7 +4927,7 @@ extern int slurmdb_unpack_event_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->state_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -5090,7 +5171,7 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) pack16(object->without_steps, buffer); pack16(object->without_usage_truncation, buffer); } else if (rpc_version >= 8) { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -5119,11 +5200,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->acct_list) + if (object->acct_list) count = list_count(object->acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5132,11 +5213,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->associd_list) + if (object->associd_list) count = list_count(object->associd_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->associd_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5144,11 +5225,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5162,11 +5243,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) pack16(object->duplicates, buffer); pack32((uint32_t)object->exitcode, buffer); - if(object->groupid_list) + if (object->groupid_list) count = list_count(object->groupid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->groupid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5177,11 +5258,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) pack32(object->nodes_max, buffer); pack32(object->nodes_min, buffer); - if(object->partition_list) + if (object->partition_list) count = list_count(object->partition_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5190,11 +5271,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->qos_list) + if (object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5203,11 +5284,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->resv_list) + if (object->resv_list) count = list_count(object->resv_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->resv_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5216,11 +5297,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->resvid_list) + if (object->resvid_list) count = list_count(object->resvid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->resvid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5229,11 +5310,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->step_list) + if (object->step_list) count = list_count(object->step_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->step_list); while((job = list_next(itr))) { slurmdb_pack_selected_step(job, rpc_version, @@ -5243,11 +5324,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->state_list) + if (object->state_list) count = list_count(object->state_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->state_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5263,11 +5344,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) packstr(object->used_nodes, buffer); - if(object->userid_list) + if (object->userid_list) count = list_count(object->userid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->userid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5276,10 +5357,10 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->wckey_list) + if (object->wckey_list) count = list_count(object->wckey_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->wckey_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5290,8 +5371,8 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) pack16(object->without_steps, buffer); pack16(object->without_usage_truncation, buffer); - } else if(rpc_version >= 6) { - if(!object) { + } else if (rpc_version >= 6) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -5315,11 +5396,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->acct_list) + if (object->acct_list) count = list_count(object->acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5328,11 +5409,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->associd_list) + if (object->associd_list) count = list_count(object->associd_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->associd_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5340,11 +5421,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5357,11 +5438,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) pack32(object->cpus_min, buffer); pack16(object->duplicates, buffer); - if(object->groupid_list) + if (object->groupid_list) count = list_count(object->groupid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->groupid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5371,11 +5452,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) pack32(object->nodes_max, buffer); pack32(object->nodes_min, buffer); - if(object->partition_list) + if (object->partition_list) count = list_count(object->partition_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5384,11 +5465,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->resv_list) + if (object->resv_list) count = list_count(object->resv_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->resv_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5397,11 +5478,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->resvid_list) + if (object->resvid_list) count = list_count(object->resvid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->resvid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5410,11 +5491,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->step_list) + if (object->step_list) count = list_count(object->step_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->step_list); while((job = list_next(itr))) { slurmdb_pack_selected_step(job, rpc_version, @@ -5424,11 +5505,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->state_list) + if (object->state_list) count = list_count(object->state_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->state_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5442,11 +5523,11 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) packstr(object->used_nodes, buffer); - if(object->userid_list) + if (object->userid_list) count = list_count(object->userid_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->userid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5455,10 +5536,10 @@ extern void slurmdb_pack_job_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->wckey_list) + if (object->wckey_list) count = list_count(object->wckey_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->wckey_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -5652,7 +5733,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->without_usage_truncation, buffer); } else if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -5662,7 +5743,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->associd_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5673,7 +5754,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5690,7 +5771,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, object_ptr->exitcode = (int32_t)uint32_tmp; safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->groupid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5704,7 +5785,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, safe_unpack32(&object_ptr->nodes_min, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5716,7 +5797,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5728,7 +5809,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->resv_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5740,7 +5821,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->resvid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5752,7 +5833,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->step_list = list_create(slurmdb_destroy_selected_step); for(i=0; i<count; i++) { @@ -5763,7 +5844,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->state_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5782,7 +5863,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, &uint32_tmp, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->userid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5793,7 +5874,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->wckey_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5805,9 +5886,9 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->without_steps, buffer); safe_unpack16(&object_ptr->without_usage_truncation, buffer); - } else if(rpc_version >= 6) { + } else if (rpc_version >= 6) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -5817,7 +5898,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->associd_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5828,7 +5909,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5843,7 +5924,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->duplicates, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->groupid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5857,7 +5938,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, safe_unpack32(&object_ptr->nodes_min, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5869,7 +5950,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->resv_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5881,7 +5962,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->resvid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5893,7 +5974,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->step_list = list_create(slurmdb_destroy_selected_step); for(i=0; i<count; i++) { @@ -5906,7 +5987,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->state_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5923,7 +6004,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, &uint32_tmp, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->userid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5934,7 +6015,7 @@ extern int slurmdb_unpack_job_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->wckey_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -5961,8 +6042,8 @@ extern void slurmdb_pack_job_modify_cond(void *in, uint16_t rpc_version, { slurmdb_job_modify_cond_t *cond = (slurmdb_job_modify_cond_t *)in; - if(rpc_version >= 8) { - if(!cond) { + if (rpc_version >= 8) { + if (!cond) { packnull(buffer); pack32(NO_VAL, buffer); return; @@ -5981,7 +6062,7 @@ extern int slurmdb_unpack_job_modify_cond(void **object, uint16_t rpc_version, *object = object_ptr; - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpackstr_xmalloc(&object_ptr->cluster, &uint32_tmp, buffer); safe_unpack32(&object_ptr->job_id, buffer); @@ -6001,7 +6082,7 @@ extern void slurmdb_pack_job_rec(void *object, uint16_t rpc_version, Buf buffer) slurmdb_step_rec_t *step = NULL; uint32_t count = 0; - if(rpc_version >= 8) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { packstr(job->account, buffer); pack32(job->alloc_cpus, buffer); pack32(job->alloc_nodes, buffer); @@ -6026,16 +6107,17 @@ extern void slurmdb_pack_job_rec(void *object, uint16_t rpc_version, Buf buffer) pack32(job->priority, buffer); pack32(job->qosid, buffer); pack32(job->req_cpus, buffer); + pack32(job->req_mem, buffer); pack32(job->requid, buffer); pack32(job->resvid, buffer); pack32(job->show_full, buffer); pack_time(job->start, buffer); pack16((uint16_t)job->state, buffer); _pack_slurmdb_stats(&job->stats, rpc_version, buffer); - if(job->steps) + if (job->steps) count = list_count(job->steps); pack32(count, buffer); - if(count) { + if (count) { itr = list_iterator_create(job->steps); while((step = list_next(itr))) { slurmdb_pack_step_rec(step, rpc_version, @@ -6057,13 +6139,15 @@ extern void slurmdb_pack_job_rec(void *object, uint16_t rpc_version, Buf buffer) pack32(job->user_cpu_usec, buffer); packstr(job->wckey, buffer); /* added for rpc_version 4 */ pack32(job->wckeyid, buffer); /* added for rpc_version 4 */ - } else if(rpc_version >= 5) { + } else { + packstr(job->account, buffer); pack32(job->alloc_cpus, buffer); pack32(job->alloc_nodes, buffer); pack32(job->associd, buffer); - packstr(job->account, buffer); packstr(job->blockid, buffer); packstr(job->cluster, buffer); + pack32((uint32_t)job->derived_ec, buffer); + packstr(job->derived_es, buffer); pack32(job->elapsed, buffer); pack_time(job->eligible, buffer); pack_time(job->end, buffer); @@ -6075,21 +6159,21 @@ extern void slurmdb_pack_job_rec(void *object, uint16_t rpc_version, Buf buffer) pack32(job->jobid, buffer); packstr(job->jobname, buffer); pack32(job->lft, buffer); - packstr(job->partition, buffer); packstr(job->nodes, buffer); + packstr(job->partition, buffer); pack32(job->priority, buffer); - pack16(job->qosid, buffer); - pack32(job->resvid, buffer); + pack32(job->qosid, buffer); pack32(job->req_cpus, buffer); pack32(job->requid, buffer); - _pack_slurmdb_stats(&job->stats, rpc_version, buffer); + pack32(job->resvid, buffer); pack32(job->show_full, buffer); pack_time(job->start, buffer); pack16((uint16_t)job->state, buffer); - if(job->steps) + _pack_slurmdb_stats(&job->stats, rpc_version, buffer); + if (job->steps) count = list_count(job->steps); pack32(count, buffer); - if(count) { + if (count) { itr = list_iterator_create(job->steps); while((step = list_next(itr))) { slurmdb_pack_step_rec(step, rpc_version, @@ -6125,7 +6209,7 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) *job = job_ptr; - if(rpc_version >= 8) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { safe_unpackstr_xmalloc(&job_ptr->account, &uint32_tmp, buffer); safe_unpack32(&job_ptr->alloc_cpus, buffer); safe_unpack32(&job_ptr->alloc_nodes, buffer); @@ -6151,13 +6235,14 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) safe_unpack32(&job_ptr->priority, buffer); safe_unpack32(&job_ptr->qosid, buffer); safe_unpack32(&job_ptr->req_cpus, buffer); + safe_unpack32(&job_ptr->req_mem, buffer); safe_unpack32(&job_ptr->requid, buffer); safe_unpack32(&job_ptr->resvid, buffer); safe_unpack32(&job_ptr->show_full, buffer); safe_unpack_time(&job_ptr->start, buffer); safe_unpack16(&uint16_tmp, buffer); job_ptr->state = uint16_tmp; - if(_unpack_slurmdb_stats(&job_ptr->stats, rpc_version, buffer) + if (_unpack_slurmdb_stats(&job_ptr->stats, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; @@ -6169,7 +6254,7 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) goto unpack_error; step->job_ptr = job_ptr; - if(!job_ptr->first_step_ptr) + if (!job_ptr->first_step_ptr) job_ptr->first_step_ptr = step; list_append(job_ptr->steps, step); } @@ -6188,13 +6273,17 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) safe_unpack32(&job_ptr->user_cpu_usec, buffer); safe_unpackstr_xmalloc(&job_ptr->wckey, &uint32_tmp, buffer); safe_unpack32(&job_ptr->wckeyid, buffer); - } else if(rpc_version >= 5) { + } else { + safe_unpackstr_xmalloc(&job_ptr->account, &uint32_tmp, buffer); safe_unpack32(&job_ptr->alloc_cpus, buffer); safe_unpack32(&job_ptr->alloc_nodes, buffer); safe_unpack32(&job_ptr->associd, buffer); - safe_unpackstr_xmalloc(&job_ptr->account, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job_ptr->blockid, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job_ptr->cluster, &uint32_tmp, buffer); + safe_unpack32(&uint32_tmp, buffer); + job_ptr->derived_ec = (int32_t)uint32_tmp; + safe_unpackstr_xmalloc(&job_ptr->derived_es, &uint32_tmp, + buffer); safe_unpack32(&job_ptr->elapsed, buffer); safe_unpack_time(&job_ptr->eligible, buffer); safe_unpack_time(&job_ptr->end, buffer); @@ -6204,23 +6293,23 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) safe_unpack32(&job_ptr->jobid, buffer); safe_unpackstr_xmalloc(&job_ptr->jobname, &uint32_tmp, buffer); safe_unpack32(&job_ptr->lft, buffer); + safe_unpackstr_xmalloc(&job_ptr->nodes, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job_ptr->partition, &uint32_tmp, buffer); - safe_unpackstr_xmalloc(&job_ptr->nodes, &uint32_tmp, buffer); safe_unpack32(&job_ptr->priority, buffer); - safe_unpack16((uint16_t *)&job_ptr->qosid, buffer); - safe_unpack32(&job_ptr->resvid, buffer); + safe_unpack32(&job_ptr->qosid, buffer); safe_unpack32(&job_ptr->req_cpus, buffer); safe_unpack32(&job_ptr->requid, buffer); - if(_unpack_slurmdb_stats(&job_ptr->stats, rpc_version, buffer) - != SLURM_SUCCESS) - goto unpack_error; + safe_unpack32(&job_ptr->resvid, buffer); safe_unpack32(&job_ptr->show_full, buffer); safe_unpack_time(&job_ptr->start, buffer); safe_unpack16(&uint16_tmp, buffer); job_ptr->state = uint16_tmp; - safe_unpack32(&count, buffer); + if (_unpack_slurmdb_stats(&job_ptr->stats, rpc_version, buffer) + != SLURM_SUCCESS) + goto unpack_error; + safe_unpack32(&count, buffer); job_ptr->steps = list_create(slurmdb_destroy_step_rec); for(i=0; i<count; i++) { if (slurmdb_unpack_step_rec(&step, rpc_version, buffer) @@ -6228,7 +6317,7 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) goto unpack_error; step->job_ptr = job_ptr; - if(!job_ptr->first_step_ptr) + if (!job_ptr->first_step_ptr) job_ptr->first_step_ptr = step; list_append(job_ptr->steps, step); } @@ -6264,8 +6353,8 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) ListIterator itr = NULL; slurmdb_qos_cond_t *object = (slurmdb_qos_cond_t *)in; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -6274,11 +6363,11 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->description_list) + if (object->description_list) count = list_count(object->description_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->description_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6287,11 +6376,11 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6300,11 +6389,11 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->name_list) + if (object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6316,7 +6405,7 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) pack16(object->preempt_mode, buffer); pack16(object->with_deleted, buffer); } else { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -6324,11 +6413,11 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->description_list) + if (object->description_list) count = list_count(object->description_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->description_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6337,11 +6426,11 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6350,11 +6439,11 @@ extern void slurmdb_pack_qos_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->name_list) + if (object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6378,9 +6467,9 @@ extern int slurmdb_unpack_qos_cond(void **object, uint16_t rpc_version, *object = object_ptr; - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->description_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6392,7 +6481,7 @@ extern int slurmdb_unpack_qos_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -6402,7 +6491,7 @@ extern int slurmdb_unpack_qos_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -6415,7 +6504,7 @@ extern int slurmdb_unpack_qos_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->with_deleted, buffer); } else { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->description_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6427,7 +6516,7 @@ extern int slurmdb_unpack_qos_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -6437,7 +6526,7 @@ extern int slurmdb_unpack_qos_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -6465,7 +6554,7 @@ extern void slurmdb_pack_reservation_cond(void *in, uint16_t rpc_version, ListIterator itr = NULL; char *tmp_info = NULL; - if(!object) { + if (!object) { pack32((uint32_t)NO_VAL, buffer); pack16(0, buffer); pack32((uint16_t)NO_VAL, buffer); @@ -6477,11 +6566,11 @@ extern void slurmdb_pack_reservation_cond(void *in, uint16_t rpc_version, return; } - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6492,11 +6581,11 @@ extern void slurmdb_pack_reservation_cond(void *in, uint16_t rpc_version, pack16(object->flags, buffer); - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6505,11 +6594,11 @@ extern void slurmdb_pack_reservation_cond(void *in, uint16_t rpc_version, } count = NO_VAL; - if(object->name_list) + if (object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6536,7 +6625,7 @@ extern int slurmdb_unpack_reservation_cond(void **object, uint16_t rpc_version, *object = object_ptr; safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -6547,7 +6636,7 @@ extern int slurmdb_unpack_reservation_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->flags, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -6556,7 +6645,7 @@ extern int slurmdb_unpack_reservation_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -6705,7 +6794,7 @@ extern int slurmdb_unpack_step_rec(slurmdb_step_rec_t **step, safe_unpackstr_xmalloc(&step_ptr->nodes, &uint32_tmp, buffer); safe_unpack32(&step_ptr->ntasks, buffer); safe_unpack32(&step_ptr->requid, buffer); - if(_unpack_slurmdb_stats(&step_ptr->stats, rpc_version, buffer) + if (_unpack_slurmdb_stats(&step_ptr->stats, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; safe_unpack_time(&step_ptr->start, buffer); @@ -6739,8 +6828,8 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) ListIterator itr = NULL; slurmdb_txn_cond_t *object = (slurmdb_txn_cond_t *)in; - if(rpc_version >= 5) { - if(!object) { + if (rpc_version >= 5) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -6753,11 +6842,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) pack16(0, buffer); return; } - if(object->acct_list) + if (object->acct_list) count = list_count(object->acct_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6766,11 +6855,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->action_list) + if (object->action_list) count = list_count(object->action_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->action_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6779,11 +6868,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->actor_list) + if (object->actor_list) count = list_count(object->actor_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->actor_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6792,11 +6881,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6805,11 +6894,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6818,11 +6907,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->info_list) + if (object->info_list) count = list_count(object->info_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->info_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6831,11 +6920,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->name_list) + if (object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6846,11 +6935,11 @@ extern void slurmdb_pack_txn_cond(void *in, uint16_t rpc_version, Buf buffer) pack_time(object->time_end, buffer); pack_time(object->time_start, buffer); - if(object->user_list) + if (object->user_list) count = list_count(object->user_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -6875,7 +6964,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, *object = object_ptr; if (rpc_version >= 5) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6886,7 +6975,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->action_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6897,7 +6986,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->actor_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6908,7 +6997,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6919,7 +7008,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, @@ -6929,7 +7018,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->info_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6940,7 +7029,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6954,7 +7043,7 @@ extern int slurmdb_unpack_txn_cond(void **object, uint16_t rpc_version, safe_unpack_time(&object_ptr->time_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -6983,8 +7072,8 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) ListIterator itr = NULL; slurmdb_wckey_cond_t *object = (slurmdb_wckey_cond_t *)in; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -7001,11 +7090,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7014,11 +7103,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7026,11 +7115,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->name_list) + if (object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7044,11 +7133,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) pack_time(object->usage_end, buffer); pack_time(object->usage_start, buffer); - if(object->user_list) + if (object->user_list) count = list_count(object->user_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7060,7 +7149,7 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) pack16(object->with_usage, buffer); pack16(object->with_deleted, buffer); } else if (rpc_version >= 5) { - if(!object) { + if (!object) { pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); pack32(NO_VAL, buffer); @@ -7075,11 +7164,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) return; } - if(object->cluster_list) + if (object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7088,11 +7177,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->id_list) + if (object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7100,11 +7189,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) } count = NO_VAL; - if(object->name_list) + if (object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7116,11 +7205,11 @@ extern void slurmdb_pack_wckey_cond(void *in, uint16_t rpc_version, Buf buffer) pack_time(object->usage_end, buffer); pack_time(object->usage_start, buffer); - if(object->user_list) + if (object->user_list) count = list_count(object->user_list); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -7148,7 +7237,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, if (rpc_version >= 8) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -7160,7 +7249,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -7170,7 +7259,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -7186,7 +7275,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, safe_unpack_time(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -7200,7 +7289,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, safe_unpack16(&object_ptr->with_deleted, buffer); } else if (rpc_version >= 5) { safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -7212,7 +7301,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, @@ -7222,7 +7311,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -7236,7 +7325,7 @@ extern int slurmdb_unpack_wckey_cond(void **object, uint16_t rpc_version, safe_unpack_time(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { @@ -7262,8 +7351,8 @@ extern void slurmdb_pack_archive_cond(void *in, uint16_t rpc_version, { slurmdb_archive_cond_t *object = (slurmdb_archive_cond_t *)in; - if(rpc_version >= 8) { - if(!object) { + if (rpc_version >= 8) { + if (!object) { packnull(buffer); packnull(buffer); slurmdb_pack_job_cond(NULL, rpc_version, buffer); @@ -7282,7 +7371,7 @@ extern void slurmdb_pack_archive_cond(void *in, uint16_t rpc_version, pack32(object->purge_step, buffer); pack32(object->purge_suspend, buffer); } else { - if(!object) { + if (!object) { packnull(buffer); pack16((uint16_t)0, buffer); pack16((uint16_t)0, buffer); @@ -7324,12 +7413,12 @@ extern int slurmdb_unpack_archive_cond(void **object, uint16_t rpc_version, *object = object_ptr; - if(rpc_version >= 8) { + if (rpc_version >= 8) { safe_unpackstr_xmalloc(&object_ptr->archive_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->archive_script, &uint32_tmp, buffer); - if(slurmdb_unpack_job_cond((void *)&object_ptr->job_cond, + if (slurmdb_unpack_job_cond((void *)&object_ptr->job_cond, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; safe_unpack32(&object_ptr->purge_event, buffer); @@ -7347,36 +7436,36 @@ extern int slurmdb_unpack_archive_cond(void **object, uint16_t rpc_version, &uint32_tmp, buffer); safe_unpack16(&a_steps, buffer); safe_unpack16(&a_suspend, buffer); - if(slurmdb_unpack_job_cond((void *)&object_ptr->job_cond, + if (slurmdb_unpack_job_cond((void *)&object_ptr->job_cond, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; safe_unpack16(&tmp16, buffer); object_ptr->purge_event = tmp16; - if(tmp16 != (uint16_t)NO_VAL) { + if (tmp16 != (uint16_t)NO_VAL) { object_ptr->purge_event |= SLURMDB_PURGE_MONTHS; - if(a_events) + if (a_events) object_ptr->purge_event |= SLURMDB_PURGE_ARCHIVE; } safe_unpack16(&tmp16, buffer); object_ptr->purge_job = tmp16; - if(tmp16 != (uint16_t)NO_VAL) { + if (tmp16 != (uint16_t)NO_VAL) { object_ptr->purge_job |= SLURMDB_PURGE_MONTHS; - if(a_jobs) + if (a_jobs) object_ptr->purge_job |= SLURMDB_PURGE_ARCHIVE; } safe_unpack16(&tmp16, buffer); object_ptr->purge_step = tmp16; - if(tmp16 != (uint16_t)NO_VAL) { + if (tmp16 != (uint16_t)NO_VAL) { object_ptr->purge_step |= SLURMDB_PURGE_MONTHS; - if(a_steps) + if (a_steps) object_ptr->purge_step |= SLURMDB_PURGE_ARCHIVE; } safe_unpack16(&tmp16, buffer); object_ptr->purge_suspend = tmp16; - if(tmp16 != (uint16_t)NO_VAL) { + if (tmp16 != (uint16_t)NO_VAL) { object_ptr->purge_suspend |= SLURMDB_PURGE_MONTHS; - if(a_suspend) + if (a_suspend) object_ptr->purge_suspend |= SLURMDB_PURGE_ARCHIVE; } @@ -7435,11 +7524,11 @@ extern void slurmdb_pack_update_object(slurmdb_update_object_t *object, } pack16(object->type, buffer); - if(object->objects) + if (object->objects) count = list_count(object->objects); pack32(count, buffer); - if(count && count != NO_VAL) { + if (count && count != NO_VAL) { itr = list_iterator_create(object->objects); while((slurmdb_object = list_next(itr))) { (*(my_function))(slurmdb_object, rpc_version, buffer); @@ -7501,10 +7590,10 @@ extern int slurmdb_unpack_update_object(slurmdb_update_object_t **object, goto unpack_error; } safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { object_ptr->objects = list_create((*(my_destroy))); for(i=0; i<count; i++) { - if(((*(my_function))(&slurmdb_object, + if (((*(my_function))(&slurmdb_object, rpc_version, buffer)) == SLURM_ERROR) goto unpack_error; diff --git a/src/common/slurmdb_pack.h b/src/common/slurmdb_pack.h index a971e139748c1b1617fe8935409a028e6d49c64a..c3c0e1f5bf5dfa127cb633551f9533673a9e8c97 100644 --- a/src/common/slurmdb_pack.h +++ b/src/common/slurmdb_pack.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index c027dd0b140adebe04a9977fc48b2f8075aad77d..442883ee8f57c6a9dcb6f5a5430ac1f064f37a03 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -145,7 +145,9 @@ static int _tot_wait (struct timeval *start_time); */ extern uint16_t slurmdbd_translate_rpc(uint16_t rpc_version) { - if (rpc_version >= SLURMDBD_2_5_VERSION) + if (rpc_version >= SLURMDBD_2_6_VERSION) + return SLURM_2_6_PROTOCOL_VERSION; + else if (rpc_version >= SLURMDBD_2_5_VERSION) return SLURM_2_5_PROTOCOL_VERSION; else if (rpc_version >= SLURMDBD_2_4_VERSION) return SLURM_2_4_PROTOCOL_VERSION; @@ -195,11 +197,11 @@ extern int slurm_open_slurmdbd_conn(char *auth_info, if ((callbacks != NULL) && ((agent_tid == 0) || (agent_list == NULL))) _create_agent(); - else if(agent_list) + else if (agent_list) _load_dbd_state(); slurm_mutex_unlock(&agent_lock); - if(tmp_errno) { + if (tmp_errno) { errno = tmp_errno; return tmp_errno; } else if (slurmdbd_fd < 0) @@ -313,7 +315,7 @@ extern int slurm_send_recv_slurmdbd_msg(uint16_t rpc_version, if (slurmdbd_fd < 0) { /* Either slurm_open_slurmdbd_conn() was not executed or * the connection to Slurm DBD has been closed */ - if(req->msg_type == DBD_GET_CONFIG) + if (req->msg_type == DBD_GET_CONFIG) _open_slurmdbd_fd(0); else _open_slurmdbd_fd(1); @@ -323,7 +325,7 @@ extern int slurm_send_recv_slurmdbd_msg(uint16_t rpc_version, } } - if(!(buffer = pack_slurmdbd_msg(req, rpc_version))) { + if (!(buffer = pack_slurmdbd_msg(req, rpc_version))) { rc = SLURM_ERROR; goto end_it; } @@ -453,13 +455,14 @@ again: if (try_backup) { try_backup = false; xfree(slurmdbd_host); - if((slurmdbd_host = + if ((slurmdbd_host = slurm_get_accounting_storage_backup_host())) goto again; } } else { int rc; fd_set_nonblocking(slurmdbd_fd); + fd_set_close_on_exec(slurmdbd_fd); rc = _send_init_msg(); if (rc == SLURM_SUCCESS) { if (from_ctld) @@ -853,157 +856,157 @@ unpack_error: extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type) { - if(!msg_type) { + if (!msg_type) { return NO_VAL; - } else if(!strcasecmp(msg_type, "Init")) { + } else if (!strcasecmp(msg_type, "Init")) { return DBD_INIT; - } else if(!strcasecmp(msg_type, "Fini")) { + } else if (!strcasecmp(msg_type, "Fini")) { return DBD_FINI; - } else if(!strcasecmp(msg_type, "Add Accounts")) { + } else if (!strcasecmp(msg_type, "Add Accounts")) { return DBD_ADD_ACCOUNTS; - } else if(!strcasecmp(msg_type, "Add Account Coord")) { + } else if (!strcasecmp(msg_type, "Add Account Coord")) { return DBD_ADD_ACCOUNT_COORDS; - } else if(!strcasecmp(msg_type, "Add Associations")) { + } else if (!strcasecmp(msg_type, "Add Associations")) { return DBD_ADD_ASSOCS; - } else if(!strcasecmp(msg_type, "Add Clusters")) { + } else if (!strcasecmp(msg_type, "Add Clusters")) { return DBD_ADD_CLUSTERS; - } else if(!strcasecmp(msg_type, "Add Users")) { + } else if (!strcasecmp(msg_type, "Add Users")) { return DBD_ADD_USERS; - } else if(!strcasecmp(msg_type, "Cluster Processors")) { + } else if (!strcasecmp(msg_type, "Cluster Processors")) { return DBD_CLUSTER_CPUS; - } else if(!strcasecmp(msg_type, "Flush Jobs")) { + } else if (!strcasecmp(msg_type, "Flush Jobs")) { return DBD_FLUSH_JOBS; - } else if(!strcasecmp(msg_type, "Get Accounts")) { + } else if (!strcasecmp(msg_type, "Get Accounts")) { return DBD_GET_ACCOUNTS; - } else if(!strcasecmp(msg_type, "Get Associations")) { + } else if (!strcasecmp(msg_type, "Get Associations")) { return DBD_GET_ASSOCS; - } else if(!strcasecmp(msg_type, "Get Association Usage")) { + } else if (!strcasecmp(msg_type, "Get Association Usage")) { return DBD_GET_ASSOC_USAGE; - } else if(!strcasecmp(msg_type, "Get Clusters")) { + } else if (!strcasecmp(msg_type, "Get Clusters")) { return DBD_GET_CLUSTERS; - } else if(!strcasecmp(msg_type, "Get Cluster Usage")) { + } else if (!strcasecmp(msg_type, "Get Cluster Usage")) { return DBD_GET_CLUSTER_USAGE; - } else if(!strcasecmp(msg_type, "Get Events")) { + } else if (!strcasecmp(msg_type, "Get Events")) { return DBD_GET_EVENTS; - } else if(!strcasecmp(msg_type, "Get Jobs")) { + } else if (!strcasecmp(msg_type, "Get Jobs")) { return DBD_GET_JOBS; - } else if(!strcasecmp(msg_type, "Get Problems")) { + } else if (!strcasecmp(msg_type, "Get Problems")) { return DBD_GET_PROBS; - } else if(!strcasecmp(msg_type, "Get Users")) { + } else if (!strcasecmp(msg_type, "Get Users")) { return DBD_GET_USERS; - } else if(!strcasecmp(msg_type, "Got Accounts")) { + } else if (!strcasecmp(msg_type, "Got Accounts")) { return DBD_GOT_ACCOUNTS; - } else if(!strcasecmp(msg_type, "Got Associations")) { + } else if (!strcasecmp(msg_type, "Got Associations")) { return DBD_GOT_ASSOCS; - } else if(!strcasecmp(msg_type, "Got Association Usage")) { + } else if (!strcasecmp(msg_type, "Got Association Usage")) { return DBD_GOT_ASSOC_USAGE; - } else if(!strcasecmp(msg_type, "Got Clusters")) { + } else if (!strcasecmp(msg_type, "Got Clusters")) { return DBD_GOT_CLUSTERS; - } else if(!strcasecmp(msg_type, "Got Cluster Usage")) { + } else if (!strcasecmp(msg_type, "Got Cluster Usage")) { return DBD_GOT_CLUSTER_USAGE; - } else if(!strcasecmp(msg_type, "Got Events")) { + } else if (!strcasecmp(msg_type, "Got Events")) { return DBD_GOT_EVENTS; - } else if(!strcasecmp(msg_type, "Got Jobs")) { + } else if (!strcasecmp(msg_type, "Got Jobs")) { return DBD_GOT_JOBS; - } else if(!strcasecmp(msg_type, "Got List")) { + } else if (!strcasecmp(msg_type, "Got List")) { return DBD_GOT_LIST; - } else if(!strcasecmp(msg_type, "Got Problems")) { + } else if (!strcasecmp(msg_type, "Got Problems")) { return DBD_GOT_PROBS; - } else if(!strcasecmp(msg_type, "Got Users")) { + } else if (!strcasecmp(msg_type, "Got Users")) { return DBD_GOT_USERS; - } else if(!strcasecmp(msg_type, "Job Complete")) { + } else if (!strcasecmp(msg_type, "Job Complete")) { return DBD_JOB_COMPLETE; - } else if(!strcasecmp(msg_type, "Job Start")) { + } else if (!strcasecmp(msg_type, "Job Start")) { return DBD_JOB_START; - } else if(!strcasecmp(msg_type, "ID RC")) { + } else if (!strcasecmp(msg_type, "ID RC")) { return DBD_ID_RC; - } else if(!strcasecmp(msg_type, "Job Suspend")) { + } else if (!strcasecmp(msg_type, "Job Suspend")) { return DBD_JOB_SUSPEND; - } else if(!strcasecmp(msg_type, "Modify Accounts")) { + } else if (!strcasecmp(msg_type, "Modify Accounts")) { return DBD_MODIFY_ACCOUNTS; - } else if(!strcasecmp(msg_type, "Modify Associations")) { + } else if (!strcasecmp(msg_type, "Modify Associations")) { return DBD_MODIFY_ASSOCS; - } else if(!strcasecmp(msg_type, "Modify Clusters")) { + } else if (!strcasecmp(msg_type, "Modify Clusters")) { return DBD_MODIFY_CLUSTERS; - } else if(!strcasecmp(msg_type, "Modify Job")) { + } else if (!strcasecmp(msg_type, "Modify Job")) { return DBD_MODIFY_JOB; - } else if(!strcasecmp(msg_type, "Modify QOS")) { + } else if (!strcasecmp(msg_type, "Modify QOS")) { return DBD_MODIFY_QOS; - } else if(!strcasecmp(msg_type, "Modify Users")) { + } else if (!strcasecmp(msg_type, "Modify Users")) { return DBD_MODIFY_USERS; - } else if(!strcasecmp(msg_type, "Node State")) { + } else if (!strcasecmp(msg_type, "Node State")) { return DBD_NODE_STATE; - } else if(!strcasecmp(msg_type, "RC")) { + } else if (!strcasecmp(msg_type, "RC")) { return DBD_RC; - } else if(!strcasecmp(msg_type, "Register Cluster")) { + } else if (!strcasecmp(msg_type, "Register Cluster")) { return DBD_REGISTER_CTLD; - } else if(!strcasecmp(msg_type, "Remove Accounts")) { + } else if (!strcasecmp(msg_type, "Remove Accounts")) { return DBD_REMOVE_ACCOUNTS; - } else if(!strcasecmp(msg_type, "Remove Account Coords")) { + } else if (!strcasecmp(msg_type, "Remove Account Coords")) { return DBD_REMOVE_ACCOUNT_COORDS; - } else if(!strcasecmp(msg_type, "Archive Dump")) { + } else if (!strcasecmp(msg_type, "Archive Dump")) { return DBD_ARCHIVE_DUMP; - } else if(!strcasecmp(msg_type, "Archive Load")) { + } else if (!strcasecmp(msg_type, "Archive Load")) { return DBD_ARCHIVE_LOAD; - } else if(!strcasecmp(msg_type, "Remove Associations")) { + } else if (!strcasecmp(msg_type, "Remove Associations")) { return DBD_REMOVE_ASSOCS; - } else if(!strcasecmp(msg_type, "Remove Clusters")) { + } else if (!strcasecmp(msg_type, "Remove Clusters")) { return DBD_REMOVE_CLUSTERS; - } else if(!strcasecmp(msg_type, "Remove Users")) { + } else if (!strcasecmp(msg_type, "Remove Users")) { return DBD_REMOVE_USERS; - } else if(!strcasecmp(msg_type, "Roll Usage")) { + } else if (!strcasecmp(msg_type, "Roll Usage")) { return DBD_ROLL_USAGE; - } else if(!strcasecmp(msg_type, "Step Complete")) { + } else if (!strcasecmp(msg_type, "Step Complete")) { return DBD_STEP_COMPLETE; - } else if(!strcasecmp(msg_type, "Step Start")) { + } else if (!strcasecmp(msg_type, "Step Start")) { return DBD_STEP_START; - } else if(!strcasecmp(msg_type, "Get Jobs Conditional")) { + } else if (!strcasecmp(msg_type, "Get Jobs Conditional")) { return DBD_GET_JOBS_COND; - } else if(!strcasecmp(msg_type, "Get Transations")) { + } else if (!strcasecmp(msg_type, "Get Transations")) { return DBD_GET_TXN; - } else if(!strcasecmp(msg_type, "Got Transations")) { + } else if (!strcasecmp(msg_type, "Got Transations")) { return DBD_GOT_TXN; - } else if(!strcasecmp(msg_type, "Add QOS")) { + } else if (!strcasecmp(msg_type, "Add QOS")) { return DBD_ADD_QOS; - } else if(!strcasecmp(msg_type, "Get QOS")) { + } else if (!strcasecmp(msg_type, "Get QOS")) { return DBD_GET_QOS; - } else if(!strcasecmp(msg_type, "Got QOS")) { + } else if (!strcasecmp(msg_type, "Got QOS")) { return DBD_GOT_QOS; - } else if(!strcasecmp(msg_type, "Remove QOS")) { + } else if (!strcasecmp(msg_type, "Remove QOS")) { return DBD_REMOVE_QOS; - } else if(!strcasecmp(msg_type, "Add WCKeys")) { + } else if (!strcasecmp(msg_type, "Add WCKeys")) { return DBD_ADD_WCKEYS; - } else if(!strcasecmp(msg_type, "Get WCKeys")) { + } else if (!strcasecmp(msg_type, "Get WCKeys")) { return DBD_GET_WCKEYS; - } else if(!strcasecmp(msg_type, "Got WCKeys")) { + } else if (!strcasecmp(msg_type, "Got WCKeys")) { return DBD_GOT_WCKEYS; - } else if(!strcasecmp(msg_type, "Remove WCKeys")) { + } else if (!strcasecmp(msg_type, "Remove WCKeys")) { return DBD_REMOVE_WCKEYS; - } else if(!strcasecmp(msg_type, "Get WCKey Usage")) { + } else if (!strcasecmp(msg_type, "Get WCKey Usage")) { return DBD_GET_WCKEY_USAGE; - } else if(!strcasecmp(msg_type, "Got WCKey Usage")) { + } else if (!strcasecmp(msg_type, "Got WCKey Usage")) { return DBD_GOT_WCKEY_USAGE; - } else if(!strcasecmp(msg_type, "Add Reservation")) { + } else if (!strcasecmp(msg_type, "Add Reservation")) { return DBD_ADD_RESV; - } else if(!strcasecmp(msg_type, "Remove Reservation")) { + } else if (!strcasecmp(msg_type, "Remove Reservation")) { return DBD_REMOVE_RESV; - } else if(!strcasecmp(msg_type, "Modify Reservation")) { + } else if (!strcasecmp(msg_type, "Modify Reservation")) { return DBD_MODIFY_RESV; - } else if(!strcasecmp(msg_type, "Get Reservations")) { + } else if (!strcasecmp(msg_type, "Get Reservations")) { return DBD_GET_RESVS; - } else if(!strcasecmp(msg_type, "Got Reservations")) { + } else if (!strcasecmp(msg_type, "Got Reservations")) { return DBD_GOT_RESVS; - } else if(!strcasecmp(msg_type, "Get Config")) { + } else if (!strcasecmp(msg_type, "Get Config")) { return DBD_GET_CONFIG; - } else if(!strcasecmp(msg_type, "Got Config")) { + } else if (!strcasecmp(msg_type, "Got Config")) { return DBD_GOT_CONFIG; - } else if(!strcasecmp(msg_type, "Send Multiple Job Starts")) { + } else if (!strcasecmp(msg_type, "Send Multiple Job Starts")) { return DBD_SEND_MULT_JOB_START; - } else if(!strcasecmp(msg_type, "Got Multiple Job Starts")) { + } else if (!strcasecmp(msg_type, "Got Multiple Job Starts")) { return DBD_GOT_MULT_JOB_START; - } else if(!strcasecmp(msg_type, "Send Multiple Messages")) { + } else if (!strcasecmp(msg_type, "Send Multiple Messages")) { return DBD_SEND_MULT_MSG; - } else if(!strcasecmp(msg_type, "Got Multiple Message Returns")) { + } else if (!strcasecmp(msg_type, "Got Multiple Message Returns")) { return DBD_GOT_MULT_MSG; } else { return NO_VAL; @@ -1016,451 +1019,451 @@ extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type, int get_enum) { switch(msg_type) { case DBD_INIT: - if(get_enum) { + if (get_enum) { return "DBD_INIT"; } else return "Init"; break; case DBD_FINI: - if(get_enum) { + if (get_enum) { return "DBD_FINI"; } else return "Fini"; break; case DBD_ADD_ACCOUNTS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_ACCOUNTS"; } else return "Add Accounts"; break; case DBD_ADD_ACCOUNT_COORDS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_ACCOUNT_COORDS"; } else return "Add Account Coord"; break; case DBD_ADD_ASSOCS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_ASSOCS"; } else return "Add Associations"; break; case DBD_ADD_CLUSTERS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_CLUSTERS"; } else return "Add Clusters"; break; case DBD_ADD_USERS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_USERS"; } else return "Add Users"; break; case DBD_CLUSTER_CPUS: - if(get_enum) { + if (get_enum) { return "DBD_CLUSTER_CPUS"; } else return "Cluster Processors"; break; case DBD_FLUSH_JOBS: - if(get_enum) { + if (get_enum) { return "DBD_FLUSH_JOBS"; } else return "Flush Jobs"; break; case DBD_GET_ACCOUNTS: - if(get_enum) { + if (get_enum) { return "DBD_GET_ACCOUNTS"; } else return "Get Accounts"; break; case DBD_GET_ASSOCS: - if(get_enum) { + if (get_enum) { return "DBD_GET_ASSOCS"; } else return "Get Associations"; break; case DBD_GET_ASSOC_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_GET_ASSOC_USAGE"; } else return "Get Association Usage"; break; case DBD_GET_CLUSTERS: - if(get_enum) { + if (get_enum) { return "DBD_GET_CLUSTERS"; } else return "Get Clusters"; break; case DBD_GET_CLUSTER_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_GET_CLUSTER_USAGE"; } else return "Get Cluster Usage"; break; case DBD_GET_EVENTS: - if(get_enum) { + if (get_enum) { return "DBD_GET_EVENTS"; } else return "Get Events"; break; case DBD_GET_JOBS: - if(get_enum) { + if (get_enum) { return "DBD_GET_JOBS"; } else return "Get Jobs"; break; case DBD_GET_PROBS: - if(get_enum) { + if (get_enum) { return "DBD_GET_PROBS"; } else return "Get Problems"; break; case DBD_GET_USERS: - if(get_enum) { + if (get_enum) { return "DBD_GET_USERS"; } else return "Get Users"; break; case DBD_GOT_ACCOUNTS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_ACCOUNTS"; } else return "Got Accounts"; break; case DBD_GOT_ASSOCS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_ASSOCS"; } else return "Got Associations"; break; case DBD_GOT_ASSOC_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_GOT_ASSOC_USAGE"; } else return "Got Association Usage"; break; case DBD_GOT_CLUSTERS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_CLUSTERS"; } else return "Got Clusters"; break; case DBD_GOT_CLUSTER_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_GOT_CLUSTER_USAGE"; } else return "Got Cluster Usage"; break; case DBD_GOT_EVENTS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_EVENTS"; } else return "Got Events"; break; case DBD_GOT_JOBS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_JOBS"; } else return "Got Jobs"; break; case DBD_GOT_LIST: - if(get_enum) { + if (get_enum) { return "DBD_GOT_LIST"; } else return "Got List"; break; case DBD_GOT_PROBS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_PROBS"; } else return "Got Problems"; break; case DBD_GOT_USERS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_USERS"; } else return "Got Users"; break; case DBD_JOB_COMPLETE: - if(get_enum) { + if (get_enum) { return "DBD_JOB_COMPLETE"; } else return "Job Complete"; break; case DBD_JOB_START: - if(get_enum) { + if (get_enum) { return "DBD_JOB_START"; } else return "Job Start"; break; case DBD_ID_RC: - if(get_enum) { + if (get_enum) { return "DBD_ID_RC"; } else return "ID RC"; break; case DBD_JOB_SUSPEND: - if(get_enum) { + if (get_enum) { return "DBD_JOB_SUSPEND"; } else return "Job Suspend"; break; case DBD_MODIFY_ACCOUNTS: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_ACCOUNTS"; } else return "Modify Accounts"; break; case DBD_MODIFY_ASSOCS: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_ASSOCS"; } else return "Modify Associations"; break; case DBD_MODIFY_CLUSTERS: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_CLUSTERS"; } else return "Modify Clusters"; break; case DBD_MODIFY_JOB: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_JOB"; } else return "Modify Job"; break; case DBD_MODIFY_QOS: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_QOS"; } else return "Modify QOS"; break; case DBD_MODIFY_USERS: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_USERS"; } else return "Modify Users"; break; case DBD_NODE_STATE: - if(get_enum) { + if (get_enum) { return "DBD_NODE_STATE"; } else return "Node State"; break; case DBD_RC: - if(get_enum) { + if (get_enum) { return "DBD_RC"; } else return "Return Code"; break; case DBD_REGISTER_CTLD: - if(get_enum) { + if (get_enum) { return "DBD_REGISTER_CTLD"; } else return "Register Cluster"; break; case DBD_REMOVE_ACCOUNTS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_ACCOUNTS"; } else return "Remove Accounts"; break; case DBD_REMOVE_ACCOUNT_COORDS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_ACCOUNT_COORDS"; } else return "Remove Account Coords"; break; case DBD_ARCHIVE_DUMP: - if(get_enum) { + if (get_enum) { return "DBD_ARCHIVE_DUMP"; } else return "Archive Dump"; break; case DBD_ARCHIVE_LOAD: - if(get_enum) { + if (get_enum) { return "DBD_ARCHIVE_LOAD"; } else return "Archive Load"; break; case DBD_REMOVE_ASSOCS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_ASSOCS"; } else return "Remove Associations"; break; case DBD_REMOVE_CLUSTERS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_CLUSTERS"; } else return "Remove Clusters"; break; case DBD_REMOVE_USERS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_USERS"; } else return "Remove Users"; break; case DBD_ROLL_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_ROLL_USAGE"; } else return "Roll Usage"; break; case DBD_STEP_COMPLETE: - if(get_enum) { + if (get_enum) { return "DBD_STEP_COMPLETE"; } else return "Step Complete"; break; case DBD_STEP_START: - if(get_enum) { + if (get_enum) { return "DBD_STEP_START"; } else return "Step Start"; break; case DBD_GET_JOBS_COND: - if(get_enum) { + if (get_enum) { return "DBD_GET_JOBS_COND"; } else return "Get Jobs Conditional"; break; case DBD_GET_TXN: - if(get_enum) { + if (get_enum) { return "DBD_GET_TXN"; } else return "Get Transations"; break; case DBD_GOT_TXN: - if(get_enum) { + if (get_enum) { return "DBD_GOT_TXN"; } else return "Got Transations"; break; case DBD_ADD_QOS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_QOS"; } else return "Add QOS"; break; case DBD_GET_QOS: - if(get_enum) { + if (get_enum) { return "DBD_GET_QOS"; } else return "Get QOS"; break; case DBD_GOT_QOS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_QOS"; } else return "Got QOS"; break; case DBD_REMOVE_QOS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_QOS"; } else return "Remove QOS"; break; case DBD_ADD_WCKEYS: - if(get_enum) { + if (get_enum) { return "DBD_ADD_WCKEYS"; } else return "Add WCKeys"; break; case DBD_GET_WCKEYS: - if(get_enum) { + if (get_enum) { return "DBD_GET_WCKEYS"; } else return "Get WCKeys"; break; case DBD_GOT_WCKEYS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_WCKEYS"; } else return "Got WCKeys"; break; case DBD_REMOVE_WCKEYS: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_WCKEYS"; } else return "Remove WCKeys"; break; case DBD_GET_WCKEY_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_GET_WCKEY_USAGE"; } else return "Get WCKey Usage"; break; case DBD_GOT_WCKEY_USAGE: - if(get_enum) { + if (get_enum) { return "DBD_GOT_WCKEY_USAGE"; } else return "Got WCKey Usage"; break; case DBD_ADD_RESV: - if(get_enum) { + if (get_enum) { return "DBD_ADD_RESV"; } else return "Add Reservation"; break; case DBD_REMOVE_RESV: - if(get_enum) { + if (get_enum) { return "DBD_REMOVE_RESV"; } else return "Remove Reservation"; break; case DBD_MODIFY_RESV: - if(get_enum) { + if (get_enum) { return "DBD_MODIFY_RESV"; } else return "Modify Reservation"; break; case DBD_GET_RESVS: - if(get_enum) { + if (get_enum) { return "DBD_GET_RESVS"; } else return "Get Reservations"; break; case DBD_GOT_RESVS: - if(get_enum) { + if (get_enum) { return "DBD_GOT_RESVS"; } else return "Got Reservations"; break; case DBD_GET_CONFIG: - if(get_enum) { + if (get_enum) { return "DBD_GET_CONFIG"; } else return "Get Config"; break; case DBD_GOT_CONFIG: - if(get_enum) { + if (get_enum) { return "DBD_GOT_CONFIG"; } else return "Got Config"; break; case DBD_SEND_MULT_JOB_START: - if(get_enum) { + if (get_enum) { return "DBD_SEND_MULT_JOB_START"; } else return "Send Multiple Job Starts"; break; case DBD_GOT_MULT_JOB_START: - if(get_enum) { + if (get_enum) { return "DBD_GOT_MULT_JOB_START"; } else return "Got Multiple Job Starts"; break; case DBD_SEND_MULT_MSG: - if(get_enum) { + if (get_enum) { return "DBD_SEND_MULT_MSG"; } else return "Send Multiple Messages"; break; case DBD_GOT_MULT_MSG: - if(get_enum) { + if (get_enum) { return "DBD_GOT_MULT_MSG"; } else return "Got Multiple Message Returns"; @@ -1476,7 +1479,7 @@ extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type, int get_enum) extern void slurmdbd_free_buffer(void *x) { Buf buffer = (Buf) x; - if(buffer) + if (buffer) free_buf(buffer); } @@ -1491,6 +1494,10 @@ static int _send_init_msg() buffer = init_buf(1024); pack16((uint16_t) DBD_INIT, buffer); + if (!slurmdbd_cluster) { + debug("No ClusterName set."); + slurmdbd_cluster = slurm_get_cluster_name(); + } req.cluster_name = slurmdbd_cluster; req.rollback = rollback_started; req.version = SLURMDBD_VERSION; @@ -1509,9 +1516,9 @@ static int _send_init_msg() read_timeout = slurm_get_msg_timeout() * 1000; rc = _get_return_code(SLURMDBD_VERSION, read_timeout); - if(tmp_errno) + if (tmp_errno) errno = tmp_errno; - else if(rc != SLURM_SUCCESS) + else if (rc != SLURM_SUCCESS) errno = rc; return rc; } @@ -1523,7 +1530,7 @@ static int _send_fini_msg(void) /* If the connection is already gone, we don't need to send a fini. */ - if(_fd_writeable(slurmdbd_fd) == -1) + if (_fd_writeable(slurmdbd_fd) == -1) return SLURM_SUCCESS; buffer = init_buf(1024); @@ -1572,7 +1579,7 @@ static int _send_msg(Buf buffer) return EAGAIN; /* if errno is ACCESS_DENIED do not try to reopen to connection just return that */ - if(errno == ESLURM_ACCESS_DENIED) + if (errno == ESLURM_ACCESS_DENIED) return ESLURM_ACCESS_DENIED; _reopen_slurmdbd_fd(); rc = _fd_writeable(slurmdbd_fd); @@ -1812,8 +1819,6 @@ static Buf _recv_msg(int read_timeout) } buffer = create_buf(msg, msg_size); - if (buffer == NULL) - fatal("create_buf: malloc failure"); return buffer; } @@ -1949,8 +1954,6 @@ static void _create_agent(void) if (agent_list == NULL) { agent_list = list_create(slurmdbd_free_buffer); - if (agent_list == NULL) - fatal("list_create: malloc failure"); _load_dbd_state(); } @@ -2029,7 +2032,7 @@ static void *_agent(void *x) while (agent_shutdown == 0) { /* START_TIMER; */ slurm_mutex_lock(&slurmdbd_lock); - if(halt_agent) + if (halt_agent) pthread_cond_wait(&slurmdbd_cond, &slurmdbd_lock); if ((slurmdbd_fd < 0) && @@ -2086,7 +2089,7 @@ static void *_agent(void *x) slurm_mutex_unlock(&slurmdbd_lock); slurm_mutex_lock(&assoc_cache_mutex); - if(slurmdbd_fd >= 0 && running_cache) + if (slurmdbd_fd >= 0 && running_cache) pthread_cond_signal(&assoc_cache_cond); slurm_mutex_unlock(&assoc_cache_mutex); @@ -2119,7 +2122,7 @@ static void *_agent(void *x) } slurm_mutex_unlock(&slurmdbd_lock); slurm_mutex_lock(&assoc_cache_mutex); - if(slurmdbd_fd >= 0 && running_cache) + if (slurmdbd_fd >= 0 && running_cache) pthread_cond_signal(&assoc_cache_cond); slurm_mutex_unlock(&assoc_cache_mutex); @@ -2143,7 +2146,7 @@ static void *_agent(void *x) /* We still need to free a mult_msg even if we got a failure. */ - if(list_msg.my_list) { + if (list_msg.my_list) { if (list_msg.my_list != agent_list) list_destroy(list_msg.my_list); list_msg.my_list = NULL; @@ -2216,7 +2219,7 @@ static void _save_dbd_state(void) set_buf_offset(buffer, 0); unpack16(&msg_type, buffer); set_buf_offset(buffer, offset); - if(msg_type == DBD_REGISTER_CTLD) { + if (msg_type == DBD_REGISTER_CTLD) { free_buf(buffer); continue; } @@ -2249,7 +2252,7 @@ static void _load_dbd_state(void) fd = open(dbd_fname, O_RDONLY); if (fd < 0) { /* don't print an error message if there is no file */ - if(errno == ENOENT) + if (errno == ENOENT) debug4("slurmdbd: There is no state save file to " "open by name %s", dbd_fname); else @@ -2401,8 +2404,6 @@ static Buf _load_dbd_rec(int fd) } buffer = init_buf((int) msg_size); - if (buffer == NULL) - fatal("slurmdbd: create_buf malloc failure"); set_buf_offset(buffer, msg_size); msg = get_buf_data(buffer); size = msg_size; @@ -2470,8 +2471,8 @@ static int _purge_job_start_req(void) \****************************************************************************/ extern void slurmdbd_free_acct_coord_msg(dbd_acct_coord_msg_t *msg) { - if(msg) { - if(msg->acct_list) { + if (msg) { + if (msg->acct_list) { list_destroy(msg->acct_list); msg->acct_list = NULL; } @@ -2504,7 +2505,7 @@ extern void slurmdbd_free_rec_msg(dbd_rec_msg_t *msg, fatal("Unknown rec type"); return; } - if(msg->rec) + if (msg->rec) (*(my_destroy))(msg->rec); xfree(msg); } @@ -2561,7 +2562,7 @@ extern void slurmdbd_free_cond_msg(dbd_cond_msg_t *msg, fatal("Unknown cond type"); return; } - if(msg->cond) + if (msg->cond) (*(my_destroy))(msg->cond); xfree(msg); } @@ -2569,7 +2570,7 @@ extern void slurmdbd_free_cond_msg(dbd_cond_msg_t *msg, extern void slurmdbd_free_init_msg(dbd_init_msg_t *msg) { - if(msg) { + if (msg) { xfree(msg->cluster_name); xfree(msg); } @@ -2621,7 +2622,7 @@ extern void slurmdbd_free_job_suspend_msg(dbd_job_suspend_msg_t *msg) extern void slurmdbd_free_list_msg(dbd_list_msg_t *msg) { if (msg) { - if(msg->my_list) + if (msg->my_list) list_destroy(msg->my_list); xfree(msg); } @@ -2664,9 +2665,9 @@ extern void slurmdbd_free_modify_msg(dbd_modify_msg_t *msg, return; } - if(msg->cond) + if (msg->cond) (*(destroy_cond))(msg->cond); - if(msg->rec) + if (msg->rec) (*(destroy_rec))(msg->rec); xfree(msg); } @@ -2683,7 +2684,7 @@ extern void slurmdbd_free_node_state_msg(dbd_node_state_msg_t *msg) extern void slurmdbd_free_rc_msg(dbd_rc_msg_t *msg) { - if(msg) { + if (msg) { xfree(msg->comment); xfree(msg); } @@ -2740,7 +2741,7 @@ extern void slurmdbd_free_usage_msg(dbd_usage_msg_t *msg, return; } - if(msg->rec) + if (msg->rec) (*(destroy_rec))(msg->rec); xfree(msg); } @@ -2757,11 +2758,11 @@ slurmdbd_pack_acct_coord_msg(dbd_acct_coord_msg_t *msg, ListIterator itr = NULL; uint32_t count = 0; - if(msg->acct_list) + if (msg->acct_list) count = list_count(msg->acct_list); pack32(count, buffer); - if(count) { + if (count) { itr = list_iterator_create(msg->acct_list); while((acct = list_next(itr))) { packstr(acct, buffer); @@ -2785,7 +2786,7 @@ slurmdbd_unpack_acct_coord_msg(dbd_acct_coord_msg_t **msg, *msg = msg_ptr; safe_unpack32(&count, buffer); - if(count) { + if (count) { msg_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&acct, &uint32_tmp, buffer); @@ -2793,7 +2794,7 @@ slurmdbd_unpack_acct_coord_msg(dbd_acct_coord_msg_t **msg, } } - if(slurmdb_unpack_user_cond((void *)&msg_ptr->cond, rpc_version, buffer) + if (slurmdb_unpack_user_cond((void *)&msg_ptr->cond, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; return SLURM_SUCCESS; @@ -2881,7 +2882,7 @@ extern int slurmdbd_unpack_rec_msg(dbd_rec_msg_t **msg, msg_ptr = xmalloc(sizeof(dbd_rec_msg_t)); *msg = msg_ptr; - if((*(my_function))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) + if ((*(my_function))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; return SLURM_SUCCESS; @@ -3003,7 +3004,7 @@ extern int slurmdbd_unpack_cond_msg(dbd_cond_msg_t **msg, msg_ptr = xmalloc(sizeof(dbd_cond_msg_t)); *msg = msg_ptr; - if((*(my_function))(&msg_ptr->cond, rpc_version, buffer) == SLURM_ERROR) + if ((*(my_function))(&msg_ptr->cond, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; return SLURM_SUCCESS; @@ -3064,7 +3065,7 @@ slurmdbd_unpack_init_msg(dbd_init_msg_t **msg, /* We find out the version of the caller right here so use that as the rpc_version. */ - if(msg_ptr->version >= 7) { + if (msg_ptr->version >= 7) { safe_unpackstr_xmalloc(&msg_ptr->cluster_name, &tmp32, buffer); } @@ -3076,7 +3077,7 @@ slurmdbd_unpack_init_msg(dbd_init_msg_t **msg, goto unpack_error; } msg_ptr->uid = g_slurm_auth_get_uid(auth_cred, auth_info); - if(g_slurm_auth_errno(auth_cred) != SLURM_SUCCESS) { + if (g_slurm_auth_errno(auth_cred) != SLURM_SUCCESS) { error("Bad authentication: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); rc = ESLURM_ACCESS_DENIED; @@ -3088,7 +3089,7 @@ slurmdbd_unpack_init_msg(dbd_init_msg_t **msg, unpack_error: slurmdbd_free_init_msg(msg_ptr); *msg = NULL; - if(rc == SLURM_SUCCESS) + if (rc == SLURM_SUCCESS) rc = SLURM_ERROR; return rc; } @@ -3172,7 +3173,35 @@ slurmdbd_pack_job_start_msg(void *in, uint16_t rpc_version, Buf buffer) { dbd_job_start_msg_t *msg = (dbd_job_start_msg_t *)in; - if (rpc_version >= SLURMDBD_2_5_VERSION) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { + packstr(msg->account, buffer); + pack32(msg->alloc_cpus, buffer); + pack32(msg->alloc_nodes, buffer); + pack32(msg->assoc_id, buffer); + packstr(msg->block_id, buffer); + pack32(msg->db_index, buffer); + pack_time(msg->eligible_time, buffer); + pack32(msg->gid, buffer); + packstr(msg->gres_alloc, buffer); + packstr(msg->gres_req, buffer); + packstr(msg->gres_used, buffer); + pack32(msg->job_id, buffer); + pack16(msg->job_state, buffer); + packstr(msg->name, buffer); + packstr(msg->nodes, buffer); + packstr(msg->node_inx, buffer); + packstr(msg->partition, buffer); + pack32(msg->priority, buffer); + pack32(msg->qos_id, buffer); + pack32(msg->req_cpus, buffer); + pack32(msg->req_mem, buffer); + pack32(msg->resv_id, buffer); + pack_time(msg->start_time, buffer); + pack_time(msg->submit_time, buffer); + pack32(msg->timelimit, buffer); + pack32(msg->uid, buffer); + packstr(msg->wckey, buffer); + } else if (rpc_version >= SLURMDBD_2_5_VERSION) { packstr(msg->account, buffer); pack32(msg->alloc_cpus, buffer); pack32(msg->alloc_nodes, buffer); @@ -3234,7 +3263,39 @@ slurmdbd_unpack_job_start_msg(void **msg, dbd_job_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_start_msg_t)); *msg = msg_ptr; - if (rpc_version >= SLURMDBD_2_5_VERSION) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { + safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer); + safe_unpack32(&msg_ptr->alloc_cpus, buffer); + safe_unpack32(&msg_ptr->alloc_nodes, buffer); + safe_unpack32(&msg_ptr->assoc_id, buffer); + safe_unpackstr_xmalloc(&msg_ptr->block_id, &uint32_tmp, buffer); + safe_unpack32(&msg_ptr->db_index, buffer); + safe_unpack_time(&msg_ptr->eligible_time, buffer); + safe_unpack32(&msg_ptr->gid, buffer); + safe_unpackstr_xmalloc(&msg_ptr->gres_alloc, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&msg_ptr->gres_req, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&msg_ptr->gres_used, &uint32_tmp, + buffer); + safe_unpack32(&msg_ptr->job_id, buffer); + safe_unpack16(&msg_ptr->job_state, buffer); + safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->partition, + &uint32_tmp, buffer); + safe_unpack32(&msg_ptr->priority, buffer); + safe_unpack32(&msg_ptr->qos_id, buffer); + safe_unpack32(&msg_ptr->req_cpus, buffer); + safe_unpack32(&msg_ptr->req_mem, buffer); + safe_unpack32(&msg_ptr->resv_id, buffer); + safe_unpack_time(&msg_ptr->start_time, buffer); + safe_unpack_time(&msg_ptr->submit_time, buffer); + safe_unpack32(&msg_ptr->timelimit, buffer); + safe_unpack32(&msg_ptr->uid, buffer); + safe_unpackstr_xmalloc(&msg_ptr->wckey, &uint32_tmp, buffer); + } else if (rpc_version >= SLURMDBD_2_5_VERSION) { safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->alloc_cpus, buffer); safe_unpack32(&msg_ptr->alloc_nodes, buffer); @@ -3434,14 +3495,14 @@ extern void slurmdbd_pack_list_msg(dbd_list_msg_t *msg, return; } - if(msg->my_list) { + if (msg->my_list) { count = list_count(msg->my_list); pack32(count, buffer); } else { // to let user know there wasn't a list (error) pack32((uint32_t)-1, buffer); } - if(count) { + if (count) { itr = list_iterator_create(msg->my_list); while((object = list_next(itr))) { (*(my_function))(object, rpc_version, buffer); @@ -3449,7 +3510,7 @@ extern void slurmdbd_pack_list_msg(dbd_list_msg_t *msg, list_iterator_destroy(itr); } - if(rpc_version >= 8) + if (rpc_version >= 8) pack32(msg->return_code, buffer); } @@ -3541,21 +3602,21 @@ extern int slurmdbd_unpack_list_msg(dbd_list_msg_t **msg, uint16_t rpc_version, *msg = msg_ptr; safe_unpack32(&count, buffer); - if((int)count > -1) { + if ((int)count > -1) { /* here we are looking to make the list if -1 or higher than 0. If -1 we don't want to have the list be NULL meaning an error occured. */ msg_ptr->my_list = list_create((*(my_destroy))); for(i=0; i<count; i++) { - if(((*(my_function))(&object, rpc_version, buffer)) + if (((*(my_function))(&object, rpc_version, buffer)) == SLURM_ERROR) goto unpack_error; list_append(msg_ptr->my_list, object); } } - if(rpc_version >= 8) + if (rpc_version >= 8) safe_unpack32(&msg_ptr->return_code, buffer); return SLURM_SUCCESS; @@ -3649,9 +3710,9 @@ extern int slurmdbd_unpack_modify_msg(dbd_modify_msg_t **msg, return SLURM_ERROR; } - if((*(my_cond))(&msg_ptr->cond, rpc_version, buffer) == SLURM_ERROR) + if ((*(my_cond))(&msg_ptr->cond, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; - if((*(my_rec))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) + if ((*(my_rec))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; return SLURM_SUCCESS; @@ -3823,6 +3884,7 @@ slurmdbd_unpack_step_complete_msg(dbd_step_comp_msg_t **msg, { dbd_step_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_comp_msg_t)); *msg = msg_ptr; + safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack32(&msg_ptr->db_index, buffer); safe_unpack_time(&msg_ptr->end_time, buffer); @@ -3835,6 +3897,7 @@ slurmdbd_unpack_step_complete_msg(dbd_step_comp_msg_t **msg, safe_unpack_time(&msg_ptr->job_submit_time, buffer); safe_unpack32(&msg_ptr->step_id, buffer); safe_unpack32(&msg_ptr->total_tasks, buffer); + return SLURM_SUCCESS; unpack_error: @@ -3850,7 +3913,22 @@ extern void slurmdbd_pack_step_start_msg(dbd_step_start_msg_t *msg, uint16_t rpc_version, Buf buffer) { - if (rpc_version >= SLURMDBD_2_3_VERSION) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { + pack32(msg->assoc_id, buffer); + pack32(msg->db_index, buffer); + pack32(msg->job_id, buffer); + packstr(msg->name, buffer); + packstr(msg->nodes, buffer); + packstr(msg->node_inx, buffer); + pack32(msg->node_cnt, buffer); + pack_time(msg->start_time, buffer); + pack_time(msg->job_submit_time, buffer); + pack32(msg->req_cpufreq, buffer); + pack32(msg->step_id, buffer); + pack16(msg->task_dist, buffer); + pack32(msg->total_cpus, buffer); + pack32(msg->total_tasks, buffer); + } else { pack32(msg->assoc_id, buffer); pack32(msg->db_index, buffer); pack32(msg->job_id, buffer); @@ -3874,7 +3952,23 @@ slurmdbd_unpack_step_start_msg(dbd_step_start_msg_t **msg, uint32_t uint32_tmp; dbd_step_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_start_msg_t)); *msg = msg_ptr; - if (rpc_version >= SLURMDBD_2_3_VERSION) { + + if (rpc_version >= SLURMDBD_2_6_VERSION) { + safe_unpack32(&msg_ptr->assoc_id, buffer); + safe_unpack32(&msg_ptr->db_index, buffer); + safe_unpack32(&msg_ptr->job_id, buffer); + safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer); + safe_unpack32(&msg_ptr->node_cnt, buffer); + safe_unpack_time(&msg_ptr->start_time, buffer); + safe_unpack_time(&msg_ptr->job_submit_time, buffer); + safe_unpack32(&msg_ptr->req_cpufreq, buffer); + safe_unpack32(&msg_ptr->step_id, buffer); + safe_unpack16(&msg_ptr->task_dist, buffer); + safe_unpack32(&msg_ptr->total_cpus, buffer); + safe_unpack32(&msg_ptr->total_tasks, buffer); + } else { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack32(&msg_ptr->db_index, buffer); safe_unpack32(&msg_ptr->job_id, buffer); @@ -3960,7 +4054,7 @@ extern int slurmdbd_unpack_usage_msg(dbd_usage_msg_t **msg, return SLURM_ERROR; } - if((*(my_rec))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) + if ((*(my_rec))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; unpack_time(&msg_ptr->start, buffer); @@ -3993,7 +4087,7 @@ extern int slurmdbd_unpack_buffer(void **out, uint32_t uint32_tmp; safe_unpackmem_xmalloc(&msg, &uint32_tmp, buffer); - if(!(out_ptr = create_buf(msg, uint32_tmp))) + if (!(out_ptr = create_buf(msg, uint32_tmp))) goto unpack_error; *out = out_ptr; diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index b228ec5a16e533b9a3839e64829910104986f237..0581839262468e6d66247b1db7224035902d1193 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -76,7 +76,8 @@ * communicating with it (e.g. it will not accept messages with a * version higher than SLURMDBD_VERSION). */ -#define SLURMDBD_VERSION 11 +#define SLURMDBD_VERSION 12 +#define SLURMDBD_2_6_VERSION 12 /* 2.6 */ #define SLURMDBD_2_5_VERSION 11 /* 2.5 */ #define SLURMDBD_2_4_VERSION 10 /* 2.4 */ #define SLURMDBD_2_3_VERSION 9 /* 2.3 */ @@ -277,6 +278,7 @@ typedef struct dbd_job_start_msg { uint32_t priority; /* job priority */ uint32_t qos_id; /* qos job is running with */ uint32_t req_cpus; /* count of req processors */ + uint32_t req_mem; /* requested minimum memory */ uint32_t resv_id; /* reservation id */ time_t start_time; /* job start time */ time_t submit_time; /* job submit time */ @@ -362,7 +364,6 @@ typedef struct dbd_step_comp_msg { time_t job_submit_time;/* job submit time needed to find job record * in db */ uint32_t step_id; /* step ID */ - uint32_t total_cpus; /* count of allocated processors */ uint32_t total_tasks; /* count of tasks for step */ } dbd_step_comp_msg_t; @@ -378,6 +379,7 @@ typedef struct dbd_step_start_msg { time_t start_time; /* step start time */ time_t job_submit_time;/* job submit time needed to find job record * in db */ + uint32_t req_cpufreq; /* requested CPU frequency */ uint32_t step_id; /* step ID */ uint16_t task_dist; /* layout method of step */ uint32_t total_cpus; /* count of allocated processors */ diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index c675fcf9a38618fe88e4d3c588efc767de009588..df4eeeb566d4705a7757ea347a929c693d3d4aa1 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -304,7 +304,6 @@ stepd_get_info(int fd) { int req = REQUEST_INFO; slurmstepd_info_t *step_info; - uint16_t protocol_version; step_info = xmalloc(sizeof(slurmstepd_info_t)); safe_write(fd, &req, sizeof(int)); @@ -313,14 +312,14 @@ stepd_get_info(int fd) safe_read(fd, &step_info->jobid, sizeof(uint32_t)); safe_read(fd, &step_info->stepid, sizeof(uint32_t)); - safe_read(fd, &protocol_version, sizeof(uint16_t)); - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + safe_read(fd, &step_info->protocol_version, sizeof(uint16_t)); + if (step_info->protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { safe_read(fd, &step_info->nodeid, sizeof(uint32_t)); safe_read(fd, &step_info->job_mem_limit, sizeof(uint32_t)); safe_read(fd, &step_info->step_mem_limit, sizeof(uint32_t)); } else { error("stepd_get_info: protocol_version " - "%hu not supported", protocol_version); + "%hu not supported", step_info->protocol_version); goto rwfail; } return step_info; @@ -504,6 +503,7 @@ _free_step_loc_t(step_loc_t *loc) xfree(loc->directory); if (loc->nodename) xfree(loc->nodename); + xfree(loc->stepd_info); xfree(loc); } @@ -586,7 +586,7 @@ stepd_available(const char *directory, const char *nodename) } l = list_create((ListDelF) _free_step_loc_t); - if(_sockname_regex_init(&re, nodename) == -1) + if (_sockname_regex_init(&re, nodename) == -1) goto done; /* @@ -931,7 +931,8 @@ rwfail: * jobacctinfo_t must be freed after calling this function. */ int -stepd_stat_jobacct(int fd, job_step_id_msg_t *sent, job_step_stat_t *resp) +stepd_stat_jobacct(int fd, job_step_id_msg_t *sent, job_step_stat_t *resp, + uint16_t protocol_version) { int req = REQUEST_STEP_STAT; int rc = SLURM_SUCCESS; @@ -949,7 +950,8 @@ stepd_stat_jobacct(int fd, job_step_id_msg_t *sent, job_step_stat_t *resp) * possible deadlock. */ if (wait_fd_readable(fd, 300)) goto rwfail; - rc = jobacctinfo_getinfo(resp->jobacct, JOBACCT_DATA_PIPE, &fd); + rc = jobacctinfo_getinfo(resp->jobacct, JOBACCT_DATA_PIPE, &fd, + protocol_version); safe_read(fd, &tasks, sizeof(int)); resp->num_tasks = tasks; diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h index f2534ef73a185043c88c653daafd47069de022a3..dab477f440e58e4f3b47b7f79e2ee0fb598f32d2 100644 --- a/src/common/stepd_api.h +++ b/src/common/stepd_api.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,13 +48,6 @@ #include "src/common/slurm_protocol_defs.h" #include "src/common/io_hdr.h" -typedef struct step_location { - uint32_t jobid; - uint32_t stepid; - char *nodename; - char *directory; -} step_loc_t; - typedef enum { REQUEST_CONNECT = 0, REQUEST_SIGNAL_PROCESS_GROUP, @@ -89,6 +82,7 @@ typedef struct { uint32_t jobid; uint32_t stepid; uint32_t nodeid; + uint16_t protocol_version; uint32_t job_mem_limit; /* job's memory limit, MB */ uint32_t step_mem_limit; /* step's memory limit, MB */ } slurmstepd_info_t; @@ -101,6 +95,15 @@ typedef struct { int estatus; /* exit status if exited is true*/ } slurmstepd_task_info_t; +typedef struct step_location { + uint32_t jobid; + uint32_t stepid; + char *nodename; + char *directory; + slurmstepd_info_t *stepd_info; +} step_loc_t; + + /* * Cleanup stale stepd domain sockets. */ @@ -242,7 +245,7 @@ int stepd_completion(int fd, step_complete_msg_t *sent); * resp receives a jobacctinfo_t which must be freed if SUCCESS. */ int stepd_stat_jobacct(int fd, job_step_id_msg_t *sent, - job_step_stat_t *resp); + job_step_stat_t *resp, uint16_t protocol_version); int stepd_task_info(int fd, slurmstepd_task_info_t **task_info, diff --git a/src/common/switch.c b/src/common/switch.c index 73c3e1c166b3e6a6b1fb9d874efc15570d71bd73..a023e20b89bb712d6335be58a3c59ee2f89870a8 100644 --- a/src/common/switch.c +++ b/src/common/switch.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/switch.h b/src/common/switch.h index 5835a8257daf2e241c378f75bf0041aaf6589586..6df2bc8cb4f564c649b1b3ed8f8eb729dc8c89ff 100644 --- a/src/common/switch.h +++ b/src/common/switch.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/timers.c b/src/common/timers.c index c52ddd99a97954edb9d07ec19a52db571cc424cd..01d69a37be13e81c283c92c23b813b28033424d4 100644 --- a/src/common/timers.c +++ b/src/common/timers.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,36 +49,27 @@ * IN len_tv_str - size of tv_str in bytes * IN from - where the function was called form */ -extern void slurm_diff_tv_str(struct timeval *tv1,struct timeval *tv2, +extern void slurm_diff_tv_str(struct timeval *tv1, struct timeval *tv2, char *tv_str, int len_tv_str, char *from, - long limit) + long limit, long *delta_t) { - long delta_t; + char p[64] = ""; + struct tm tm; - delta_t = (tv2->tv_sec - tv1->tv_sec) * 1000000; - delta_t += tv2->tv_usec - tv1->tv_usec; - snprintf(tv_str, len_tv_str, "usec=%ld", delta_t); + (*delta_t) = (tv2->tv_sec - tv1->tv_sec) * 1000000; + (*delta_t) += tv2->tv_usec - tv1->tv_usec; + snprintf(tv_str, len_tv_str, "usec=%ld", *delta_t); if (from) { if (!limit) limit = 1000000; - if (delta_t > limit) { + if (*delta_t > limit) { + if (!localtime_r(&tv1->tv_sec, &tm)) + fprintf(stderr, "localtime_r() failed\n"); + if (strftime(p, sizeof(p), "%T", &tm) == 0) + fprintf(stderr, "strftime() returned 0\n"); verbose("Warning: Note very large processing " - "time from %s: %s", - from, tv_str); + "time from %s: %s began=%s.%3.3d", + from, tv_str, p, (int)(tv1->tv_usec / 1000)); } } } - -/* - * slurm_diff_tv - return the difference between two times - * IN tv1 - start of event - * IN tv2 - end of event - * RET time in micro-seconds - */ -extern long slurm_diff_tv(struct timeval *tv1, struct timeval *tv2) -{ - long delta_t; - delta_t = (tv2->tv_sec - tv1->tv_sec) * 1000000; - delta_t += tv2->tv_usec - tv1->tv_usec; - return delta_t; -} diff --git a/src/common/timers.h b/src/common/timers.h index 8cd3dfac002ab43242db9518083dea9532e8ab40..e165830e13461a9c998fa3156c3f5f1893823d9f 100644 --- a/src/common/timers.h +++ b/src/common/timers.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -41,15 +41,15 @@ #include <sys/time.h> -#define DEF_TIMERS struct timeval tv1, tv2; char tv_str[20] = "" +#define DEF_TIMERS struct timeval tv1, tv2; char tv_str[20] = ""; long delta_t; #define START_TIMER gettimeofday(&tv1, NULL) #define END_TIMER gettimeofday(&tv2, NULL); \ - slurm_diff_tv_str(&tv1, &tv2, tv_str, 20, NULL, 0) + slurm_diff_tv_str(&tv1, &tv2, tv_str, 20, NULL, 0, &delta_t) #define END_TIMER2(from) gettimeofday(&tv2, NULL); \ - slurm_diff_tv_str(&tv1, &tv2, tv_str, 20, from, 0) + slurm_diff_tv_str(&tv1, &tv2, tv_str, 20, from, 0, &delta_t) #define END_TIMER3(from, limit) gettimeofday(&tv2, NULL); \ - slurm_diff_tv_str(&tv1, &tv2, tv_str, 20, from, limit) -#define DELTA_TIMER slurm_diff_tv(&tv1, &tv2) + slurm_diff_tv_str(&tv1, &tv2, tv_str, 20, from, limit, &delta_t) +#define DELTA_TIMER delta_t #define TIME_STR tv_str /* @@ -61,17 +61,10 @@ * IN len_tv_str - size of tv_str in bytes * IN from - Name to be printed on long diffs * IN limit - limit to wait + * OUT delta_t - raw time difference in usec */ extern void slurm_diff_tv_str(struct timeval *tv1,struct timeval *tv2, char *tv_str, int len_tv_str, char *from, - long limit); - -/* - * slurm_diff_tv - return the difference between two times - * IN tv1 - start of event - * IN tv2 - end of event - * RET time in micro-seconds - */ -extern long slurm_diff_tv(struct timeval *tv1, struct timeval *tv2); + long limit, long *delta_t); #endif diff --git a/src/common/uid.c b/src/common/uid.c index bb127c92e56e989f8d698bbb55354f925593f5a8..1ebdb64640777301b7e6d0e944d9ffafc2cd934f 100644 --- a/src/common/uid.c +++ b/src/common/uid.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/uid.h b/src/common/uid.h index 1a756474bb2522b146aa2f161e4b0ac996ae085c..67f3265f56a3ffd1ad5a4f30c99bc4958bfa3610 100644 --- a/src/common/uid.h +++ b/src/common/uid.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/unsetenv.c b/src/common/unsetenv.c index 5f7598d27b8bac65b34e64a2f8b19cfaca204fe9..fe05efbddfde0749e6529de7287984bfbbabdede 100644 --- a/src/common/unsetenv.c +++ b/src/common/unsetenv.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -37,8 +37,11 @@ \*****************************************************************************/ #include <stdlib.h> +#include <stdio.h> #include <string.h> +#include "src/common/log.h" + extern int unsetenv (const char *name) { int len, rc; @@ -49,6 +52,10 @@ extern int unsetenv (const char *name) len = strlen(name); tmp = malloc(len + 3); + if (!tmp) { + log_oom(__FILE__, __LINE__, __CURRENT_FUNC__); + abort(); + } strcpy(tmp, name); strcat(tmp, "=x"); if ((rc = putenv(tmp)) != 0) diff --git a/src/common/unsetenv.h b/src/common/unsetenv.h index b3b5159a51ae0845da0016ce51bc4a1b83c23457..65c2bb4061c04f4a1dc5994d3f9998ee877e2b29 100644 --- a/src/common/unsetenv.h +++ b/src/common/unsetenv.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/uthash/LICENSE b/src/common/uthash/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6e41e339a59b1099861525ff75f168d182151593 --- /dev/null +++ b/src/common/uthash/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) 2005-2010, Troy D. Hanson http://uthash.sourceforge.net +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/src/common/uthash/README b/src/common/uthash/README new file mode 100644 index 0000000000000000000000000000000000000000..6cb90b88d64f1063517bc6465568218013eb246f --- /dev/null +++ b/src/common/uthash/README @@ -0,0 +1,3 @@ +uthash: version 1.9.6, more information and latests versions at + http://uthash.sourceforge.net +license of uthash is in the LICENSE file diff --git a/src/common/uthash/uthash.h b/src/common/uthash/uthash.h new file mode 100644 index 0000000000000000000000000000000000000000..9f83fc34f15bc5b6dd3cd8b38968b180e7d00a06 --- /dev/null +++ b/src/common/uthash/uthash.h @@ -0,0 +1,915 @@ +/* +Copyright (c) 2003-2012, Troy D. Hanson http://uthash.sourceforge.net +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#include <string.h> /* memcmp,strlen */ +#include <stddef.h> /* ptrdiff_t */ +#include <stdlib.h> /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#ifdef _MSC_VER /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while(0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while(0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on win32 */ +#ifdef _MSC_VER +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#else +#include <inttypes.h> /* uint32_t */ +#endif + +#define UTHASH_VERSION 1.9.6 + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_bkt,_hf_hashv; \ + out=NULL; \ + if (head) { \ + HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ + keyptr,keylen,out); \ + } \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while(0) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char*)keyptr; \ + (add)->hh.keylen = (unsigned)keylen_in; \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh,head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ + (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ + HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ + HASH_FSCK(hh,head); \ +} while(0) + +#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1)); \ +} while(0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle*)((char*)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield,strlen(add->strfield),add) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %d, actual %d\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include <unistd.h> to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6 */ +#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hb_keylen=keylen; \ + char *_hb_key=(char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ + bkt = (hashv) & (num_bkts-1); \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _sx_i; \ + char *_hs_key=(char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts-1); \ +} while (0) + +#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _fn_i; \ + char *_hf_key=(char*)(key); \ + hashv = 2166136261UL; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) \ + hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _ho_i; \ + char *_ho_key=(char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + char *_hj_key=(char*)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = (unsigned)keylen; \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ + case 5: _hj_j += _hj_key[4]; \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ +do { \ + char *_sfh_key=(char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6b; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35l; \ + _h ^= _h >> 16; \ +} while(0) + +#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353; \ + uint32_t _mur_c1 = 0xcc9e2d51; \ + uint32_t _mur_c2 = 0x1b873593; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ + uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = _mur_h1*5+0xe6546b64; \ + } \ + const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ + uint32_t _mur_k1=0; \ + switch((keylen) & 3) { \ + case 3: _mur_k1 ^= _mur_tail[2] << 16; \ + case 2: _mur_k1 ^= _mur_tail[1] << 8; \ + case 1: _mur_k1 ^= _mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ + bkt = hashv & (num_bkts-1); \ +} while(0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ +do { \ + if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ + else out=NULL; \ + while (out) { \ + if ((out)->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \ + } \ + if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \ + else out = NULL; \ + } \ +} while(0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ + && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while(0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ + ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ + _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while(0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail ) { \ + _hs_tail->next = ((_hs_e) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + _hs_e->prev = ((_hs_tail) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + _hs_tail->next = NULL; \ + if ( _hs_nmerges <= 1 ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while(0) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/src/common/working_cluster.c b/src/common/working_cluster.c index 56ea1587fd81b33fb43d1f8cd1a9fe46a6649374..78c0a009fee956b8842eb64f3ae2e31f1322b036 100644 --- a/src/common/working_cluster.c +++ b/src/common/working_cluster.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/working_cluster.h b/src/common/working_cluster.h index c747349b09365e37cbd30edfc38a09d124113701..febe975de1fb56e6d8534d36caebbdd3dd147e18 100644 --- a/src/common/working_cluster.h +++ b/src/common/working_cluster.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/write_labelled_message.c b/src/common/write_labelled_message.c index 47ecc4ced218c983a4202e5dda0f2f9048ff916b..a4797f36a91a408cdfbeac00ac8e623289eb9f81 100644 --- a/src/common/write_labelled_message.c +++ b/src/common/write_labelled_message.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/write_labelled_message.h b/src/common/write_labelled_message.h index 415955d133f287b8f58910cef9501ab85a653ba2..19128af18c5c52700f544b963e4b04c348f8189f 100644 --- a/src/common/write_labelled_message.h +++ b/src/common/write_labelled_message.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xassert.c b/src/common/xassert.c index 9a001aa84d8b2f6026a91de0c91ef03e16cc213d..bfb70f6e6cae8fc3d7c26be5461392835a7f8abe 100644 --- a/src/common/xassert.c +++ b/src/common/xassert.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xassert.h b/src/common/xassert.h index 4fcbcba2507050ec0310e395f89d826561c5600c..743452e0a28a55ce9925639ff815b9728397f966 100644 --- a/src/common/xassert.h +++ b/src/common/xassert.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xcgroup.c b/src/common/xcgroup.c index 6cd85f641aaaf29cbf175cdfeef57f6a63942e60..329415694776de5831122bdbae63b22cb318f150 100644 --- a/src/common/xcgroup.c +++ b/src/common/xcgroup.c @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xcgroup.h b/src/common/xcgroup.h index e8baeb717ad608ca551f92165a81a3d12f39ad2c..a7763a7b0ca8d9457debb38d8e3eac120d3c7169 100644 --- a/src/common/xcgroup.h +++ b/src/common/xcgroup.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xcgroup_read_config.c b/src/common/xcgroup_read_config.c index 51368c7ae2dedeb1b99acae7699cd0441bb930ac..979b51228288b7da98f2bfa7363a1da1cc19f5d9 100644 --- a/src/common/xcgroup_read_config.c +++ b/src/common/xcgroup_read_config.c @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -64,7 +64,6 @@ slurm_cgroup_conf_t *slurm_cgroup_conf = NULL; /* Local functions */ static void _clear_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf); -static char * _get_conf_path(void); /* * free_slurm_cgroup_conf - free storage associated with the global variable @@ -165,7 +164,7 @@ extern int read_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf) _clear_slurm_cgroup_conf(slurm_cgroup_conf); /* Get the cgroup.conf path and validate the file */ - conf_path = _get_conf_path(); + conf_path = get_extra_conf_path("cgroup.conf"); if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) { info("No cgroup.conf file (%s)", conf_path); } else { @@ -266,28 +265,3 @@ extern int read_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf) return SLURM_SUCCESS; } - -/* Return the pathname of the cgroup.conf file. - * xfree() the value returned */ -static char * _get_conf_path(void) -{ - char *val = getenv("SLURM_CONF"); - char *path = NULL; - int i; - - if (!val) - val = default_slurm_config_file; - - /* Replace file name on end of path */ - i = strlen(val) + 15; - path = xmalloc(i); - strcpy(path, val); - val = strrchr(path, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = path; - strcpy(val, "cgroup.conf"); - - return path; -} diff --git a/src/common/xcgroup_read_config.h b/src/common/xcgroup_read_config.h index a3d0738fa631c13e5cc202b508635ca4c57ba127..dffa8c5ac127268b99d988b48a03c7387b4563a1 100644 --- a/src/common/xcgroup_read_config.h +++ b/src/common/xcgroup_read_config.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xcpuinfo.c b/src/common/xcpuinfo.c index 388305062250996ffdd780348cc5e01d81e8672b..af048436291f2d438f5d1a6a9acc69391ca2d7bf 100644 --- a/src/common/xcpuinfo.c +++ b/src/common/xcpuinfo.c @@ -6,7 +6,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xcpuinfo.h b/src/common/xcpuinfo.h index 57e94629f76e8ba8269d1e90a73b73eacb77ebd0..c5533c1f70c8faf31fdcdfb392b69169b285d43c 100644 --- a/src/common/xcpuinfo.h +++ b/src/common/xcpuinfo.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xhash.c b/src/common/xhash.c new file mode 100644 index 0000000000000000000000000000000000000000..f7a4da54ebf508b134b3fdcee32f6e14239a9037 --- /dev/null +++ b/src/common/xhash.c @@ -0,0 +1,165 @@ +/*****************************************************************************\ + * xtree.c - functions used for hash table manament + ***************************************************************************** + * Copyright (C) 2012 CEA/DAM/DIF + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/common/xhash.h" +#include "src/common/xmalloc.h" +#include "src/common/uthash/uthash.h" + +#if 0 +/* undefine default allocators */ +#undef uthash_malloc +#undef uthash_free + +/* re-define them using slurm's ones */ +#define uthash_malloc(sz) xmalloc(sz) +#define uthash_free(ptr, sz) xfree(ptr) +#endif + +/* + * FIXME: + * A pre-malloced array of xhash_item_t could be associated to + * the xhash_table in order to speed up the xhash_add function. + * The default array size could be something like 10% of the + * provided table_size (hash table are commonly defined larger + * than necessary to avoid shared keys and usage of linked list) + */ + +typedef struct xhash_item_st { + void* item; /* user item */ + const char* key; /* cached key calculated by user function, */ + /* needed by uthash */ + uint32_t keysize; /* cached key size */ + UT_hash_handle hh; /* make this structure hashable by uthash */ +} xhash_item_t; + +struct xhash_st { + xhash_item_t* ht; /* hash table */ + uint32_t count; /* user items count */ + xhash_idfunc_t identify; /* function returning a unique str key */ +}; + +static xhash_item_t* xhash_find(xhash_t* table, const char* key) +{ + xhash_item_t* hash_item = NULL; + uint32_t key_size = 0; + + if (!table || !key) + return NULL; + key_size = strlen(key); + HASH_FIND(hh, table->ht, key, key_size, hash_item); + return hash_item; +} + +void* xhash_get(xhash_t* table, const char* key) +{ + xhash_item_t* item = xhash_find(table, key); + if (!item) + return NULL; + return item->item; +} + +xhash_t* xhash_init(xhash_idfunc_t idfunc, + xhash_hashfunc_t hashfunc, + uint32_t table_size) +{ + xhash_t* table = NULL; + if (!idfunc) + return NULL; + table = (xhash_t*)xmalloc(sizeof(xhash_t)); + table->ht = NULL; /* required by uthash */ + table->count = 0; + table->identify = idfunc; + return table; +} + +void* xhash_add(xhash_t* table, void* item) +{ + xhash_item_t* hash_item = NULL; + if (!table || !item) + return NULL; + hash_item = (xhash_item_t*)xmalloc(sizeof(xhash_item_t)); + hash_item->item = item; + hash_item->key = table->identify(item); + hash_item->keysize = strlen(hash_item->key); + HASH_ADD_KEYPTR(hh, table->ht, hash_item->key, + hash_item->keysize, hash_item); + ++table->count; + return hash_item->item; +} + +void xhash_delete(xhash_t* table, const char* key) +{ + xhash_item_t* item = xhash_find(table, key); + if (!item) + return; + HASH_DELETE(hh, table->ht, item); + xfree(item); + --table->count; +} + +uint32_t xhash_count(xhash_t* table) +{ + if (!table) + return 0; + return table->count; +} + +void xhash_walk(xhash_t* table, + void (*callback)(void* item, void* arg), + void* arg) +{ + xhash_item_t* current_item = NULL; + xhash_item_t* tmp = NULL; + if (!table || !callback) + return; + HASH_ITER(hh, table->ht, current_item, tmp) { + callback(current_item->item, arg); + } +} + +void xhash_free(xhash_t* table) +{ + xhash_item_t* current_item = NULL; + xhash_item_t* tmp = NULL; + + if (!table) + return; + HASH_ITER(hh, table->ht, current_item, tmp) { + HASH_DEL(table->ht, current_item); + xfree(current_item); + } + xfree(table); +} + diff --git a/src/common/xhash.h b/src/common/xhash.h new file mode 100644 index 0000000000000000000000000000000000000000..2f4b137a0e776f402000d50825ae96e1060fc5fd --- /dev/null +++ b/src/common/xhash.h @@ -0,0 +1,116 @@ +/*****************************************************************************\ + * xtree.h - functions used for hash table manament + ***************************************************************************** + * Copyright (C) 2012 CEA/DAM/DIF + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __XHASH_EJ2ORE_INC__ +#define __XHASH_EJ2ORE_INC__ + +#include <stdint.h> + +/** Opaque definition of the hash table */ +struct xhash_st; +typedef struct xhash_st xhash_t; + +/** + * This function will be used to generate unique identifier from a + * stored item by returning a string. + * Beware that string conflict cause an item to be unfindable by the + * hash table. + * + * @param item takes one of the items stored by the lists in the hash + * table. + * @returns an unique identifier used for making hashes. + */ +typedef const char* (*xhash_idfunc_t)(void* item); + +/** + * @param id is the unique identifier an item can be identified with. + * @param hashes_count is the number of hashes contained in the hash + * table and the function must return an index in + * range [0 to hashes_count-1]. + * @returns a hash used as an index for storing the item identified by + * the given id. + */ + +/* Currently not implementable with uthash */ +typedef unsigned (*xhash_hashfunc_t)(unsigned hashes_count, const char* id); + +/** @returns an item from a key searching through the hash table. NULL if not + * found. + */ +void* xhash_get(xhash_t* table, const char* key); + +/** Initialize the hash table. + * + * @param idfunc is used to calculate a string unique identifier from a user + * item. + * + * @returns the newly allocated hash table. Must be freed with xhash_free. + */ +xhash_t* xhash_init(xhash_idfunc_t idfunc, + xhash_hashfunc_t hashfunc, /* Currently: should be NULL */ + uint32_t table_size); /* Currently: unused */ + +/** Add an item to the hash table. + * @param table is the hash table you want to add the item to. + * @param item is the user item to add. It has to be initialized in order for + * the idfunc function to be able to calculate the final unique + * key string associated with it. + * @returns item or NULL in case of error. + */ +void* xhash_add(xhash_t* table, void* item); + +/** Remove an item associated with key from the hash table item is if found, + * but do not free the item memory itself (the user is responsible for + * managing item's memory). + */ +void xhash_delete(xhash_t* table, const char* key); + +/** @returns the number of items stored in the hash table */ +uint32_t xhash_count(xhash_t* table); + +/** apply callback to each item contained in the hash table */ +void xhash_walk(xhash_t* table, + void (*callback)(void* item, void* arg), + void* arg); + +/** This function frees the hash table, but does not free its stored items, + * you can use xhash_walk to perform a free operation on all items if wanted. + * @parameter table is the hash table to free. The table pointer is invalid + * after this call. + */ +void xhash_free(xhash_t* table); + +#endif + diff --git a/src/common/xmalloc.c b/src/common/xmalloc.c index 38b6d85f4266884777d2df0275a98c3f6386a6c4..f868a7b226e27775b57a4214b664234ae4193265 100644 --- a/src/common/xmalloc.c +++ b/src/common/xmalloc.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -94,11 +94,9 @@ void *slurm_xmalloc(size_t size, const char *file, int line, const char *func) p = (int *)malloc(size + 2*sizeof(int)); MALLOC_UNLOCK(); if (!p) { - /* don't call log functions here, we're probably OOM - */ - fprintf(log_fp(), "%s:%d: %s: xmalloc(%d) failed\n", - file, line, func, (int)size); - exit(1); + /* out of memory */ + log_oom(file, line, func); + abort(); } p[0] = XMALLOC_MAGIC; /* add "secret" magic cookie */ p[1] = (int)size; /* store size in buffer */ @@ -185,8 +183,7 @@ void * slurm_xrealloc(void **item, size_t newsize, return *item; error: - fprintf(log_fp(), "%s:%d: %s: xrealloc(%d) failed\n", - file, line, func, (int)newsize); + log_oom(file, line, func); abort(); } diff --git a/src/common/xmalloc.h b/src/common/xmalloc.h index de0d00751aab93c4e03f771c1bdbce1ce8573a81..93b08127d0210fb054c39194aa8ed802f7afa580 100644 --- a/src/common/xmalloc.h +++ b/src/common/xmalloc.h @@ -12,7 +12,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xsignal.c b/src/common/xsignal.c index c665bb74371186a8210d104cf4f97a3714b892b5..e40fa882d9abc2e915724bb112486200167c3990 100644 --- a/src/common/xsignal.c +++ b/src/common/xsignal.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xsignal.h b/src/common/xsignal.h index 37d4a88df7831c079df268a7bccdc2083ef2b11e..4d8eaa93aaca77af5e54cfeaeb6e768e5c961956 100644 --- a/src/common/xsignal.h +++ b/src/common/xsignal.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/common/xstring.c b/src/common/xstring.c index 10a27fcb89b8966064b9f5c20325e7effadbdf03..c9f680b68e0ad7ec6862811753ba9a9c8402ff79 100644 --- a/src/common/xstring.c +++ b/src/common/xstring.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -57,6 +57,7 @@ #include <stdarg.h> #include <ctype.h> +#include <sys/time.h> #include <time.h> #include "slurm/slurm_errno.h" @@ -217,36 +218,65 @@ void _xstrftimecat(char **buf, const char *fmt) } /* - * Append a RFC 5424 formatted timestamp to buffer buf, expand as - * needed + * Append a ISO 8601 formatted timestamp to buffer buf, expand as needed + */ +void _xiso8601timecat(char **buf) +{ + char p[64] = ""; + struct timeval tv; + struct tm tm; + + if (gettimeofday(&tv, NULL) == -1) + fprintf(stderr, "gettimeofday() failed\n"); + + if (!localtime_r(&tv.tv_sec, &tm)) + fprintf(stderr, "localtime_r() failed\n"); + + if (strftime(p, sizeof(p), "%Y-%m-%dT%T", &tm) == 0) + fprintf(stderr, "strftime() returned 0\n"); + +#if defined LOG_TIME_MSEC /* Add millisecond data */ + _xstrfmtcat(buf, "%s.%3.3d", p, (int)(tv.tv_usec / 1000)); +#else + _xstrfmtcat(buf, "%s", p); +#endif +} + +/* + * Append a RFC 5424 formatted timestamp to buffer buf, expand as needed * */ void _xrfc5424timecat(char **buf) { - char p[26]; - time_t t; + char p[64] = ""; + char z[12] = ""; + struct timeval tv; struct tm tm; - const char fmt[] = "%Y-%m-%dT%T%z"; - - if (time(&t) == (time_t) -1) - fprintf(stderr, "time() failed\n"); + if (gettimeofday(&tv, NULL) == -1) + fprintf(stderr, "gettimeofday() failed\n"); - if (!localtime_r(&t, &tm)) + if (!localtime_r(&tv.tv_sec, &tm)) fprintf(stderr, "localtime_r() failed\n"); - if (strftime(p, sizeof(p), fmt, &tm) == 0) + if (strftime(p, sizeof(p), "%Y-%m-%dT%T", &tm) == 0) fprintf(stderr, "strftime() returned 0\n"); /* The strftime %z format creates timezone offsets of the form * (+/-)hhmm, whereas the RFC 5424 format is (+/-)hh:mm. So - * shift the minutes one step back and insert the semicolon. */ - p[25] = '\0'; - p[24] = p[23]; - p[23] = p[22]; - p[22] = ':'; - - _xstrcat(buf, p); + * shift the minutes one step back and insert the semicolon. + */ + if (strftime(z, sizeof(z), "%z", &tm) == 0) + fprintf(stderr, "strftime() returned 0\n"); + z[5] = z[4]; + z[4] = z[3]; + z[3] = ':'; + +#if defined LOG_TIME_MSEC /* Add millisecond data */ + _xstrfmtcat(buf, "%s.%3.3d%s", p, (int)(tv.tv_usec / 1000), z); +#else + _xstrfmtcat(buf, "%s%s", p, z); +#endif } /* @@ -440,7 +470,7 @@ char *xstrstrip(char *str) char quote_c = '\0'; int quote = 0; - if(!str) + if (!str) return NULL; /* first strip off the ("|')'s */ @@ -452,13 +482,13 @@ char *xstrstrip(char *str) start = i; while(str[i]) { - if(quote && str[i] == quote_c) { + if (quote && str[i] == quote_c) { found = 1; break; } i++; } - if(found) { + if (found) { meat = xmalloc((i-start)+1); memcpy(meat, str+start, (i-start)); } else @@ -515,7 +545,7 @@ bool xstring_is_whitespace(const char *str) */ char *xstrtolower(char *str) { - if(str) { + if (str) { int j = 0; while(str[j]) { str[j] = tolower((int)str[j]); @@ -540,9 +570,9 @@ static char *_xstrdup_vprintf(const char *fmt, va_list ap) char *p = NULL; va_list our_ap; - if((p = xmalloc(size)) == NULL) + if ((p = xmalloc(size)) == NULL) return NULL; - while(1) { + while (1) { /* Try to print in the allocated space. */ va_copy(our_ap, ap); n = vsnprintf(p, size, fmt, our_ap); diff --git a/src/common/xstring.h b/src/common/xstring.h index b96589a6dfba9fcdfc607b55c1d2fa8ccf89dec5..06dc742361798c46f028587d0a4fc15b03961bb4 100644 --- a/src/common/xstring.h +++ b/src/common/xstring.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -47,6 +47,7 @@ #define xstrcatchar(__p, __c) _xstrcatchar(&(__p), __c) #define xslurm_strerrorcat(__p) _xslurm_strerrorcat(&(__p)) #define xstrftimecat(__p, __fmt) _xstrftimecat(&(__p), __fmt) +#define xiso8601timecat(__p) _xiso8601timecat(&(__p)) #define xrfc5424timecat(__p) _xrfc5424timecat(&(__p)) #define xstrfmtcat(__p, __fmt, args...) _xstrfmtcat(&(__p), __fmt, ## args) #define xmemcat(__p, __s, __e) _xmemcat(&(__p), __s, __e) @@ -89,6 +90,11 @@ void _xslurm_strerrorcat(char **str); */ void _xstrftimecat(char **str, const char *fmt); +/* +** Concatenate a ISO 8601 timestamp onto str. +*/ +void _xiso8601timecat(char **str); + /* ** Concatenate a RFC 5424 timestamp onto str. */ diff --git a/src/common/xtree.c b/src/common/xtree.c new file mode 100644 index 0000000000000000000000000000000000000000..ebf0db2eb2a74ad8dd1a93a437535f91dfb58c56 --- /dev/null +++ b/src/common/xtree.c @@ -0,0 +1,572 @@ +/*****************************************************************************\ + * xtree.c - functions used for tree data structure manament + ***************************************************************************** + * Copyright (C) 2012 CEA/DAM/DIF + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/common/xassert.h" +#include "src/common/xmalloc.h" +#include "src/common/xtree.h" + +/* free the node childs */ +static void xtree_free_childs(xtree_t* tree, xtree_node_t* node) +{ + xtree_node_t* current_node = node; + xtree_node_t* free_later = NULL; + + /* if (!tree || !tree->root || !node) return; comm: not a user func */ + if (current_node && current_node->start) { + /* tree has childs, depth may have changed */ + tree->state &= ~XTREE_STATE_DEPTHCACHED; + } + + while (current_node) { + if (current_node->start) { + current_node = current_node->start; + continue; + } + if (current_node == node) { + current_node->start = current_node->end = NULL; + return; + } + free_later = current_node; + if (current_node->parent) { + current_node->parent->start = current_node->next; + } + current_node = current_node->parent; + if (tree->free) + tree->free(free_later->data); + xfree(free_later); + --tree->count; + } +} + +/* tries to free the leftest leaf each time, and remove it from the tree, + * then go above, since node above finish to be a leaf itself. + */ +void xtree_free(xtree_t* tree) +{ + if (!tree || !tree->root) + return; + xtree_free_childs(tree, tree->root); + + if (tree->free) + tree->free(tree->root->data); + xfree(tree->root); + + xtree_init(tree, tree->free); +} + +void xtree_init(xtree_t* tree, xtree_free_data_function_t freefunc) +{ + tree->root = NULL; + tree->free = freefunc; + tree->count = 0; + tree->depth = 0; + tree->state = XTREE_STATE_DEPTHCACHED; +} + +void xtree_set_freefunc(xtree_t* tree, xtree_free_data_function_t freefunc) +{ + tree->free = freefunc; +} + +xtree_node_t* xtree_get_parent(xtree_t* tree, xtree_node_t* node) +{ + if (!node || !tree || !tree->root) + return NULL; + return node->parent; +} + +uint32_t xtree_get_count(xtree_t* tree) +{ + if (!tree) + return UINT32_MAX; + return tree->count; +} + +xtree_node_t* xtree_add_child(xtree_t* tree, + xtree_node_t* parent, + void* data, + uint8_t flags) +{ + xtree_node_t* newnode = NULL; + + if (!tree || (!parent && tree->root) || (parent && !tree->root)) { + return NULL; + } + + xassert(flags & (XTREE_APPEND | XTREE_PREPEND)); + + newnode = (xtree_node_t*)xmalloc(sizeof(xtree_node_t)); + newnode->data = data; + newnode->parent = parent; + newnode->start = NULL; + newnode->end = NULL; + newnode->next = NULL; + newnode->previous = NULL; + + if (!parent) { + newnode->next = NULL; + newnode->previous = NULL; + tree->root = newnode; + tree->count = 1; + tree->depth = 1; + tree->state = XTREE_STATE_DEPTHCACHED; + return newnode; + } + + if (flags & XTREE_APPEND) { + newnode->previous = parent->end; + newnode->next = NULL; + if (parent->end) { + parent->end->next = newnode; + } else { + parent->start = newnode; + } + parent->end = newnode; + } else { + newnode->next = parent->start; + newnode->previous = NULL; + if (parent->start) { + parent->start->previous = newnode; + } else { + parent->end = newnode; + } + parent->start = newnode; + } + + ++tree->count; + tree->state &= ~XTREE_STATE_DEPTHCACHED; + if (flags & XTREE_REFRESH_DEPTH) + xtree_refresh_depth(tree); + + return newnode; +} + +xtree_node_t* xtree_add_sibling(xtree_t* tree, + xtree_node_t* node, + void* data, + uint8_t flags) +{ + xtree_node_t* newnode = NULL; + + xassert(flags & (XTREE_APPEND | XTREE_PREPEND)); + + if (!tree) + return NULL; + + /* no node, same behaviour as add_child */ + if (!node) return xtree_add_child(tree, node, data, flags); + + /* root node has only childs */ + /* FIXME: better to call add_child instead here, or can be too + * confusing ? + */ + if (!node->parent) + return NULL; + + newnode = (xtree_node_t*)xmalloc(sizeof(xtree_node_t)); + newnode->data = data; + newnode->parent = node->parent; + newnode->start = NULL; + newnode->end = NULL; + newnode->next = NULL; + newnode->previous = NULL; + + if (flags & XTREE_APPEND) { + newnode->previous = node; + newnode->next = node->next; + node->next = newnode; + if (newnode->next) { + newnode->next->previous = newnode; + } else { + node->parent->end = newnode; + } + } else { + newnode->next = node; + newnode->previous = node->previous; + node->previous = newnode; + if (newnode->previous) { + newnode->previous->next = newnode; + } else { + node->parent->start = newnode; + } + } + + ++tree->count; + tree->state &= ~XTREE_STATE_DEPTHCACHED; + + if (flags & XTREE_REFRESH_DEPTH) + xtree_refresh_depth(tree); + + return newnode; +} + +/* NOTE: 0 = no node since no depth so implies no root */ +uint32_t xtree_depth_const(const xtree_t* tree) +{ + if (tree->state & XTREE_STATE_DEPTHCACHED) + return tree->depth; + + return xtree_depth_const_node(tree, tree->root); +} + +static uint8_t xtree_depth_helper(xtree_node_t* node, + uint8_t which, + uint32_t level, + void* arg) +{ + uint32_t* max_level = (uint32_t*)arg; + + if (level >= *max_level) { + *max_level = level; + } + + return 1; +} + +uint32_t xtree_depth_const_node(const xtree_t* tree, const xtree_node_t* node) +{ + uint32_t max_level = 0; + + if (!tree->root) + return 0; + xtree_walk((xtree_t*)tree, + NULL, + 0, + UINT32_MAX, + xtree_depth_helper, + &max_level); + return max_level + 1; +} + +uint32_t xtree_depth(xtree_t* tree) +{ + xtree_refresh_depth(tree); + return tree->depth; +} + +void xtree_refresh_depth(xtree_t* tree) +{ + if (tree->state & XTREE_STATE_DEPTHCACHED) + return; + tree->depth = xtree_depth_const_node(tree, tree->root); + tree->state |= XTREE_STATE_DEPTHCACHED; +} + +uint32_t xtree_node_depth(const xtree_node_t* node) +{ + uint32_t depth = 0; + while (node) { + ++depth; + node = node->parent; + } + return depth; +} + +/* always tries to browse the tree in this order : most left child, if no + * child, go to next sibling, then if no sibling, go up until a sibling is + * found. + */ +xtree_node_t* xtree_walk(xtree_t* tree, + xtree_node_t* node, + uint32_t min_level, + uint32_t max_level, + xtree_walk_function_t action, + void* arg) +{ + xtree_node_t* current_node = NULL; + uint32_t level = 0; + + if (!tree || !action) + return NULL; + if (!node) + node = tree->root; + + current_node = node; + while (current_node) { + /* go down and continue counting */ + if (current_node->start) { + if (level >= min_level && !action(current_node, + XTREE_PREORDER, + level, + arg)) { + return current_node; + } + if (level < max_level) { + current_node = current_node->start; + ++level; + continue; + } + } else if (level >= min_level && + !action(current_node, XTREE_LEAF, level, arg)) { + return current_node; + } + + /* while no next member go up */ + while (!current_node->next) { + current_node = current_node->parent; + --level; + if (!current_node) { + return NULL; + } else if (current_node == node) { + if (level >= min_level && + !action(current_node, + XTREE_ENDORDER, + level, + arg)) { + return current_node; + } + return NULL; + } else if (level >= min_level && !action(current_node, + XTREE_ENDORDER, + level, + arg)) { + return current_node; + } + } + + /* go to next sibling */ + if (current_node->next) { + if ((level >= min_level) && + !action(current_node->parent, + XTREE_INORDER, + level - 1, + arg)) { + return current_node; + } + current_node = current_node->next; + } + } + return NULL; +} + +struct xtree_find_st { + xtree_find_compare_t compare; + const void* arg; +}; + +static uint8_t xtree_find_helper(xtree_node_t* node, + uint8_t which, + uint32_t level, + void* arg) +{ + struct xtree_find_st* st = (struct xtree_find_st*)arg; + return st->compare(node->data, st->arg); +} + +xtree_node_t* xtree_find(xtree_t* tree, + xtree_find_compare_t compare, + const void* arg) +{ + + struct xtree_find_st st; + if (!tree || !compare) + return NULL; + st.compare = compare; + st.arg = arg; + return xtree_walk(tree, NULL, 0, UINT32_MAX, xtree_find_helper, &st); +} + +xtree_node_t* xtree_delete(xtree_t* tree, xtree_node_t* node) +{ + xtree_node_t* parent = NULL; + + if (!tree || !tree->root || !node) + return NULL; + if (node == tree->root) { + xtree_free(tree); + return NULL; + } + + parent = node->parent; + if (parent->start == node && parent->end == node) { + parent->start = parent->end = NULL; + tree->state &= ~XTREE_STATE_DEPTHCACHED; + } else if (parent->start == node) { + parent->start = node->next; + node->next->previous = NULL; + } else if (parent->end == node) { + parent->end = node->previous; + node->previous->next = NULL; + } else { + node->previous->next = node->next; + node->next->previous = node->previous; + } + + xtree_free_childs(tree, node); + if (tree->free) + tree->free(node->data); + xfree(node); + --tree->count; + + return parent; +} + +#define XTREE_GET_PARENTS_FIRST_SIZE 64 + +xtree_node_t** xtree_get_parents(xtree_t* tree, + xtree_node_t* node, + uint32_t* size) +{ + xtree_node_t* current_node = NULL; + xtree_node_t** parents_list = NULL; + uint32_t parents_size = 0; + uint32_t parents_count = 0; + if (!tree || !tree->root || !node || !size) + return NULL; + + parents_size = XTREE_GET_PARENTS_FIRST_SIZE; + parents_list = (xtree_node_t**)xmalloc( + sizeof(xtree_node_t*)*parents_size); + + current_node = node->parent; + while (current_node) { + if (parents_count >= parents_size) { + parents_size = parents_count*2; + parents_list = (xtree_node_t**)xrealloc(parents_list, + sizeof(xtree_node_t*)*parents_size); + } + parents_list[parents_count] = current_node; + ++parents_count; + current_node = current_node->parent; + } + + if (parents_count != 0) { + parents_list = (xtree_node_t**)xrealloc(parents_list, + sizeof(xtree_node_t*)*(parents_count+1)); + /* safety mesure, can be used as strlen if users assumes it */ + parents_list[parents_count] = NULL; + } + else { + xfree(parents_list); + parents_list = NULL; + } + *size = parents_count; + return parents_list; +} + +xtree_node_t* xtree_common(xtree_t* tree, + const xtree_node_t* const* nodes, + uint32_t size) +{ + xtree_node_t* common_ancestor = NULL; + xtree_node_t* current_node = NULL; + uint32_t i; + uint8_t found_common_ancestor; + + if (!tree || !tree->root || !nodes || !nodes[0] || !size || + !nodes[0]->parent) + return NULL; + + common_ancestor = nodes[0]->parent; + for (i = 1; i < size && common_ancestor; ++i) { + found_common_ancestor = 0; + while (common_ancestor && !found_common_ancestor) { + if (!nodes[i]) return common_ancestor; + current_node = nodes[i]->parent; + while (current_node && + current_node != common_ancestor) { + current_node = current_node->parent; + } + if (current_node != common_ancestor) { + common_ancestor = common_ancestor->parent; + } else { + found_common_ancestor = 1; + } + } + } + + return common_ancestor; +} + +#define XTREE_GET_LEAVES_FIRST_SIZE 64 +struct xtree_get_leaves_st { + xtree_node_t** list; + uint32_t list_count; + uint32_t size; +}; + +static uint8_t xtree_get_leaves_helper(xtree_node_t* node, + uint8_t which, + uint32_t level, + void* arg) +{ + struct xtree_get_leaves_st* st = (struct xtree_get_leaves_st*)arg; + if (which == XTREE_LEAF) { + if (st->list_count >= st->size) { + st->size = st->list_count * 2; + st->list = (xtree_node_t**)xrealloc(st->list, + sizeof(xtree_node_t*)*st->size); + } + st->list[st->list_count] = node; + ++st->list_count; + } + return 1; +} + +xtree_node_t** xtree_get_leaves(xtree_t* tree, + xtree_node_t* node, + uint32_t* size) +{ + struct xtree_get_leaves_st st; + if (!tree || !size || !node) { + /* testing node nulliness to return NULL since xtree_walk will + * be run for root node if node == NULL and return an + * unattended non null value. */ + return NULL; + } + if (!node->start) { + /* if the node is a leave itself there is no leaves descending + * it, but tree walk will return the leave itself, so + * returning null before. */ + return NULL; + } + st.list_count = 0; + st.size = XTREE_GET_LEAVES_FIRST_SIZE; + st.list = (xtree_node_t**)xmalloc(sizeof(xtree_node_t*)*st.size); + xtree_walk(tree, node, 0, UINT32_MAX, xtree_get_leaves_helper, &st); + if (st.list_count != 0) { + st.list = (xtree_node_t**)xrealloc(st.list, + sizeof(xtree_node_t*)*(st.list_count+1)); + /* safety mesure, can be used as strlen if users assumes it */ + st.list[st.list_count] = NULL; + } + else { + xfree(st.list); + st.list = NULL; + } + *size = st.list_count; + return st.list; +} + diff --git a/src/common/xtree.h b/src/common/xtree.h new file mode 100644 index 0000000000000000000000000000000000000000..4a817cf1f0268787c0b6951c779b5f16dc35d283 --- /dev/null +++ b/src/common/xtree.h @@ -0,0 +1,312 @@ +/*****************************************************************************\ + * xtree.h - functions used for tree data structure manament + ***************************************************************************** + * Copyright (C) 2012 CEA/DAM/DIF + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __XTREE_O7S7VY_INC__ +#define __XTREE_O7S7VY_INC__ + +#include <stdint.h> + +/* function prototype to deallocate data stored in tree nodes */ +typedef void (*xtree_free_data_function_t)(void* data); + +/** + * The root node's parent must always be NULL (or browsing algorithm which + * stops at root when going up, will either crash or go to an infinite loop). + */ +typedef struct xtree_node_st { + void* data; /* data of the node */ + struct xtree_node_st* parent; /* parent node, level up */ + struct xtree_node_st* start; /* first node, level below */ + struct xtree_node_st* end; /* last node, level below */ + struct xtree_node_st* next; /* next node, same level */ + struct xtree_node_st* previous; /* previous node, same level */ +} xtree_node_t; + +typedef struct xtree_st { + xtree_node_t* root; /* root node of the tree */ + xtree_free_data_function_t free; /* frees nodes data or null */ + uint32_t count; /* always: number of nodes */ + uint32_t depth; /* cached depth */ + uint32_t state; /* see XTREE_STATE_* */ +} xtree_t; + +/* free a complete tree whatever the `tree` entry point, it can be a leaf for + * example. + * + * The tree itself is freed but the `xtree_t` structure is not free (since it + * can be stored on the stack). + * + * During freeing operation the tree is in a invalid state. + * + * @param tree is the tree entry point. + */ +void xtree_free(xtree_t* tree); + +/** + * initialize a xtree_t structure with an empty tree + * + * @param freefunc is the function which will be used to free data associated + * with tree's nodes or NULL + * @param tree is the structure to initialize + * + */ +void xtree_init(xtree_t* tree, xtree_free_data_function_t freefunc); + +/** + * Sets the functions which will be used to free data member for each node or + * NULL to disable freeing. This function should not be used if nodes have + * already been added to the tree. + * @param freefunc is the freeing function. + */ +void xtree_set_freefunc(xtree_t* tree, xtree_free_data_function_t freefunc); + +#define XTREE_STATE_DEPTHCACHED 1 + +#define XTREE_PREPEND 1 /** append to child list */ +#define XTREE_APPEND 2 /** prepend to child list */ +#define XTREE_REFRESH_DEPTH 4 /** default: don't refresh at insertion */ + +/** convenient function to get the parent of a node */ +xtree_node_t* xtree_get_parent(xtree_t* tree, xtree_node_t* node); + +/** convenient function to get node count + * @returns the count of node inside the tree, constant time, or UINT32_MAX + * if tree is NULL. + */ +uint32_t xtree_get_count(xtree_t* tree); + +/** Add a child to a node of a tree. + * @param tree the tree to manage, `parent` belongs to it. + * @param parent is the node where to add a child to. + * @param data is the data member associated with the new child. + * @param flags is a combination of the following : + * XTREE_APPEND: add the new node after `node` (mutually exclusive with + * PREPEND); + * XTREE_PREPEND: add the new node before `node` (mutually exclusive with + * APPEND); + * XTREE_REFRESH_DEPTH: refresh the cached depth of the tree. + * @returns the new child node added or NULL if parent is NULL but tree has + * root node. This function assumes a flag is given or abort o/w. + */ +xtree_node_t* xtree_add_child(xtree_t* tree, + xtree_node_t* parent, + void* data, + uint8_t flags); + +/** Add a sibling to a node. + * @param tree is the tree to manage, NULL tree is illegal. + * @param node is the node next to which the new node should be added. When + * node is null, the function has the same behaviour as xtree_add_child. + * @param data is the data associated with the new node being added. + * @param flags is a combination of the following : + * XTREE_APPEND: add the new node after `node` (mutually exclusive with + * PREPEND); + * XTREE_PREPEND: add the new node before `node` (mutually exclusive with + * APPEND); + * XTREE_REFRESH_DEPTH: refresh the cached depth of the tree. + * @returns the new child node added or NULL for illegal parameter. + */ +xtree_node_t* xtree_add_sibling(xtree_t* tree, + xtree_node_t* node, + void* data, + uint8_t flags); + +/** Calculate a tree depth by calling xtree_walk to browse the tree. + * This function browse the complete tree to determine the greatest depth of + * the tree. + * @param tree the tree to calculate depth from. + * @returns the depth of a given tree, a return value of 0 means the tree has + * no nodes, even not a root node, it is an empty tree. + */ +uint32_t xtree_depth_const(const xtree_t* tree); + +/** Calculate depth starting from node (call xtree_walk). + * @param tree the tree to calculate depth from. + * @param node the starting point for calculating depth. + */ +uint32_t xtree_depth_const_node(const xtree_t* tree, const xtree_node_t* node); + +/** Calculate a tree depth, caching its result inside the tree (call + * xtree_walk). + * @see xtree_depth_const + */ +uint32_t xtree_depth(xtree_t* tree); + +/** Calculate tree depth with xtree_depth and cache it. + * @param tree is the tree to refresh. + */ +void xtree_refresh_depth(xtree_t* tree); + +/** Convenient function which go upward to the root node to calculate node + * depth passed in argument. + */ +uint32_t xtree_node_depth(const xtree_node_t* node); + +/** see function prototype for xtree_walk for description */ +#define XTREE_PREORDER 1 +#define XTREE_INORDER 2 +#define XTREE_ENDORDER 4 +#define XTREE_LEAF 8 + +#define XTREE_LEVEL_MAX UINT32_MAX + +/** function prototype for walking through tree. + * + * @param node is the current node being parsed. + * @param which informs which visit is being done on the node, + * XTREE_PREORDER, XTREE_INORDER, XTREE_ENDORDER indicates this node + * is being visited before visiting the children, between visit of + * each children (if more than one), and after visiting the children. + * XTREE_LEAF indicates the node being visited is a leaf and receive + * consequently only one visit. + * @param level is the current level, 0 being the root node. + * @param arg is the data assigned to xtree_walk then calling it. + * @returns 0 to indicate that xtree_walk do not need to continue to go + * through the tree, nonzero value continue the browsing. + */ +typedef uint8_t (*xtree_walk_function_t)(xtree_node_t* node, + uint8_t which, + uint32_t level, + void* arg); + +/** Browse the tree depth-first, left-to-right. It mimics the C twalk + * function. + * + * You should not modify tree structure during traversal, since it can cause + * browsing errors or crash. + * + * @param tree is the tree you want to browse. + * @param node is the starting point or NULL (same as tree->root) to begin + * the traversal. + * @param min_level is the minimum level required to execute the action + * function, minimum being root node (=0). + * @param max_level is the maximum level to browse, the traversal's goes up + * again reaching this point, maximum being UINT32_MAX for + * all the tree's depth. + * @param action is the user function to execute for each node. See the + * typedef documentation. + * @param arg is the user data to pass unmodified to the user function. + * @returns the lastest node for which action was aborted or NULL. + */ +xtree_node_t* xtree_walk(xtree_t* tree, + xtree_node_t* node, + uint32_t min_level, + uint32_t max_level, + xtree_walk_function_t action, + void* arg); + +/** @see xtree_find */ +typedef uint8_t (*xtree_find_compare_t)(const void* node_data, + const void* arg); + +/** Convenient function which calls xtree_walk to find a node according to + * a compare function. + * + * @param tree is the tree to search through. + * @param compare is a function returning 0 when the element correspond to + * search criterias, this function takes node_data for each + * node as first argument and arg as its second argument. + * @param arg is a function argument which can be the key or whatever data + * the user function needs to find the searched element. + * @returns the found node or NULL. + */ +xtree_node_t* xtree_find(xtree_t* tree, + xtree_find_compare_t compare, + const void* arg); + +/** Deletes a node from the tree. You can use xtree_find or xtree_walk to + * find the wanted node. This function frees the node data thanks to the + * setted freefunc function of the tree. And recursively frees node childs + * too. + * + * @param tree is the tree to manage. + * @param node is the node to remove. + * @returns the parent node of the deleted node or NULL if bad argument/tree + * or the node was the tree's root node. + */ +xtree_node_t* xtree_delete(xtree_t* tree, xtree_node_t* node); + +/** Gets recursive parents list from a node or NULL for bad tree/parameters + * or root node. + * User is responsible for `xfree`'ing the returned list. + * Parents lists starts from node's parent to root. + * + * @param tree the managed tree. + * @param node the node to start finding parents (not included itself in the + * list). + * @param size will be modified according to the number of parents in the + * returned list if the return value is not null. + * @returns the `xmalloc`ed parents array or NULL. Although size contains the + * array number of elements, the array is null terminated. + */ +xtree_node_t** xtree_get_parents(xtree_t* tree, + xtree_node_t* node, + uint32_t* size); + +/** Get common ancestor of all given nodes. + * Example: 1 -> 2 -> 7, common ancestor of 2 and 7 is 1. + * + * @param tree is the managed tree. + * @param nodes is a node table which should have a common ancestor, an + * optionnal null node ends the list, else list stops at the + * (size - 1)th element. + * @param size is the number of elements the node table has, can be greather + * than the number of actual elements if list is null terminated + * (such as the UINT32_MAX value). + * @returns the common ancestor of all nodes or NULL if no such ancestors + * exists (if root node is listed, returns null since root node has + * no ancestor). + */ +xtree_node_t* xtree_common(xtree_t* tree, + const xtree_node_t* const* nodes, + uint32_t size); + +/** Get recursive list of leaves starting from node. + * User is responsible for `xfree`'ing the returned list. + * + * @param tree the managed tree. + * @param node the node to start from. + * @param size will be modified according to the number of leaves in the + * retured list if the return value is not null. + * @returns the `xmalloc`ed leaves array starting from node or NULL if bogus + * tree or bad parameters. Although size contains the + * array number of elements, the array is null terminated. + */ +xtree_node_t** xtree_get_leaves(xtree_t* tree, + xtree_node_t* node, + uint32_t* size); + +#endif + diff --git a/src/database/Makefile.in b/src/database/Makefile.in index f507fd20d56b81e9ddda96760f474d5c97325859..cbb617d2eadfbe2eaf72016a0e259df9ac9043e6 100644 --- a/src/database/Makefile.in +++ b/src/database/Makefile.in @@ -58,6 +58,7 @@ subdir = src/database DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -179,6 +183,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -199,6 +205,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -208,6 +217,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -215,6 +226,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -249,6 +269,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -276,6 +299,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/database/mysql_common.c b/src/database/mysql_common.c index d165e8230dd6297e0c77642f12e11bb8c7a671e8..acf0739e28187db43fb75c74db8f39a736430609 100644 --- a/src/database/mysql_common.c +++ b/src/database/mysql_common.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h index eb600aaddf6ebd9ea85f12787568aba0b758f961..054bfd21a5bb92bf543269d939377877196c7fa7 100644 --- a/src/database/mysql_common.h +++ b/src/database/mysql_common.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/database/pgsql_common.c b/src/database/pgsql_common.c index f46bf86f30a4ad95ad7320c26bd2adf1f15dc353..9e6b40d5984a01048946febaacfc4aa286a11f6a 100644 --- a/src/database/pgsql_common.c +++ b/src/database/pgsql_common.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/database/pgsql_common.h b/src/database/pgsql_common.h index e21500af3bffe82b9ca2397bc8ae7784641134f7..fdbdf1729ee451b358300680f806297cdf54b7db 100644 --- a/src/database/pgsql_common.h +++ b/src/database/pgsql_common.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/Makefile.in b/src/db_api/Makefile.in index 696e6961d66aefc4e77678716ffa0d0ef9a6fa6f..6fdb9e538dcf7550a795f65dee9bf1c0b8df45e0 100644 --- a/src/db_api/Makefile.in +++ b/src/db_api/Makefile.in @@ -61,6 +61,7 @@ subdir = src/db_api DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -78,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -86,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -194,6 +198,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -215,6 +221,9 @@ EGREP = @EGREP@ # This is needed if compiling on windows EXEEXT = FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -224,6 +233,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -231,6 +242,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -265,6 +285,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -292,6 +315,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/db_api/account_functions.c b/src/db_api/account_functions.c index 6f7037b72e89ee2c6c71b66746056ff7d9ba494e..e666e62ccd3de421c22efe8f058d86d8f104a540 100644 --- a/src/db_api/account_functions.c +++ b/src/db_api/account_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/archive_functions.c b/src/db_api/archive_functions.c index 775662318fcf76462bf6a721644ca907e396d440..592d8a4c7dc0be28bec44f376ac5130325f99ec6 100644 --- a/src/db_api/archive_functions.c +++ b/src/db_api/archive_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/assoc_functions.c b/src/db_api/assoc_functions.c index fac2588f4ae5cd92445af313581415f8177b85da..87d19ac4471044209ea584d965f6c42fc3dff2cb 100644 --- a/src/db_api/assoc_functions.c +++ b/src/db_api/assoc_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/cluster_functions.c b/src/db_api/cluster_functions.c index fce9d0310a7d9b953514f6fe7404b42a53127617..32294db6e984ccf96d6355149302cc99ad10deb6 100644 --- a/src/db_api/cluster_functions.c +++ b/src/db_api/cluster_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/cluster_report_functions.c b/src/db_api/cluster_report_functions.c index 04c333641c8a08734e4f17e2d790e06a8c7b23e7..77074ca1890855526941c59befe599759f1011e0 100644 --- a/src/db_api/cluster_report_functions.c +++ b/src/db_api/cluster_report_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -68,17 +68,17 @@ static void _process_ua(List user_list, slurmdb_association_rec_t *assoc) */ itr = list_iterator_create(user_list); while((slurmdb_report_user = list_next(itr))) { - if(!strcmp(slurmdb_report_user->name, assoc->user) + if (!strcmp(slurmdb_report_user->name, assoc->user) && !strcmp(slurmdb_report_user->acct, assoc->acct)) break; } list_iterator_destroy(itr); - if(!slurmdb_report_user) { + if (!slurmdb_report_user) { struct passwd *passwd_ptr = NULL; uid_t uid = NO_VAL; passwd_ptr = getpwnam(assoc->user); - if(passwd_ptr) + if (passwd_ptr) uid = passwd_ptr->pw_uid; /* In this report we are using the slurmdb_report user structure to store the information we want @@ -137,7 +137,7 @@ static void _process_uw(List user_list, slurmdb_wckey_rec_t *wckey) uid_t uid = NO_VAL; passwd_ptr = getpwnam(wckey->user); - if(passwd_ptr) + if (passwd_ptr) uid = passwd_ptr->pw_uid; /* In this report we are using the slurmdb_report user structure to store the information we want @@ -172,12 +172,12 @@ static void _process_wu(List assoc_list, slurmdb_wckey_rec_t *wckey) /* find the parent */ itr = list_iterator_create(assoc_list); while((parent_assoc = list_next(itr))) { - if(!parent_assoc->user + if (!parent_assoc->user && !strcmp(parent_assoc->acct, wckey->name)) break; } list_iterator_destroy(itr); - if(!parent_assoc) { + if (!parent_assoc) { parent_assoc = xmalloc(sizeof(slurmdb_report_assoc_rec_t)); list_append(assoc_list, @@ -214,20 +214,20 @@ static void _process_assoc_type( /* now add the associations of interest here by user */ while((assoc = list_next(itr))) { - if(!assoc->accounting_list + if (!assoc->accounting_list || !list_count(assoc->accounting_list) || ((type == CLUSTER_REPORT_UA) && !assoc->user)) { list_delete_item(itr); continue; } - if(strcmp(cluster_name, assoc->cluster)) + if (strcmp(cluster_name, assoc->cluster)) continue; - if(type == CLUSTER_REPORT_UA) + if (type == CLUSTER_REPORT_UA) _process_ua(slurmdb_report_cluster->user_list, assoc); - else if(type == CLUSTER_REPORT_AU) + else if (type == CLUSTER_REPORT_AU) _process_au(slurmdb_report_cluster->assoc_list, assoc); @@ -245,20 +245,20 @@ static void _process_wckey_type( /* now add the wckeyiations of interest here by user */ while((wckey = list_next(itr))) { - if(!wckey->accounting_list + if (!wckey->accounting_list || !list_count(wckey->accounting_list) || ((type == CLUSTER_REPORT_UW) && !wckey->user)) { list_delete_item(itr); continue; } - if(strcmp(cluster_name, wckey->cluster)) + if (strcmp(cluster_name, wckey->cluster)) continue; - if(type == CLUSTER_REPORT_UW) + if (type == CLUSTER_REPORT_UW) _process_uw(slurmdb_report_cluster->user_list, wckey); - else if(type == CLUSTER_REPORT_WU) + else if (type == CLUSTER_REPORT_WU) _process_wu(slurmdb_report_cluster->assoc_list, wckey); @@ -287,13 +287,13 @@ static List _process_util_by_report(void *db_conn, char *calling_name, cluster_cond.with_deleted = 1; cluster_cond.with_usage = 1; - if((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) { + if ((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) { start_time = ((slurmdb_association_cond_t *)cond)->usage_start; end_time = ((slurmdb_association_cond_t *)cond)->usage_end; cluster_cond.cluster_list = ((slurmdb_association_cond_t *)cond)->cluster_list; - } else if((type == CLUSTER_REPORT_UW) || (type == CLUSTER_REPORT_WU)) { + } else if ((type == CLUSTER_REPORT_UW) || (type == CLUSTER_REPORT_WU)) { start_time = ((slurmdb_wckey_cond_t *)cond)->usage_start; end_time = ((slurmdb_wckey_cond_t *)cond)->usage_end; @@ -316,32 +316,32 @@ static List _process_util_by_report(void *db_conn, char *calling_name, cluster_list = acct_storage_g_get_clusters( db_conn, my_uid, &cluster_cond); - if(!cluster_list) { + if (!cluster_list) { exit_code=1; fprintf(stderr, "%s: Problem with cluster query.\n", calling_name); goto end_it; } - if((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) { + if ((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) { ((slurmdb_association_cond_t *)cond)->usage_start = start_time; ((slurmdb_association_cond_t *)cond)->usage_end = end_time; type_list = acct_storage_g_get_associations( db_conn, my_uid, cond); - } else if((type == CLUSTER_REPORT_UW) || (type == CLUSTER_REPORT_WU)) { + } else if ((type == CLUSTER_REPORT_UW) || (type == CLUSTER_REPORT_WU)) { ((slurmdb_wckey_cond_t *)cond)->usage_start = start_time; ((slurmdb_wckey_cond_t *)cond)->usage_end = end_time; type_list = acct_storage_g_get_wckeys( db_conn, my_uid, cond); } - if(!type_list) { + if (!type_list) { exit_code=1; fprintf(stderr, "%s: Problem with get query.\n", calling_name); goto end_it; } - if((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) { + if ((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) { first_list = type_list; type_list = slurmdb_get_hierarchical_sorted_assoc_list( first_list); @@ -353,7 +353,7 @@ static List _process_util_by_report(void *db_conn, char *calling_name, while((cluster = list_next(itr))) { /* check to see if this cluster is around during the time we are looking at */ - if(!cluster->accounting_list + if (!cluster->accounting_list || !list_count(cluster->accounting_list)) continue; @@ -361,18 +361,18 @@ static List _process_util_by_report(void *db_conn, char *calling_name, list_append(ret_list, slurmdb_report_cluster); - if((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_UW)) + if ((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_UW)) slurmdb_report_cluster->user_list = list_create(slurmdb_destroy_report_user_rec); - else if((type == CLUSTER_REPORT_AU) + else if ((type == CLUSTER_REPORT_AU) || (type == CLUSTER_REPORT_WU)) slurmdb_report_cluster->assoc_list = list_create(slurmdb_destroy_report_assoc_rec); - if((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) + if ((type == CLUSTER_REPORT_UA) || (type == CLUSTER_REPORT_AU)) _process_assoc_type(type_itr, slurmdb_report_cluster, cluster->name, type); - else if((type == CLUSTER_REPORT_UW) + else if ((type == CLUSTER_REPORT_UW) || (type == CLUSTER_REPORT_WU)) _process_wckey_type(type_itr, slurmdb_report_cluster, cluster->name, type); @@ -383,23 +383,23 @@ static List _process_util_by_report(void *db_conn, char *calling_name, end_it: - if(type_list) { + if (type_list) { list_destroy(type_list); type_list = NULL; } - if(first_list) { + if (first_list) { list_destroy(first_list); first_list = NULL; } - if(cluster_list) { + if (cluster_list) { list_destroy(cluster_list); cluster_list = NULL; } - if(exit_code) { - if(ret_list) { + if (exit_code) { + if (ret_list) { list_destroy(ret_list); ret_list = NULL; } diff --git a/src/db_api/connection_functions.c b/src/db_api/connection_functions.c index c72edce0ee2d8cc69feed6760e3f58ba569d2fe1..19324b9ec1ca57c3862680d76a4d50de1bc4fa7e 100644 --- a/src/db_api/connection_functions.c +++ b/src/db_api/connection_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/coord_functions.c b/src/db_api/coord_functions.c index bc0544f0656fd3004691acb7f42599a8299959f7..df5c07ed083b4ce90e528f95c00f07675d718c79 100644 --- a/src/db_api/coord_functions.c +++ b/src/db_api/coord_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/extra_get_functions.c b/src/db_api/extra_get_functions.c index e63ecb5a47c1b2a3d3e46587eee6fd8b7c82ff43..2a19b847d227220c8073476c71d9c9446338a326 100644 --- a/src/db_api/extra_get_functions.c +++ b/src/db_api/extra_get_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/job_report_functions.c b/src/db_api/job_report_functions.c index aa540f45d259268cff9c5c3e39b44054e41b1d7b..af92f7aff24427a74069b01829b987c6538bc953 100644 --- a/src/db_api/job_report_functions.c +++ b/src/db_api/job_report_functions.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -75,12 +75,12 @@ static void _check_create_grouping( itr = list_iterator_create(cluster_list); while((cluster_group = list_next(itr))) { - if(!strcmp(cluster, cluster_group->cluster)) + if (!strcmp(cluster, cluster_group->cluster)) break; } list_iterator_destroy(itr); - if(!cluster_group) { + if (!cluster_group) { cluster_group = xmalloc( sizeof(slurmdb_report_cluster_grouping_t)); cluster_group->cluster = xstrdup(cluster); @@ -91,12 +91,12 @@ static void _check_create_grouping( itr = list_iterator_create(cluster_group->acct_list); while((acct_group = list_next(itr))) { - if(!strcmp(name, acct_group->acct)) + if (!strcmp(name, acct_group->acct)) break; } list_iterator_destroy(itr); - if(!acct_group) { + if (!acct_group) { uint32_t last_size = 0; char *group = NULL; acct_group = xmalloc(sizeof(slurmdb_report_acct_grouping_t)); @@ -115,22 +115,22 @@ static void _check_create_grouping( job_group = xmalloc( sizeof(slurmdb_report_job_grouping_t)); job_group->jobs = list_create(NULL); - if(!individual) + if (!individual) job_group->min_size = last_size; last_size = atoi(group); - if(!individual) + if (!individual) job_group->max_size = last_size-1; else job_group->min_size = job_group->max_size = last_size; list_append(acct_group->groups, job_group); } - if(last_size && !individual) { + if (last_size && !individual) { job_group = xmalloc( sizeof(slurmdb_report_job_grouping_t)); job_group->jobs = list_create(NULL); job_group->min_size = last_size; - if(individual) + if (individual) job_group->max_size = job_group->min_size; else @@ -174,11 +174,11 @@ static List _process_grouped_report( here since we may be looking for sub accounts of a specific account. */ - if(!job_cond) { + if (!job_cond) { destroy_job_cond = 1; job_cond = xmalloc(sizeof(slurmdb_job_cond_t)); } - if(!grouping_list) { + if (!grouping_list) { destroy_grouping_list = 1; grouping_list = list_create(slurm_destroy_char); slurm_addto_char_list(grouping_list, "50,250,500,1000"); @@ -191,7 +191,7 @@ static List _process_grouped_report( job_cond->acct_list = tmp_acct_list; tmp_acct_list = NULL; - if(!job_list) { + if (!job_list) { exit_code=1; fprintf(stderr, " Problem with job query.\n"); goto end_it; @@ -199,21 +199,21 @@ static List _process_grouped_report( group_itr = list_iterator_create(grouping_list); /* make a group for each job size we find. */ - if(!list_count(grouping_list)) { + if (!list_count(grouping_list)) { char *group = NULL; char *tmp = NULL; individual = 1; itr = list_iterator_create(job_list); while((job = list_next(itr))) { - if(!job->elapsed || !job->alloc_cpus) + if (!job->elapsed || !job->alloc_cpus) continue; tmp = xstrdup_printf("%u", job->alloc_cpus); while((group = list_next(group_itr))) { - if(!strcmp(group, tmp)) { + if (!strcmp(group, tmp)) { break; } } - if(!group) + if (!group) list_append(grouping_list, tmp); else xfree(tmp); @@ -227,7 +227,7 @@ static List _process_grouped_report( cluster_itr = list_iterator_create(cluster_list); - if(flat_view) + if (flat_view) goto no_objects; if (!wckey_type || both) { @@ -237,8 +237,8 @@ static List _process_grouped_report( assoc_cond.cluster_list = job_cond->cluster_list; /* don't limit associations to having the partition_list */ //assoc_cond.partition_list = job_cond->partition_list; - if(!job_cond->acct_list || !list_count(job_cond->acct_list)) { - if(job_cond->acct_list) + if (!job_cond->acct_list || !list_count(job_cond->acct_list)) { + if (job_cond->acct_list) list_destroy(job_cond->acct_list); job_cond->acct_list = list_create(NULL); list_append(job_cond->acct_list, "root"); @@ -262,7 +262,7 @@ static List _process_grouped_report( } } - if(!object_list) { + if (!object_list) { debug2(" No join list given.\n"); goto no_objects; } @@ -277,7 +277,7 @@ static List _process_grouped_report( (slurmdb_association_rec_t *)object; if (!itr2) { char *name = NULL; - if(wckey_type) { + if (wckey_type) { cluster = wckey->cluster; name = wckey->name; } else { @@ -326,12 +326,12 @@ no_objects: char *local_cluster = "UNKNOWN"; char tmp_acct[200]; - if(!job->elapsed) { + if (!job->elapsed) { /* here we don't care about jobs that didn't * really run here */ continue; } - if(job->cluster) + if (job->cluster) local_cluster = job->cluster; if (!wckey_type) { @@ -358,14 +358,14 @@ no_objects: list_iterator_reset(cluster_itr); while((cluster_group = list_next(cluster_itr))) { - if(!strcmp(local_cluster, cluster_group->cluster)) + if (!strcmp(local_cluster, cluster_group->cluster)) break; } - if(!cluster_group) { + if (!cluster_group) { /* here we are only looking for groups that * were added with the associations above */ - if(!flat_view) + if (!flat_view) continue; cluster_group = xmalloc( sizeof(slurmdb_report_cluster_grouping_t)); @@ -378,12 +378,12 @@ no_objects: acct_itr = list_iterator_create(cluster_group->acct_list); while((acct_group = list_next(acct_itr))) { if (wckey_type) { - if(!strcmp(tmp_acct, acct_group->acct)) + if (!strcmp(tmp_acct, acct_group->acct)) break; continue; } - if(!flat_view + if (!flat_view && (acct_group->lft != (uint32_t)NO_VAL) && (job->lft != (uint32_t)NO_VAL)) { /* keep separate since we don't want @@ -408,13 +408,13 @@ no_objects: } list_iterator_destroy(acct_itr); - if(!acct_group) { + if (!acct_group) { char *group = NULL; uint32_t last_size = 0; /* here we are only looking for groups that * were added with the associations above */ - if(!flat_view) + if (!flat_view) continue; acct_group = xmalloc( @@ -428,22 +428,22 @@ no_objects: job_group = xmalloc( sizeof(slurmdb_report_job_grouping_t)); job_group->jobs = list_create(NULL); - if(!individual) + if (!individual) job_group->min_size = last_size; last_size = atoi(group); - if(!individual) + if (!individual) job_group->max_size = last_size-1; else job_group->min_size = job_group->max_size = last_size; list_append(acct_group->groups, job_group); } - if(last_size && !individual) { + if (last_size && !individual) { job_group = xmalloc( sizeof(slurmdb_report_job_grouping_t)); job_group->jobs = list_create(NULL); job_group->min_size = last_size; - if(individual) + if (individual) job_group->max_size = job_group->min_size; else @@ -456,7 +456,7 @@ no_objects: local_itr = list_iterator_create(acct_group->groups); while((job_group = list_next(local_itr))) { uint64_t total_secs = 0; - if((job->alloc_cpus < job_group->min_size) + if ((job->alloc_cpus < job_group->min_size) || (job->alloc_cpus > job_group->max_size)) continue; list_append(job_group->jobs, job); @@ -492,20 +492,20 @@ no_objects: list_iterator_destroy(cluster_itr); end_it: - if(object_list) + if (object_list) list_destroy(object_list); - if(object2_list) + if (object2_list) list_destroy(object2_list); - if(destroy_job_cond) + if (destroy_job_cond) slurmdb_destroy_job_cond(job_cond); - if(destroy_grouping_list && grouping_list) + if (destroy_grouping_list && grouping_list) list_destroy(grouping_list); - if(exit_code) { - if(cluster_list) { + if (exit_code) { + if (cluster_list) { list_destroy(cluster_list); cluster_list = NULL; } diff --git a/src/db_api/qos_functions.c b/src/db_api/qos_functions.c index fcea72de15576363074efa9ac12d13af6e210274..9a565a145c9333fc72d5371c04b7eb7c20f54d0f 100644 --- a/src/db_api/qos_functions.c +++ b/src/db_api/qos_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/resv_report_functions.c b/src/db_api/resv_report_functions.c index 7e76cb02a5f3ba0dcaf67a75784debb8ab64d87b..345e172d4367ef5a7a0333644b857f6e38e36b5a 100644 --- a/src/db_api/resv_report_functions.c +++ b/src/db_api/resv_report_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/usage_functions.c b/src/db_api/usage_functions.c index 7b122c503b83c6a1ea044cfdbcbd7c91c42d7550..5ae9a9eeebbcdff0e5eeaa6439eaad546165891f 100644 --- a/src/db_api/usage_functions.c +++ b/src/db_api/usage_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/user_functions.c b/src/db_api/user_functions.c index 75f1702a24b111b3cd277b63707405b03a4dff54..4b9d863242e6126a0ee5a8915cdab6d9068f7bf5 100644 --- a/src/db_api/user_functions.c +++ b/src/db_api/user_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/db_api/user_report_functions.c b/src/db_api/user_report_functions.c index 597e8433767572dd9fad3e8bc803dce0c46510f3..453418e55a9d8fceaa9dbd3bfcf5df4e62e6a7eb 100644 --- a/src/db_api/user_report_functions.c +++ b/src/db_api/user_report_functions.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -73,18 +73,18 @@ extern List slurmdb_report_user_top_usage(void *db_conn, delete_cluster_list = 0; time_t start_time, end_time; - if(!user_cond) { + if (!user_cond) { delete_user_cond = 1; user_cond = xmalloc(sizeof(slurmdb_user_cond_t)); } - if(!user_cond->assoc_cond) { + if (!user_cond->assoc_cond) { delete_assoc_cond = 1; user_cond->assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); } - if(!user_cond->assoc_cond->cluster_list) { + if (!user_cond->assoc_cond->cluster_list) { delete_cluster_list = 1; user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); @@ -106,7 +106,7 @@ extern List slurmdb_report_user_top_usage(void *db_conn, user_cond->assoc_cond->usage_end = end_time; user_list = acct_storage_g_get_users(db_conn, my_uid, user_cond); - if(!user_list) { + if (!user_list) { exit_code=1; fprintf(stderr, " Problem with user query.\n"); goto end_it; @@ -125,7 +125,7 @@ extern List slurmdb_report_user_top_usage(void *db_conn, usage_cluster_list = acct_storage_g_get_clusters( db_conn, my_uid, &cluster_cond); - if(!usage_cluster_list) { + if (!usage_cluster_list) { exit_code=1; fprintf(stderr, " Problem with cluster query.\n"); goto end_it; @@ -137,7 +137,7 @@ extern List slurmdb_report_user_top_usage(void *db_conn, while((cluster = list_next(itr))) { /* check to see if this cluster is around during the time we are looking at */ - if(!cluster->accounting_list + if (!cluster->accounting_list || !list_count(cluster->accounting_list)) continue; @@ -155,11 +155,11 @@ extern List slurmdb_report_user_top_usage(void *db_conn, cluster_itr = list_iterator_create(cluster_list); while((user = list_next(itr))) { struct passwd *passwd_ptr = NULL; - if(!user->assoc_list || !list_count(user->assoc_list)) + if (!user->assoc_list || !list_count(user->assoc_list)) continue; passwd_ptr = getpwnam(user->name); - if(passwd_ptr) + if (passwd_ptr) user->uid = passwd_ptr->pw_uid; else user->uid = (uint32_t)NO_VAL; @@ -167,16 +167,16 @@ extern List slurmdb_report_user_top_usage(void *db_conn, itr2 = list_iterator_create(user->assoc_list); while((assoc = list_next(itr2))) { - if(!assoc->accounting_list + if (!assoc->accounting_list || !list_count(assoc->accounting_list)) continue; while((slurmdb_report_cluster = list_next(cluster_itr))) { - if(!strcmp(slurmdb_report_cluster->name, + if (!strcmp(slurmdb_report_cluster->name, assoc->cluster)) { ListIterator user_itr = NULL; - if(!group_accounts) { + if (!group_accounts) { slurmdb_report_user = NULL; goto new_user; } @@ -185,13 +185,13 @@ extern List slurmdb_report_user_top_usage(void *db_conn, user_list); while((slurmdb_report_user = list_next(user_itr))) { - if(slurmdb_report_user->uid + if (slurmdb_report_user->uid != NO_VAL) { - if(slurmdb_report_user-> + if (slurmdb_report_user-> uid == user->uid) break; - } else if(slurmdb_report_user-> + } else if (slurmdb_report_user-> name && !strcasecmp( slurmdb_report_user-> @@ -201,7 +201,7 @@ extern List slurmdb_report_user_top_usage(void *db_conn, } list_iterator_destroy(user_itr); new_user: - if(!slurmdb_report_user) { + if (!slurmdb_report_user) { slurmdb_report_user = xmalloc( sizeof (slurmdb_report_user_rec_t)); @@ -219,7 +219,7 @@ extern List slurmdb_report_user_top_usage(void *db_conn, break; } } - if(!slurmdb_report_cluster) { + if (!slurmdb_report_cluster) { error("This cluster '%s' hasn't " "registered yet, but we have jobs " "that ran?", assoc->cluster); @@ -244,12 +244,12 @@ extern List slurmdb_report_user_top_usage(void *db_conn, itr3 = list_iterator_create( slurmdb_report_user->acct_list); while((object = list_next(itr3))) { - if(!strcmp(object, assoc->acct)) + if (!strcmp(object, assoc->acct)) break; } list_iterator_destroy(itr3); - if(!object) + if (!object) list_append(slurmdb_report_user->acct_list, xstrdup(assoc->acct)); itr3 = list_iterator_create(assoc->accounting_list); @@ -267,28 +267,28 @@ extern List slurmdb_report_user_top_usage(void *db_conn, list_iterator_destroy(cluster_itr); end_it: - if(delete_cluster_list) { + if (delete_cluster_list) { list_destroy(user_cond->assoc_cond->cluster_list); user_cond->assoc_cond->cluster_list = NULL; } - if(delete_assoc_cond) { + if (delete_assoc_cond) { slurmdb_destroy_association_cond(user_cond->assoc_cond); user_cond->assoc_cond = NULL; } - if(delete_user_cond) { + if (delete_user_cond) { slurmdb_destroy_user_cond(user_cond); user_cond = NULL; } - if(user_list) { + if (user_list) { list_destroy(user_list); user_list = NULL; } - if(exit_code) { - if(cluster_list) { + if (exit_code) { + if (cluster_list) { list_destroy(cluster_list); cluster_list = NULL; } diff --git a/src/db_api/wckey_functions.c b/src/db_api/wckey_functions.c index e02658d66f3d5c6948080919f3f6478c4a3f9d3b..acbce8a493ac42b6b4d625768d117010b61cddb2 100644 --- a/src/db_api/wckey_functions.c +++ b/src/db_api/wckey_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index fdd851fff3a906f03d5df97be82e7958f5bbfac4..3e73f3452b0a7cbbce6d90a388a712ded9e9aa04 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -1,9 +1,13 @@ SUBDIRS = \ accounting_storage \ acct_gather_energy \ + acct_gather_profile \ + acct_gather_infiniband \ + acct_gather_filesystem \ auth \ checkpoint \ crypto \ + ext_sensors \ gres \ jobacct_gather \ jobcomp \ @@ -14,6 +18,7 @@ SUBDIRS = \ priority \ proctrack \ sched \ + slurmctld \ select \ switch \ task \ diff --git a/src/plugins/Makefile.in b/src/plugins/Makefile.in index 0722b9043e0dca5b56be9fd5052bb1cc55061e73..e330bb46f90a8b62b2bde821bc1c4cb98a58ab7f 100644 --- a/src/plugins/Makefile.in +++ b/src/plugins/Makefile.in @@ -55,6 +55,7 @@ subdir = src/plugins DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -355,9 +381,13 @@ top_srcdir = @top_srcdir@ SUBDIRS = \ accounting_storage \ acct_gather_energy \ + acct_gather_profile \ + acct_gather_infiniband \ + acct_gather_filesystem \ auth \ checkpoint \ crypto \ + ext_sensors \ gres \ jobacct_gather \ jobcomp \ @@ -368,6 +398,7 @@ SUBDIRS = \ priority \ proctrack \ sched \ + slurmctld \ select \ switch \ task \ diff --git a/src/plugins/accounting_storage/Makefile.in b/src/plugins/accounting_storage/Makefile.in index 6fc55d7311231e0cf0a6ed13b0fe98336f01faa6..a51b2c7df7a7910fa39b7c3d93b9cfe90637d2fd 100644 --- a/src/plugins/accounting_storage/Makefile.in +++ b/src/plugins/accounting_storage/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/accounting_storage DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/common/Makefile.in b/src/plugins/accounting_storage/common/Makefile.in index 16cfb6d4dfdb99ea23f5db2a4700f7b0f1f4ad3c..19879eccfe134792769909d0e90dfc2f66d08189 100644 --- a/src/plugins/accounting_storage/common/Makefile.in +++ b/src/plugins/accounting_storage/common/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/accounting_storage/common DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -150,6 +154,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -170,6 +176,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -179,6 +188,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -186,6 +197,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -220,6 +240,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -247,6 +270,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/common/common_as.c b/src/plugins/accounting_storage/common/common_as.c index 5d2245e99bd466b4fb27641ab72d14954faebd9e..c5961a9729b8810e0b58571bb678a26a392d020c 100644 --- a/src/plugins/accounting_storage/common/common_as.c +++ b/src/plugins/accounting_storage/common/common_as.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -71,7 +71,7 @@ static int _sort_update_object_dec(slurmdb_update_object_t *object_a, if ((object_a->type == SLURMDB_MODIFY_ASSOC) && (object_b->type != SLURMDB_MODIFY_ASSOC)) return 1; - else if((object_b->type == SLURMDB_MODIFY_ASSOC) + else if ((object_b->type == SLURMDB_MODIFY_ASSOC) && (object_a->type != SLURMDB_MODIFY_ASSOC)) return -1; return 0; @@ -106,19 +106,19 @@ extern int addto_update_list(List update_list, slurmdb_update_type_t type, slurmdb_association_rec_t *assoc = object; slurmdb_qos_rec_t *qos = object; ListIterator itr = NULL; - if(!update_list) { + if (!update_list) { error("no update list given"); return SLURM_ERROR; } itr = list_iterator_create(update_list); while((update_object = list_next(itr))) { - if(update_object->type == type) + if (update_object->type == type) break; } list_iterator_destroy(itr); - if(update_object) { + if (update_object) { /* here we prepend primarly for remove association since parents need to be removed last, and they are removed first in the calling code */ @@ -145,36 +145,36 @@ extern int addto_update_list(List update_list, slurmdb_update_type_t type, /* We are going to send these to the slurmctld's so lets set up the correct limits to INIFINITE instead of NO_VAL */ - if(assoc->grp_cpu_mins == (uint64_t)NO_VAL) + if (assoc->grp_cpu_mins == (uint64_t)NO_VAL) assoc->grp_cpu_mins = (uint64_t)INFINITE; - if(assoc->grp_cpu_run_mins == (uint64_t)NO_VAL) + if (assoc->grp_cpu_run_mins == (uint64_t)NO_VAL) assoc->grp_cpu_run_mins = (uint64_t)INFINITE; - if(assoc->grp_cpus == NO_VAL) + if (assoc->grp_cpus == NO_VAL) assoc->grp_cpus = INFINITE; - if(assoc->grp_jobs == NO_VAL) + if (assoc->grp_jobs == NO_VAL) assoc->grp_jobs = INFINITE; - if(assoc->grp_mem == NO_VAL) + if (assoc->grp_mem == NO_VAL) assoc->grp_mem = INFINITE; - if(assoc->grp_nodes == NO_VAL) + if (assoc->grp_nodes == NO_VAL) assoc->grp_nodes = INFINITE; - if(assoc->grp_submit_jobs == NO_VAL) + if (assoc->grp_submit_jobs == NO_VAL) assoc->grp_submit_jobs = INFINITE; - if(assoc->grp_wall == NO_VAL) + if (assoc->grp_wall == NO_VAL) assoc->grp_wall = INFINITE; - if(assoc->max_cpu_mins_pj == (uint64_t)NO_VAL) + if (assoc->max_cpu_mins_pj == (uint64_t)NO_VAL) assoc->max_cpu_mins_pj = (uint64_t)INFINITE; - if(assoc->max_cpu_run_mins == (uint64_t)NO_VAL) + if (assoc->max_cpu_run_mins == (uint64_t)NO_VAL) assoc->max_cpu_run_mins = (uint64_t)INFINITE; - if(assoc->max_cpus_pj == NO_VAL) + if (assoc->max_cpus_pj == NO_VAL) assoc->max_cpus_pj = INFINITE; - if(assoc->max_jobs == NO_VAL) + if (assoc->max_jobs == NO_VAL) assoc->max_jobs = INFINITE; - if(assoc->max_nodes_pj == NO_VAL) + if (assoc->max_nodes_pj == NO_VAL) assoc->max_nodes_pj = INFINITE; - if(assoc->max_submit_jobs == NO_VAL) + if (assoc->max_submit_jobs == NO_VAL) assoc->max_submit_jobs = INFINITE; - if(assoc->max_wall_pj == NO_VAL) + if (assoc->max_wall_pj == NO_VAL) assoc->max_wall_pj = INFINITE; case SLURMDB_MODIFY_ASSOC: case SLURMDB_REMOVE_ASSOC: @@ -186,40 +186,40 @@ extern int addto_update_list(List update_list, slurmdb_update_type_t type, /* We are going to send these to the slurmctld's so lets set up the correct limits to INIFINITE instead of NO_VAL */ - if(qos->grp_cpu_mins == (uint64_t)NO_VAL) + if (qos->grp_cpu_mins == (uint64_t)NO_VAL) qos->grp_cpu_mins = (uint64_t)INFINITE; - if(qos->grp_cpu_run_mins == (uint64_t)NO_VAL) + if (qos->grp_cpu_run_mins == (uint64_t)NO_VAL) qos->grp_cpu_run_mins = (uint64_t)INFINITE; - if(qos->grp_cpus == NO_VAL) + if (qos->grp_cpus == NO_VAL) qos->grp_cpus = INFINITE; - if(qos->grp_jobs == NO_VAL) + if (qos->grp_jobs == NO_VAL) qos->grp_jobs = INFINITE; - if(qos->grp_mem == NO_VAL) + if (qos->grp_mem == NO_VAL) qos->grp_mem = INFINITE; - if(qos->grp_nodes == NO_VAL) + if (qos->grp_nodes == NO_VAL) qos->grp_nodes = INFINITE; - if(qos->grp_submit_jobs == NO_VAL) + if (qos->grp_submit_jobs == NO_VAL) qos->grp_submit_jobs = INFINITE; - if(qos->grp_wall == NO_VAL) + if (qos->grp_wall == NO_VAL) qos->grp_wall = INFINITE; - if(qos->max_cpu_mins_pj == (uint64_t)NO_VAL) + if (qos->max_cpu_mins_pj == (uint64_t)NO_VAL) qos->max_cpu_mins_pj = (uint64_t)INFINITE; - if(qos->max_cpu_run_mins_pu == (uint64_t)NO_VAL) + if (qos->max_cpu_run_mins_pu == (uint64_t)NO_VAL) qos->max_cpu_run_mins_pu = (uint64_t)INFINITE; - if(qos->max_cpus_pj == NO_VAL) + if (qos->max_cpus_pj == NO_VAL) qos->max_cpus_pj = INFINITE; - if(qos->max_cpus_pu == NO_VAL) + if (qos->max_cpus_pu == NO_VAL) qos->max_cpus_pu = INFINITE; - if(qos->max_jobs_pu == NO_VAL) + if (qos->max_jobs_pu == NO_VAL) qos->max_jobs_pu = INFINITE; - if(qos->max_nodes_pj == NO_VAL) + if (qos->max_nodes_pj == NO_VAL) qos->max_nodes_pj = INFINITE; - if(qos->max_nodes_pu == NO_VAL) + if (qos->max_nodes_pu == NO_VAL) qos->max_nodes_pu = INFINITE; - if(qos->max_submit_jobs_pu == NO_VAL) + if (qos->max_submit_jobs_pu == NO_VAL) qos->max_submit_jobs_pu = INFINITE; - if(qos->max_wall_pj == NO_VAL) + if (qos->max_wall_pj == NO_VAL) qos->max_wall_pj = INFINITE; case SLURMDB_MODIFY_QOS: case SLURMDB_REMOVE_QOS: @@ -262,7 +262,7 @@ extern void dump_update_list(List update_list) debug3("========== DUMP UPDATE LIST =========="); itr = list_iterator_create(update_list); while((object = list_next(itr))) { - if(!object->objects || !list_count(object->objects)) { + if (!object->objects || !list_count(object->objects)) { debug3("\tUPDATE OBJECT WITH NO RECORDS, type: %d", object->type); continue; @@ -369,15 +369,15 @@ extern int set_usage_information(char **usage_table, slurmdbd_msg_type_t type, char *my_usage_table = (*usage_table); /* Default is going to be the last day */ - if(!end) { - if(!localtime_r(&my_time, &end_tm)) { + if (!end) { + if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from end %ld", my_time); return SLURM_ERROR; } end_tm.tm_hour = 0; } else { - if(!localtime_r(&end, &end_tm)) { + if (!localtime_r(&end, &end_tm)) { error("Couldn't get localtime from user end %ld", end); return SLURM_ERROR; @@ -388,8 +388,8 @@ extern int set_usage_information(char **usage_table, slurmdbd_msg_type_t type, end_tm.tm_isdst = -1; end = mktime(&end_tm); - if(!start) { - if(!localtime_r(&my_time, &start_tm)) { + if (!start) { + if (!localtime_r(&my_time, &start_tm)) { error("Couldn't get localtime from start %ld", my_time); return SLURM_ERROR; @@ -397,7 +397,7 @@ extern int set_usage_information(char **usage_table, slurmdbd_msg_type_t type, start_tm.tm_hour = 0; start_tm.tm_mday--; } else { - if(!localtime_r(&start, &start_tm)) { + if (!localtime_r(&start, &start_tm)) { error("Couldn't get localtime from user start %ld", start); return SLURM_ERROR; @@ -408,9 +408,9 @@ extern int set_usage_information(char **usage_table, slurmdbd_msg_type_t type, start_tm.tm_isdst = -1; start = mktime(&start_tm); - if(end-start < 3600) { + if (end-start < 3600) { end = start + 3600; - if(!localtime_r(&end, &end_tm)) { + if (!localtime_r(&end, &end_tm)) { error("2 Couldn't get localtime from user end %ld", end); return SLURM_ERROR; @@ -420,7 +420,7 @@ extern int set_usage_information(char **usage_table, slurmdbd_msg_type_t type, * boundaries other wise use the day table. */ //info("%d %d %d", start_tm.tm_hour, end_tm.tm_hour, end-start); - if(start_tm.tm_hour || end_tm.tm_hour || (end-start < 86400) + if (start_tm.tm_hour || end_tm.tm_hour || (end-start < 86400) || (end > my_time)) { switch (type) { case DBD_GET_ASSOC_USAGE: @@ -437,7 +437,7 @@ extern int set_usage_information(char **usage_table, slurmdbd_msg_type_t type, slurmdbd_msg_type_2_str(type, 1)); break; } - } else if(start_tm.tm_mday == 0 && end_tm.tm_mday == 0 + } else if (start_tm.tm_mday == 0 && end_tm.tm_mday == 0 && (end-start > 86400)) { switch (type) { case DBD_GET_ASSOC_USAGE: @@ -476,21 +476,21 @@ extern void merge_delta_qos_list(List qos_list, List delta_qos_list) char *new_qos = NULL, *curr_qos = NULL; while((new_qos = list_next(new_itr))) { - if(new_qos[0] == '-') { + if (new_qos[0] == '-') { while((curr_qos = list_next(curr_itr))) { - if(!strcmp(curr_qos, new_qos+1)) { + if (!strcmp(curr_qos, new_qos+1)) { list_delete_item(curr_itr); break; } } list_iterator_reset(curr_itr); - } else if(new_qos[0] == '+') { + } else if (new_qos[0] == '+') { while((curr_qos = list_next(curr_itr))) { - if(!strcmp(curr_qos, new_qos+1)) { + if (!strcmp(curr_qos, new_qos+1)) { break; } } - if(!curr_qos) { + if (!curr_qos) { list_append(qos_list, xstrdup(new_qos+1)); } list_iterator_reset(curr_itr); @@ -508,15 +508,17 @@ extern bool is_user_min_admin_level(void *db_conn, uid_t uid, * THERE IS NO AUTHENTICATION WHEN RUNNNING OUT OF THE * SLURMDBD! */ - if(slurmdbd_conf) { + if (slurmdbd_conf) { /* We have to check the authentication here in the * plugin since we don't know what accounts are being * referenced until after the query. */ - if((uid != slurmdbd_conf->slurm_user_id && uid != 0) + if ((uid != slurmdbd_conf->slurm_user_id && uid != 0) && assoc_mgr_get_admin_level(db_conn, uid) < min_level) is_admin = 0; - } + } else if (uid != 0) + is_admin = 0; + return is_admin; } @@ -533,7 +535,7 @@ extern bool is_user_coord(slurmdb_user_rec_t *user, char *account) itr = list_iterator_create(user->coord_accts); while((coord = list_next(itr))) { - if(!strcasecmp(coord->name, account)) + if (!strcasecmp(coord->name, account)) break; } list_iterator_destroy(itr); @@ -543,7 +545,7 @@ extern bool is_user_coord(slurmdb_user_rec_t *user, char *account) extern bool is_user_any_coord(void *db_conn, slurmdb_user_rec_t *user) { xassert(user); - if(assoc_mgr_fill_in_user(db_conn, user, 1, NULL) != SLURM_SUCCESS) { + if (assoc_mgr_fill_in_user(db_conn, user, 1, NULL) != SLURM_SUCCESS) { error("couldn't get information for this user %s(%d)", user->name, user->uid); return 0; @@ -560,12 +562,12 @@ extern char *acct_get_db_name(void) char *db_name = NULL; char *location = slurm_get_accounting_storage_loc(); - if(!location) + if (!location) db_name = xstrdup(DEFAULT_ACCOUNTING_DB); else { int i = 0; while(location[i]) { - if(location[i] == '.' || location[i] == '/') { + if (location[i] == '.' || location[i] == '/') { debug("%s doesn't look like a database " "name using %s", location, DEFAULT_ACCOUNTING_DB); @@ -573,7 +575,7 @@ extern char *acct_get_db_name(void) } i++; } - if(location[i]) { + if (location[i]) { db_name = xstrdup(DEFAULT_ACCOUNTING_DB); xfree(location); } else @@ -587,19 +589,19 @@ extern time_t archive_setup_end_time(time_t last_submit, uint32_t purge) struct tm time_tm; int16_t units; - if(purge == NO_VAL) { + if (purge == NO_VAL) { error("Invalid purge set"); return 0; } units = SLURMDB_PURGE_GET_UNITS(purge); - if(units < 0) { + if (units < 0) { error("invalid units from purge '%d'", units); return 0; } /* use localtime to avoid any daylight savings issues */ - if(!localtime_r(&last_submit, &time_tm)) { + if (!localtime_r(&last_submit, &time_tm)) { error("Couldn't get localtime from first " "suspend start %ld", (long)last_submit); return 0; @@ -608,12 +610,12 @@ extern time_t archive_setup_end_time(time_t last_submit, uint32_t purge) time_tm.tm_sec = 0; time_tm.tm_min = 0; - if(SLURMDB_PURGE_IN_HOURS(purge)) + if (SLURMDB_PURGE_IN_HOURS(purge)) time_tm.tm_hour -= units; - else if(SLURMDB_PURGE_IN_DAYS(purge)) { + else if (SLURMDB_PURGE_IN_DAYS(purge)) { time_tm.tm_hour = 0; time_tm.tm_mday -= units; - } else if(SLURMDB_PURGE_IN_MONTHS(purge)) { + } else if (SLURMDB_PURGE_IN_MONTHS(purge)) { time_tm.tm_hour = 0; time_tm.tm_mday = 1; time_tm.tm_mon -= units; @@ -663,8 +665,8 @@ extern int archive_run_script(slurmdb_archive_cond_t *arch_cond, env_array_append_fmt(&env, "SLURM_ARCHIVE_CLUSTER", "%s", cluster_name); - if(arch_cond->purge_event != NO_VAL) { - if(!(curr_end = archive_setup_end_time( + if (arch_cond->purge_event != NO_VAL) { + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_event))) { error("Parsing purge events failed"); return SLURM_ERROR; @@ -677,8 +679,8 @@ extern int archive_run_script(slurmdb_archive_cond_t *arch_cond, (long)curr_end); } - if(arch_cond->purge_job != NO_VAL) { - if(!(curr_end = archive_setup_end_time( + if (arch_cond->purge_job != NO_VAL) { + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_job))) { error("Parsing purge job failed"); return SLURM_ERROR; @@ -691,8 +693,22 @@ extern int archive_run_script(slurmdb_archive_cond_t *arch_cond, (long)curr_end); } - if(arch_cond->purge_step != NO_VAL) { - if(!(curr_end = archive_setup_end_time( + if (arch_cond->purge_resv != NO_VAL) { + if (!(curr_end = archive_setup_end_time( + last_submit, arch_cond->purge_job))) { + error("Parsing purge job failed"); + return SLURM_ERROR; + } + + env_array_append_fmt(&env, "SLURM_ARCHIVE_RESV", "%u", + SLURMDB_PURGE_ARCHIVE_SET( + arch_cond->purge_job)); + env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_RESV", "%ld", + (long)curr_end); + } + + if (arch_cond->purge_step != NO_VAL) { + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_step))) { error("Parsing purge step"); return SLURM_ERROR; @@ -705,8 +721,8 @@ extern int archive_run_script(slurmdb_archive_cond_t *arch_cond, (long)curr_end); } - if(arch_cond->purge_suspend != NO_VAL) { - if(!(curr_end = archive_setup_end_time( + if (arch_cond->purge_suspend != NO_VAL) { + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_suspend))) { error("Parsing purge suspend"); return SLURM_ERROR; @@ -744,8 +760,8 @@ static char *_make_archive_name(time_t period_start, time_t period_end, time_tm.tm_min = 0; /* set up the start time based off the period we are purging */ - if(SLURMDB_PURGE_IN_HOURS(archive_period)) { - } else if(SLURMDB_PURGE_IN_DAYS(archive_period)) { + if (SLURMDB_PURGE_IN_HOURS(archive_period)) { + } else if (SLURMDB_PURGE_IN_DAYS(archive_period)) { time_tm.tm_hour = 0; } else { time_tm.tm_hour = 0; diff --git a/src/plugins/accounting_storage/common/common_as.h b/src/plugins/accounting_storage/common/common_as.h index b2f26bdb56d3777ce867f64dab87632fbe73e569..cedaf34e9155c2394fb735b451e75130f0563c26 100644 --- a/src/plugins/accounting_storage/common/common_as.h +++ b/src/plugins/accounting_storage/common/common_as.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/filetxt/Makefile.in b/src/plugins/accounting_storage/filetxt/Makefile.in index c2c2420f68d60dd6af3bb97d58800a3d4fef00d1..8440f164e4c3fad72e6cb9d7d768c796b52de768 100644 --- a/src/plugins/accounting_storage/filetxt/Makefile.in +++ b/src/plugins/accounting_storage/filetxt/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/accounting_storage/filetxt DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -183,6 +187,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -203,6 +209,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -212,6 +221,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -219,6 +230,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -253,6 +273,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -280,6 +303,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index bbd421a390805bae168c922355a03c609adae77a..8e929dcf6d096465b1618aaa97ccafa8d5e75546 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -138,7 +138,7 @@ static int _print_record(struct job_record *job_ptr, { static int rc=SLURM_SUCCESS; char *block_id = NULL; - if(!job_ptr->details) { + if (!job_ptr->details) { error("job_acct: job=%u doesn't exist", job_ptr->job_id); return SLURM_ERROR; } @@ -150,7 +150,7 @@ static int _print_record(struct job_record *job_ptr, &block_id); #endif - if(!block_id) + if (!block_id) block_id = xstrdup("-"); slurm_mutex_lock( &logfile_lock ); @@ -203,7 +203,7 @@ extern int init ( void ) mode_t prot = 0600; struct stat statbuf; - if(slurmdbd_conf) { + if (slurmdbd_conf) { fatal("The filetxt plugin should not " "be run from the slurmdbd. " "Please use a database plugin"); @@ -214,17 +214,17 @@ extern int init ( void ) * file in append mode stats could fail on it if the file * isn't world writable. */ - if(first && (getuid() == slurm_get_slurm_user_id())) { + if (first && (getuid() == slurm_get_slurm_user_id())) { debug2("slurmdb_init() called"); log_file = slurm_get_accounting_storage_loc(); - if(!log_file) + if (!log_file) log_file = xstrdup(DEFAULT_STORAGE_LOC); slurm_mutex_lock( &logfile_lock ); if (LOGFILE) fclose(LOGFILE); if (*log_file != '/') - fatal("JobAcctLogfile must specify an " + fatal("AccountingStorageLoc must specify an " "absolute pathname"); if (stat(log_file, &statbuf)==0)/* preserve current file mode */ prot = statbuf.st_mode; @@ -567,7 +567,7 @@ extern int jobacct_storage_p_job_start(void *db_conn, long priority; int track_steps = 0; - if(!storage_init) { + if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } @@ -626,7 +626,7 @@ extern int jobacct_storage_p_job_complete(void *db_conn, uint16_t job_state; int duration; - if(!storage_init) { + if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } @@ -674,20 +674,20 @@ extern int jobacct_storage_p_step_start(void *db_conn, float float_tmp = 0; char *account, *step_name; - if(!storage_init) { + if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } #ifdef HAVE_BG - if(step_ptr->job_ptr->details) + if (step_ptr->job_ptr->details) cpus = step_ptr->job_ptr->details->min_cpus; else cpus = step_ptr->job_ptr->cpu_cnt; select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); - if(ionodes) { + if (ionodes) { snprintf(node_list, BUFFER_SIZE, "%s[%s]", step_ptr->job_ptr->nodes, ionodes); xfree(ionodes); @@ -696,7 +696,7 @@ extern int jobacct_storage_p_step_start(void *db_conn, step_ptr->job_ptr->nodes); #else - if(!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { + if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { cpus = step_ptr->job_ptr->total_cpus; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); } else { @@ -789,7 +789,7 @@ extern int jobacct_storage_p_step_complete(void *db_conn, char *account, *step_name; uint32_t exit_code; - if(!storage_init) { + if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } @@ -815,14 +815,14 @@ extern int jobacct_storage_p_step_complete(void *db_conn, comp_status = JOB_COMPLETE; #ifdef HAVE_BG - if(step_ptr->job_ptr->details) + if (step_ptr->job_ptr->details) cpus = step_ptr->job_ptr->details->min_cpus; else cpus = step_ptr->job_ptr->cpu_cnt; select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); - if(ionodes) { + if (ionodes) { snprintf(node_list, BUFFER_SIZE, "%s[%s]", step_ptr->job_ptr->nodes, ionodes); xfree(ionodes); @@ -831,7 +831,7 @@ extern int jobacct_storage_p_step_complete(void *db_conn, step_ptr->job_ptr->nodes); #else - if(!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { + if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { cpus = step_ptr->job_ptr->total_cpus; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); @@ -842,7 +842,7 @@ extern int jobacct_storage_p_step_complete(void *db_conn, } #endif /* figure out the ave of the totals sent */ - if(cpus > 0) { + if (cpus > 0) { ave_vsize = jobacct->tot_vsize; ave_vsize /= cpus; ave_rss = jobacct->tot_rss; @@ -853,7 +853,7 @@ extern int jobacct_storage_p_step_complete(void *db_conn, ave_cpu /= cpus; } - if(jobacct->min_cpu != (uint32_t)NO_VAL) { + if (jobacct->min_cpu != (uint32_t)NO_VAL) { ave_cpu2 = jobacct->min_cpu; } @@ -929,13 +929,13 @@ extern int jobacct_storage_p_suspend(void *db_conn, static time_t now = 0; static time_t temp = 0; int elapsed; - if(!storage_init) { + if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } /* tell what time has passed */ - if(!now) + if (!now) now = job_ptr->start_time; temp = now; now = time(NULL); diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index 4a98567513f1895fcd17efa14f03365e1ba696e0..9eb3f5dd3a10c0549dc81e12cc2a9de1077c0f74 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -74,7 +74,6 @@ typedef struct header { time_t timestamp; uint32_t uid; uint32_t gid; - uint16_t rec_type; } filetxt_header_t; typedef struct { @@ -206,7 +205,7 @@ enum { F_TOT_ELAPSED = HEADER_LENGTH, static void _destroy_exp(void *object) { expired_rec_t *exp_rec = (expired_rec_t *)object; - if(exp_rec) { + if (exp_rec) { xfree(exp_rec->line); xfree(exp_rec); } @@ -215,7 +214,7 @@ static void _destroy_exp(void *object) static void _free_filetxt_header(void *object) { filetxt_header_t *header = (filetxt_header_t *)object; - if(header) { + if (header) { xfree(header->partition); #ifdef HAVE_BG xfree(header->blockid); @@ -227,7 +226,7 @@ static void _destroy_filetxt_job_rec(void *object) { filetxt_job_rec_t *job = (filetxt_job_rec_t *)object; if (job) { - if(job->steps) + if (job->steps) list_destroy(job->steps); _free_filetxt_header(&job->header); xfree(job->jobname); @@ -255,10 +254,10 @@ static slurmdb_step_rec_t *_slurmdb_create_step_rec( slurmdb_step_rec_t *slurmdb_step = slurmdb_create_step_rec(); slurmdb_step->elapsed = filetxt_step->elapsed; - slurmdb_step->end = filetxt_step->header.timestamp; + slurmdb_step->end = filetxt_step->end; slurmdb_step->exitcode = filetxt_step->exitcode; slurmdb_step->ncpus = filetxt_step->ncpus; - if(filetxt_step->nodes) { + if (filetxt_step->nodes) { hostlist_t hl = hostlist_create(filetxt_step->nodes); slurmdb_step->nnodes = hostlist_count(hl); hostlist_destroy(hl); @@ -267,8 +266,7 @@ static slurmdb_step_rec_t *_slurmdb_create_step_rec( slurmdb_step->requid = filetxt_step->requid; memcpy(&slurmdb_step->stats, &filetxt_step->stats, sizeof(slurmdb_stats_t)); - slurmdb_step->start = filetxt_step->header.timestamp - - slurmdb_step->elapsed; + slurmdb_step->start = slurmdb_step->end - slurmdb_step->elapsed; slurmdb_step->state = filetxt_step->status; slurmdb_step->stepid = filetxt_step->stepnum; slurmdb_step->stepname = xstrdup(filetxt_step->stepname); @@ -289,7 +287,7 @@ static slurmdb_job_rec_t *_slurmdb_create_job_rec( ListIterator itr = NULL; filetxt_step_rec_t *filetxt_step = NULL; - if(!job_cond) + if (!job_cond) goto no_cond; if (job_cond->state_list @@ -316,7 +314,7 @@ no_cond: slurmdb_job->cluster = NULL; slurmdb_job->elapsed = filetxt_job->elapsed; slurmdb_job->eligible = filetxt_job->header.job_submit; - slurmdb_job->end = filetxt_job->header.timestamp; + slurmdb_job->end = filetxt_job->end; slurmdb_job->exitcode = filetxt_job->exitcode; slurmdb_job->gid = filetxt_job->header.gid; slurmdb_job->jobid = filetxt_job->header.jobnum; @@ -324,7 +322,7 @@ no_cond: slurmdb_job->partition = xstrdup(filetxt_job->header.partition); slurmdb_job->req_cpus = filetxt_job->ncpus; slurmdb_job->alloc_cpus = filetxt_job->ncpus; - if(filetxt_job->nodes) { + if (filetxt_job->nodes) { hostlist_t hl = hostlist_create(filetxt_job->nodes); slurmdb_job->alloc_nodes = hostlist_count(hl); hostlist_destroy(hl); @@ -335,19 +333,18 @@ no_cond: memcpy(&slurmdb_job->stats, &filetxt_job->stats, sizeof(slurmdb_stats_t)); slurmdb_job->show_full = filetxt_job->show_full; - slurmdb_job->start = filetxt_job->header.timestamp - - slurmdb_job->elapsed; + slurmdb_job->start = slurmdb_job->end - slurmdb_job->elapsed; slurmdb_job->state = filetxt_job->status; slurmdb_job->steps = list_create(slurmdb_destroy_step_rec); - if(filetxt_job->steps) { + if (filetxt_job->steps) { itr = list_iterator_create(filetxt_job->steps); while((filetxt_step = list_next(itr))) { slurmdb_step_rec_t *step = _slurmdb_create_step_rec(filetxt_step); - if(step) { + if (step) { step->job_ptr = slurmdb_job; - if(!slurmdb_job->first_step_ptr) + if (!slurmdb_job->first_step_ptr) slurmdb_job->first_step_ptr = step; list_append(slurmdb_job->steps, step); } @@ -469,20 +466,6 @@ static FILE *_open_log_file(char *logfile) return fd; } -static char *_convert_type(int rec_type) -{ - switch(rec_type) { - case JOB_START: - return "JOB_START"; - case JOB_STEP: - return "JOB_STEP"; - case JOB_TERMINATED: - return "JOB_TERMINATED"; - default: - return "UNKNOWN"; - } -} - static int _cmp_jrec(const void *a1, const void *a2) { expired_rec_t *j1 = (expired_rec_t *) a1; expired_rec_t *j2 = (expired_rec_t *) a2; @@ -490,7 +473,7 @@ static int _cmp_jrec(const void *a1, const void *a2) { if (j1->job < j2->job) return -1; else if (j1->job == j2->job) { - if(j1->job_submit == j2->job_submit) + if (j1->job_submit == j2->job_submit) return 0; else return 1; @@ -508,120 +491,6 @@ static void _show_rec(char *f[]) return; } -static void _do_fdump(char* f[], int lc) -{ - int i=0, j=0; - char **type; - char *header[] = {"job", /* F_JOB */ - "partition", /* F_PARTITION */ - "job_submit", /* F_JOB_SUBMIT */ - "timestamp", /* F_TIMESTAMP */ - "uid", /* F_UIDGID */ - "gid", /* F_UIDGID */ - "BlockID", /* F_BLOCKID */ - "reserved-2",/* F_RESERVED1 */ - "recordType",/* F_RECTYPE */ - NULL}; - - char *start[] = {"jobName", /* F_JOBNAME */ - "TrackSteps", /* F_TRACK_STEPS */ - "priority", /* F_PRIORITY */ - "ncpus", /* F_NCPUS */ - "nodeList", /* F_NODES */ - "account", /* F_JOB_ACCOUNT */ - NULL}; - - char *step[] = {"jobStep", /* F_JOBSTEP */ - "status", /* F_STATUS */ - "exitcode", /* F_EXITCODE */ - "ntasks", /* F_NTASKS */ - "ncpus", /* F_STEPNCPUS */ - "elapsed", /* F_ELAPSED */ - "cpu_sec", /* F_CPU_SEC */ - "cpu_usec", /* F_CPU_USEC */ - "user_sec", /* F_USER_SEC */ - "user_usec", /* F_USER_USEC */ - "sys_sec", /* F_SYS_SEC */ - "sys_usec", /* F_SYS_USEC */ - "rss", /* F_RSS */ - "ixrss", /* F_IXRSS */ - "idrss", /* F_IDRSS */ - "isrss", /* F_ISRSS */ - "minflt", /* F_MINFLT */ - "majflt", /* F_MAJFLT */ - "nswap", /* F_NSWAP */ - "inblocks", /* F_INBLOCKS */ - "oublocks", /* F_OUTBLOCKS */ - "msgsnd", /* F_MSGSND */ - "msgrcv", /* F_MSGRCV */ - "nsignals", /* F_NSIGNALS */ - "nvcsw", /* F_VCSW */ - "nivcsw", /* F_NIVCSW */ - "max_vsize", /* F_MAX_VSIZE */ - "max_vsize_task", /* F_MAX_VSIZE_TASK */ - "ave_vsize", /* F_AVE_VSIZE */ - "max_rss", /* F_MAX_RSS */ - "max_rss_task", /* F_MAX_RSS_TASK */ - "ave_rss", /* F_AVE_RSS */ - "max_pages", /* F_MAX_PAGES */ - "max_pages_task", /* F_MAX_PAGES_TASK */ - "ave_pages", /* F_AVE_PAGES */ - "min_cputime", /* F_MIN_CPU */ - "min_cputime_task", /* F_MIN_CPU_TASK */ - "ave_cputime", /* F_AVE_RSS */ - "StepName", /* F_STEPNAME */ - "StepNodes", /* F_STEPNODES */ - "max_vsize_node", /* F_MAX_VSIZE_NODE */ - "max_rss_node", /* F_MAX_RSS_NODE */ - "max_pages_node", /* F_MAX_PAGES_NODE */ - "min_cputime_node", /* F_MIN_CPU_NODE */ - "account", /* F_STEP_ACCOUNT */ - "requid", /* F_STEP_REQUID */ - NULL}; - - char *suspend[] = {"Suspend/Run time", /* F_TOT_ELAPSED */ - "status", /* F_STATUS */ - NULL}; - - char *term[] = {"totElapsed", /* F_TOT_ELAPSED */ - "status", /* F_STATUS */ - "requid", /* F_JOB_REQUID */ - "exitcode", /* F_EXITCODE */ - NULL}; - - i = atoi(f[F_RECTYPE]); - printf("\n------- Line %d %s -------\n", lc, _convert_type(i)); - - for(j=0; j < HEADER_LENGTH; j++) - printf("%12s: %s\n", header[j], f[j]); - switch(i) { - case JOB_START: - type = start; - j = JOB_START_LENGTH; - break; - case JOB_STEP: - type = step; - j = JOB_STEP_LENGTH; - break; - case JOB_SUSPEND: - type = suspend; - j = JOB_TERM_LENGTH; - case JOB_TERMINATED: - type = term; - j = JOB_TERM_LENGTH; - break; - default: - while(f[j]) { - printf(" Field[%02d]: %s\n", j, f[j]); - j++; - } - return; - } - - for(i=HEADER_LENGTH; i < j; i++) - printf("%12s: %s\n", type[i-HEADER_LENGTH], f[i]); -} - static filetxt_job_rec_t *_find_job_record(List job_list, filetxt_header_t header, int type) @@ -631,19 +500,19 @@ static filetxt_job_rec_t *_find_job_record(List job_list, while((job = (filetxt_job_rec_t *)list_next(itr)) != NULL) { if (job->header.jobnum == header.jobnum) { - if(job->header.job_submit == 0 && type == JOB_START) { + if (job->header.job_submit == 0 && type == JOB_START) { list_remove(itr); _destroy_filetxt_job_rec(job); job = NULL; break; } - if(job->header.job_submit == BATCH_JOB_TIMESTAMP) { + if (job->header.job_submit == BATCH_JOB_TIMESTAMP) { job->header.job_submit = header.job_submit; break; } - if(job->header.job_submit == header.job_submit) + if (job->header.job_submit == header.job_submit) break; else { /* If we're looking for a later @@ -667,7 +536,7 @@ static filetxt_step_rec_t *_find_step_record(filetxt_job_rec_t *job, filetxt_step_rec_t *step = NULL; ListIterator itr = NULL; - if(!list_count(job->steps)) + if (!list_count(job->steps)) return step; itr = list_iterator_create(job->steps); @@ -755,7 +624,7 @@ static int _parse_line(char *f[], void **data, int len) (*step)->rusage.ru_nvcsw = atoi(f[F_NVCSW]); (*step)->rusage.ru_nivcsw = atoi(f[F_NIVCSW]); (*step)->stats.vsize_max = atoi(f[F_MAX_VSIZE]); - if(len > F_STEPNODES) { + if (len > F_STEPNODES) { (*step)->stats.vsize_max_taskid = atoi(f[F_MAX_VSIZE_TASK]); (*step)->stats.vsize_ave = atof(f[F_AVE_VSIZE]); @@ -788,7 +657,7 @@ static int _parse_line(char *f[], void **data, int len) (*step)->stepname = NULL; (*step)->nodes = NULL; } - if(len > F_MIN_CPU_NODE) { + if (len > F_MIN_CPU_NODE) { (*step)->stats.vsize_max_nodeid = atoi(f[F_MAX_VSIZE_NODE]); (*step)->stats.rss_max_nodeid = @@ -803,9 +672,9 @@ static int _parse_line(char *f[], void **data, int len) (*step)->stats.pages_max_nodeid = NO_VAL; (*step)->stats.cpu_min_nodeid = NO_VAL; } - if(len > F_STEP_ACCOUNT) + if (len > F_STEP_ACCOUNT) (*step)->account = xstrdup(f[F_STEP_ACCOUNT]); - if(len > F_STEP_REQUID) + if (len > F_STEP_REQUID) (*step)->requid = atoi(f[F_STEP_REQUID]); break; case JOB_SUSPEND: @@ -813,9 +682,9 @@ static int _parse_line(char *f[], void **data, int len) *job = _create_filetxt_job_rec(header); (*job)->elapsed = atoi(f[F_TOT_ELAPSED]); (*job)->status = atoi(f[F_STATUS]); - if(len > F_JOB_REQUID) + if (len > F_JOB_REQUID) (*job)->requid = atoi(f[F_JOB_REQUID]); - if(len > F_JOB_EXITCODE) + if (len > F_JOB_EXITCODE) (*job)->exitcode = atoi(f[F_JOB_EXITCODE]); break; default: @@ -918,7 +787,7 @@ static void _process_step(List job_list, char *f[], int lc, step = temp; temp = NULL; list_append(job->steps, step); - if(!job->track_steps) { + if (!job->track_steps) { /* If we don't have track_steps we want to see if we have multiple steps. If we only have 1 step check the job name against the step @@ -926,19 +795,19 @@ static void _process_step(List job_list, char *f[], int lc, different. If it is different print out the step separate. */ - if(list_count(job->steps) > 1) + if (list_count(job->steps) > 1) job->track_steps = 1; - else if(step && step->stepname && job->jobname) { - if(strcmp(step->stepname, job->jobname)) + else if (step && step->stepname && job->jobname) { + if (strcmp(step->stepname, job->jobname)) job->track_steps = 1; } } - if(job->header.timestamp == 0) + if (job->header.timestamp == 0) job->header.timestamp = step->header.timestamp; job->job_step_seen = 1; job->ntasks += step->ntasks; - if(!job->nodes || !strcmp(job->nodes, "(unknown)")) { + if (!job->nodes || !strcmp(job->nodes, "(unknown)")) { xfree(job->nodes); job->nodes = xstrdup(step->nodes); } @@ -1020,7 +889,7 @@ static void _process_terminated(List job_list, char *f[], int lc, job->status = temp->status; job->requid = temp->requid; job->exitcode = temp->exitcode; - if(list_count(job->steps) > 1) + if (list_count(job->steps) > 1) job->track_steps = 1; job->show_full = show_full; @@ -1045,21 +914,13 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) char *object = NULL; ListIterator itr = NULL, itr2 = NULL; int show_full = 0; - int fdump_flag = 0; List ret_job_list = list_create(slurmdb_destroy_job_rec); List job_list = list_create(_destroy_filetxt_job_rec); filein = slurm_get_accounting_storage_loc(); - /* we grab the fdump only for the filetxt plug through the - FDUMP_FLAG on the job_cond->duplicates variable. We didn't - add this extra field to the structure since it only applies - to this plugin. - */ - if(job_cond) { - fdump_flag = job_cond->duplicates & FDUMP_FLAG; - job_cond->duplicates &= (~FDUMP_FLAG); - if(!job_cond->duplicates) + if (job_cond) { + if (!job_cond->duplicates) itr2 = list_iterator_create(ret_job_list); } @@ -1094,12 +955,12 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) uid = atoi(f[F_UID]); gid = atoi(f[F_GID]); - if(rec_type == JOB_STEP) + if (rec_type == JOB_STEP) step_id = atoi(f[F_JOBSTEP]); else step_id = NO_VAL; - if(!job_cond) { + if (!job_cond) { show_full = 1; goto no_cond; } @@ -1132,7 +993,7 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) } foundgid: - if (job_cond->jobname_list + if ((rec_type == JOB_START) && job_cond->jobname_list && list_count(job_cond->jobname_list)) { itr = list_iterator_create(job_cond->jobname_list); while((object = list_next(itr))) { @@ -1153,7 +1014,7 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) if (selected_step->jobid != job_id) continue; /* job matches; does the step? */ - if(selected_step->stepid == NO_VAL) { + if (selected_step->stepid == NO_VAL) { show_full = 1; list_iterator_destroy(itr); goto foundjob; @@ -1171,7 +1032,7 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) } foundjob: - if (job_cond->partition_list + if ((rec_type == JOB_START) && job_cond->partition_list && list_count(job_cond->partition_list)) { itr = list_iterator_create(job_cond->partition_list); while((object = list_next(itr))) @@ -1183,31 +1044,27 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) continue; /* no match */ } foundp: - if (fdump_flag) { - _do_fdump(f, lc); - continue; - } no_cond: /* Build suitable tables with all the data */ switch(rec_type) { case JOB_START: - if(i < F_JOB_ACCOUNT) { + if (i < F_JOB_ACCOUNT) { error("Bad data on a Job Start"); _show_rec(f); } else _process_start(job_list, f, lc, show_full, i); break; case JOB_STEP: - if(i < F_MAX_VSIZE) { + if (i < F_MAX_VSIZE) { error("Bad data on a Step entry"); _show_rec(f); } else _process_step(job_list, f, lc, show_full, i); break; case JOB_SUSPEND: - if(i < F_JOB_REQUID) { + if (i < F_JOB_REQUID) { error("Bad data on a Suspend entry"); _show_rec(f); } else @@ -1215,7 +1072,7 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) show_full, i); break; case JOB_TERMINATED: - if(i < F_JOB_REQUID) { + if (i < F_JOB_REQUID) { error("Bad data on a Job Term"); _show_rec(f); } else @@ -1240,9 +1097,9 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) while((filetxt_job = list_next(itr))) { slurmdb_job_rec_t *slurmdb_job = _slurmdb_create_job_rec(filetxt_job, job_cond); - if(slurmdb_job) { + if (slurmdb_job) { slurmdb_job_rec_t *curr_job = NULL; - if(itr2) { + if (itr2) { list_iterator_reset(itr2); while((curr_job = list_next(itr2))) { if (curr_job->jobid == @@ -1258,7 +1115,7 @@ extern List filetxt_jobacct_process_get_jobs(slurmdb_job_cond_t *job_cond) } } - if(itr2) + if (itr2) list_iterator_destroy(itr2); list_iterator_destroy(itr); @@ -1303,14 +1160,14 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) /* Figure out our expiration date */ time_t expiry; - if(!arch_cond || !arch_cond->job_cond) { + if (!arch_cond || !arch_cond->job_cond) { error("no job_cond was given for archive"); return SLURM_ERROR; } job_cond = arch_cond->job_cond; - if(!arch_cond->archive_script) + if (!arch_cond->archive_script) filein = slurm_get_accounting_storage_loc(); else filein = arch_cond->archive_script; @@ -1362,6 +1219,8 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) exp_rec = xmalloc(sizeof(expired_rec_t)); exp_rec->line = xstrdup(line); + for (i = 0; i < EXPIRE_READ_LENGTH; i++) + f[i] = fptr; /* Initialization for bad data read */ for (i = 0; i < EXPIRE_READ_LENGTH; i++) { f[i] = fptr; fptr = strstr(fptr, " "); @@ -1381,7 +1240,7 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) list_append(keep_list, exp_rec); continue; } - if (job_cond->partition_list + if ((rec_type == JOB_START) && job_cond->partition_list && list_count(job_cond->partition_list)) { itr = list_iterator_create( job_cond->partition_list); @@ -1390,7 +1249,7 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) break; list_iterator_destroy(itr); - if(!object) + if (!object) continue; /* no match */ } @@ -1419,7 +1278,7 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) if (new_file) { /* By default, the expired file looks like the log */ chmod(logfile_name, prot); - if(chown(logfile_name, uid, gid) == -1) + if (chown(logfile_name, uid, gid) == -1) error("Couldn't change ownership of %s to %u:%u", logfile_name, uid, gid); } @@ -1434,7 +1293,7 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) goto finished; } chmod(logfile_name, prot); /* preserve file protection */ - if(chown(logfile_name, uid, gid) == -1)/* and ownership */ + if (chown(logfile_name, uid, gid) == -1)/* and ownership */ error("2 Couldn't change ownership of %s to %u:%u", logfile_name, uid, gid); /* Use line buffering to allow us to safely write @@ -1461,7 +1320,7 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) while((exp_rec = list_next(itr))) { itr2 = list_iterator_create(other_list); while((exp_rec2 = list_next(itr2))) { - if((exp_rec2->job != exp_rec->job) + if ((exp_rec2->job != exp_rec->job) || (exp_rec2->job_submit != exp_rec->job_submit)) continue; if (fputs(exp_rec2->line, expired_logfile)<0) { @@ -1490,7 +1349,7 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) while((exp_rec = list_next(itr))) { itr2 = list_iterator_create(other_list); while((exp_rec2 = list_next(itr2))) { - if(exp_rec2->job != exp_rec->job) + if (exp_rec2->job != exp_rec->job) continue; if (fputs(exp_rec2->line, new_logfile)<0) { perror("writing keep_logfile"); diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h index a6eff13191e32bb59e3e64b92d83f4f469ab0afd..1785e051a3eb2fe4a7bda360639ed690fd810e4f 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/Makefile.in b/src/plugins/accounting_storage/mysql/Makefile.in index e464ba1ba0466804e40cbcbe5fe8f42eaf0cd0f3..3dc90228494ef6aa7d48111dff978c8f69e2617b 100644 --- a/src/plugins/accounting_storage/mysql/Makefile.in +++ b/src/plugins/accounting_storage/mysql/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/accounting_storage/mysql DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -233,6 +237,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -253,6 +259,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -262,6 +271,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -269,6 +280,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -303,6 +323,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -330,6 +353,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 2d26060aac48c1edd7f3afef2e6c24aad9c5bb46..a8e65bcc8e35b4af8ae5ed75ead111c17197c63d 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@schedmd.com, da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -935,6 +935,7 @@ extern int create_cluster_tables(mysql_conn_t *mysql_conn, char *cluster_name) { "id_user", "int unsigned not null" }, { "id_group", "int unsigned not null" }, { "kill_requid", "int default -1 not null" }, + { "mem_req", "int unsigned default 0 not null" }, { "nodelist", "text" }, { "nodes_alloc", "int unsigned not null" }, { "node_inx", "text" }, @@ -998,23 +999,32 @@ extern int create_cluster_tables(mysql_conn_t *mysql_conn, char *cluster_name) { "sys_sec", "int unsigned default 0 not null" }, { "sys_usec", "int unsigned default 0 not null" }, { "max_pages", "int unsigned default 0 not null" }, - { "max_pages_task", "smallint unsigned default 0 not null" }, + { "max_pages_task", "int unsigned default 0 not null" }, { "max_pages_node", "int unsigned default 0 not null" }, { "ave_pages", "double unsigned default 0.0 not null" }, { "max_rss", "bigint unsigned default 0 not null" }, - { "max_rss_task", "smallint unsigned default 0 not null" }, + { "max_rss_task", "int unsigned default 0 not null" }, { "max_rss_node", "int unsigned default 0 not null" }, { "ave_rss", "double unsigned default 0.0 not null" }, { "max_vsize", "bigint unsigned default 0 not null" }, - { "max_vsize_task", "smallint unsigned default 0 not null" }, + { "max_vsize_task", "int unsigned default 0 not null" }, { "max_vsize_node", "int unsigned default 0 not null" }, { "ave_vsize", "double unsigned default 0.0 not null" }, { "min_cpu", "int unsigned default 0 not null" }, - { "min_cpu_task", "smallint unsigned default 0 not null" }, + { "min_cpu_task", "int unsigned default 0 not null" }, { "min_cpu_node", "int unsigned default 0 not null" }, { "ave_cpu", "double unsigned default 0.0 not null" }, { "act_cpufreq", "double unsigned default 0.0 not null" }, { "consumed_energy", "double unsigned default 0.0 not null" }, + { "req_cpufreq", "int unsigned default 0 not null" }, + { "max_disk_read", "double unsigned default 0.0 not null" }, + { "max_disk_read_task", "int unsigned default 0 not null" }, + { "max_disk_read_node", "int unsigned default 0 not null" }, + { "ave_disk_read", "double unsigned default 0.0 not null" }, + { "max_disk_write", "double unsigned default 0.0 not null" }, + { "max_disk_write_task", "int unsigned default 0 not null" }, + { "max_disk_write_node", "int unsigned default 0 not null" }, + { "ave_disk_write", "double unsigned default 0.0 not null" }, { NULL, NULL} }; diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.h b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.h index 0e0fa9c4fcc6d7552235b5448fcb6aba53c7bf19..19120025bb1e32fe7b8a2437dd0dab4bc8fba08f 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.h +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_acct.c b/src/plugins/accounting_storage/mysql/as_mysql_acct.c index 21e82a4a2c7b8e67f3e27f6ff4c04d66e785c87d..6d188e742dc2c2f88515cecf15731cda9b688257 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_acct.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_acct.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -667,7 +667,7 @@ empty: acct_list = list_create(slurmdb_destroy_account_rec); - if (acct_cond && acct_cond->with_assocs) { + if (acct_cond && acct_cond->with_assocs && acct_cond->assoc_cond) { /* We are going to be freeing the inners of this list in the acct->name so we don't free it here diff --git a/src/plugins/accounting_storage/mysql/as_mysql_acct.h b/src/plugins/accounting_storage/mysql/as_mysql_acct.h index d4c27037d8c147a7fbef3ee0a51db2e2c52e7b92..a15714bf22dceca9f6f644eb43541edc7e8c58f7 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_acct.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_acct.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_archive.c b/src/plugins/accounting_storage/mysql/as_mysql_archive.c index ec9076359f54345f18829cc3668eda873c2afa16..8da0adcaecc5b1f1d0aca9f979893a96c4de5282 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_archive.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_archive.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -79,6 +79,7 @@ typedef struct { char *priority; char *qos; char *req_cpus; + char *req_mem; char *resvid; char *start; char *state; @@ -92,14 +93,36 @@ typedef struct { } local_job_t; typedef struct { + char *assocs; + char *cpus; + char *flags; + char *id; + char *name; + char *nodes; + char *node_inx; + char *time_end; + char *time_start; +} local_resv_t; + +typedef struct { + char *act_cpufreq; char *ave_cpu; + char *ave_disk_read; + char *ave_disk_write; char *ave_pages; char *ave_rss; char *ave_vsize; char *exit_code; + char *consumed_energy; char *cpus; char *id; char *kill_requid; + char *max_disk_read; + char *max_disk_read_node; + char *max_disk_read_task; + char *max_disk_write; + char *max_disk_write_node; + char *max_disk_write_task; char *max_pages; char *max_pages_node; char *max_pages_task; @@ -119,6 +142,7 @@ typedef struct { char *period_end; char *period_start; char *period_suspended; + char *req_cpufreq; char *state; char *stepid; char *sys_sec; @@ -127,8 +151,6 @@ typedef struct { char *task_dist; char *user_sec; char *user_usec; - char *act_cpufreq; - char *consumed_energy; } local_step_t; typedef struct { @@ -188,6 +210,7 @@ static char *job_req_inx[] = { "priority", "id_qos", "cpus_req", + "mem_req", "id_resv", "time_start", "state", @@ -223,6 +246,7 @@ enum { JOB_REQ_PRIORITY, JOB_REQ_QOS, JOB_REQ_REQ_CPUS, + JOB_REQ_REQ_MEM, JOB_REQ_START, JOB_REQ_STATE, JOB_REQ_SUBMIT, @@ -234,6 +258,32 @@ enum { JOB_REQ_COUNT }; +/* if this changes you will need to edit the corresponding enum */ +char *resv_req_inx[] = { + "id_resv", + "assoclist", + "cpus", + "flags", + "nodelist", + "node_inx", + "resv_name", + "time_start", + "time_end", +}; + +enum { + RESV_REQ_ID, + RESV_REQ_ASSOCS, + RESV_REQ_CPUS, + RESV_REQ_FLAGS, + RESV_REQ_NODES, + RESV_REQ_NODE_INX, + RESV_REQ_NAME, + RESV_REQ_START, + RESV_REQ_END, + RESV_REQ_COUNT +}; + /* if this changes you will need to edit the corresponding * enum below */ static char *step_req_inx[] = { @@ -273,7 +323,16 @@ static char *step_req_inx[] = { "min_cpu_node", "ave_cpu", "act_cpufreq", - "consumed_energy" + "consumed_energy", + "req_cpufreq", + "max_disk_read", + "max_disk_read_task", + "max_disk_read_node", + "ave_disk_read", + "max_disk_write", + "max_disk_write_task", + "max_disk_write_node", + "ave_disk_write" }; @@ -315,7 +374,16 @@ enum { STEP_REQ_AVE_CPU, STEP_REQ_ACT_CPUFREQ, STEP_REQ_CONSUMED_ENERGY, - STEP_REQ_COUNT + STEP_REQ_REQ_CPUFREQ, + STEP_REQ_MAX_DISK_READ, + STEP_REQ_MAX_DISK_READ_TASK, + STEP_REQ_MAX_DISK_READ_NODE, + STEP_REQ_AVE_DISK_READ, + STEP_REQ_MAX_DISK_WRITE, + STEP_REQ_MAX_DISK_WRITE_TASK, + STEP_REQ_MAX_DISK_WRITE_NODE, + STEP_REQ_AVE_DISK_WRITE, + STEP_REQ_COUNT, }; /* if this changes you will need to edit the corresponding @@ -394,6 +462,7 @@ static void _pack_local_job(local_job_t *object, packstr(object->priority, buffer); packstr(object->qos, buffer); packstr(object->req_cpus, buffer); + packstr(object->req_mem, buffer); packstr(object->resvid, buffer); packstr(object->start, buffer); packstr(object->state, buffer); @@ -412,37 +481,105 @@ static int _unpack_local_job(local_job_t *object, { uint32_t tmp32; - unpackstr_ptr(&object->account, &tmp32, buffer); - unpackstr_ptr(&object->alloc_cpus, &tmp32, buffer); - unpackstr_ptr(&object->alloc_nodes, &tmp32, buffer); - unpackstr_ptr(&object->associd, &tmp32, buffer); - unpackstr_ptr(&object->blockid, &tmp32, buffer); - unpackstr_ptr(&object->derived_ec, &tmp32, buffer); - unpackstr_ptr(&object->derived_es, &tmp32, buffer); - unpackstr_ptr(&object->exit_code, &tmp32, buffer); - unpackstr_ptr(&object->timelimit, &tmp32, buffer); - unpackstr_ptr(&object->eligible, &tmp32, buffer); - unpackstr_ptr(&object->end, &tmp32, buffer); - unpackstr_ptr(&object->gid, &tmp32, buffer); + if (rpc_version >= SLURMDBD_2_6_VERSION) { + unpackstr_ptr(&object->account, &tmp32, buffer); + unpackstr_ptr(&object->alloc_cpus, &tmp32, buffer); + unpackstr_ptr(&object->alloc_nodes, &tmp32, buffer); + unpackstr_ptr(&object->associd, &tmp32, buffer); + unpackstr_ptr(&object->blockid, &tmp32, buffer); + unpackstr_ptr(&object->derived_ec, &tmp32, buffer); + unpackstr_ptr(&object->derived_es, &tmp32, buffer); + unpackstr_ptr(&object->exit_code, &tmp32, buffer); + unpackstr_ptr(&object->timelimit, &tmp32, buffer); + unpackstr_ptr(&object->eligible, &tmp32, buffer); + unpackstr_ptr(&object->end, &tmp32, buffer); + unpackstr_ptr(&object->gid, &tmp32, buffer); + unpackstr_ptr(&object->id, &tmp32, buffer); + unpackstr_ptr(&object->jobid, &tmp32, buffer); + unpackstr_ptr(&object->kill_requid, &tmp32, buffer); + unpackstr_ptr(&object->name, &tmp32, buffer); + unpackstr_ptr(&object->nodelist, &tmp32, buffer); + unpackstr_ptr(&object->node_inx, &tmp32, buffer); + unpackstr_ptr(&object->partition, &tmp32, buffer); + unpackstr_ptr(&object->priority, &tmp32, buffer); + unpackstr_ptr(&object->qos, &tmp32, buffer); + unpackstr_ptr(&object->req_cpus, &tmp32, buffer); + unpackstr_ptr(&object->req_mem, &tmp32, buffer); + unpackstr_ptr(&object->resvid, &tmp32, buffer); + unpackstr_ptr(&object->start, &tmp32, buffer); + unpackstr_ptr(&object->state, &tmp32, buffer); + unpackstr_ptr(&object->submit, &tmp32, buffer); + unpackstr_ptr(&object->suspended, &tmp32, buffer); + unpackstr_ptr(&object->track_steps, &tmp32, buffer); + unpackstr_ptr(&object->uid, &tmp32, buffer); + unpackstr_ptr(&object->wckey, &tmp32, buffer); + unpackstr_ptr(&object->wckey_id, &tmp32, buffer); + } else { + unpackstr_ptr(&object->account, &tmp32, buffer); + unpackstr_ptr(&object->alloc_cpus, &tmp32, buffer); + unpackstr_ptr(&object->alloc_nodes, &tmp32, buffer); + unpackstr_ptr(&object->associd, &tmp32, buffer); + unpackstr_ptr(&object->blockid, &tmp32, buffer); + unpackstr_ptr(&object->derived_ec, &tmp32, buffer); + unpackstr_ptr(&object->derived_es, &tmp32, buffer); + unpackstr_ptr(&object->exit_code, &tmp32, buffer); + unpackstr_ptr(&object->timelimit, &tmp32, buffer); + unpackstr_ptr(&object->eligible, &tmp32, buffer); + unpackstr_ptr(&object->end, &tmp32, buffer); + unpackstr_ptr(&object->gid, &tmp32, buffer); + unpackstr_ptr(&object->id, &tmp32, buffer); + unpackstr_ptr(&object->jobid, &tmp32, buffer); + unpackstr_ptr(&object->kill_requid, &tmp32, buffer); + unpackstr_ptr(&object->name, &tmp32, buffer); + unpackstr_ptr(&object->nodelist, &tmp32, buffer); + unpackstr_ptr(&object->node_inx, &tmp32, buffer); + unpackstr_ptr(&object->partition, &tmp32, buffer); + unpackstr_ptr(&object->priority, &tmp32, buffer); + unpackstr_ptr(&object->qos, &tmp32, buffer); + unpackstr_ptr(&object->req_cpus, &tmp32, buffer); + unpackstr_ptr(&object->resvid, &tmp32, buffer); + unpackstr_ptr(&object->start, &tmp32, buffer); + unpackstr_ptr(&object->state, &tmp32, buffer); + unpackstr_ptr(&object->submit, &tmp32, buffer); + unpackstr_ptr(&object->suspended, &tmp32, buffer); + unpackstr_ptr(&object->track_steps, &tmp32, buffer); + unpackstr_ptr(&object->uid, &tmp32, buffer); + unpackstr_ptr(&object->wckey, &tmp32, buffer); + unpackstr_ptr(&object->wckey_id, &tmp32, buffer); + } + return SLURM_SUCCESS; +} + +static void _pack_local_resv(local_resv_t *object, + uint16_t rpc_version, Buf buffer) +{ + packstr(object->assocs, buffer); + packstr(object->cpus, buffer); + packstr(object->flags, buffer); + packstr(object->id, buffer); + packstr(object->name, buffer); + packstr(object->nodes, buffer); + packstr(object->node_inx, buffer); + packstr(object->time_end, buffer); + packstr(object->time_start, buffer); +} + +/* this needs to be allocated before calling, and since we aren't + * doing any copying it needs to be used before destroying buffer */ +static int _unpack_local_resv(local_resv_t *object, + uint16_t rpc_version, Buf buffer) +{ + uint32_t tmp32; + + unpackstr_ptr(&object->assocs, &tmp32, buffer); + unpackstr_ptr(&object->cpus, &tmp32, buffer); + unpackstr_ptr(&object->flags, &tmp32, buffer); unpackstr_ptr(&object->id, &tmp32, buffer); - unpackstr_ptr(&object->jobid, &tmp32, buffer); - unpackstr_ptr(&object->kill_requid, &tmp32, buffer); unpackstr_ptr(&object->name, &tmp32, buffer); - unpackstr_ptr(&object->nodelist, &tmp32, buffer); + unpackstr_ptr(&object->nodes, &tmp32, buffer); unpackstr_ptr(&object->node_inx, &tmp32, buffer); - unpackstr_ptr(&object->partition, &tmp32, buffer); - unpackstr_ptr(&object->priority, &tmp32, buffer); - unpackstr_ptr(&object->qos, &tmp32, buffer); - unpackstr_ptr(&object->req_cpus, &tmp32, buffer); - unpackstr_ptr(&object->resvid, &tmp32, buffer); - unpackstr_ptr(&object->start, &tmp32, buffer); - unpackstr_ptr(&object->state, &tmp32, buffer); - unpackstr_ptr(&object->submit, &tmp32, buffer); - unpackstr_ptr(&object->suspended, &tmp32, buffer); - unpackstr_ptr(&object->track_steps, &tmp32, buffer); - unpackstr_ptr(&object->uid, &tmp32, buffer); - unpackstr_ptr(&object->wckey, &tmp32, buffer); - unpackstr_ptr(&object->wckey_id, &tmp32, buffer); + unpackstr_ptr(&object->time_end, &tmp32, buffer); + unpackstr_ptr(&object->time_start, &tmp32, buffer); return SLURM_SUCCESS; } @@ -450,7 +587,54 @@ static int _unpack_local_job(local_job_t *object, static void _pack_local_step(local_step_t *object, uint16_t rpc_version, Buf buffer) { - if (rpc_version >= SLURMDBD_2_5_VERSION) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { + packstr(object->act_cpufreq, buffer); + packstr(object->ave_cpu, buffer); + packstr(object->ave_disk_read, buffer); + packstr(object->ave_disk_write, buffer); + packstr(object->ave_pages, buffer); + packstr(object->ave_rss, buffer); + packstr(object->ave_vsize, buffer); + packstr(object->exit_code, buffer); + packstr(object->consumed_energy, buffer); + packstr(object->cpus, buffer); + packstr(object->id, buffer); + packstr(object->kill_requid, buffer); + packstr(object->max_disk_read, buffer); + packstr(object->max_disk_read_node, buffer); + packstr(object->max_disk_read_task, buffer); + packstr(object->max_disk_write, buffer); + packstr(object->max_disk_write_node, buffer); + packstr(object->max_disk_write_task, buffer); + packstr(object->max_pages, buffer); + packstr(object->max_pages_node, buffer); + packstr(object->max_pages_task, buffer); + packstr(object->max_rss, buffer); + packstr(object->max_rss_node, buffer); + packstr(object->max_rss_task, buffer); + packstr(object->max_vsize, buffer); + packstr(object->max_vsize_node, buffer); + packstr(object->max_vsize_task, buffer); + packstr(object->min_cpu, buffer); + packstr(object->min_cpu_node, buffer); + packstr(object->min_cpu_task, buffer); + packstr(object->name, buffer); + packstr(object->nodelist, buffer); + packstr(object->nodes, buffer); + packstr(object->node_inx, buffer); + packstr(object->period_end, buffer); + packstr(object->period_start, buffer); + packstr(object->period_suspended, buffer); + packstr(object->req_cpufreq, buffer); + packstr(object->state, buffer); + packstr(object->stepid, buffer); + packstr(object->sys_sec, buffer); + packstr(object->sys_usec, buffer); + packstr(object->tasks, buffer); + packstr(object->task_dist, buffer); + packstr(object->user_sec, buffer); + packstr(object->user_usec, buffer); + } else if (rpc_version >= SLURMDBD_2_5_VERSION) { packstr(object->act_cpufreq, buffer); packstr(object->ave_cpu, buffer); packstr(object->ave_pages, buffer); @@ -534,7 +718,54 @@ static int _unpack_local_step(local_step_t *object, { uint32_t tmp32; - if (rpc_version >= SLURMDBD_2_5_VERSION) { + if (rpc_version >= SLURMDBD_2_6_VERSION) { + unpackstr_ptr(&object->act_cpufreq, &tmp32, buffer); + unpackstr_ptr(&object->ave_cpu, &tmp32, buffer); + unpackstr_ptr(&object->ave_disk_read, &tmp32, buffer); + unpackstr_ptr(&object->ave_disk_write, &tmp32, buffer); + unpackstr_ptr(&object->ave_pages, &tmp32, buffer); + unpackstr_ptr(&object->ave_rss, &tmp32, buffer); + unpackstr_ptr(&object->ave_vsize, &tmp32, buffer); + unpackstr_ptr(&object->exit_code, &tmp32, buffer); + unpackstr_ptr(&object->consumed_energy, &tmp32, buffer); + unpackstr_ptr(&object->cpus, &tmp32, buffer); + unpackstr_ptr(&object->id, &tmp32, buffer); + unpackstr_ptr(&object->kill_requid, &tmp32, buffer); + unpackstr_ptr(&object->max_disk_read, &tmp32, buffer); + unpackstr_ptr(&object->max_disk_read_node, &tmp32, buffer); + unpackstr_ptr(&object->max_disk_read_task, &tmp32, buffer); + unpackstr_ptr(&object->max_disk_write, &tmp32, buffer); + unpackstr_ptr(&object->max_disk_write_node, &tmp32, buffer); + unpackstr_ptr(&object->max_disk_write_task, &tmp32, buffer); + unpackstr_ptr(&object->max_pages, &tmp32, buffer); + unpackstr_ptr(&object->max_pages_node, &tmp32, buffer); + unpackstr_ptr(&object->max_pages_task, &tmp32, buffer); + unpackstr_ptr(&object->max_rss, &tmp32, buffer); + unpackstr_ptr(&object->max_rss_node, &tmp32, buffer); + unpackstr_ptr(&object->max_rss_task, &tmp32, buffer); + unpackstr_ptr(&object->max_vsize, &tmp32, buffer); + unpackstr_ptr(&object->max_vsize_node, &tmp32, buffer); + unpackstr_ptr(&object->max_vsize_task, &tmp32, buffer); + unpackstr_ptr(&object->min_cpu, &tmp32, buffer); + unpackstr_ptr(&object->min_cpu_node, &tmp32, buffer); + unpackstr_ptr(&object->min_cpu_task, &tmp32, buffer); + unpackstr_ptr(&object->name, &tmp32, buffer); + unpackstr_ptr(&object->nodelist, &tmp32, buffer); + unpackstr_ptr(&object->nodes, &tmp32, buffer); + unpackstr_ptr(&object->node_inx, &tmp32, buffer); + unpackstr_ptr(&object->period_end, &tmp32, buffer); + unpackstr_ptr(&object->period_start, &tmp32, buffer); + unpackstr_ptr(&object->period_suspended, &tmp32, buffer); + unpackstr_ptr(&object->req_cpufreq, &tmp32, buffer); + unpackstr_ptr(&object->state, &tmp32, buffer); + unpackstr_ptr(&object->stepid, &tmp32, buffer); + unpackstr_ptr(&object->sys_sec, &tmp32, buffer); + unpackstr_ptr(&object->sys_usec, &tmp32, buffer); + unpackstr_ptr(&object->tasks, &tmp32, buffer); + unpackstr_ptr(&object->task_dist, &tmp32, buffer); + unpackstr_ptr(&object->user_sec, &tmp32, buffer); + unpackstr_ptr(&object->user_usec, &tmp32, buffer); + } else if (rpc_version >= SLURMDBD_2_5_VERSION) { unpackstr_ptr(&object->act_cpufreq, &tmp32, buffer); unpackstr_ptr(&object->ave_cpu, &tmp32, buffer); unpackstr_ptr(&object->ave_pages, &tmp32, buffer); @@ -1353,6 +1584,7 @@ static uint32_t _archive_jobs(mysql_conn_t *mysql_conn, char *cluster_name, job.priority = row[JOB_REQ_PRIORITY]; job.qos = row[JOB_REQ_QOS]; job.req_cpus = row[JOB_REQ_REQ_CPUS]; + job.req_mem = row[JOB_REQ_REQ_MEM]; job.resvid = row[JOB_REQ_RESVID]; job.start = row[JOB_REQ_START]; job.state = row[JOB_REQ_STATE]; @@ -1433,6 +1665,7 @@ static char *_load_jobs(uint16_t rpc_version, Buf buffer, object.priority, object.qos, object.req_cpus, + object.req_mem, object.resvid, object.start, object.state, @@ -1451,6 +1684,135 @@ static char *_load_jobs(uint16_t rpc_version, Buf buffer, return insert; } +/* returns count of resvations archived or SLURM_ERROR on error */ +static uint32_t _archive_resvs(mysql_conn_t *mysql_conn, char *cluster_name, + time_t period_end, char *arch_dir, + uint32_t archive_period) +{ + MYSQL_RES *result = NULL; + MYSQL_ROW row; + char *tmp = NULL, *query = NULL; + time_t period_start = 0; + uint32_t cnt = 0; + local_resv_t resv; + Buf buffer; + int error_code = 0, i = 0; + + xfree(tmp); + xstrfmtcat(tmp, "%s", resv_req_inx[0]); + for(i=1; i<RESV_REQ_COUNT; i++) { + xstrfmtcat(tmp, ", %s", resv_req_inx[i]); + } + + /* get all the events started before this time listed */ + query = xstrdup_printf("select %s from \"%s_%s\" where " + "time_start <= %ld " + "&& time_end != 0 order by time_start asc", + tmp, cluster_name, resv_table, period_end); + xfree(tmp); + +// START_TIMER; + debug3("%d(%s:%d) query\n%s", + mysql_conn->conn, THIS_FILE, __LINE__, query); + if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { + xfree(query); + return SLURM_ERROR; + } + xfree(query); + + if (!(cnt = mysql_num_rows(result))) { + mysql_free_result(result); + return 0; + } + + buffer = init_buf(high_buffer_size); + pack16(SLURMDBD_VERSION, buffer); + pack_time(time(NULL), buffer); + pack16(DBD_GOT_RESVS, buffer); + packstr(cluster_name, buffer); + pack32(cnt, buffer); + + while ((row = mysql_fetch_row(result))) { + if (!period_start) + period_start = slurm_atoul(row[RESV_REQ_START]); + + memset(&resv, 0, sizeof(local_resv_t)); + + resv.assocs = row[RESV_REQ_ASSOCS]; + resv.cpus = row[RESV_REQ_CPUS]; + resv.flags = row[RESV_REQ_FLAGS]; + resv.id = row[RESV_REQ_ID]; + resv.name = row[RESV_REQ_NAME]; + resv.nodes = row[RESV_REQ_NODES]; + resv.node_inx = row[RESV_REQ_NODE_INX]; + resv.time_end = row[RESV_REQ_END]; + resv.time_start = row[RESV_REQ_START]; + + _pack_local_resv(&resv, SLURMDBD_VERSION, buffer); + } + mysql_free_result(result); + +// END_TIMER2("step query"); +// info("event query took %s", TIME_STR); + + error_code = archive_write_file(buffer, cluster_name, + period_start, period_end, + arch_dir, "resv", archive_period); + free_buf(buffer); + + if (error_code != SLURM_SUCCESS) + return error_code; + + return cnt; +} + +/* returns sql statement from archived data or NULL on error */ +static char *_load_resvs(uint16_t rpc_version, Buf buffer, + char *cluster_name, uint32_t rec_cnt) +{ + char *insert = NULL, *format = NULL; + local_resv_t object; + int i = 0; + + xstrfmtcat(insert, "insert into \"%s_%s\" (%s", + cluster_name, resv_table, resv_req_inx[0]); + xstrcat(format, "('%s'"); + for(i=1; i<RESV_REQ_COUNT; i++) { + xstrfmtcat(insert, ", %s", resv_req_inx[i]); + xstrcat(format, ", '%s'"); + } + xstrcat(insert, ") values "); + xstrcat(format, ")"); + for(i=0; i<rec_cnt; i++) { + memset(&object, 0, sizeof(local_resv_t)); + if (_unpack_local_resv(&object, rpc_version, buffer) + != SLURM_SUCCESS) { + error("issue unpacking"); + xfree(format); + xfree(insert); + break; + } + if (i) + xstrcat(insert, ", "); + + xstrfmtcat(insert, format, + object.assocs, + object.cpus, + object.flags, + object.id, + object.name, + object.nodes, + object.node_inx, + object.time_end, + object.time_start); + } +// END_TIMER2("step query"); +// info("resv query took %s", TIME_STR); + xfree(format); + + return insert; +} + /* returns count of steps archived or SLURM_ERROR on error */ static uint32_t _archive_steps(mysql_conn_t *mysql_conn, char *cluster_name, time_t period_end, char *arch_dir, @@ -1508,6 +1870,8 @@ static uint32_t _archive_steps(mysql_conn_t *mysql_conn, char *cluster_name, step.ave_cpu = row[STEP_REQ_AVE_CPU]; step.act_cpufreq = row[STEP_REQ_ACT_CPUFREQ]; step.consumed_energy = row[STEP_REQ_CONSUMED_ENERGY]; + step.ave_disk_read = row[STEP_REQ_AVE_DISK_READ]; + step.ave_disk_write = row[STEP_REQ_AVE_DISK_WRITE]; step.ave_pages = row[STEP_REQ_AVE_PAGES]; step.ave_rss = row[STEP_REQ_AVE_RSS]; step.ave_vsize = row[STEP_REQ_AVE_VSIZE]; @@ -1515,6 +1879,12 @@ static uint32_t _archive_steps(mysql_conn_t *mysql_conn, char *cluster_name, step.cpus = row[STEP_REQ_CPUS]; step.id = row[STEP_REQ_ID]; step.kill_requid = row[STEP_REQ_KILL_REQUID]; + step.max_disk_read = row[STEP_REQ_MAX_DISK_READ]; + step.max_disk_read_node = row[STEP_REQ_MAX_DISK_READ_NODE]; + step.max_disk_read_task = row[STEP_REQ_MAX_DISK_READ_TASK]; + step.max_disk_write = row[STEP_REQ_MAX_DISK_WRITE]; + step.max_disk_write_node = row[STEP_REQ_MAX_DISK_WRITE_NODE]; + step.max_disk_write_task = row[STEP_REQ_MAX_DISK_WRITE_TASK]; step.max_pages = row[STEP_REQ_MAX_PAGES]; step.max_pages_node = row[STEP_REQ_MAX_PAGES_NODE]; step.max_pages_task = row[STEP_REQ_MAX_PAGES_TASK]; @@ -1534,6 +1904,7 @@ static uint32_t _archive_steps(mysql_conn_t *mysql_conn, char *cluster_name, step.period_end = row[STEP_REQ_END]; step.period_start = row[STEP_REQ_START]; step.period_suspended = row[STEP_REQ_SUSPENDED]; + step.req_cpufreq = row[STEP_REQ_REQ_CPUFREQ]; step.state = row[STEP_REQ_STATE]; step.stepid = row[STEP_REQ_STEPID]; step.sys_sec = row[STEP_REQ_SYS_SEC]; @@ -1594,6 +1965,8 @@ static char *_load_steps(uint16_t rpc_version, Buf buffer, object.ave_cpu, object.act_cpufreq, object.consumed_energy, + object.ave_disk_read, + object.ave_disk_write, object.ave_pages, object.ave_rss, object.ave_vsize, @@ -1601,6 +1974,12 @@ static char *_load_steps(uint16_t rpc_version, Buf buffer, object.cpus, object.id, object.kill_requid, + object.max_disk_read, + object.max_disk_read_node, + object.max_disk_read_task, + object.max_disk_write, + object.max_disk_write_node, + object.max_disk_write_task, object.max_pages, object.max_pages_node, object.max_pages_task, @@ -1620,6 +1999,7 @@ static char *_load_steps(uint16_t rpc_version, Buf buffer, object.period_end, object.period_start, object.period_suspended, + object.req_cpufreq, object.state, object.stepid, object.sys_sec, @@ -1921,6 +2301,44 @@ exit_steps: } } exit_jobs: + + if (arch_cond->purge_resv != NO_VAL) { + /* remove all data from resv table that was older than + * last_submit * arch_cond->purge_resv. + */ + if (!(curr_end = archive_setup_end_time( + last_submit, arch_cond->purge_resv))) { + error("Parsing purge resv"); + return SLURM_ERROR; + } + + debug4("Purging resv entires before %ld for %s", + curr_end, cluster_name); + + if (SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_resv)) { + rc = _archive_resvs(mysql_conn, cluster_name, + curr_end, arch_cond->archive_dir, + arch_cond->purge_resv); + if (!rc) + goto exit_resvs; + else if (rc == SLURM_ERROR) + return rc; + } + + query = xstrdup_printf("delete from \"%s_%s\" " + "where time_submit <= %ld " + "&& time_end != 0", + cluster_name, resv_table, curr_end); + debug3("%d(%s:%d) query\n%s", + mysql_conn->conn, THIS_FILE, __LINE__, query); + rc = mysql_db_query(mysql_conn, query); + xfree(query); + if (rc != SLURM_SUCCESS) { + error("Couldn't remove old resv data"); + return SLURM_ERROR; + } + } +exit_resvs: return SLURM_SUCCESS; } @@ -2062,6 +2480,9 @@ extern int as_mysql_jobacct_process_archive_load( case DBD_GOT_JOBS: data = _load_jobs(ver, buffer, cluster_name, rec_cnt); break; + case DBD_GOT_RESVS: + data = _load_resvs(ver, buffer, cluster_name, rec_cnt); + break; case DBD_STEP_START: data = _load_steps(ver, buffer, cluster_name, rec_cnt); break; diff --git a/src/plugins/accounting_storage/mysql/as_mysql_archive.h b/src/plugins/accounting_storage/mysql/as_mysql_archive.h index 03de8d49633b45de7876112033fbacceeee088c1..fc07295d3ac74ab6227907448e2921b363285dd4 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_archive.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_archive.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c index 22156dbd9b21fc42940cc33ed5200fcc731d0fd6..5827aaba3d04ff8f042a22ea55191a0775a420c8 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -1975,8 +1975,8 @@ static int _cluster_get_assocs(mysql_conn_t *mysql_conn, */ if (!is_admin && (private_data & PRIVATE_DATA_USERS)) { int set = 0; - query = xstrdup_printf("select lft from %s where user='%s'", - assoc_table, user->name); + query = xstrdup_printf("select lft from %s_%s where user='%s'", + cluster_name, assoc_table, user->name); if (user->coord_accts) { slurmdb_coord_rec_t *coord = NULL; itr = list_iterator_create(user->coord_accts); @@ -2008,9 +2008,19 @@ static int _cluster_get_assocs(mysql_conn_t *mysql_conn, row[0]); } } - if (set) - xstrcat(extra,")"); + mysql_free_result(result); + + if (set) + xstrcat(extra, ")"); + else { + xfree(extra); + debug("User %s has no assocations, and is not admin, " + "so not returning any.", user->name); + /* This user has no valid associations, so + * end. */ + return SLURM_SUCCESS; + } } qos_extra = _setup_association_cond_qos(assoc_cond, cluster_name); @@ -3012,8 +3022,10 @@ extern List as_mysql_modify_assocs(mysql_conn_t *mysql_conn, uint32_t uid, if (!(is_admin = is_user_min_admin_level( mysql_conn, uid, SLURMDB_ADMIN_OPERATOR))) { - if (assoc_cond->user_list - && (list_count(assoc_cond->user_list) == 1)) { + if (is_user_any_coord(mysql_conn, &user)) { + goto is_same_user; + } else if (assoc_cond->user_list + && (list_count(assoc_cond->user_list) == 1)) { uid_t pw_uid; char *name; name = list_peek(assoc_cond->user_list); @@ -3037,12 +3049,9 @@ extern List as_mysql_modify_assocs(mysql_conn_t *mysql_conn, uint32_t uid, } } - if (!is_user_any_coord(mysql_conn, &user)) { - error("Only admins/coordinators can " - "modify associations"); - errno = ESLURM_ACCESS_DENIED; - return NULL; - } + error("Only admins/coordinators can modify associations"); + errno = ESLURM_ACCESS_DENIED; + return NULL; } is_same_user: @@ -3135,6 +3144,7 @@ is_same_user: if (!ret_list) { reset_mysql_conn(mysql_conn); + errno = rc; return NULL; } else if (!list_count(ret_list)) { reset_mysql_conn(mysql_conn); @@ -3330,6 +3340,11 @@ extern List as_mysql_get_assocs(mysql_conn_t *mysql_conn, uid_t uid, assoc_mgr_fill_in_user( mysql_conn, &user, 1, NULL); } + if (!is_admin && !user.name) { + debug("User %u has no assocations, and is not admin, " + "so not returning any.", user.uid); + return NULL; + } } if ((assoc_cond->qos_list && list_count(assoc_cond->qos_list)) diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.h b/src/plugins/accounting_storage/mysql/as_mysql_assoc.h index 5174f682917db8091425c83f3ac0600c09440837..efabb613090bdfba6a77111112f9a13f2516de5c 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_cluster.c b/src/plugins/accounting_storage/mysql/as_mysql_cluster.c index 1fc7a8433dc23a182d987e8580c6aa44acc9c3f8..dbceb7d0a7c7514cd0ca089771f7937e3a5ea573 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_cluster.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_cluster.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_cluster.h b/src/plugins/accounting_storage/mysql/as_mysql_cluster.h index 3b2c6873b5ff3a366816b17ef871f85f822bd20d..8ed6eca431ecffcb25cd8c1c3cb07e9ca89670b7 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_cluster.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_cluster.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_convert.c b/src/plugins/accounting_storage/mysql/as_mysql_convert.c index 0c492be2288fcdfadcd6ce65dba77dff858584f7..5beb7f59bf80521e07c83eb29a7d26ffe4c6ef5c 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_convert.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_convert.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -659,6 +659,7 @@ extern int as_mysql_convert_tables(mysql_conn_t *mysql_conn) { "comp_code", "int default 0 not null" }, { "priority", "int not null" }, { "req_cpus", "int unsigned not null" }, + { "req_mem", "int unsigned default 0 not null" }, { "alloc_cpus", "int unsigned not null" }, { "alloc_nodes", "int unsigned not null" }, { "nodelist", "text" }, @@ -713,23 +714,32 @@ extern int as_mysql_convert_tables(mysql_conn_t *mysql_conn) { "sys_sec", "int unsigned default 0 not null" }, { "sys_usec", "int unsigned default 0 not null" }, { "max_vsize", "bigint unsigned default 0 not null" }, - { "max_vsize_task", "smallint unsigned default 0 not null" }, + { "max_vsize_task", "int unsigned default 0 not null" }, { "max_vsize_node", "int unsigned default 0 not null" }, { "ave_vsize", "double unsigned default 0.0 not null" }, { "max_rss", "bigint unsigned default 0 not null" }, - { "max_rss_task", "smallint unsigned default 0 not null" }, + { "max_rss_task", "int unsigned default 0 not null" }, { "max_rss_node", "int unsigned default 0 not null" }, { "ave_rss", "double unsigned default 0.0 not null" }, { "max_pages", "int unsigned default 0 not null" }, - { "max_pages_task", "smallint unsigned default 0 not null" }, + { "max_pages_task", "int unsigned default 0 not null" }, { "max_pages_node", "int unsigned default 0 not null" }, { "ave_pages", "double unsigned default 0.0 not null" }, { "min_cpu", "int unsigned default 0 not null" }, - { "min_cpu_task", "smallint unsigned default 0 not null" }, + { "min_cpu_task", "int unsigned default 0 not null" }, { "min_cpu_node", "int unsigned default 0 not null" }, { "ave_cpu", "double unsigned default 0.0 not null" }, { "act_cpufreq", "double unsigned default 0.0 not null" }, { "consumed_energy", "double unsigned default 99.0 not null" }, + { "req_cpufreq", "int unsigned default 0 not null" }, + { "max_disk_read", "double unsigned default 0 not null" }, + { "max_disk_read_task", "int unsigned default 0 not null" }, + { "max_disk_read_node", "int unsigned default 0 not null" }, + { "ave_disk_read", "double unsigned default 0.0 not null" }, + { "max_disk_write", "double unsigned default 0 not null" }, + { "max_disk_write_task", "int unsigned default 0 not null" }, + { "max_disk_write_node", "int unsigned default 0 not null" }, + { "ave_disk_write", "double unsigned default 0.0 not null" }, { NULL, NULL} }; diff --git a/src/plugins/accounting_storage/mysql/as_mysql_convert.h b/src/plugins/accounting_storage/mysql/as_mysql_convert.h index 99644f73ba1fbff22545cb14b49fbea1ada45781..7e1707fcdd93309d56da5d126d7851b6469d7993 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_convert.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_convert.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index 60fb073ba8fb7e6d95040343244a480aa31444a4..dea5a6e4bcf01ec87a87cb1e27c8a4fb28352e89 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -426,7 +426,7 @@ no_rollup_change: "id_group, nodelist, id_resv, timelimit, " "time_eligible, time_submit, time_start, " "job_name, track_steps, state, priority, cpus_req, " - "cpus_alloc, nodes_alloc", + "cpus_alloc, nodes_alloc, mem_req", mysql_conn->cluster_name, job_table); if (job_ptr->account) @@ -446,7 +446,7 @@ no_rollup_change: xstrfmtcat(query, ") values (%u, %u, %u, %u, %u, %u, '%s', %u, %u, " - "%ld, %ld, %ld, '%s', %u, %u, %u, %u, %u, %u", + "%ld, %ld, %ld, '%s', %u, %u, %u, %u, %u, %u, %u", job_ptr->job_id, job_ptr->assoc_id, job_ptr->qos_id, wckeyid, job_ptr->user_id, job_ptr->group_id, nodes, @@ -454,7 +454,8 @@ no_rollup_change: begin_time, submit_time, start_time, jname, track_steps, job_state, job_ptr->priority, job_ptr->details->min_cpus, - job_ptr->total_cpus, node_cnt); + job_ptr->total_cpus, node_cnt, + job_ptr->details->pn_min_memory); if (job_ptr->account) xstrfmtcat(query, ", '%s'", job_ptr->account); @@ -479,13 +480,15 @@ no_rollup_change: "time_submit=%ld, time_start=%ld, " "job_name='%s', track_steps=%u, id_qos=%u, " "state=greatest(state, %u), priority=%u, " - "cpus_req=%u, cpus_alloc=%u, nodes_alloc=%u", + "cpus_req=%u, cpus_alloc=%u, nodes_alloc=%u, " + "mem_req=%u", wckeyid, job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, submit_time, start_time, jname, track_steps, job_ptr->qos_id, job_state, job_ptr->priority, job_ptr->details->min_cpus, - job_ptr->total_cpus, node_cnt); + job_ptr->total_cpus, node_cnt, + job_ptr->details->pn_min_memory); if (job_ptr->account) xstrfmtcat(query, ", account='%s'", job_ptr->account); @@ -542,12 +545,13 @@ no_rollup_change: xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, " "cpus_alloc=%u, nodes_alloc=%u, id_qos=%u, " "id_assoc=%u, id_wckey=%u, id_resv=%u, " - "timelimit=%u, " + "timelimit=%u, mem_req=%u, " "time_eligible=%ld where job_db_inx=%d", start_time, jname, job_state, job_ptr->total_cpus, node_cnt, job_ptr->qos_id, job_ptr->assoc_id, wckeyid, job_ptr->resv_id, job_ptr->time_limit, + job_ptr->details->pn_min_memory, begin_time, job_ptr->db_index); debug3("%d(%s:%d) query\n%s", @@ -629,12 +633,13 @@ extern List as_mysql_modify_job(mysql_conn_t *mysql_conn, uint32_t uid, } /* Here we want to get the last job submitted here */ - query = xstrdup_printf("select job_db_inx, id_job, time_submit " + query = xstrdup_printf("select job_db_inx, id_job, time_submit, " + "id_user " "from \"%s_%s\" where deleted=0 " - "&& id_job=%u && id_user=%u " + "&& id_job=%u " "order by time_submit desc limit 1;", job_cond->cluster, job_table, - job_cond->job_id, uid); + job_cond->job_id); debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); @@ -647,6 +652,17 @@ extern List as_mysql_modify_job(mysql_conn_t *mysql_conn, uint32_t uid, if ((row = mysql_fetch_row(result))) { char tmp_char[25]; time_t time_submit = atol(row[2]); + + if ((uid != atoi(row[3])) && + !is_user_min_admin_level(mysql_conn, uid, + SLURMDB_ADMIN_OPERATOR)) { + errno = ESLURM_ACCESS_DENIED; + xfree(vals); + xfree(query); + mysql_free_result(result); + return NULL; + } + slurm_make_time_str(&time_submit, tmp_char, sizeof(tmp_char)); xstrfmtcat(cond_char, "job_db_inx=%s", row[0]); @@ -925,19 +941,19 @@ extern int as_mysql_step_start(mysql_conn_t *mysql_conn, "insert into \"%s_%s\" (job_db_inx, id_step, time_start, " "step_name, state, " "cpus_alloc, nodes_alloc, task_cnt, nodelist, " - "node_inx, task_dist) " + "node_inx, task_dist, req_cpufreq) " "values (%d, %d, %d, '%s', %d, %d, %d, %d, " - "'%s', '%s', %d) " + "'%s', '%s', %d, %u) " "on duplicate key update cpus_alloc=%d, nodes_alloc=%d, " "task_cnt=%d, time_end=0, state=%d, " - "nodelist='%s', node_inx='%s', task_dist=%d", + "nodelist='%s', node_inx='%s', task_dist=%d, req_cpufreq=%u", mysql_conn->cluster_name, step_table, step_ptr->job_ptr->db_index, step_ptr->step_id, (int)start_time, step_name, JOB_RUNNING, cpus, nodes, tasks, node_list, node_inx, task_dist, - cpus, nodes, tasks, JOB_RUNNING, - node_list, node_inx, task_dist); + step_ptr->cpu_freq, cpus, nodes, tasks, JOB_RUNNING, + node_list, node_inx, task_dist, step_ptr->cpu_freq); debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(mysql_conn, query); @@ -956,6 +972,8 @@ extern int as_mysql_step_complete(mysql_conn_t *mysql_conn, struct jobacctinfo *jobacct = (struct jobacctinfo *)step_ptr->jobacct; struct jobacctinfo dummy_jobacct; double ave_vsize = NO_VAL, ave_rss = NO_VAL, ave_pages = NO_VAL; + double ave_disk_read = (double)NO_VAL; + double ave_disk_write = (double)NO_VAL; double ave_cpu = (double)NO_VAL; char *query = NULL; int rc =SLURM_SUCCESS; @@ -1025,6 +1043,10 @@ extern int as_mysql_step_complete(mysql_conn_t *mysql_conn, ave_pages /= (double)tasks; ave_cpu = (double)jobacct->tot_cpu; ave_cpu /= (double)tasks; + ave_disk_read = (double)jobacct->tot_disk_read; + ave_disk_read /= (double)tasks; + ave_disk_write = (double)jobacct->tot_disk_write; + ave_disk_write /= (double)tasks; } if (!step_ptr->job_ptr->db_index) { @@ -1052,6 +1074,10 @@ extern int as_mysql_step_complete(mysql_conn_t *mysql_conn, "kill_requid=%d, exit_code=%d, " "user_sec=%u, user_usec=%u, " "sys_sec=%u, sys_usec=%u, " + "max_disk_read=%f, max_disk_read_task=%u, " + "max_disk_read_node=%u, ave_disk_read=%f, " + "max_disk_write=%f, max_disk_write_task=%u, " + "max_disk_write_node=%u, ave_disk_write=%f, " "max_vsize=%u, max_vsize_task=%u, " "max_vsize_node=%u, ave_vsize=%f, " "max_rss=%u, max_rss_task=%u, " @@ -1074,6 +1100,22 @@ extern int as_mysql_step_complete(mysql_conn_t *mysql_conn, jobacct->sys_cpu_sec, /* system microsecs */ jobacct->sys_cpu_usec, + /* max disk_read */ + jobacct->max_disk_read, + /* max disk_read task */ + jobacct->max_disk_read_id.taskid, + /* max disk_read node */ + jobacct->max_disk_read_id.nodeid, + /* ave disk_read */ + ave_disk_read, + /* max disk_write */ + jobacct->max_disk_write, + /* max disk_write task */ + jobacct->max_disk_write_id.taskid, + /* max disk_write node */ + jobacct->max_disk_write_id.nodeid, + /* ave disk_write */ + ave_disk_write, jobacct->max_vsize, /* max vsize */ jobacct->max_vsize_id.taskid, /* max vsize task */ jobacct->max_vsize_id.nodeid, /* max vsize node */ diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.h b/src/plugins/accounting_storage/mysql/as_mysql_job.h index 2de6601fede511d9d12e43349dbd651afbac6d46..343cc24323f5f298c29aad5ee1811e7e00eef41a 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c index 799104a6bc80065ccbb71772e3e5e0f5d59d3df1..6d17083f19f65a9d8a452a39020efebeda2f172b 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -71,6 +71,7 @@ char *job_req_inx[] = { "t1.job_db_inx", "t1.job_name", "t1.kill_requid", + "t1.mem_req", "t1.node_inx", "t1.nodelist", "t1.nodes_alloc", @@ -108,6 +109,7 @@ enum { JOB_REQ_ID, JOB_REQ_NAME, JOB_REQ_KILL_REQUID, + JOB_REQ_REQ_MEM, JOB_REQ_NODE_INX, JOB_REQ_NODELIST, JOB_REQ_ALLOC_NODES, @@ -149,6 +151,14 @@ char *step_req_inx[] = { "t1.user_usec", "t1.sys_sec", "t1.sys_usec", + "t1.max_disk_read", + "t1.max_disk_read_task", + "t1.max_disk_read_node", + "t1.ave_disk_read", + "t1.max_disk_write", + "t1.max_disk_write_task", + "t1.max_disk_write_node", + "t1.ave_disk_write", "t1.max_vsize", "t1.max_vsize_task", "t1.max_vsize_node", @@ -166,7 +176,8 @@ char *step_req_inx[] = { "t1.min_cpu_node", "t1.ave_cpu", "t1.act_cpufreq", - "t1.consumed_energy" + "t1.consumed_energy", + "t1.req_cpufreq" }; enum { @@ -188,6 +199,14 @@ enum { STEP_REQ_USER_USEC, STEP_REQ_SYS_SEC, STEP_REQ_SYS_USEC, + STEP_REQ_MAX_DISK_READ, + STEP_REQ_MAX_DISK_READ_TASK, + STEP_REQ_MAX_DISK_READ_NODE, + STEP_REQ_AVE_DISK_READ, + STEP_REQ_MAX_DISK_WRITE, + STEP_REQ_MAX_DISK_WRITE_TASK, + STEP_REQ_MAX_DISK_WRITE_NODE, + STEP_REQ_AVE_DISK_WRITE, STEP_REQ_MAX_VSIZE, STEP_REQ_MAX_VSIZE_TASK, STEP_REQ_MAX_VSIZE_NODE, @@ -206,6 +225,7 @@ enum { STEP_REQ_AVE_CPU, STEP_REQ_ACT_CPUFREQ, STEP_REQ_CONSUMED_ENERGY, + STEP_REQ_REQ_CPUFREQ, STEP_REQ_COUNT }; @@ -383,9 +403,19 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, prefix); } } - if (set) - xstrcat(extra,")"); + mysql_free_result(result); + + if (set) + xstrcat(extra, ")"); + else { + xfree(extra); + debug("User %s has no assocations, and is not admin, " + "so not returning any jobs.", user->name); + /* This user has no valid associations, so + * they will not have any jobs. */ + goto end_it; + } } setup_job_cluster_cond_limits(mysql_conn, job_cond, @@ -611,6 +641,7 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, job->track_steps = slurm_atoul(row[JOB_REQ_TRACKSTEPS]); job->priority = slurm_atoul(row[JOB_REQ_PRIORITY]); job->req_cpus = slurm_atoul(row[JOB_REQ_REQ_CPUS]); + job->req_mem = slurm_atoul(row[JOB_REQ_REQ_MEM]); job->requid = slurm_atoul(row[JOB_REQ_KILL_REQUID]); job->qosid = slurm_atoul(row[JOB_REQ_QOS]); job->show_full = 1; @@ -752,6 +783,20 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, step->user_cpu_sec + step->sys_cpu_sec; step->tot_cpu_usec += step->user_cpu_usec + step->sys_cpu_usec; + step->stats.disk_read_max = + atof(step_row[STEP_REQ_MAX_DISK_READ]); + step->stats.disk_read_max_taskid = + slurm_atoul(step_row[ + STEP_REQ_MAX_DISK_READ_TASK]); + step->stats.disk_read_ave = + atof(step_row[STEP_REQ_AVE_DISK_READ]); + step->stats.disk_write_max = + atof(step_row[STEP_REQ_MAX_DISK_WRITE]); + step->stats.disk_write_max_taskid = + slurm_atoul(step_row[ + STEP_REQ_MAX_DISK_WRITE_TASK]); + step->stats.disk_write_ave = + atof(step_row[STEP_REQ_AVE_DISK_WRITE]); step->stats.vsize_max = slurm_atoul(step_row[STEP_REQ_MAX_VSIZE]); step->stats.vsize_max_taskid = @@ -776,9 +821,11 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, slurm_atoul(step_row[STEP_REQ_MIN_CPU_TASK]); step->stats.cpu_ave = atof(step_row[STEP_REQ_AVE_CPU]); step->stats.act_cpufreq = - atof(step_row[STEP_REQ_ACT_CPUFREQ]); + atof(step_row[STEP_REQ_ACT_CPUFREQ]); step->stats.consumed_energy = - atof(step_row[STEP_REQ_CONSUMED_ENERGY]); + atof(step_row[STEP_REQ_CONSUMED_ENERGY]); + step->req_cpufreq = + slurm_atoul(step_row[STEP_REQ_REQ_CPUFREQ]); step->stepname = xstrdup(step_row[STEP_REQ_NAME]); step->nodes = xstrdup(step_row[STEP_REQ_NODELIST]); step->stats.vsize_max_nodeid = @@ -1440,6 +1487,11 @@ extern List as_mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, */ is_user_any_coord(mysql_conn, &user); } + if (!is_admin && !user.name) { + debug("User %u has no assocations, and is not admin, " + "so not returning any jobs.", user.uid); + return NULL; + } } if (job_cond diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h index 719d02306dee1e48d25aee45fa4f43007c4d3f8c..4edf10c86e15730154f57b658edc1b2c0306d3e1 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_problems.c b/src/plugins/accounting_storage/mysql/as_mysql_problems.c index df1f2ef2412b83f1dc24508515a792d289fdf225..7583bd2cbe065a9502db84cc1f4cacb64c7015e0 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_problems.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_problems.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_problems.h b/src/plugins/accounting_storage/mysql/as_mysql_problems.h index 2b48911ad143511867f874a8017e0b0ee27764a8..497014540558391a62a61dd10a403e4f649a6705 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_problems.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_problems.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_qos.c b/src/plugins/accounting_storage/mysql/as_mysql_qos.c index 73e2a481ec530245584f00228c071d5be44209c8..0b2dbd4bb7374932b1bcfa2d1711372f66f3443a 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_qos.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_qos.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -379,45 +379,49 @@ static int _setup_qos_limits(slurmdb_qos_rec_t *qos, if (qos->preempt_list && list_count(qos->preempt_list)) { char *preempt_val = NULL; - char *tmp_char = NULL, *begin_preempt = NULL; + char *tmp_char = NULL, *last_preempt = NULL; + char *tick = ""; ListIterator preempt_itr = list_iterator_create(qos->preempt_list); xstrcat(*cols, ", preempt"); - begin_preempt = xstrdup("preempt"); - while ((tmp_char = list_next(preempt_itr))) { if (tmp_char[0] == '-') { - xstrfmtcat(preempt_val, - "replace(%s, ',%s', '')", - begin_preempt, tmp_char+1); - xfree(begin_preempt); - begin_preempt = preempt_val; + preempt_val = xstrdup_printf( + "replace(%s, ',%s', '')", + last_preempt ? last_preempt : "preempt", + tmp_char+1); + xfree(last_preempt); + last_preempt = preempt_val; + preempt_val = NULL; } else if (tmp_char[0] == '+') { - xstrfmtcat(preempt_val, - "concat(" - "replace(%s, ',%s', ''), ',%s')", - begin_preempt, - tmp_char+1, tmp_char+1); + preempt_val = xstrdup_printf( + "concat(replace(%s, ',%s', ''), ',%s')", + last_preempt ? last_preempt : "preempt", + tmp_char+1, tmp_char+1); if (added_preempt) xstrfmtcat(*added_preempt, ",%s", tmp_char+1); - xfree(begin_preempt); - begin_preempt = preempt_val; + xfree(last_preempt); + last_preempt = preempt_val; + preempt_val = NULL; } else if (tmp_char[0]) { xstrfmtcat(preempt_val, ",%s", tmp_char); if (added_preempt) xstrfmtcat(*added_preempt, ",%s", tmp_char); + tick = "\'"; } else xstrcat(preempt_val, ""); } list_iterator_destroy(preempt_itr); - xfree(begin_preempt); - - xstrfmtcat(*vals, ", '%s'", preempt_val); - xstrfmtcat(*extra, ", preempt='%s'", preempt_val); + if (last_preempt) { + preempt_val = last_preempt; + last_preempt = NULL; + } + xstrfmtcat(*vals, ", %s%s%s", tick, preempt_val, tick); + xstrfmtcat(*extra, ", preempt=%s%s%s", tick, preempt_val, tick); xfree(preempt_val); } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_qos.h b/src/plugins/accounting_storage/mysql/as_mysql_qos.h index 4b2cedbad42f86a3b204d468d34674028cb14e55..98ddd9842770a28435cd4bab405dd973e2cc7fe2 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_qos.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_qos.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_resv.c b/src/plugins/accounting_storage/mysql/as_mysql_resv.c index 1e60d03542baaf4dcf27e4f81ec5dae3aeb45f37..96e04e65d314fe50885183023d9da1c2b58dfddf 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_resv.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_resv.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -517,26 +517,26 @@ extern List as_mysql_get_resvs(mysql_conn_t *mysql_conn, uid_t uid, /* if this changes you will need to edit the corresponding enum */ char *resv_req_inx[] = { "id_resv", - "resv_name", - "cpus", "assoclist", + "cpus", + "flags", "nodelist", "node_inx", + "resv_name", "time_start", "time_end", - "flags", }; enum { RESV_REQ_ID, - RESV_REQ_NAME, - RESV_REQ_CPUS, RESV_REQ_ASSOCS, + RESV_REQ_CPUS, + RESV_REQ_FLAGS, RESV_REQ_NODES, RESV_REQ_NODE_INX, + RESV_REQ_NAME, RESV_REQ_START, RESV_REQ_END, - RESV_REQ_FLAGS, RESV_REQ_COUNT }; diff --git a/src/plugins/accounting_storage/mysql/as_mysql_resv.h b/src/plugins/accounting_storage/mysql/as_mysql_resv.h index f0ddae89f07e31d2fa70d1861fb0d29af7986357..1cf8ff1c699844453830302afe7586a6101df53c 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_resv.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_resv.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c index 4108bee757b490e187089c2d51656eed185ec68f..da78164b36bd663208653dd5a8ead1d8863b5fc5 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -128,6 +128,12 @@ static int _process_purge(mysql_conn_t *mysql_conn, arch_cond.purge_job = slurmdbd_conf->purge_job; else arch_cond.purge_job = NO_VAL; + + if (purge_period & slurmdbd_conf->purge_resv) + arch_cond.purge_resv = slurmdbd_conf->purge_resv; + else + arch_cond.purge_resv = NO_VAL; + if (purge_period & slurmdbd_conf->purge_step) arch_cond.purge_step = slurmdbd_conf->purge_step; else @@ -979,15 +985,18 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, } /* now apply the down time from the slurmctld disconnects */ - list_iterator_reset(c_itr); - while ((loc_c_usage = list_next(c_itr))) - c_usage->d_cpu += loc_c_usage->total_time; + if (c_usage) { + list_iterator_reset(c_itr); + while ((loc_c_usage = list_next(c_itr))) + c_usage->d_cpu += loc_c_usage->total_time; - if ((rc = _process_cluster_usage( - mysql_conn, cluster_name, curr_start, - curr_end, now, c_usage)) != SLURM_SUCCESS) { - _destroy_local_cluster_usage(c_usage); - goto end_it; + if ((rc = _process_cluster_usage( + mysql_conn, cluster_name, curr_start, + curr_end, now, c_usage)) + != SLURM_SUCCESS) { + _destroy_local_cluster_usage(c_usage); + goto end_it; + } } list_iterator_reset(a_itr); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_rollup.h b/src/plugins/accounting_storage/mysql/as_mysql_rollup.h index 276dd3f214cac6f5e751c9396829e7c027556f64..dd786b7c1510f04e9ec0be7a2165dfb2d76a12e1 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_rollup.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_rollup.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_txn.c b/src/plugins/accounting_storage/mysql/as_mysql_txn.c index 6933e4fb853156262125bed476d28e87bd507e71..a64539df2d822e370137af528c508bbd85f40677 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_txn.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_txn.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_txn.h b/src/plugins/accounting_storage/mysql/as_mysql_txn.h index d607c0d1a6a9ff6cc4224b0064e943dff16dc7bb..db874ad44134a8fccab27e5cc726c65ef6738896 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_txn.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_txn.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_usage.c b/src/plugins/accounting_storage/mysql/as_mysql_usage.c index c5552cfcfaafa98e23d71a608a7d497380381e43..9328f6599d46c6ed993fdb871e2184e2625599e8 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_usage.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_usage.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_usage.h b/src/plugins/accounting_storage/mysql/as_mysql_usage.h index b73c1f2bba53958f5692d0f0b3e76ca18d43f1dc..c61106aff0812ab13b4702c107f9a8f6bcff13b3 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_usage.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_usage.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_user.c b/src/plugins/accounting_storage/mysql/as_mysql_user.c index 7c53e0f122a94bbd1da76e179f727053d8bb5b42..41773706f844107c94fa2432a60574a84841af24 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_user.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_user.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -1121,12 +1121,13 @@ extern List as_mysql_get_users(mysql_conn_t *mysql_conn, uid_t uid, if (private_data & PRIVATE_DATA_USERS) { if (!(is_admin = is_user_min_admin_level( mysql_conn, uid, SLURMDB_ADMIN_OPERATOR))) { - if (!is_user_any_coord(mysql_conn, &user)) { - error("Only admins/coordinators can " - "access user data"); - errno = ESLURM_ACCESS_DENIED; - return NULL; - } + assoc_mgr_fill_in_user( + mysql_conn, &user, 1, NULL); + } + if (!is_admin && !user.name) { + debug("User %u has no assocations, and is not admin, " + "so not returning any users.", user.uid); + return NULL; } } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_user.h b/src/plugins/accounting_storage/mysql/as_mysql_user.h index f0de0f7a38d4d12c12224797098192d411e22779..71b5ce4621531284336ce56627ed189adbf0f3ab 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_user.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_user.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/mysql/as_mysql_wckey.c b/src/plugins/accounting_storage/mysql/as_mysql_wckey.c index dcbe189d65556c61607b020622971c22fde40f37..c10aa629b58024b7cf42068d3ac0223488c7386d 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_wckey.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_wckey.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -797,12 +797,13 @@ extern List as_mysql_get_wckeys(mysql_conn_t *mysql_conn, uid_t uid, if (private_data & PRIVATE_DATA_USERS) { if (!(is_admin = is_user_min_admin_level( mysql_conn, uid, SLURMDB_ADMIN_OPERATOR))) { - if (!is_user_any_coord(mysql_conn, &user)) { - error("Only admins/coordinators can " - "access wc key data"); - errno = ESLURM_ACCESS_DENIED; - return NULL; - } + assoc_mgr_fill_in_user( + mysql_conn, &user, 1, NULL); + } + if (!is_admin && !user.name) { + debug("User %u has no assocations, and is not admin, " + "so not returning any wckeys.", user.uid); + return NULL; } } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_wckey.h b/src/plugins/accounting_storage/mysql/as_mysql_wckey.h index 42bd53c85572510ace3f904d4441132dd328219d..a083ede1e8594d58061e6cc8a8c40bbf5992c53f 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_wckey.h +++ b/src/plugins/accounting_storage/mysql/as_mysql_wckey.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/none/Makefile.in b/src/plugins/accounting_storage/none/Makefile.in index 95c48d9dbd553c36a62806927f0d5c2c138e3036..8931820edc30799f5edfccba91658216c41d5860 100644 --- a/src/plugins/accounting_storage/none/Makefile.in +++ b/src/plugins/accounting_storage/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/accounting_storage/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c index 53d175f7d9f97ef7a5e8dea096e9b759c851b93d..6ba1b0b8daba2df510c0b86a784c01359392fda6 100644 --- a/src/plugins/accounting_storage/none/accounting_storage_none.c +++ b/src/plugins/accounting_storage/none/accounting_storage_none.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/Makefile.in b/src/plugins/accounting_storage/pgsql/Makefile.in index aae9132c574ece9f67b530c2c56b5c981e16a1ce..f1e2ee3e09fcf537b0212a7168299f88c829dc30 100644 --- a/src/plugins/accounting_storage/pgsql/Makefile.in +++ b/src/plugins/accounting_storage/pgsql/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/accounting_storage/pgsql DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -228,6 +232,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -248,6 +254,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -257,6 +266,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -264,6 +275,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -298,6 +318,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -325,6 +348,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c index 975a08b7906fe57878f8bb0ffc44ee5bf9263a8f..3250ab5f9f0ad335d13c1d5f603fead9bae34553 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -100,7 +100,7 @@ static pgsql_db_info_t *_pgsql_acct_create_db_info() { pgsql_db_info_t *db_info = xmalloc(sizeof(pgsql_db_info_t)); db_info->port = slurm_get_accounting_storage_port(); - if(!db_info->port) { + if (!db_info->port) { db_info->port = DEFAULT_PGSQL_PORT; slurm_set_accounting_storage_port(db_info->port); } @@ -167,18 +167,20 @@ extern int init ( void ) /* since this can be loaded from many different places only tell us once. */ - if(!first) + if (!first) return SLURM_SUCCESS; first = 0; - if(!slurmdbd_conf) { + if (!slurmdbd_conf) { char *cluster_name = NULL; if (!(cluster_name = slurm_get_cluster_name())) fatal("%s requires ClusterName in slurm.conf", plugin_name); xfree(cluster_name); } - + error("account_storage/pgsql will be removed in the next version of " + "Slurm. Please make plans to switch to a different method of " + "storing data. We would suggest using The SlurmDBD over MySQL."); pgsql_db_info = _pgsql_acct_create_db_info(); /* pgsql_db_info = acct_create_db_info(DEFAULT_PGSQL_PORT); */ pgsql_db_name = acct_get_db_name(); @@ -226,7 +228,7 @@ extern void *acct_storage_p_get_connection(const slurm_trigger_callbacks_t *cb, { pgsql_conn_t *pg_conn = xmalloc(sizeof(pgsql_conn_t)); - if(!pgsql_db_info) + if (!pgsql_db_info) init(); debug2("as/pg: get_connection: request new connection: %d", rollback); @@ -241,7 +243,7 @@ extern void *acct_storage_p_get_connection(const slurm_trigger_callbacks_t *cb, pgsql_get_db_connection(&pg_conn->db_conn, pgsql_db_name, pgsql_db_info); - if(pg_conn->db_conn && rollback) { + if (pg_conn->db_conn && rollback) { pgsql_db_start_transaction(pg_conn->db_conn); } return (void *)pg_conn; @@ -249,7 +251,7 @@ extern void *acct_storage_p_get_connection(const slurm_trigger_callbacks_t *cb, extern int acct_storage_p_close_connection(pgsql_conn_t **pg_conn) { - if(!pg_conn || !*pg_conn) + if (!pg_conn || !*pg_conn) return SLURM_SUCCESS; acct_storage_p_commit((*pg_conn), 0); /* discard changes */ @@ -272,14 +274,14 @@ extern int acct_storage_p_commit(pgsql_conn_t *pg_conn, bool commit) debug4("as/pg: commit: got %d commits", list_count(pg_conn->update_list)); - if(pg_conn->rollback) { - if(!commit) { - if(pgsql_db_rollback(pg_conn->db_conn)) { + if (pg_conn->rollback) { + if (!commit) { + if (pgsql_db_rollback(pg_conn->db_conn)) { error("as/pg: commit: rollback failed"); return SLURM_ERROR; } } else { - if(pgsql_db_commit(pg_conn->db_conn)) { + if (pgsql_db_commit(pg_conn->db_conn)) { error("as/pg: commit: commit failed"); return SLURM_ERROR; } @@ -288,7 +290,7 @@ extern int acct_storage_p_commit(pgsql_conn_t *pg_conn, bool commit) pgsql_db_start_transaction(pg_conn->db_conn); } - if(commit && list_count(pg_conn->update_list)) { + if (commit && list_count(pg_conn->update_list)) { query = xstrdup_printf( "SELECT name, control_host, control_port, rpc_version " " FROM %s WHERE deleted=0 AND control_port!=0", @@ -564,7 +566,7 @@ extern int clusteracct_storage_p_node_down(pgsql_conn_t *pg_conn, time_t event_time, char *reason, uint32_t reason_uid) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -576,7 +578,7 @@ extern int clusteracct_storage_p_node_up(pgsql_conn_t *pg_conn, struct node_record *node_ptr, time_t event_time) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -587,7 +589,7 @@ extern int clusteracct_storage_p_node_up(pgsql_conn_t *pg_conn, extern int clusteracct_storage_p_register_ctld(pgsql_conn_t *pg_conn, uint16_t port) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -613,7 +615,7 @@ extern int clusteracct_storage_p_cluster_cpus(pgsql_conn_t *pg_conn, uint32_t cpus, time_t event_time) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -628,7 +630,7 @@ extern int clusteracct_storage_p_cluster_cpus(pgsql_conn_t *pg_conn, extern int jobacct_storage_p_job_start(pgsql_conn_t *pg_conn, struct job_record *job_ptr) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -642,7 +644,7 @@ extern int jobacct_storage_p_job_start(pgsql_conn_t *pg_conn, extern int jobacct_storage_p_job_complete(pgsql_conn_t *pg_conn, struct job_record *job_ptr) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -656,7 +658,7 @@ extern int jobacct_storage_p_job_complete(pgsql_conn_t *pg_conn, extern int jobacct_storage_p_step_start(pgsql_conn_t *pg_conn, struct step_record *step_ptr) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -670,7 +672,7 @@ extern int jobacct_storage_p_step_start(pgsql_conn_t *pg_conn, extern int jobacct_storage_p_step_complete(pgsql_conn_t *pg_conn, struct step_record *step_ptr) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } @@ -684,7 +686,7 @@ extern int jobacct_storage_p_step_complete(pgsql_conn_t *pg_conn, extern int jobacct_storage_p_suspend(pgsql_conn_t *pg_conn, struct job_record *job_ptr) { - if(!pg_conn->cluster_name) { + if (!pg_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; } diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h index 308ac898e96231a74441df804650f680c7c66de6..80cb01f8e6f8e047c8148c5cff74169a1aef908c 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_acct.c b/src/plugins/accounting_storage/pgsql/as_pg_acct.c index a239e70e7def43ca23a1671d08456e8121851c38..f426b55615d2936688dbddb85c59a9b6555b5b17 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_acct.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_acct.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -88,12 +88,12 @@ _get_account_coords(pgsql_conn_t *pg_conn, slurmdb_account_rec_t *acct) DEF_VARS; slurmdb_coord_rec_t *coord = NULL; - if(!acct) { + if (!acct) { error("as/pg: _get_account_coords: account not given"); return SLURM_ERROR; } - if(!acct->coordinators) + if (!acct->coordinators) acct->coordinators = list_create(slurmdb_destroy_coord_rec); /* get direct coords */ @@ -101,7 +101,7 @@ _get_account_coords(pgsql_conn_t *pg_conn, slurmdb_account_rec_t *acct) "WHERE acct='%s' AND deleted=0", acct_coord_table, acct->name); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { @@ -128,7 +128,7 @@ _get_account_coords(pgsql_conn_t *pg_conn, slurmdb_account_rec_t *acct) } END_EACH_CLUSTER; result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { @@ -185,7 +185,7 @@ as_pg_add_accts(pgsql_conn_t *pg_conn, uint32_t uid, List acct_list) itr = list_iterator_create(acct_list); while((object = list_next(itr))) { - if(!object->name || !object->description + if (!object->name || !object->description || !object->organization) { error("as/pg: add_accts: We need an account name, " "description, and organization to add. %s %s %s", @@ -201,7 +201,7 @@ as_pg_add_accts(pgsql_conn_t *pg_conn, uint32_t uid, List acct_list) query = xstrdup_printf("SELECT public.add_acct(%s);", rec); xfree(rec); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("as/pg: couldn't add acct %s", object->name); continue; } @@ -209,7 +209,7 @@ as_pg_add_accts(pgsql_conn_t *pg_conn, uint32_t uid, List acct_list) info = xstrdup_printf("description='%s', organization='%s'", object->description, object->organization); - if(txn_query) + if (txn_query) xstrfmtcat(txn_query, ", (%ld, %u, '%s', '%s', $$%s$$)", now, DBD_ADD_ACCOUNTS, object->name, @@ -224,7 +224,7 @@ as_pg_add_accts(pgsql_conn_t *pg_conn, uint32_t uid, List acct_list) user_name, info); xfree(info); - if(!object->assoc_list) + if (!object->assoc_list) continue; list_transfer(assoc_list, object->assoc_list); @@ -232,12 +232,12 @@ as_pg_add_accts(pgsql_conn_t *pg_conn, uint32_t uid, List acct_list) list_iterator_destroy(itr); xfree(user_name); - if(rc == SLURM_SUCCESS) { - if(txn_query) { + if (rc == SLURM_SUCCESS) { + if (txn_query) { xstrcat(txn_query, ";"); rc = pgsql_db_query(pg_conn->db_conn, txn_query); xfree(txn_query); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("as/pg: add_accts: couldn't add txn"); rc = SLURM_SUCCESS; } @@ -245,8 +245,8 @@ as_pg_add_accts(pgsql_conn_t *pg_conn, uint32_t uid, List acct_list) } else xfree(txn_query); - if(rc == SLURM_SUCCESS && list_count(assoc_list)) { - if(acct_storage_p_add_associations(pg_conn, uid, assoc_list) + if (rc == SLURM_SUCCESS && list_count(assoc_list)) { + if (acct_storage_p_add_associations(pg_conn, uid, assoc_list) != SLURM_SUCCESS) { error("as/pg: add_accts: problem adding account " "associations"); @@ -279,14 +279,14 @@ as_pg_modify_accounts(pgsql_conn_t *pg_conn, uint32_t uid, char *vals = NULL, *cond = NULL, *name_char = NULL; time_t now = time(NULL); - if(!acct_cond || !acct) { + if (!acct_cond || !acct) { error("as/pg: modify_accounts: we need something to change"); return NULL; } if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; - if(acct_cond->assoc_cond) + if (acct_cond->assoc_cond) concat_cond_list(acct_cond->assoc_cond->acct_list, NULL, "name", &cond); concat_cond_list(acct_cond->description_list, @@ -299,11 +299,11 @@ as_pg_modify_accounts(pgsql_conn_t *pg_conn, uint32_t uid, return NULL; } - if(acct->description) + if (acct->description) xstrfmtcat(vals, ", description='%s'", acct->description); - if(acct->organization) + if (acct->organization) xstrfmtcat(vals, ", organization='%s'", acct->organization); - if(!vals) { + if (!vals) { xfree(cond); errno = SLURM_NO_CHANGE_IN_DATA; error("as/pg: modify_accounts: no new values given"); @@ -314,7 +314,7 @@ as_pg_modify_accounts(pgsql_conn_t *pg_conn, uint32_t uid, acct_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) { + if (!result) { xfree(vals); return NULL; } @@ -324,7 +324,7 @@ as_pg_modify_accounts(pgsql_conn_t *pg_conn, uint32_t uid, FOR_EACH_ROW { object = xstrdup(ROW(0)); list_append(ret_list, object); - if(!rc) { + if (!rc) { xstrfmtcat(name_char, "(name='%s'", object); rc = 1; } else { @@ -334,7 +334,7 @@ as_pg_modify_accounts(pgsql_conn_t *pg_conn, uint32_t uid, } END_EACH_ROW; PQclear(result); - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("as/pg: modify_accounts: didn't effect anything"); xfree(vals); @@ -422,7 +422,7 @@ _get_acct_running_jobs(pgsql_conn_t *pg_conn, char *assoc_cond) job = xstrdup_printf( "JobID = %-10s C = %-10s A = %-10s U = %-9s", ROW(0), ROW(4), ROW(1), ROW(2)); - if(!ISEMPTY(3)) + if (!ISEMPTY(3)) xstrfmtcat(job, " P = %s", ROW(3)); if (!job_list) job_list = list_create(slurm_destroy_char); @@ -499,7 +499,7 @@ _cluster_remove_acct_assoc(pgsql_conn_t *pg_conn, char *cluster, } } - if(rc == SLURM_SUCCESS) + if (rc == SLURM_SUCCESS) rc = pgsql_get_modified_lfts(pg_conn, cluster, smallest_lft); if (rc != SLURM_SUCCESS) { @@ -538,21 +538,21 @@ as_pg_remove_accts(pgsql_conn_t *pg_conn, uint32_t uid, time_t now = time(NULL); int rc = SLURM_SUCCESS, has_jobs; - if(!acct_cond) { + if (!acct_cond) { error("as/pg: remove_accts: we need something to remove"); return NULL; } if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; - if(acct_cond->assoc_cond) + if (acct_cond->assoc_cond) concat_cond_list(acct_cond->assoc_cond->acct_list, NULL, "name", &cond); concat_cond_list(acct_cond->description_list, NULL, "description", &cond); concat_cond_list(acct_cond->organization_list, NULL, "organization", &cond); - if(!cond) { + if (!cond) { error("as/pg: remove_accts: nothing to remove"); return NULL; } @@ -562,7 +562,7 @@ as_pg_remove_accts(pgsql_conn_t *pg_conn, uint32_t uid, acct_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) + if (!result) return NULL; rc = 0; @@ -570,7 +570,7 @@ as_pg_remove_accts(pgsql_conn_t *pg_conn, uint32_t uid, FOR_EACH_ROW { char *object = xstrdup(ROW(0)); list_append(ret_list, object); - if(!rc) { + if (!rc) { xstrfmtcat(name_char, "name='%s'", object); xstrfmtcat(assoc_char, "t2.acct='%s'", object); rc = 1; @@ -581,7 +581,7 @@ as_pg_remove_accts(pgsql_conn_t *pg_conn, uint32_t uid, } END_EACH_ROW; PQclear(result); - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("as/pg: remove_accts: didn't effect anything"); return ret_list; @@ -589,7 +589,7 @@ as_pg_remove_accts(pgsql_conn_t *pg_conn, uint32_t uid, /* remove these accounts from the coord's that have it */ tmp_list = acct_storage_p_remove_coord(pg_conn, uid, ret_list, NULL); - if(tmp_list) + if (tmp_list) list_destroy(tmp_list); /* if there are running jobs of the accounts, return the jobs */ @@ -688,17 +688,17 @@ as_pg_get_accts(pgsql_conn_t *pg_conn, uid_t uid, return NULL; } - if(!acct_cond) { + if (!acct_cond) { xstrcat(cond, "WHERE deleted=0"); goto empty; } - if(acct_cond->with_deleted) + if (acct_cond->with_deleted) xstrcat(cond, "WHERE (deleted=0 OR deleted=1)"); else xstrcat(cond, "WHERE deleted=0"); - if(acct_cond->assoc_cond) + if (acct_cond->assoc_cond) concat_cond_list(acct_cond->assoc_cond->acct_list, NULL, "name", &cond); concat_cond_list(acct_cond->description_list, @@ -707,12 +707,12 @@ as_pg_get_accts(pgsql_conn_t *pg_conn, uid_t uid, NULL, "organization", &cond); empty: - if(!is_admin) { + if (!is_admin) { slurmdb_coord_rec_t *coord = NULL; set = 0; itr = list_iterator_create(user.coord_accts); while((coord = list_next(itr))) { - if(set) { + if (set) { xstrfmtcat(cond, " OR name='%s'", coord->name); } else { @@ -722,7 +722,7 @@ empty: } } list_iterator_destroy(itr); - if(set) + if (set) xstrcat(cond,")"); } @@ -735,11 +735,11 @@ empty: acct_list = list_create(slurmdb_destroy_account_rec); - if(acct_cond && acct_cond->with_assocs) { - if(!acct_cond->assoc_cond) + if (acct_cond && acct_cond->with_assocs) { + if (!acct_cond->assoc_cond) acct_cond->assoc_cond = xmalloc( sizeof(slurmdb_association_cond_t)); - else if(acct_cond->assoc_cond->acct_list) + else if (acct_cond->assoc_cond->acct_list) list_destroy(acct_cond->assoc_cond->acct_list); acct_cond->assoc_cond->acct_list = list_create(NULL); } @@ -751,9 +751,9 @@ empty: acct->name = xstrdup(ROW(F_NAME)); acct->description = xstrdup(ROW(F_DESC)); acct->organization = xstrdup(ROW(F_ORG)); - if(acct_cond && acct_cond->with_coords) + if (acct_cond && acct_cond->with_coords) _get_account_coords(pg_conn, acct); - if(acct_cond && acct_cond->with_assocs) { + if (acct_cond && acct_cond->with_assocs) { list_append(acct_cond->assoc_cond->acct_list, acct->name); } @@ -761,7 +761,7 @@ empty: PQclear(result); /* get associations */ - if(acct_cond && acct_cond->with_assocs && + if (acct_cond && acct_cond->with_assocs && list_count(acct_cond->assoc_cond->acct_list)) { ListIterator assoc_itr = NULL; slurmdb_account_rec_t *acct = NULL; @@ -769,7 +769,7 @@ empty: List assoc_list = acct_storage_p_get_associations( pg_conn, uid, acct_cond->assoc_cond); - if(!assoc_list) { + if (!assoc_list) { error("as/pg: get_accounts: no associations"); return acct_list; } @@ -778,17 +778,17 @@ empty: assoc_itr = list_iterator_create(assoc_list); while((acct = list_next(itr))) { while((assoc = list_next(assoc_itr))) { - if(strcmp(assoc->acct, acct->name)) + if (strcmp(assoc->acct, acct->name)) continue; - if(!acct->assoc_list) + if (!acct->assoc_list) acct->assoc_list = list_create( slurmdb_destroy_association_rec); list_append(acct->assoc_list, assoc); list_remove(assoc_itr); } list_iterator_reset(assoc_itr); - if(!acct->assoc_list) /* problem acct */ + if (!acct->assoc_list) /* problem acct */ list_remove(itr); } list_iterator_destroy(itr); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_acct.h b/src/plugins/accounting_storage/pgsql/as_pg_acct.h index b38a1c64587e9ee8f0001c9c0d9a86eb2812dc4b..15a0e0eade384e251285b445beac7fd2d9ff3605 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_acct.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_acct.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_archive.c b/src/plugins/accounting_storage/pgsql/as_pg_archive.c index 9ae7cf655bdd8618e0d1cf03db8608d833fc0c06..b2e7e412154220c6fc675c234ab960f3f93fcb81 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_archive.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_archive.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -68,7 +68,7 @@ _archive_record(PGresult *result, char *cluster_name, time_t period_end, pack32(cnt, buffer); FOR_EACH_ROW { - if(!period_start) + if (!period_start) period_start = atoi(ROW(0)); /* XXX: ROW(0) must be time_start */ for (i = 0; i < field_cnt; i ++) { packstr(ROW(i), buffer); @@ -80,7 +80,7 @@ _archive_record(PGresult *result, char *cluster_name, time_t period_end, arch_dir, desc, archive_period); free_buf(buffer); - if(error_code != SLURM_SUCCESS) + if (error_code != SLURM_SUCCESS) return error_code; return cnt; @@ -99,7 +99,7 @@ _load_record(uint16_t rpc_version, Buf buffer, char *cluster_name, cluster_name, table, fields); for(i = 0; i < rec_cnt; i ++) { - if(i) + if (i) xstrcat(insert, ", "); for (j = 0; j < field_cnt; j ++) { @@ -291,18 +291,18 @@ _execute_archive(pgsql_conn_t *pg_conn, char *cluster_name, time_t curr_end; time_t last_submit = time(NULL); - if(arch_cond->archive_script) + if (arch_cond->archive_script) return archive_run_script(arch_cond, cluster_name, last_submit); - else if(!arch_cond->archive_dir) { + else if (!arch_cond->archive_dir) { error("No archive dir given, can't process"); return SLURM_ERROR; } - if(arch_cond->purge_event != NO_VAL) { + if (arch_cond->purge_event != NO_VAL) { /* remove all data from event table that was older than * period_start * arch_cond->purge_event. */ - if(!(curr_end = archive_setup_end_time( + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_event))) { error("Parsing purge event"); return SLURM_ERROR; @@ -311,13 +311,13 @@ _execute_archive(pgsql_conn_t *pg_conn, char *cluster_name, debug4("Purging event entries before %ld for %s", (long)curr_end, cluster_name); - if(SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_event)) { + if (SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_event)) { rc = _archive_events(pg_conn, cluster_name, curr_end, arch_cond->archive_dir, arch_cond->purge_event); - if(!rc) + if (!rc) goto exit_events; - else if(rc == SLURM_ERROR) + else if (rc == SLURM_ERROR) return rc; } query = xstrdup_printf("DELETE FROM %s.%s WHERE " @@ -325,7 +325,7 @@ _execute_archive(pgsql_conn_t *pg_conn, char *cluster_name, cluster_name, event_table, (long)curr_end); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't remove old event data"); return SLURM_ERROR; } @@ -333,11 +333,11 @@ _execute_archive(pgsql_conn_t *pg_conn, char *cluster_name, exit_events: - if(arch_cond->purge_suspend != NO_VAL) { + if (arch_cond->purge_suspend != NO_VAL) { /* remove all data from suspend table that was older than * period_start * arch_cond->purge_suspend. */ - if(!(curr_end = archive_setup_end_time( + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_suspend))) { error("Parsing purge suspend"); return SLURM_ERROR; @@ -346,13 +346,13 @@ exit_events: debug4("Purging suspend entries before %ld for %s", (long)curr_end, cluster_name); - if(SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_suspend)) { + if (SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_suspend)) { rc = _archive_suspend(pg_conn, cluster_name, curr_end, arch_cond->archive_dir, arch_cond->purge_suspend); - if(!rc) + if (!rc) goto exit_suspend; - else if(rc == SLURM_ERROR) + else if (rc == SLURM_ERROR) return rc; } query = xstrdup_printf("DELETE FROM %s.%s WHERE " @@ -360,7 +360,7 @@ exit_events: cluster_name, suspend_table, (long)curr_end); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't remove old suspend data"); return SLURM_ERROR; } @@ -368,11 +368,11 @@ exit_events: exit_suspend: - if(arch_cond->purge_step != NO_VAL) { + if (arch_cond->purge_step != NO_VAL) { /* remove all data from step table that was older than * start * arch_cond->purge_step. */ - if(!(curr_end = archive_setup_end_time( + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_step))) { error("Parsing purge step"); return SLURM_ERROR; @@ -381,13 +381,13 @@ exit_suspend: debug4("Purging step entries before %ld for %s", (long)curr_end, cluster_name); - if(SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_step)) { + if (SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_step)) { rc = _archive_steps(pg_conn, cluster_name, curr_end, arch_cond->archive_dir, arch_cond->purge_step); - if(!rc) + if (!rc) goto exit_steps; - else if(rc == SLURM_ERROR) + else if (rc == SLURM_ERROR) return rc; } @@ -396,18 +396,18 @@ exit_suspend: cluster_name, step_table, (long)curr_end); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't remove old step data"); return SLURM_ERROR; } } exit_steps: - if(arch_cond->purge_job != NO_VAL) { + if (arch_cond->purge_job != NO_VAL) { /* remove all data from job table that was older than * last_submit * arch_cond->purge_job. */ - if(!(curr_end = archive_setup_end_time( + if (!(curr_end = archive_setup_end_time( last_submit, arch_cond->purge_job))) { error("Parsing purge job"); return SLURM_ERROR; @@ -416,13 +416,13 @@ exit_steps: debug4("Purging job entires before %ld for %s", (long)curr_end, cluster_name); - if(SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_job)) { + if (SLURMDB_PURGE_ARCHIVE_SET(arch_cond->purge_job)) { rc = _archive_jobs(pg_conn, cluster_name, curr_end, arch_cond->archive_dir, arch_cond->purge_job); - if(!rc) + if (!rc) goto exit_jobs; - else if(rc == SLURM_ERROR) + else if (rc == SLURM_ERROR) return rc; } @@ -430,7 +430,7 @@ exit_steps: "time_submit<=%ld AND time_end!=0", cluster_name, job_table, (long)curr_end); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't remove old job data"); return SLURM_ERROR; } @@ -452,7 +452,7 @@ js_pg_archive(pgsql_conn_t *pg_conn, slurmdb_archive_cond_t *arch_cond) int rc = SLURM_SUCCESS; List cluster_list = NULL; - if(!arch_cond) { + if (!arch_cond) { error("No arch_cond was given to archive from. returning"); return SLURM_ERROR; } @@ -491,7 +491,7 @@ js_pg_archive_load(pgsql_conn_t *pg_conn, uint16_t type = 0, ver = 0; uint32_t data_size = 0, rec_cnt = 0, tmp32 = 0; - if(!arch_rec) { + if (!arch_rec) { error("We need a slurmdb_archive_rec to load anything."); return SLURM_ERROR; } @@ -499,9 +499,9 @@ js_pg_archive_load(pgsql_conn_t *pg_conn, if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; - if(arch_rec->insert) { + if (arch_rec->insert) { data = xstrdup(arch_rec->insert); - } else if(arch_rec->archive_file) { + } else if (arch_rec->archive_file) { int data_allocated, data_read = 0; int state_fd = open(arch_rec->archive_file, O_RDONLY); if (state_fd < 0) { @@ -530,7 +530,7 @@ js_pg_archive_load(pgsql_conn_t *pg_conn, } close(state_fd); } - if(error_code != SLURM_SUCCESS) { + if (error_code != SLURM_SUCCESS) { xfree(data); return error_code; } @@ -540,7 +540,7 @@ js_pg_archive_load(pgsql_conn_t *pg_conn, return SLURM_ERROR; } - if(!data) { + if (!data) { error("It doesn't appear we have anything to load."); return SLURM_ERROR; } @@ -563,7 +563,7 @@ js_pg_archive_load(pgsql_conn_t *pg_conn, unpackstr_ptr(&cluster_name, &tmp32, buffer); safe_unpack32(&rec_cnt, buffer); - if(!rec_cnt) { + if (!rec_cnt) { error("we didn't get any records from this file of type '%s'", slurmdbd_msg_type_2_str(type, 0)); free_buf(buffer); @@ -590,7 +590,7 @@ js_pg_archive_load(pgsql_conn_t *pg_conn, free_buf(buffer); got_sql: - if(!data) { + if (!data) { error("No data to load"); return SLURM_ERROR; } diff --git a/src/plugins/accounting_storage/pgsql/as_pg_archive.h b/src/plugins/accounting_storage/pgsql/as_pg_archive.h index f33cd0a16ebea7d391426d34888844ee7c0b90ea..ac269ab5de09360cb360cf9466cc8f1b48185e84 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_archive.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_archive.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_assoc.c b/src/plugins/accounting_storage/pgsql/as_pg_assoc.c index 523aaad8531c1b268721cd3125c6ed5a70069bac..20e3e3ccb9d9901a30de18d77fa10f1c67dc7dc5 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_assoc.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_assoc.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -551,12 +551,12 @@ _make_assoc_rec(slurmdb_association_rec_t *assoc, time_t now, int deleted, concat_limit_64("grp_cpu_mins", assoc->grp_cpu_mins, rec, txn); concat_limit_64("grp_cpu_run_mins", assoc->grp_cpu_run_mins, rec, txn); - if(assoc->def_qos_id == INFINITE) { + if (assoc->def_qos_id == INFINITE) { xstrcat(*rec, "NULL, "); xstrcat(*txn, ", def_qos_id=NULL"); /* 0 is the no def_qos_id, so it that way */ assoc->def_qos_id = 0; - } else if((assoc->def_qos_id != NO_VAL) + } else if ((assoc->def_qos_id != NO_VAL) && ((int32_t)assoc->def_qos_id > 0)) { xstrfmtcat(*rec, "%u, ", assoc->def_qos_id); xstrfmtcat(*txn, ", def_qos_id=%u", assoc->def_qos_id); @@ -573,7 +573,7 @@ _make_assoc_rec(slurmdb_association_rec_t *assoc, time_t now, int deleted, while((tmp = list_next(itr))) { if (!tmp[0]) continue; - if(!delta && (tmp[0] == '+' || tmp[0] == '-')) + if (!delta && (tmp[0] == '+' || tmp[0] == '-')) delta = 1; /* XXX: always with ',' prefix */ xstrfmtcat(qos_val, ",%s", tmp); @@ -668,9 +668,9 @@ _make_cluster_root_assoc_rec(time_t now, slurmdb_cluster_rec_t *cluster, concat_limit_64("grp_cpu_run_mins", ra->grp_cpu_run_mins, rec, txn); - if(ra->def_qos_id == INFINITE) { + if (ra->def_qos_id == INFINITE) { xstrcat(*rec, "NULL, "); - } else if((ra->def_qos_id != NO_VAL) + } else if ((ra->def_qos_id != NO_VAL) && ((int32_t)ra->def_qos_id > 0)) { xstrfmtcat(*rec, "%u, ", ra->def_qos_id); } else { @@ -685,7 +685,7 @@ _make_cluster_root_assoc_rec(time_t now, slurmdb_cluster_rec_t *cluster, while((tmp = list_next(itr))) { if (!tmp[0]) continue; - if(!delta && (tmp[0] == '+' || tmp[0] == '-')) + if (!delta && (tmp[0] == '+' || tmp[0] == '-')) delta = 1; /* XXX: always with ',' prefix */ xstrfmtcat(qos_val, ",%s", tmp); @@ -871,7 +871,7 @@ _move_parent(pgsql_conn_t *pg_conn, char *cluster, char *id, uint32_t *lft, } PQclear(result); - if(rc != SLURM_SUCCESS) + if (rc != SLURM_SUCCESS) return rc; /* @@ -880,10 +880,10 @@ _move_parent(pgsql_conn_t *pg_conn, char *cluster, char *id, uint32_t *lft, query = xstrdup_printf("SELECT lft, rgt FROM %s.%s WHERE id_assoc=%s;", cluster, assoc_table, id); result = DEF_QUERY_RET; - if(! result) + if (! result) return SLURM_ERROR; - if(PQntuples(result) > 0) { + if (PQntuples(result) > 0) { /* move account to destination */ *lft = atoi(PG_VAL(0)); *rgt = atoi(PG_VAL(1)); @@ -914,10 +914,10 @@ _make_assoc_cond(slurmdb_association_cond_t *assoc_cond) char *prefix = "t1"; int set = 0; - if(!assoc_cond) + if (!assoc_cond) return NULL; - if(assoc_cond->qos_list && list_count(assoc_cond->qos_list)) { + if (assoc_cond->qos_list && list_count(assoc_cond->qos_list)) { /* * QOSLevel applies to all sub-associations in hierarchy. * So find all sub-associations like WithSubAccounts @@ -930,7 +930,7 @@ _make_assoc_cond(slurmdb_association_cond_t *assoc_cond) set = 0; itr = list_iterator_create(assoc_cond->qos_list); while((object = list_next(itr))) { - if(set) + if (set) xstrcat(cond, " OR "); xstrfmtcat(cond, "(%s.qos ~ ',%s(,.+)?$' " @@ -940,7 +940,7 @@ _make_assoc_cond(slurmdb_association_cond_t *assoc_cond) } list_iterator_destroy(itr); xstrcat(cond, ") AND"); - } else if(assoc_cond->with_sub_accts) { + } else if (assoc_cond->with_sub_accts) { prefix = "t2"; xstrfmtcat(cond, ", %%s.%s AS t2 WHERE " "(t1.lft BETWEEN t2.lft AND t2.rgt) AND", @@ -948,7 +948,7 @@ _make_assoc_cond(slurmdb_association_cond_t *assoc_cond) } else /* No QOS condition, no WithSubAccounts */ xstrcat(cond, " WHERE"); - if(assoc_cond->with_deleted) + if (assoc_cond->with_deleted) xstrfmtcat(cond, " (%s.deleted=0 OR %s.deleted=1)", prefix, prefix); else @@ -1001,7 +1001,7 @@ _make_assoc_cond(slurmdb_association_cond_t *assoc_cond) concat_cond_list(assoc_cond->user_list, prefix, "user_name", &cond); /* user_name specified */ - } else if(assoc_cond->user_list) { + } else if (assoc_cond->user_list) { /* we want all the users, but no non-user(account) associations */ debug4("no user specified looking at users"); @@ -1023,10 +1023,10 @@ _make_assoc_limit_vals(slurmdb_association_rec_t *assoc, char **vals) { char *tmp = NULL; - if(!assoc) + if (!assoc) return SLURM_ERROR; - if((int)assoc->shares_raw >= 0) { + if ((int)assoc->shares_raw >= 0) { xstrfmtcat(*vals, ", shares=%u", assoc->shares_raw); } else if (((int)assoc->shares_raw == INFINITE)) { xstrcat(*vals, ", shares=1"); @@ -1122,7 +1122,7 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, F_COUNT }; - if(!ret_list || !acct) + if (!ret_list || !acct) return SLURM_ERROR; /* We want all the sub accounts and user accounts */ @@ -1133,7 +1133,7 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, " ORDER BY lft;", ma_fields, cluster, assoc_table, lft, rgt, acct, acct); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { @@ -1146,55 +1146,55 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, mod_assoc->id = atoi(ROW(F_ID)); mod_assoc->cluster = xstrdup(cluster); - if(ISNULL(F_MJ) && assoc->max_jobs != NO_VAL) { + if (ISNULL(F_MJ) && assoc->max_jobs != NO_VAL) { mod_assoc->max_jobs = assoc->max_jobs; modified = 1; } - if(ISNULL(F_MSJ) && assoc->max_submit_jobs != NO_VAL) { + if (ISNULL(F_MSJ) && assoc->max_submit_jobs != NO_VAL) { mod_assoc->max_submit_jobs = assoc->max_submit_jobs; modified = 1; } - if(ISNULL(F_MNPJ) && assoc->max_nodes_pj != NO_VAL) { + if (ISNULL(F_MNPJ) && assoc->max_nodes_pj != NO_VAL) { mod_assoc->max_nodes_pj = assoc->max_nodes_pj; modified = 1; } - if(ISNULL(F_MCPJ) && assoc->max_cpus_pj != NO_VAL) { + if (ISNULL(F_MCPJ) && assoc->max_cpus_pj != NO_VAL) { mod_assoc->max_cpus_pj = assoc->max_cpus_pj; modified = 1; } - if(ISNULL(F_MWPJ) && assoc->max_wall_pj != NO_VAL) { + if (ISNULL(F_MWPJ) && assoc->max_wall_pj != NO_VAL) { mod_assoc->max_wall_pj = assoc->max_wall_pj; modified = 1; } - if(ISNULL(F_MCMPJ) && + if (ISNULL(F_MCMPJ) && assoc->max_cpu_mins_pj != (uint64_t)NO_VAL) { mod_assoc->max_cpu_mins_pj = assoc->max_cpu_mins_pj; modified = 1; } - if(ISNULL(F_MCRM) && + if (ISNULL(F_MCRM) && assoc->max_cpu_run_mins != (uint64_t)NO_VAL) { mod_assoc->max_cpu_run_mins = assoc->max_cpu_run_mins; modified = 1; } - if(ISNULL(F_DEF_QOS) && assoc->def_qos_id != NO_VAL) { + if (ISNULL(F_DEF_QOS) && assoc->def_qos_id != NO_VAL) { mod_assoc->def_qos_id = assoc->def_qos_id; modified = 1; } - if(ISEMPTY(F_QOS) && assoc->qos_list) { + if (ISEMPTY(F_QOS) && assoc->qos_list) { List delta_qos_list = NULL; char *qos_char = NULL, *delta_char = NULL; ListIterator delta_itr = NULL; ListIterator qos_itr = list_iterator_create(assoc->qos_list); - if(! ISEMPTY(F_DELTA_QOS)) { + if (! ISEMPTY(F_DELTA_QOS)) { delta_qos_list = list_create(slurm_destroy_char); slurm_addto_char_list(delta_qos_list, @@ -1209,27 +1209,27 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, it to the parent. */ while((qos_char = list_next(qos_itr))) { - if(delta_itr && qos_char[0] != '=') { + if (delta_itr && qos_char[0] != '=') { while((delta_char = list_next(delta_itr))) { - if((qos_char[0] + if ((qos_char[0] != delta_char[0]) && (!strcmp(qos_char+1, delta_char+1))) break; } list_iterator_reset(delta_itr); - if(delta_char) + if (delta_char) continue; } list_append(mod_assoc->qos_list, xstrdup(qos_char)); } list_iterator_destroy(qos_itr); - if(delta_itr) + if (delta_itr) list_iterator_destroy(delta_itr); - if(list_count(mod_assoc->qos_list) + if (list_count(mod_assoc->qos_list) || !list_count(assoc->qos_list)) modified = 1; else { @@ -1239,11 +1239,11 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, } /* We only want to add those that are modified here */ - if(modified) { + if (modified) { /* Since we aren't really changing this non * user association we don't want to send it. */ - if(ISEMPTY(F_USER)) { + if (ISEMPTY(F_USER)) { /* This is a sub account so run it * through as if it is a parent. */ @@ -1259,7 +1259,7 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, } /* We do want to send all user accounts though */ mod_assoc->shares_raw = NO_VAL; - if(! ISEMPTY(F_PART)) { + if (! ISEMPTY(F_PART)) { // see if there is a partition name object = xstrdup_printf( "C = %-10s A = %-20s U = %-9s P = %s", @@ -1274,10 +1274,10 @@ _modify_unset_users(pgsql_conn_t *pg_conn, char *cluster, list_append(ret_list, object); - if(moved_parent) + if (moved_parent) slurmdb_destroy_association_rec(mod_assoc); else - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_MODIFY_ASSOC, mod_assoc) != SLURM_SUCCESS) @@ -1344,7 +1344,7 @@ _get_parent_limits(pgsql_conn_t *pg_conn, char *cluster, "SELECT * FROM %s.get_parent_limits('%s');", cluster, pacct); result = DEF_QUERY_RET; - if(! result) + if (! result) return SLURM_ERROR; if (PQntuples(result) == 0) { @@ -1446,7 +1446,7 @@ _concat_user_get_assoc_cond(pgsql_conn_t *pg_conn, char *cluster, query = xstrdup_printf( "SELECT lft, rgt FROM %s.%s WHERE user_name='%s'", cluster, assoc_table, user->name); - if(user->coord_accts) { + if (user->coord_accts) { slurmdb_coord_rec_t *coord = NULL; itr = list_iterator_create(user->coord_accts); while((coord = list_next(itr))) { @@ -1455,11 +1455,11 @@ _concat_user_get_assoc_cond(pgsql_conn_t *pg_conn, char *cluster, list_iterator_destroy(itr); } result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { - if(set) { + if (set) { xstrfmtcat(*cond, " OR (t1.lft BETWEEN %s AND %s)", ROW(0), ROW(1)); } else { @@ -1468,7 +1468,7 @@ _concat_user_get_assoc_cond(pgsql_conn_t *pg_conn, char *cluster, ROW(0), ROW(1)); } } END_EACH_ROW; - if(set) + if (set) xstrcat(*cond,")"); PQclear(result); return SLURM_SUCCESS; @@ -1554,7 +1554,7 @@ _cluster_get_assocs(pgsql_conn_t *pg_conn, char *cluster, ga_fields, cluster, assoc_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; assoc_list = list_create(slurmdb_destroy_association_rec); @@ -1569,9 +1569,9 @@ _cluster_get_assocs(pgsql_conn_t *pg_conn, char *cluster, assoc->rgt = atoi(ROW(F_RGT)); assoc->cluster = xstrdup(cluster); assoc->acct = xstrdup(ROW(F_ACCT)); - if(! ISEMPTY(F_USER)) + if (! ISEMPTY(F_USER)) assoc->user = xstrdup(ROW(F_USER)); - if(! ISEMPTY(F_PART)) + if (! ISEMPTY(F_PART)) assoc->partition = xstrdup(ROW(F_PART)); assoc->grp_jobs = ISNULL(F_GJ) ? INFINITE : atoi(ROW(F_GJ)); @@ -1587,11 +1587,11 @@ _cluster_get_assocs(pgsql_conn_t *pg_conn, char *cluster, assoc->shares_raw = ISNULL(F_FS) ? INFINITE : atoi(ROW(F_FS)); parent_acct = ROW(F_ACCT); - if(!without_parent_info + if (!without_parent_info && !ISEMPTY(F_PARENT)) { assoc->parent_acct = xstrdup(ROW(F_PARENT)); parent_acct = ROW(F_PARENT); - } else if(!assoc->user) { + } else if (!assoc->user) { /* (parent_acct='' AND user_name='') => acct='root' */ parent_acct = NULL; parent_id = 0; @@ -1599,15 +1599,15 @@ _cluster_get_assocs(pgsql_conn_t *pg_conn, char *cluster, last_acct = NULL; } - if(!without_parent_info && parent_acct && + if (!without_parent_info && parent_acct && (!last_acct || strcmp(parent_acct, last_acct))) { _init_parent_limits(&p_assoc); xfree(p_qos); xfree(p_delta); parent_id = _get_parent_id(pg_conn, cluster, parent_acct); - if(!without_parent_limits) { - if(_get_parent_limits(pg_conn, cluster, + if (!without_parent_limits) { + if (_get_parent_limits(pg_conn, cluster, parent_acct, &p_assoc, &p_qos, &p_delta) != SLURM_SUCCESS) { @@ -1639,27 +1639,27 @@ _cluster_get_assocs(pgsql_conn_t *pg_conn, char *cluster, assoc->qos_list = list_create(slurm_destroy_char); /* alway with a ',' in qos and delta_qos */ - if(! ISEMPTY(F_QOS)) + if (! ISEMPTY(F_QOS)) slurm_addto_char_list(assoc->qos_list, ROW(F_QOS)+1); else { /* add the parents first */ - if(p_qos) + if (p_qos) slurm_addto_char_list(assoc->qos_list, p_qos+1); /* then add the parents delta */ - if(p_delta) + if (p_delta) slurm_addto_char_list(delta_qos_list, p_delta+1); /* now add the associations */ - if(! ISEMPTY(F_DELTA_QOS)) + if (! ISEMPTY(F_DELTA_QOS)) slurm_addto_char_list(delta_qos_list, ROW(F_DELTA_QOS)+1); } - if(with_raw_qos && list_count(delta_qos_list)) { + if (with_raw_qos && list_count(delta_qos_list)) { list_transfer(assoc->qos_list, delta_qos_list); - } else if(list_count(delta_qos_list)) { + } else if (list_count(delta_qos_list)) { merge_delta_qos_list(assoc->qos_list, delta_qos_list); } list_flush(delta_qos_list); @@ -1674,7 +1674,7 @@ _cluster_get_assocs(pgsql_conn_t *pg_conn, char *cluster, xfree(p_delta); xfree(p_qos); - if(with_usage && assoc_list) + if (with_usage && assoc_list) get_usage_for_assoc_list(pg_conn, cluster, assoc_list, assoc_cond->usage_start, assoc_cond->usage_end); @@ -1730,7 +1730,7 @@ _clusters_assoc_update(pgsql_conn_t *pg_conn, List cluster_list, uid_t uid) itr = list_iterator_create(assoc_list); while((assoc = list_next(itr))) { - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_MODIFY_ASSOC, assoc) == SLURM_SUCCESS) list_remove(itr); @@ -1752,9 +1752,9 @@ _set_assoc_limits_for_add(pgsql_conn_t *pg_conn, xassert(assoc); - if(assoc->parent_acct) + if (assoc->parent_acct) p_acct = assoc->parent_acct; - else if(assoc->user) + else if (assoc->user) p_acct = assoc->acct; else return SLURM_SUCCESS; @@ -1763,37 +1763,37 @@ _set_assoc_limits_for_add(pgsql_conn_t *pg_conn, &p_qos, &p_delta) != SLURM_SUCCESS) return SLURM_ERROR; - if(p_assoc.def_qos_id && assoc->def_qos_id == NO_VAL) + if (p_assoc.def_qos_id && assoc->def_qos_id == NO_VAL) assoc->def_qos_id = p_assoc.def_qos_id; - else if(assoc->def_qos_id == NO_VAL) + else if (assoc->def_qos_id == NO_VAL) assoc->def_qos_id = 0; - if(p_assoc.max_jobs && assoc->max_jobs == NO_VAL) + if (p_assoc.max_jobs && assoc->max_jobs == NO_VAL) assoc->max_jobs = p_assoc.max_jobs; - if(p_assoc.max_submit_jobs && assoc->max_submit_jobs == NO_VAL) + if (p_assoc.max_submit_jobs && assoc->max_submit_jobs == NO_VAL) assoc->max_submit_jobs = p_assoc.max_submit_jobs; - if(p_assoc.max_cpus_pj && assoc->max_cpus_pj == NO_VAL) + if (p_assoc.max_cpus_pj && assoc->max_cpus_pj == NO_VAL) assoc->max_cpus_pj = p_assoc.max_cpus_pj; - if(p_assoc.max_nodes_pj && assoc->max_nodes_pj == NO_VAL) + if (p_assoc.max_nodes_pj && assoc->max_nodes_pj == NO_VAL) assoc->max_nodes_pj = p_assoc.max_nodes_pj; - if(p_assoc.max_wall_pj && assoc->max_wall_pj == NO_VAL) + if (p_assoc.max_wall_pj && assoc->max_wall_pj == NO_VAL) assoc->max_wall_pj = p_assoc.max_wall_pj; - if(p_assoc.max_cpu_mins_pj && assoc->max_cpu_mins_pj == (uint64_t)NO_VAL) + if (p_assoc.max_cpu_mins_pj && assoc->max_cpu_mins_pj == (uint64_t)NO_VAL) assoc->max_cpu_mins_pj = p_assoc.max_cpu_mins_pj; - if(p_assoc.max_cpu_run_mins && assoc->max_cpu_run_mins == (uint64_t)NO_VAL) + if (p_assoc.max_cpu_run_mins && assoc->max_cpu_run_mins == (uint64_t)NO_VAL) assoc->max_cpu_run_mins = p_assoc.max_cpu_run_mins; - if(assoc->qos_list) { + if (assoc->qos_list) { int set = 0; char *tmp_char = NULL; ListIterator qos_itr = list_iterator_create(assoc->qos_list); while((tmp_char = list_next(qos_itr))) { /* we don't want to include blank names */ - if(!tmp_char[0]) + if (!tmp_char[0]) continue; - if(!set) { - if(tmp_char[0] != '+' && tmp_char[0] != '-') + if (!set) { + if (tmp_char[0] != '+' && tmp_char[0] != '-') break; set = 1; } @@ -1801,20 +1801,20 @@ _set_assoc_limits_for_add(pgsql_conn_t *pg_conn, } list_iterator_destroy(qos_itr); - if(tmp_char) { + if (tmp_char) { goto end_it; } list_flush(assoc->qos_list); } else assoc->qos_list = list_create(slurm_destroy_char); - if(p_qos) + if (p_qos) slurm_addto_char_list(assoc->qos_list, p_qos+1); - if(p_delta) + if (p_delta) slurm_addto_char_list(assoc->qos_list, p_delta+1); - if(qos_delta) { + if (qos_delta) { slurm_addto_char_list(assoc->qos_list, qos_delta+1); } @@ -1872,7 +1872,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, F_COUNT }; - if(!assoc_list) { + if (!assoc_list) { error("as/pg: add_associations: no association list given"); return SLURM_ERROR; } @@ -1886,7 +1886,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, itr = list_iterator_create(assoc_list); while((object = list_next(itr))) { - if(!object->cluster || !object->cluster[0] || + if (!object->cluster || !object->cluster[0] || !object->acct || !object->acct[0]) { error("We need an association cluster and " "acct to add one."); @@ -1913,21 +1913,21 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, assoc_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("couldn't query the database"); rc = SLURM_ERROR; break; } - if(PQntuples(result) == 0) { /* assoc not in table */ - if(!old_parent || !old_cluster + if (PQntuples(result) == 0) { /* assoc not in table */ + if (!old_parent || !old_cluster || strcasecmp(parent, old_parent) || strcasecmp(object->cluster, old_cluster)) { - if(incr) { /* make space for newly + if (incr) { /* make space for newly * added assocs */ rc = _make_space(pg_conn, old_cluster, p_lft, incr); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't make space"); break; } @@ -1954,7 +1954,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, query = xstrdup_printf("SELECT %s.add_assoc(%s);", object->cluster, rec); xfree(rec); - } else if(atoi(PG_VAL(F_DELETED)) == 0) { + } else if (atoi(PG_VAL(F_DELETED)) == 0) { /* assoc exists and not deleted */ /* We don't need to do anything here */ debug("This association was added already"); @@ -1964,11 +1964,11 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, uint32_t lft = atoi(PG_VAL(F_LFT)); uint32_t rgt = atoi(PG_VAL(F_RGT)); - if(object->parent_acct + if (object->parent_acct && strcasecmp(object->parent_acct, PG_VAL(F_PACCT))) { /* We need to move the parent! */ - if(_move_parent(pg_conn, + if (_move_parent(pg_conn, object->cluster, PG_VAL(F_ID), &lft, &rgt, @@ -2011,7 +2011,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, object->parent_id = p_id; _set_assoc_limits_for_add(pg_conn, object); - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_ADD_ASSOC, object) == SLURM_SUCCESS) { list_remove(itr); @@ -2019,7 +2019,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, } /* add to txn query string */ - if(txn_query) + if (txn_query) xstrfmtcat(txn_query, ", (%ld, %d, '%d', '%s', $$%s$$)", now, DBD_ADD_ASSOCS, object->id, user_name, @@ -2036,28 +2036,28 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, list_iterator_destroy(itr); xfree(user_name); - if(rc == SLURM_SUCCESS && incr) { + if (rc == SLURM_SUCCESS && incr) { /* _make_space() change delete=2 => deleted=0 */ rc = _make_space(pg_conn, old_cluster, p_lft, incr); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't make space 2"); } } - if(!moved_parent) { + if (!moved_parent) { slurmdb_update_object_t *update_object = NULL; itr = list_iterator_create(pg_conn->update_list);; while((update_object = list_next(itr))) { - if(!update_object->objects + if (!update_object->objects || !list_count(update_object->objects)) continue; - if(update_object->type == SLURMDB_ADD_ASSOC) + if (update_object->type == SLURMDB_ADD_ASSOC) break; } list_iterator_destroy(itr); - if(update_object && update_object->objects + if (update_object && update_object->objects && list_count(update_object->objects)) { ListIterator itr2 = list_iterator_create(update_object->objects); @@ -2065,7 +2065,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, FOR_EACH_CLUSTER(NULL) { uint32_t smallest_lft = 0xFFFFFFFF; while((object = list_next(itr2))) { - if(object->lft < smallest_lft + if (object->lft < smallest_lft && !strcmp(object->cluster, cluster_name)) smallest_lft = object->lft; @@ -2073,7 +2073,7 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, list_iterator_reset(itr2); /* now get the lowest lft from the added files by cluster */ - if(smallest_lft != 0xFFFFFFFF) + if (smallest_lft != 0xFFFFFFFF) rc = pgsql_get_modified_lfts( pg_conn, cluster_name, smallest_lft); @@ -2082,19 +2082,19 @@ as_pg_add_associations(pgsql_conn_t *pg_conn, uint32_t uid, } } - if(rc == SLURM_SUCCESS) { - if(txn_query) { + if (rc == SLURM_SUCCESS) { + if (txn_query) { xstrcat(txn_query, ";"); debug3("as/pg(%s:%d) query\n%s", THIS_FILE, __LINE__, txn_query); rc = pgsql_db_query(pg_conn->db_conn, txn_query); xfree(txn_query); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add txn"); rc = SLURM_SUCCESS; } } - if(moved_parent) { + if (moved_parent) { list_flush(pg_conn->update_list); if (_clusters_assoc_update(pg_conn, update_cluster_list, uid) @@ -2146,7 +2146,7 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, ma_fields, cluster, assoc_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; if (PQntuples(result) == 0) { @@ -2179,14 +2179,14 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, * association so account is really the parent * of the user a coord can change that all day long. */ - if(!ISEMPTY(F_PACCT)) + if (!ISEMPTY(F_PACCT)) /* parent_acct != '' => user_name = '' */ account = ROW(F_PACCT); - if(!is_admin) { + if (!is_admin) { if (!is_user_coord(user, account)) { - if(!ISEMPTY(F_PACCT)) + if (!ISEMPTY(F_PACCT)) error("User %s(%d) can not modify " "account (%s) because they " "are not coordinators of " @@ -2207,19 +2207,19 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, } } - if(! ISEMPTY(F_PART)) { /* partition != '' */ + if (! ISEMPTY(F_PART)) { /* partition != '' */ object = xstrdup_printf( "C = %-10s A = %-20s U = %-9s P = %s", cluster, ROW(F_ACCT), ROW(F_USER), ROW(F_PART)); - } else if(! ISEMPTY(F_USER)){ /* user != '' */ + } else if (! ISEMPTY(F_USER)){ /* user != '' */ object = xstrdup_printf( "C = %-10s A = %-20s U = %-9s", cluster, ROW(F_ACCT), ROW(F_USER)); } else { - if(assoc->parent_acct) { - if(!strcasecmp(ROW(F_ACCT), + if (assoc->parent_acct) { + if (!strcasecmp(ROW(F_ACCT), assoc->parent_acct)) { error("You can't make an account be " "child of it's self"); @@ -2239,7 +2239,7 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, moved_parent = 1; } - if(! ISEMPTY(F_PACCT)) { + if (! ISEMPTY(F_PACCT)) { object = xstrdup_printf( "C = %-10s A = %s of %s", cluster, ROW(F_ACCT), @@ -2253,7 +2253,7 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, } list_append(ret_list, object); - if(!set) { + if (!set) { xstrfmtcat(name_char, "(id_assoc=%s", ROW(F_ID)); set = 1; } else { @@ -2269,10 +2269,10 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, /* no need to get the parent id since if we moved * parent id's we will get it when we send the total list */ - if(ISEMPTY(F_USER)) + if (ISEMPTY(F_USER)) mod_assoc->parent_acct = xstrdup(assoc->parent_acct); - if(assoc->qos_list && list_count(assoc->qos_list)) { + if (assoc->qos_list && list_count(assoc->qos_list)) { ListIterator new_qos_itr = list_iterator_create(assoc->qos_list); char *new_qos = NULL, *tmp_qos = NULL; @@ -2281,11 +2281,11 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, mod_assoc->qos_list = list_create(slurm_destroy_char); while((new_qos = list_next(new_qos_itr))) { - if(new_qos[0] == '-' || new_qos[0] == '+') { + if (new_qos[0] == '-' || new_qos[0] == '+') { list_append(mod_assoc->qos_list, xstrdup(new_qos)); delta = 1; - } else if(new_qos[0]) { + } else if (new_qos[0]) { list_append(mod_assoc->qos_list, xstrdup_printf("=%s", new_qos)); @@ -2350,11 +2350,11 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, /* TODO: how about here set_assoc_parent_limits_... */ _set_assoc_limits_for_add(pg_conn, mod_assoc); /* XXX: parent account? */ - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_MODIFY_ASSOC, mod_assoc) != SLURM_SUCCESS) error("couldn't add to the update list"); - if(account_type) { /* propagate change to sub account and users */ + if (account_type) { /* propagate change to sub account and users */ _modify_unset_users(pg_conn, cluster, mod_assoc, @@ -2366,13 +2366,13 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, } END_EACH_ROW; PQclear(result); - if(assoc->parent_acct) { + if (assoc->parent_acct) { if ((rc == ESLURM_INVALID_PARENT_ACCOUNT || rc == ESLURM_SAME_PARENT_ACCOUNT) && list_count(ret_list)) rc = SLURM_SUCCESS; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { list_destroy(ret_list); errno = rc; return SLURM_ERROR; @@ -2380,7 +2380,7 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, } - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { debug3("didn't effect anything"); xfree(vals); list_destroy(ret_list); @@ -2389,7 +2389,7 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, } xstrcat(name_char, ")"); - if(vals) { + if (vals) { char *table = xstrdup_printf("%s.%s", cluster, assoc_table); user_name = uid_to_string((uid_t) user->uid); rc = pgsql_modify_common(pg_conn, DBD_MODIFY_ASSOCS, now, @@ -2404,7 +2404,7 @@ _cluster_modify_associations(pgsql_conn_t *pg_conn, char *cluster, return rc; } } - if(moved_parent) { + if (moved_parent) { List cl = list_create(NULL); list_append(cl, cluster); rc = _clusters_assoc_update(pg_conn, cl, user->uid); @@ -2436,7 +2436,7 @@ as_pg_modify_associations(pgsql_conn_t *pg_conn, uint32_t uid, int is_admin=0, rc = SLURM_SUCCESS; slurmdb_user_rec_t user; - if(!assoc_cond || !assoc) { + if (!assoc_cond || !assoc) { error("as/pg: modify_associations: nothing to change"); return NULL; } @@ -2473,7 +2473,7 @@ as_pg_modify_associations(pgsql_conn_t *pg_conn, uint32_t uid, } _make_assoc_limit_vals(assoc, &vals); - if((!vals && !assoc->parent_acct + if ((!vals && !assoc->parent_acct && (!assoc->qos_list || !list_count(assoc->qos_list)))) { error("Nothing to change"); xfree(cond); @@ -2531,7 +2531,7 @@ _get_assoc_running_jobs(pgsql_conn_t *pg_conn, char *cluster, char *assoc_cond) job = xstrdup_printf( "JobID = %-10s C = %-10s A = %-10s U = %-9s", ROW(0), ROW(4), ROW(1), ROW(2)); - if(!ISEMPTY(3)) + if (!ISEMPTY(3)) xstrfmtcat(job, " P = %s", ROW(3)); if (!job_list) job_list = list_create(slurm_destroy_char); @@ -2595,11 +2595,11 @@ _cluster_remove_associations(pgsql_conn_t *pg_conn, char *cluster, cluster, assoc_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { - if(! name_char) + if (! name_char) xstrfmtcat(name_char, "lft BETWEEN %s AND %s", ROW(0), ROW(1)); else @@ -2608,7 +2608,7 @@ _cluster_remove_associations(pgsql_conn_t *pg_conn, char *cluster, } END_EACH_ROW; PQclear(result); - if(!name_char) { + if (!name_char) { return SLURM_SUCCESS; } @@ -2626,7 +2626,7 @@ _cluster_remove_associations(pgsql_conn_t *pg_conn, char *cluster, FOR_EACH_ROW { uint32_t lft; slurmdb_association_rec_t *rem_assoc = NULL; - if(!is_admin && !is_user_coord(user, ROW(F_ACCT))) { + if (!is_admin && !is_user_coord(user, ROW(F_ACCT))) { error("User %s(%d) does not have the " "ability to change this account (%s)", user->name, user->uid, ROW(F_ACCT)); @@ -2634,18 +2634,18 @@ _cluster_remove_associations(pgsql_conn_t *pg_conn, char *cluster, rc = SLURM_ERROR; break; } - if(! ISEMPTY(F_PART)) { + if (! ISEMPTY(F_PART)) { object = xstrdup_printf( "C = %-10s A = %-10s U = %-9s P = %s", cluster, ROW(F_ACCT), ROW(F_USER), ROW(F_PART)); - } else if(! ISEMPTY(F_USER)){ + } else if (! ISEMPTY(F_USER)){ object = xstrdup_printf( "C = %-10s A = %-10s U = %-9s", cluster, ROW(F_ACCT), ROW(F_USER)); } else { - if(! ISEMPTY(F_PACCT)) { + if (! ISEMPTY(F_PACCT)) { object = xstrdup_printf( "C = %-10s A = %s of %s", cluster, ROW(F_ACCT), @@ -2658,7 +2658,7 @@ _cluster_remove_associations(pgsql_conn_t *pg_conn, char *cluster, } list_append(ret_list, object); list_append(assoc_id_list, xstrdup(ROW(F_ID))); - if(! assoc_char) + if (! assoc_char) xstrfmtcat(assoc_char, "t1.id_assoc=%s", ROW(F_ID)); else xstrfmtcat(assoc_char, " OR t1.id_assoc=%s", ROW(F_ID)); @@ -2671,14 +2671,14 @@ _cluster_remove_associations(pgsql_conn_t *pg_conn, char *cluster, the modified lfts after it. */ lft = atoi(ROW(F_LFT)); - if(lft < smallest_lft) + if (lft < smallest_lft) smallest_lft = lft; rem_assoc = xmalloc(sizeof(slurmdb_association_rec_t)); slurmdb_init_association_rec(rem_assoc, 0); rem_assoc->id = atoi(ROW(F_ID)); rem_assoc->cluster = xstrdup(cluster); - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_REMOVE_ASSOC, rem_assoc) != SLURM_SUCCESS) error("couldn't add to the update list"); @@ -2753,7 +2753,7 @@ as_pg_remove_associations(pgsql_conn_t *pg_conn, uint32_t uid, slurmdb_user_rec_t user; char *cond; - if(!assoc_cond) { + if (!assoc_cond) { error("as/pg: remove_associations: no condition given"); return NULL; } @@ -2830,7 +2830,7 @@ as_pg_get_associations(pgsql_conn_t *pg_conn, uid_t uid, } - if(!assoc_cond) + if (!assoc_cond) xstrcat(cond, " WHERE deleted=0"); else cond = _make_assoc_cond(assoc_cond); @@ -2901,7 +2901,7 @@ get_user_from_associd(pgsql_conn_t *pg_conn, char *cluster, query = xstrdup_printf("SELECT user_name FROM %s.%s WHERE id_assoc=%u", cluster, assoc_table, associd); result = DEF_QUERY_RET; - if(!result) + if (!result) return NULL; if (PQntuples(result)) user_name = xstrdup(PG_VAL(0)); @@ -2933,7 +2933,7 @@ pgsql_get_modified_lfts(pgsql_conn_t *pg_conn, assoc->id = atoi(ROW(0)); assoc->lft = atoi(ROW(1)); assoc->cluster = xstrdup(cluster_name); - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_MODIFY_ASSOC, assoc) != SLURM_SUCCESS) slurmdb_destroy_association_rec(assoc); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_assoc.h b/src/plugins/accounting_storage/pgsql/as_pg_assoc.h index f5db3ada5e88359b546b77e8f613f657a24c7347..cbe0d1ca0902759167050baeb50f9d2cf00862f4 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_assoc.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_assoc.h @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_cluster.c b/src/plugins/accounting_storage/pgsql/as_pg_cluster.c index c2ce59facefc97e7090f53d32c0c77a1e20f82bc..f19c0c0a5825cd28afd3d044bd42ca5a39acccab 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_cluster.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_cluster.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -188,7 +188,7 @@ as_pg_add_clusters(pgsql_conn_t *pg_conn, uint32_t uid, user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(cluster_list); while((object = list_next(itr))) { - if(!object->name) { + if (!object->name) { error("as/pg: add_clusters: We need a cluster " "name to add."); rc = SLURM_ERROR; @@ -212,7 +212,7 @@ as_pg_add_clusters(pgsql_conn_t *pg_conn, uint32_t uid, (long)now, (long)now, object->name, object->classification); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add cluster %s", object->name); added = 0; /* rollback modification to DB */ break; @@ -251,7 +251,7 @@ as_pg_add_clusters(pgsql_conn_t *pg_conn, uint32_t uid, assoc->cluster = xstrdup(object->name); assoc->user = xstrdup("root"); assoc->acct = xstrdup("root"); - if(acct_storage_p_add_associations(pg_conn, uid, assoc_list) + if (acct_storage_p_add_associations(pg_conn, uid, assoc_list) == SLURM_ERROR) { error("Problem adding root user association"); rc = SLURM_ERROR; @@ -305,17 +305,17 @@ as_pg_modify_clusters(pgsql_conn_t *pg_conn, uint32_t uid, return NULL; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; - if(!pg_conn->cluster_name + if (!pg_conn->cluster_name && cluster_cond->cluster_list && list_count(cluster_cond->cluster_list)) pg_conn->cluster_name = xstrdup(list_peek(cluster_cond->cluster_list)); concat_cond_list(cluster_cond->cluster_list, NULL, "name", &cond); - if(cluster_cond->classification) { + if (cluster_cond->classification) { xstrfmtcat(cond, " AND (classification & %u)", cluster_cond->classification); } @@ -354,12 +354,12 @@ as_pg_modify_clusters(pgsql_conn_t *pg_conn, uint32_t uid, cluster->classification); } - if(!vals) { + if (!vals) { xfree(cond); errno = SLURM_NO_CHANGE_IN_DATA; error("as/pg: modify_clusters: nothing to change"); return NULL; - } else if(clust_reg && (set != 3)) { + } else if (clust_reg && (set != 3)) { xfree(vals); xfree(cond); errno = EFAULT; @@ -384,7 +384,7 @@ as_pg_modify_clusters(pgsql_conn_t *pg_conn, uint32_t uid, FOR_EACH_ROW { object = xstrdup(ROW(0)); list_append(ret_list, object); - if(!rc) { + if (!rc) { xstrfmtcat(name_char, "name='%s'", object); rc = 1; } else { @@ -393,14 +393,14 @@ as_pg_modify_clusters(pgsql_conn_t *pg_conn, uint32_t uid, } END_EACH_ROW; PQclear(result); - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("as/pg: modify_cluster: nothing effected"); xfree(vals); return ret_list; } - if(vals) { + if (vals) { send_char = xstrdup_printf("(%s)", name_char); user_name = uid_to_string((uid_t) uid); rc = pgsql_modify_common(pg_conn, DBD_MODIFY_CLUSTERS, now, @@ -450,7 +450,7 @@ _get_cluster_running_jobs(pgsql_conn_t *pg_conn, char *cluster) job = xstrdup_printf( "JobID = %-10s C = %-10s A = %-10s U = %-9s", ROW(0), cluster, ROW(1), ROW(2)); - if(!ISEMPTY(3)) + if (!ISEMPTY(3)) xstrfmtcat(job, " P = %s", ROW(3)); if (!job_list) job_list = list_create(slurm_destroy_char); @@ -495,15 +495,15 @@ as_pg_remove_clusters(pgsql_conn_t *pg_conn, uint32_t uid, char *cond = NULL, *user_name = NULL; time_t now = time(NULL); - if(!cluster_cond) { + if (!cluster_cond) { error("as/pg: remove_clusters: we need something to remove"); return NULL; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; concat_cond_list(cluster_cond->cluster_list, NULL, "name", &cond); - if(!cond) { + if (!cond) { error("as/pg: remove_clusters: nothing to remove"); return NULL; } @@ -618,12 +618,12 @@ as_pg_get_clusters(pgsql_conn_t *pg_conn, uid_t uid, if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; - if(!cluster_cond) { + if (!cluster_cond) { xstrcat(cond, "WHERE deleted=0"); goto empty; } - if(cluster_cond->with_deleted) + if (cluster_cond->with_deleted) xstrcat(cond, "WHERE (deleted=0 OR deleted=1)"); else xstrcat(cond, "WHERE deleted=0"); @@ -640,7 +640,7 @@ empty: cluster_list = list_create(slurmdb_destroy_cluster_rec); memset(&assoc_cond, 0, sizeof(slurmdb_association_cond_t)); - if(cluster_cond) { + if (cluster_cond) { /* I don't think we want the with_usage flag here. * We do need the with_deleted though. */ //assoc_cond.with_usage = cluster_cond->with_usage; @@ -657,7 +657,7 @@ empty: list_append(assoc_cond.cluster_list, cluster->name); /* get the usage if requested */ - if(cluster_cond && cluster_cond->with_usage) { + if (cluster_cond && cluster_cond->with_usage) { as_pg_get_usage(pg_conn, uid, cluster, DBD_GET_CLUSTER_USAGE, cluster_cond->usage_start, @@ -676,7 +676,7 @@ empty: } END_EACH_ROW; PQclear(result); - if(!list_count(assoc_cond.cluster_list)) { + if (!list_count(assoc_cond.cluster_list)) { list_destroy(assoc_cond.cluster_list); return cluster_list; } @@ -693,17 +693,17 @@ empty: list_destroy(assoc_cond.acct_list); list_destroy(assoc_cond.user_list); - if(!assoc_list) + if (!assoc_list) return cluster_list; itr = list_iterator_create(cluster_list); assoc_itr = list_iterator_create(assoc_list); while((cluster = list_next(itr))) { while((assoc = list_next(assoc_itr))) { - if(strcmp(assoc->cluster, cluster->name)) + if (strcmp(assoc->cluster, cluster->name)) continue; - if(cluster->root_assoc) { + if (cluster->root_assoc) { debug("This cluster %s already has " "an association.", cluster->name); continue; @@ -715,7 +715,7 @@ empty: } list_iterator_destroy(itr); list_iterator_destroy(assoc_itr); - if(list_count(assoc_list)) + if (list_count(assoc_list)) error("I have %d left over associations", list_count(assoc_list)); list_destroy(assoc_list); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_cluster.h b/src/plugins/accounting_storage/pgsql/as_pg_cluster.h index 68829a57475ffed4e742ef222250f4f6b283ed75..ed9dc5f336e62306a4c10ccbf0c86b096904e2bf 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_cluster.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_cluster.h @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_common.c b/src/plugins/accounting_storage/pgsql/as_pg_common.c index a1fff6606d52771621fb451e22791be3c53ebf22..ed45a89615f739720c6b775ff10e272e815bcdac 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_common.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_common.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -72,11 +72,11 @@ concat_cond_list(List cond_list, char *prefix, char *col, char **cond_str) char *object; ListIterator itr = NULL; - if(cond_list && list_count(cond_list)) { + if (cond_list && list_count(cond_list)) { xstrcat(*cond_str, " AND ("); itr = list_iterator_create(cond_list); while((object = list_next(itr))) { - if(set) + if (set) xstrcat(*cond_str, " OR "); if (prefix) xstrfmtcat(*cond_str, "%s.%s='%s'", @@ -98,11 +98,11 @@ concat_node_state_cond_list(List cond_list, char *prefix, char *object; ListIterator itr = NULL; - if(cond_list && list_count(cond_list)) { + if (cond_list && list_count(cond_list)) { xstrcat(*cond_str, " AND ("); itr = list_iterator_create(cond_list); while((object = list_next(itr))) { - if(set) + if (set) xstrcat(*cond_str, " OR "); /* node states are numeric */ /* TODO: NODE_STATE_UNKNOWN == 0, fails the condition*/ @@ -136,11 +136,11 @@ concat_like_cond_list(List cond_list, char *prefix, char *col, char **cond_str) char *object; ListIterator itr = NULL; - if(cond_list && list_count(cond_list)) { + if (cond_list && list_count(cond_list)) { xstrcat(*cond_str, " AND ("); itr = list_iterator_create(cond_list); while((object = list_next(itr))) { - if(set) + if (set) xstrcat(*cond_str, " OR "); /* XXX: strings cond_list turned to lower case by slurm_addto_char_list(). @@ -238,7 +238,7 @@ pgsql_modify_common(pgsql_conn_t *pg_conn, uint16_t type, time_t now, rc = add_txn(pg_conn, now, cluster, type, name_char, user_name, (vals+2)); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { reset_pgsql_conn(pg_conn); return SLURM_ERROR; } @@ -259,7 +259,7 @@ check_db_connection(pgsql_conn_t *pg_conn) error("as/pg: we need a connection to run this"); errno = SLURM_ERROR; return SLURM_ERROR; - } else if(!pg_conn->db_conn || + } else if (!pg_conn->db_conn || PQstatus(pg_conn->db_conn) != CONNECTION_OK) { info("as/pg: database connection lost."); PQreset(pg_conn->db_conn); @@ -327,8 +327,8 @@ check_table(PGconn *db_conn, char *schema, char *table, static void _destroy_local_cluster(void *object) { local_cluster_t *local_cluster = (local_cluster_t *)object; - if(local_cluster) { - if(local_cluster->hl) + if (local_cluster) { + if (local_cluster->hl) hostlist_destroy(local_cluster->hl); FREE_NULL_BITMAP(local_cluster->asked_bitmap); xfree(local_cluster); @@ -349,10 +349,10 @@ setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond) hostlist_t temp_hl = NULL; hostlist_iterator_t h_itr = NULL; - if(!job_cond || !job_cond->used_nodes) + if (!job_cond || !job_cond->used_nodes) return NULL; - if(!job_cond->cluster_list || list_count(job_cond->cluster_list) != 1) { + if (!job_cond->cluster_list || list_count(job_cond->cluster_list) != 1) { error("If you are doing a query against nodes " "you must only have 1 cluster " "you are asking for."); @@ -360,7 +360,7 @@ setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond) } temp_hl = hostlist_create(job_cond->used_nodes); - if(!hostlist_count(temp_hl)) { + if (!hostlist_count(temp_hl)) { error("we didn't get any real hosts to look for."); hostlist_destroy(temp_hl); return NULL; @@ -372,8 +372,8 @@ setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond) (char *)list_peek(job_cond->cluster_list), event_table); - if(job_cond->usage_start) { - if(!job_cond->usage_end) + if (job_cond->usage_start) { + if (!job_cond->usage_end) job_cond->usage_end = now; xstrfmtcat(query, " AND ((time_start<%ld) " @@ -382,7 +382,7 @@ setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond) } result = DEF_QUERY_RET; - if(!result) { + if (!result) { hostlist_destroy(temp_hl); return NULL; } @@ -401,15 +401,15 @@ setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond) local_cluster->asked_bitmap = bit_alloc(hostlist_count(local_cluster->hl)); while((host = hostlist_next(h_itr))) { - if((loc = hostlist_find( + if ((loc = hostlist_find( local_cluster->hl, host)) != -1) bit_set(local_cluster->asked_bitmap, loc); free(host); } hostlist_iterator_reset(h_itr); - if(bit_ffs(local_cluster->asked_bitmap) != -1) { + if (bit_ffs(local_cluster->asked_bitmap) != -1) { list_append(cnodes->cluster_list, local_cluster); - if(local_cluster->end == 0) { + if (local_cluster->end == 0) { local_cluster->end = now; cnodes->curr_cluster = local_cluster; } @@ -418,7 +418,7 @@ setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond) } END_EACH_ROW; PQclear(result); hostlist_iterator_destroy(h_itr); - if(!list_count(cnodes->cluster_list)) { + if (!list_count(cnodes->cluster_list)) { destroy_cluster_nodes(cnodes); cnodes = NULL; } @@ -448,17 +448,17 @@ good_nodes_from_inx(cluster_nodes_t *cnodes, char *node_inx, int submit) if (! cnodes) return 1; - if(!node_inx || !node_inx[0]) + if (!node_inx || !node_inx[0]) return 0; - if(!cnodes->curr_cluster || + if (!cnodes->curr_cluster || (submit < (cnodes->curr_cluster)->start) || (submit > (cnodes->curr_cluster)->end)) { local_cluster_t *local_cluster = NULL; ListIterator itr = list_iterator_create(cnodes->cluster_list); while((local_cluster = list_next(itr))) { - if((submit >= local_cluster->start) + if ((submit >= local_cluster->start) && (submit <= local_cluster->end)) { cnodes->curr_cluster = local_cluster; break; @@ -470,7 +470,7 @@ good_nodes_from_inx(cluster_nodes_t *cnodes, char *node_inx, int submit) } job_bitmap = bit_alloc(hostlist_count((cnodes->curr_cluster)->hl)); bit_unfmt(job_bitmap, node_inx); - if(!bit_overlap((cnodes->curr_cluster)->asked_bitmap, job_bitmap)) { + if (!bit_overlap((cnodes->curr_cluster)->asked_bitmap, job_bitmap)) { FREE_NULL_BITMAP(job_bitmap); return 0; } @@ -484,7 +484,7 @@ reset_pgsql_conn(pgsql_conn_t *pg_conn) { int saved_errno = errno; - if(pg_conn->rollback) { + if (pg_conn->rollback) { pgsql_db_rollback(pg_conn->db_conn); } list_flush(pg_conn->update_list); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_common.h b/src/plugins/accounting_storage/pgsql/as_pg_common.h index 9d95c964dcd217735d098d8f95bbd82304d8ced8..a5fb17746824f50356da5abba5d51808bdf48e4b 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_common.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_common.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_event.c b/src/plugins/accounting_storage/pgsql/as_pg_event.c index 3d6aa243c8bdd3e9f38ce997ef8abae20e363bd3..76e804bf99daf1249678f919a57eb6110c047215 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_event.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_event.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -155,7 +155,7 @@ cs_pg_node_down(pgsql_conn_t *pg_conn, struct node_record *node_ptr, uint16_t cpus; char *query = NULL, *my_reason; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (!cluster_in_db(pg_conn, pg_conn->cluster_name)) { @@ -199,7 +199,7 @@ cs_pg_node_up(pgsql_conn_t *pg_conn, struct node_record *node_ptr, { char* query; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (!cluster_in_db(pg_conn, pg_conn->cluster_name)) { @@ -231,11 +231,11 @@ cs_pg_register_ctld(pgsql_conn_t *pg_conn, char *cluster, uint16_t port) time_t now = time(NULL); uint32_t flags = slurmdb_setup_cluster_flags(); - if(slurmdbd_conf) + if (slurmdbd_conf) fatal("clusteracct_storage_g_register_ctld " "should never be called from the slurmdbd."); - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (!pg_conn->cluster_name) @@ -251,7 +251,7 @@ cs_pg_register_ctld(pgsql_conn_t *pg_conn, char *cluster, uint16_t port) gethostname(hostname, sizeof(hostname)); /* check if we are running on the backup controller */ - if(slurmctld_conf.backup_controller + if (slurmctld_conf.backup_controller && !strcmp(slurmctld_conf.backup_controller, hostname)) { address = slurmctld_conf.backup_addr; } else @@ -290,7 +290,7 @@ cs_pg_cluster_cpus(pgsql_conn_t *pg_conn, char *cluster_nodes, char* query; int rc = SLURM_SUCCESS, got_cpus = 0, first = 0; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (!cluster_in_db(pg_conn, pg_conn->cluster_name)) { @@ -304,11 +304,11 @@ cs_pg_cluster_cpus(pgsql_conn_t *pg_conn, char *cluster_nodes, "AND node_name='' LIMIT 1;", pg_conn->cluster_name, event_table); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; /* we only are checking the first one here */ - if(!PQntuples(result)) { + if (!PQntuples(result)) { debug("We don't have an entry for this machine %s " "most likely a first time running.", pg_conn->cluster_name); @@ -324,12 +324,12 @@ cs_pg_cluster_cpus(pgsql_conn_t *pg_conn, char *cluster_nodes, goto add_it; } got_cpus = atoi(PG_VAL(0)); - if(got_cpus == cpus) { + if (got_cpus == cpus) { debug3("we have the same cpu count as before for %s, " "no need to update the database.", pg_conn->cluster_name); - if(cluster_nodes) { - if(PG_EMPTY(1)) { + if (cluster_nodes) { + if (PG_EMPTY(1)) { debug("Adding cluster nodes '%s' to " "last instance of cluster '%s'.", cluster_nodes, pg_conn->cluster_name); @@ -340,7 +340,7 @@ cs_pg_cluster_cpus(pgsql_conn_t *pg_conn, char *cluster_nodes, event_table, cluster_nodes); rc = DEF_QUERY_RET_RC; goto end_it; - } else if(!strcmp(cluster_nodes, + } else if (!strcmp(cluster_nodes, PG_VAL(1))) { debug3("we have the same nodes in the cluster " "as before no need to " @@ -361,7 +361,7 @@ cs_pg_cluster_cpus(pgsql_conn_t *pg_conn, char *cluster_nodes, pg_conn->cluster_name, event_table, (event_time-1)); rc = DEF_QUERY_RET_RC; first = 1; - if(rc != SLURM_SUCCESS) + if (rc != SLURM_SUCCESS) goto end_it; add_it: query = xstrdup_printf( @@ -409,15 +409,15 @@ as_pg_get_events(pgsql_conn_t *pg_conn, uid_t uid, F_COUNT }; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; cond = xstrdup("WHERE TRUE"); - if(!event_cond) + if (!event_cond) goto empty; - if(event_cond->cpus_min) { - if(event_cond->cpus_max) { + if (event_cond->cpus_min) { + if (event_cond->cpus_max) { xstrfmtcat(cond, " AND (cpu_count BETWEEN %u AND %u)", event_cond->cpus_min, event_cond->cpus_max); @@ -442,8 +442,8 @@ as_pg_get_events(pgsql_conn_t *pg_conn, uid_t uid, } concat_cond_list(event_cond->node_list, NULL, "node_name", &cond); - if(event_cond->period_start) { - if(!event_cond->period_end) + if (event_cond->period_start) { + if (!event_cond->period_end) event_cond->period_end = now; xstrfmtcat(cond, " AND (time_start < %ld) " @@ -479,7 +479,7 @@ empty: list_append(ret_list, event); event->cluster = xstrdup(cluster_name); - if(ISEMPTY(F_NODE)) { + if (ISEMPTY(F_NODE)) { event->event_type = SLURMDB_EVENT_CLUSTER; } else { event->node_name = xstrdup(ROW(F_NODE)); @@ -489,10 +489,10 @@ empty: event->state = atoi(ROW(F_STATE)); event->period_start = atoi(ROW(F_START)); event->period_end = atoi(ROW(F_END)); - if(!ISEMPTY(F_REASON)) + if (!ISEMPTY(F_REASON)) event->reason = xstrdup(ROW(F_REASON)); event->reason_uid = atoi(ROW(F_REASON_UID)); - if(!ISEMPTY(F_CNODES)) + if (!ISEMPTY(F_CNODES)) event->cluster_nodes = xstrdup(ROW(F_CNODES)); } END_EACH_ROW; diff --git a/src/plugins/accounting_storage/pgsql/as_pg_event.h b/src/plugins/accounting_storage/pgsql/as_pg_event.h index 3ea609e4a939174c9cbff449d9cec840d93897cb..904e051e396bbbe3b41a6cc9fd7e9a3ae21ccc3e 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_event.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_event.h @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_get_jobs.c b/src/plugins/accounting_storage/pgsql/as_pg_get_jobs.c index c1de50ea0f4a73ccde1b6e4ca814d63031757573..eb6910dcc31da890666da56a276c20c83fbea0fd 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_get_jobs.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_get_jobs.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -214,14 +214,14 @@ static void _state_time_string(char **extra, uint32_t state, { int base_state = state & JOB_STATE_BASE; - if(!start && !end) { + if (!start && !end) { xstrfmtcat(*extra, "t1.state=%u", state); return; } switch(base_state) { case JOB_PENDING: - if(start && !end) { + if (start && !end) { xstrfmtcat(*extra, "(t1.time_eligible!=0 AND (t1.time_start=0" " OR (%d BETWEEN " @@ -243,7 +243,7 @@ static void _state_time_string(char **extra, uint32_t state, /* FIX ME: this should do something with the suspended table, but it doesn't right now. */ case JOB_RUNNING: - if(start && !end) { + if (start && !end) { xstrfmtcat(*extra, "(t1.time_start!=0 AND (t1.time_end=0 OR " "(%d BETWEEN t1.time_start AND " @@ -268,13 +268,13 @@ static void _state_time_string(char **extra, uint32_t state, case JOB_PREEMPTED: default: xstrfmtcat(*extra, "(t1.state=%u AND (t1.time_end!=0 AND ", state); - if(start && !end) { + if (start && !end) { xstrfmtcat(*extra, "(t1.time_end >= %d)))", start); } else if (start && end) { xstrfmtcat(*extra, "(t1.time_end BETWEEN %d AND %d)))", start, end); - } else if(end) { + } else if (end) { xstrfmtcat(*extra, "(t1.time_end <= %d)))", end); } break; @@ -301,11 +301,11 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, xstrcat (*cond, " WHERE TRUE"); - if(!job_cond) + if (!job_cond) return; /* THIS ASSOCID CHECK ALWAYS NEEDS TO BE FIRST!!!!!!! */ - if(job_cond->associd_list && list_count(job_cond->associd_list)) { + if (job_cond->associd_list && list_count(job_cond->associd_list)) { set = 0; xstrfmtcat(*extra_table, ", %%s.%s AS t3", assoc_table); @@ -327,12 +327,12 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, concat_cond_list(job_cond->partition_list, "t1", "partition", cond); concat_cond_list(job_cond->qos_list, "t1", "id_qos", cond); - if(job_cond->step_list && list_count(job_cond->step_list)) { + if (job_cond->step_list && list_count(job_cond->step_list)) { set = 0; xstrcat(*cond, " AND ("); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if(set) + if (set) xstrcat(*cond, " OR "); xstrfmtcat(*cond, "t1.id_job=%u", selected_step->jobid); set = 1; @@ -342,13 +342,13 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, } - if(job_cond->state_list && list_count(job_cond->state_list)) { + if (job_cond->state_list && list_count(job_cond->state_list)) { set = 0; xstrcat(*cond, " AND ("); itr = list_iterator_create(job_cond->state_list); while((object = list_next(itr))) { - if(set) + if (set) xstrcat(*cond, " OR "); _state_time_string(cond, atoi(object), job_cond->usage_start, @@ -360,8 +360,8 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, } else { /* Only do this (default of all eligible jobs) if no state is given */ - if(job_cond->usage_start) { - if(!job_cond->usage_end) + if (job_cond->usage_start) { + if (!job_cond->usage_end) xstrfmtcat(*cond, " AND ((t1.time_end>=%ld " "OR t1.time_end=0))", (long)job_cond->usage_start); @@ -371,7 +371,7 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, " OR t1.time_end=0))", (long)job_cond->usage_end, (long)job_cond->usage_start); - } else if(job_cond->usage_end) { + } else if (job_cond->usage_end) { xstrfmtcat(*cond, " AND (t1.time_eligible<%ld)", (long)job_cond->usage_end); } @@ -380,8 +380,8 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, concat_cond_list(job_cond->state_list, "t1", "state", cond); concat_cond_list(job_cond->wckey_list, "t1", "wckey", cond); - if(job_cond->cpus_min) { - if(job_cond->cpus_max) + if (job_cond->cpus_min) { + if (job_cond->cpus_max) xstrfmtcat(*cond, " AND ((t1.cpus_alloc BETWEEN %u AND %u))", job_cond->cpus_min, job_cond->cpus_max); @@ -390,8 +390,8 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, job_cond->cpus_min); } - if(job_cond->nodes_min) { - if(job_cond->nodes_max) + if (job_cond->nodes_min) { + if (job_cond->nodes_max) xstrfmtcat(*cond, " AND ((t1.nodes_alloc BETWEEN %u AND %u))", job_cond->nodes_min, job_cond->nodes_max); @@ -401,8 +401,8 @@ _make_job_cond_str(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond, job_cond->nodes_min); } - if(job_cond->timelimit_min) { - if(job_cond->timelimit_max) { + if (job_cond->timelimit_min) { + if (job_cond->timelimit_max) { xstrfmtcat(*cond, " AND (t1.timelimit BETWEEN %u AND %u))", job_cond->timelimit_min, @@ -427,7 +427,7 @@ _concat_cluster_job_cond_str(pgsql_conn_t *pg_conn, char *cluster, /* this must be done before resvid_list since we set resvid_list up here */ - if(job_cond->resv_list && list_count(job_cond->resv_list)) { + if (job_cond->resv_list && list_count(job_cond->resv_list)) { query = xstrdup_printf( "SELECT DISTINCT id_resv FROM %s.%s WHERE TRUE ", cluster, resv_table); @@ -435,11 +435,11 @@ _concat_cluster_job_cond_str(pgsql_conn_t *pg_conn, char *cluster, concat_cond_list(job_cond->resv_list, NULL, "resv_name", &query); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("as/pg: couldn't get resv id"); goto no_resv; } - if(!job_cond->resvid_list) + if (!job_cond->resvid_list) job_cond->resvid_list = list_create(slurm_destroy_char); FOR_EACH_ROW { list_append(job_cond->resvid_list, xstrdup(ROW(0))); @@ -462,7 +462,7 @@ _concat_user_job_cond_str(pgsql_conn_t *pg_conn, char *cluster, query = xstrdup_printf("SELECT lft,rgt FROM %s.%s WHERE user_name='%s'", cluster, assoc_table, user->name); - if(user->coord_accts) { + if (user->coord_accts) { slurmdb_coord_rec_t *coord = NULL; itr = list_iterator_create(user->coord_accts); while((coord = list_next(itr))) { @@ -472,11 +472,11 @@ _concat_user_job_cond_str(pgsql_conn_t *pg_conn, char *cluster, list_iterator_destroy(itr); } result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { - if(set) { + if (set) { xstrfmtcat(*cond, " OR (%s.lft BETWEEN %s AND %s)", table_level, ROW(0), ROW(1)); @@ -487,7 +487,7 @@ _concat_user_job_cond_str(pgsql_conn_t *pg_conn, char *cluster, table_level, ROW(0), ROW(1)); } } END_EACH_ROW; - if(set) + if (set) xstrcat(*cond, ")"); PQclear(result); return SLURM_SUCCESS; @@ -513,7 +513,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, _concat_cluster_job_cond_str(pg_conn, cluster, job_cond, &cond); - if(!is_admin) { + if (!is_admin) { if (_concat_user_job_cond_str(pg_conn, cluster, user, sent_extra ? "t3" : "t2", &cond) @@ -527,7 +527,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, "AS t2 ON t1.id_assoc=t2.id_assoc ", job_fields, cluster, job_table, cluster, assoc_table); - if(sent_extra) { + if (sent_extra) { extra_table = xstrdup_printf(sent_extra, cluster); xstrcat(query, extra_table); xfree(extra_table); @@ -535,7 +535,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, xstrcat(query, cond); xfree(cond); - if(job_cond && job_cond->used_nodes) { + if (job_cond && job_cond->used_nodes) { cnodes = setup_cluster_nodes(pg_conn, job_cond); if (!cnodes) return SLURM_ERROR; @@ -544,13 +544,13 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, /* Here we want to order them this way in such a way so it is easy to look for duplicates */ - if(job_cond && !job_cond->duplicates) + if (job_cond && !job_cond->duplicates) xstrcat(query, " ORDER BY id_job, time_submit DESC;"); else xstrcat(query, " ORDER BY time_submit DESC;"); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; cluster_job_list = list_create(slurmdb_destroy_job_rec); @@ -564,14 +564,14 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, submit = atoi(ROW(JF_SUBMIT)); curr_id = atoi(ROW(JF_JOBID)); - if(job_cond && !job_cond->duplicates && curr_id == last_id) + if (job_cond && !job_cond->duplicates && curr_id == last_id) continue; last_id = curr_id; /* check the bitmap to see if this is one of the jobs we are looking for */ - if(!good_nodes_from_inx(cnodes, ROW(JF_NODE_INX), submit)) + if (!good_nodes_from_inx(cnodes, ROW(JF_NODE_INX), submit)) continue; debug3("as/pg: get_jobs_cond: job %d past node test", curr_id); @@ -587,26 +587,26 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, job->cluster = xstrdup(cluster); /* we want a blank wckey if the name is null */ - if(! ISNULL(JF_WCKEY)) + if (! ISNULL(JF_WCKEY)) job->wckey = xstrdup(ROW(JF_WCKEY)); else job->wckey = xstrdup(""); job->wckeyid = atoi(ROW(JF_WCKEYID)); - if(! ISNULL(JF_USER_NAME)) + if (! ISNULL(JF_USER_NAME)) job->user = xstrdup(ROW(JF_USER_NAME)); else job->uid = atoi(ROW(JF_UID)); - if(! ISNULL(JF_LFT)) + if (! ISNULL(JF_LFT)) job->lft = atoi(ROW(JF_LFT)); - if(! ISEMPTY(JF_ACCOUNT)) + if (! ISEMPTY(JF_ACCOUNT)) job->account = xstrdup(ROW(JF_ACCOUNT)); - else if(! ISEMPTY(JF_ACCOUNT1)) + else if (! ISEMPTY(JF_ACCOUNT1)) job->account = xstrdup(ROW(JF_ACCOUNT1)); - if(! ISNULL(JF_BLOCKID)) + if (! ISNULL(JF_BLOCKID)) job->blockid = xstrdup(ROW(JF_BLOCKID)); job->eligible = atoi(ROW(JF_ELIGIBLE)); @@ -616,26 +616,26 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, job->timelimit = atoi(ROW(JF_TIMELIMIT)); /* since the job->end could be set later end it here */ - if(job->end) { + if (job->end) { job_ended = 1; - if(!job->start || (job->start > job->end)) + if (!job->start || (job->start > job->end)) job->start = job->end; } - if(job_cond && !job_cond->without_usage_truncation + if (job_cond && !job_cond->without_usage_truncation && job_cond->usage_start) { - if(job->start && (job->start < job_cond->usage_start)) + if (job->start && (job->start < job_cond->usage_start)) job->start = job_cond->usage_start; - if(!job->end || job->end > job_cond->usage_end) + if (!job->end || job->end > job_cond->usage_end) job->end = job_cond->usage_end; - if(!job->start) + if (!job->start) job->start = job->end; job->elapsed = job->end - job->start; - if(ROW(JF_SUSPENDED)) { + if (ROW(JF_SUSPENDED)) { int local_start, local_end; /* get the suspended time for this job */ query = xstrdup_printf( @@ -648,7 +648,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, (long)job_cond->usage_end, (long)job_cond->usage_start, id); result2 = DEF_QUERY_RET; - if(!result2) { + if (!result2) { list_destroy(cluster_job_list); cluster_job_list = NULL; rc = SLURM_ERROR; @@ -657,15 +657,15 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, FOR_EACH_ROW2 { local_start = atoi(ROW2(0)); local_end = atoi(ROW2(1)); - if(!local_start) + if (!local_start) continue; - if(job->start > local_start) + if (job->start > local_start) local_start = job->start; - if(job->end < local_end) + if (job->end < local_end) local_end = job->end; - if((local_end - local_start) < 1) + if ((local_end - local_start) < 1) continue; job->elapsed -= @@ -681,9 +681,9 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, if (job->state == JOB_SUSPENDED) job->suspended = now - job->suspended; - if(!job->start) { + if (!job->start) { job->elapsed = 0; - } else if(!job->end) { + } else if (!job->end) { job->elapsed = now - job->start; } else { job->elapsed = job->end - job->start; @@ -692,7 +692,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, job->elapsed -= job->suspended; } - if((int)job->elapsed < 0) + if ((int)job->elapsed < 0) job->elapsed = 0; job->jobid = curr_id; @@ -700,10 +700,10 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, job->gid = atoi(ROW(JF_GID)); job->exitcode = atoi(ROW(JF_COMP_CODE)); - if(! ISEMPTY(JF_PARTITION)) + if (! ISEMPTY(JF_PARTITION)) job->partition = xstrdup(ROW(JF_PARTITION)); - if(! ISEMPTY(JF_NODELIST)) + if (! ISEMPTY(JF_NODELIST)) job->nodes = xstrdup(ROW(JF_NODELIST)); if (!job->nodes || !strcmp(job->nodes, "(null)")) { @@ -718,17 +718,17 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, job->qosid = atoi(ROW(JF_QOS)); job->show_full = 1; - if(only_pending || (job_cond && job_cond->without_steps)) + if (only_pending || (job_cond && job_cond->without_steps)) goto skip_steps; - if(job_cond && job_cond->step_list + if (job_cond && job_cond->step_list && list_count(job_cond->step_list)) { slurmdb_selected_step_t *selected_step = NULL; int set = 0; ListIterator itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if(selected_step->jobid != job->jobid) { + if (selected_step->jobid != job->jobid) { continue; } else if (selected_step->stepid == (uint32_t)NO_VAL) { @@ -736,7 +736,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, break; } - if(set) + if (set) xstrcat(cond, " OR "); else xstrcat(cond, " AND ("); @@ -747,19 +747,19 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, job->show_full = 0; } list_iterator_destroy(itr); - if(set) + if (set) xstrcat(cond, ")"); } query = xstrdup_printf( "SELECT %s FROM %s.%s AS t1 WHERE t1.job_db_inx=%s", step_fields, cluster, step_table, id); - if(cond) { + if (cond) { xstrcat(query, cond); xfree(cond); } result2 = DEF_QUERY_RET; - if(!result2) { + if (!result2) { list_destroy(cluster_job_list); cluster_job_list = NULL; rc = SLURM_ERROR; @@ -773,7 +773,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, FOR_EACH_ROW2 { /* check the bitmap to see if this is one of the steps we are looking for */ - if(!good_nodes_from_inx(cnodes, ROW2(SF_NODE_INX), + if (!good_nodes_from_inx(cnodes, ROW2(SF_NODE_INX), submit)) continue; @@ -781,7 +781,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, step->tot_cpu_sec = 0; step->tot_cpu_usec = 0; step->job_ptr = job; - if(!job->first_step_ptr) + if (!job->first_step_ptr) job->first_step_ptr = step; list_append(job->steps, step); step->stepid = atoi(ROW2(SF_STEPID)); @@ -794,42 +794,42 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, step->ntasks = atoi(ROW2(SF_TASKS)); step->task_dist = atoi(ROW2(SF_TASKDIST)); - if(!step->ntasks) + if (!step->ntasks) step->ntasks = step->ncpus; step->start = atoi(ROW2(SF_START)); step->end = atoi(ROW2(SF_END)); /* if the job has ended end the step also */ - if(!step->end && job_ended) { + if (!step->end && job_ended) { step->end = job->end; step->state = job->state; } - if(job_cond && !job_cond->without_usage_truncation + if (job_cond && !job_cond->without_usage_truncation && job_cond->usage_start) { - if(step->start + if (step->start && (step->start < job_cond->usage_start)) step->start = job_cond->usage_start; - if(!step->start && step->end) + if (!step->start && step->end) step->start = step->end; - if(!step->end + if (!step->end || (step->end > job_cond->usage_end)) step->end = job_cond->usage_end; } /* figure this out by start stop */ step->suspended = atoi(ROW2(SF_SUSPENDED)); - if(!step->end) { + if (!step->end) { step->elapsed = now - step->start; } else { step->elapsed = step->end - step->start; } step->elapsed -= step->suspended; - if((int)step->elapsed < 0) + if ((int)step->elapsed < 0) step->elapsed = 0; step->user_cpu_sec = atoi(ROW2(SF_USER_SEC)); @@ -879,7 +879,7 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, } END_EACH_ROW2; PQclear(result2); - if(!job->track_steps) { + if (!job->track_steps) { /* If we don't have track_steps we want to see if we have multiple steps. If we only have 1 step check the job name against the step @@ -887,10 +887,10 @@ _cluster_get_jobs(pgsql_conn_t *pg_conn, char *cluster, different. If it is different print out the step separate. */ - if(list_count(job->steps) > 1) + if (list_count(job->steps) > 1) job->track_steps = 1; - else if(step && step->stepname && job->jobname) { - if(strcmp(step->stepname, job->jobname)) + else if (step && step->stepname && job->jobname) { + if (strcmp(step->stepname, job->jobname)) job->track_steps = 1; } } @@ -929,7 +929,7 @@ js_pg_get_jobs_cond(pgsql_conn_t *pg_conn, uid_t uid, List job_list = list_create(slurmdb_destroy_job_rec); char *cond = NULL, *extra_table = NULL; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; if (check_user_op(pg_conn, uid, PRIVATE_DATA_JOBS, &is_admin, &user) @@ -939,7 +939,7 @@ js_pg_get_jobs_cond(pgsql_conn_t *pg_conn, uid_t uid, return NULL; } - if(job_cond->state_list && (list_count(job_cond->state_list) == 1) + if (job_cond->state_list && (list_count(job_cond->state_list) == 1) && (atoi(list_peek(job_cond->state_list)) == JOB_PENDING)) only_pending = 1; diff --git a/src/plugins/accounting_storage/pgsql/as_pg_job.c b/src/plugins/accounting_storage/pgsql/as_pg_job.c index 2cda905a47691ce685c5937c467dd153f68368d7..b4e533d40c7aa00e70aae5d1f087867584813ec8 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_job.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_job.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -157,10 +157,10 @@ _get_db_index(pgsql_conn_t *pg_conn, time_t submit, uint32_t jobid, " AND id_job=%u AND id_assoc=%u", pg_conn->cluster_name, job_table, submit, jobid, associd); result = DEF_QUERY_RET; - if(!result) + if (!result) return 0; - if(!PQntuples(result)) { + if (!PQntuples(result)) { debug("We can't get a db_index for this combo, " "time_submit=%ld and id_job=%u and id_assoc=%u." "We must not have heard about the start yet, " @@ -190,7 +190,7 @@ _check_job_db_index(pgsql_conn_t *pg_conn, struct job_record *job_ptr) else submit_time = job_ptr->details->submit_time; - if(!job_ptr->db_index) { + if (!job_ptr->db_index) { job_ptr->db_index = _get_db_index( pg_conn, submit_time, @@ -200,7 +200,7 @@ _check_job_db_index(pgsql_conn_t *pg_conn, struct job_record *job_ptr) /* If we get an error with this just fall * through to avoid an infinite loop */ - if(jobacct_storage_p_job_start(pg_conn, job_ptr) + if (jobacct_storage_p_job_start(pg_conn, job_ptr) == SLURM_ERROR) { error("couldn't add job %u ", job_ptr->job_id); @@ -406,9 +406,9 @@ js_pg_job_start(pgsql_conn_t *pg_conn, * removed. This is most likely the only time we are going to * be notified of the change also so make the state without * the resize. */ - if(IS_JOB_RESIZING(job_ptr)) { + if (IS_JOB_RESIZING(job_ptr)) { /* If we have a db_index lets end the previous record. */ - if(job_ptr->db_index) + if (job_ptr->db_index) js_pg_job_complete(pg_conn, job_ptr); else error("We don't have a db_index for job %u, " @@ -440,7 +440,7 @@ js_pg_job_start(pgsql_conn_t *pg_conn, check_time = submit_time; slurm_mutex_lock(&usage_rollup_lock); - if(check_time < global_last_rollup) { + if (check_time < global_last_rollup) { PGresult *result = NULL; /* check to see if we are hearing about this time for the * first time. @@ -451,7 +451,7 @@ js_pg_job_start(pgsql_conn_t *pg_conn, pg_conn->cluster_name, job_table, job_ptr->job_id, submit_time, begin_time, start_time); result = DEF_QUERY_RET; - if(!result) { + if (!result) { slurm_mutex_unlock(&usage_rollup_lock); return SLURM_ERROR; } @@ -465,13 +465,13 @@ js_pg_job_start(pgsql_conn_t *pg_conn, } PQclear(result); - if(job_ptr->start_time) + if (job_ptr->start_time) debug("Need to reroll usage from %s Job %u " "from %s started then and we are just " "now hearing about it.", ctime(&check_time), job_ptr->job_id, pg_conn->cluster_name); - else if(begin_time) + else if (begin_time) debug("Need to reroll usage from %s Job %u " "from %s became eligible then and we are just " "now hearing about it.", @@ -509,17 +509,17 @@ no_rollup_change: else nodes = "None assigned"; - if(job_ptr->batch_flag) + if (job_ptr->batch_flag) track_steps = 1; - if(slurmdbd_conf) { + if (slurmdbd_conf) { block_id = xstrdup(job_ptr->comment); node_cnt = job_ptr->total_nodes; node_inx = job_ptr->network; } else { char temp_bit[BUF_SIZE]; - if(job_ptr->node_bitmap) { + if (job_ptr->node_bitmap) { node_inx = bit_fmt(temp_bit, sizeof(temp_bit), job_ptr->node_bitmap); } @@ -537,13 +537,13 @@ no_rollup_change: /* If there is a start_time get the wckeyid. If the job is * cancelled before the job starts we also want to grab it. */ - if(job_ptr->assoc_id && + if (job_ptr->assoc_id && (job_ptr->start_time || IS_JOB_CANCELLED(job_ptr))) wckeyid = get_wckeyid(pg_conn, &job_ptr->wckey, job_ptr->user_id, pg_conn->cluster_name, job_ptr->assoc_id); - if(!job_ptr->db_index) { + if (!job_ptr->db_index) { if (!begin_time) begin_time = submit_time; @@ -595,7 +595,7 @@ no_rollup_change: job_ptr->db_index = pgsql_query_ret_id(pg_conn->db_conn, query); if (!job_ptr->db_index) { - if(!reinit) { + if (!reinit) { error("It looks like the storage has gone " "away trying to reconnect"); check_db_connection(pg_conn); @@ -608,17 +608,17 @@ no_rollup_change: } else { query = xstrdup_printf("UPDATE %s.%s SET nodelist='%s', ", pg_conn->cluster_name, job_table, nodes); - if(job_ptr->account) + if (job_ptr->account) xstrfmtcat(query, "account='%s', ", job_ptr->account); - if(job_ptr->partition) + if (job_ptr->partition) xstrfmtcat(query, "partition='%s', ", job_ptr->partition); - if(block_id) + if (block_id) xstrfmtcat(query, "blockid='%s', ", block_id); - if(job_ptr->wckey) + if (job_ptr->wckey) xstrfmtcat(query, "wckey='%s', ", job_ptr->wckey); - if(node_inx) + if (node_inx) xstrfmtcat(query, "node_inx='%s', ", node_inx); xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%d, " @@ -686,7 +686,7 @@ js_pg_job_complete(pgsql_conn_t *pg_conn, } slurm_mutex_lock(&usage_rollup_lock); - if(end_time < global_last_rollup) { + if (end_time < global_last_rollup) { global_last_rollup = job_ptr->end_time; slurm_mutex_unlock(&usage_rollup_lock); @@ -757,17 +757,17 @@ js_pg_step_start(pgsql_conn_t *pg_conn, if (step_ptr->job_ptr->resize_time) { submit_time = start_time = step_ptr->job_ptr->resize_time; - if(step_ptr->start_time > submit_time) + if (step_ptr->start_time > submit_time) start_time = step_ptr->start_time; } else { start_time = step_ptr->start_time; submit_time = step_ptr->job_ptr->details->submit_time; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; - if(slurmdbd_conf) { + if (slurmdbd_conf) { tasks = step_ptr->job_ptr->details->num_tasks; cpus = step_ptr->cpu_count; snprintf(node_list, BUFFER_SIZE, "%s", @@ -778,7 +778,7 @@ js_pg_step_start(pgsql_conn_t *pg_conn, } else { char temp_bit[BUF_SIZE]; - if(step_ptr->step_node_bitmap) { + if (step_ptr->step_node_bitmap) { node_inx = bit_fmt(temp_bit, sizeof(temp_bit), step_ptr->step_node_bitmap); } @@ -787,7 +787,7 @@ js_pg_step_start(pgsql_conn_t *pg_conn, select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); - if(ionodes) { + if (ionodes) { snprintf(node_list, BUFFER_SIZE, "%s[%s]", step_ptr->job_ptr->nodes, ionodes); xfree(ionodes); @@ -798,7 +798,7 @@ js_pg_step_start(pgsql_conn_t *pg_conn, SELECT_JOBDATA_NODE_CNT, &nodes); #else - if(!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { + if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { tasks = cpus = step_ptr->job_ptr->total_cpus; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); @@ -888,14 +888,14 @@ js_pg_step_complete(pgsql_conn_t *pg_conn, if (step_ptr->job_ptr->resize_time) { submit_time = start_time = step_ptr->job_ptr->resize_time; - if(step_ptr->start_time > submit_time) + if (step_ptr->start_time > submit_time) start_time = step_ptr->start_time; } else { start_time = step_ptr->start_time; submit_time = step_ptr->job_ptr->details->submit_time; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (jobacct == NULL) { @@ -904,7 +904,7 @@ js_pg_step_complete(pgsql_conn_t *pg_conn, jobacct = &dummy_jobacct; } - if(slurmdbd_conf) { + if (slurmdbd_conf) { now = step_ptr->job_ptr->end_time; cpus = step_ptr->cpu_count; } else { @@ -913,7 +913,7 @@ js_pg_step_complete(pgsql_conn_t *pg_conn, cpus = step_ptr->job_ptr->details->min_cpus; #else - if(!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) + if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) cpus = step_ptr->job_ptr->total_cpus; else cpus = step_ptr->cpu_count; @@ -934,7 +934,7 @@ js_pg_step_complete(pgsql_conn_t *pg_conn, } /* figure out the ave of the totals sent */ - if(cpus > 0) { + if (cpus > 0) { ave_vsize = (double)jobacct->tot_vsize; ave_vsize /= (double)cpus; ave_rss = (double)jobacct->tot_rss; @@ -945,7 +945,7 @@ js_pg_step_complete(pgsql_conn_t *pg_conn, ave_cpu /= (double)cpus; } - if(jobacct->min_cpu != (uint32_t)NO_VAL) { + if (jobacct->min_cpu != (uint32_t)NO_VAL) { ave_cpu2 = (double)jobacct->min_cpu; } @@ -1087,7 +1087,7 @@ as_pg_flush_jobs_on_cluster(pgsql_conn_t *pg_conn, time_t event_time) char *id_char = NULL; char *suspended_char = NULL; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (! cluster_in_db(pg_conn, pg_conn->cluster_name) ) { @@ -1107,8 +1107,8 @@ as_pg_flush_jobs_on_cluster(pgsql_conn_t *pg_conn, time_t event_time) FOR_EACH_ROW { int state = atoi(ROW(1)); - if(state == JOB_SUSPENDED) { - if(suspended_char) + if (state == JOB_SUSPENDED) { + if (suspended_char) xstrfmtcat(suspended_char, " OR job_db_inx=%s", ROW(0)); else @@ -1116,14 +1116,14 @@ as_pg_flush_jobs_on_cluster(pgsql_conn_t *pg_conn, time_t event_time) ROW(0)); } - if(id_char) + if (id_char) xstrfmtcat(id_char, " OR job_db_inx=%s", ROW(0)); else xstrfmtcat(id_char, "job_db_inx=%s", ROW(0)); } END_EACH_ROW; PQclear(result); - if(suspended_char) { + if (suspended_char) { xstrfmtcat(query, "UPDATE %s.%s SET time_suspended=%ld-time_suspended " "WHERE %s;", pg_conn->cluster_name, job_table, @@ -1139,7 +1139,7 @@ as_pg_flush_jobs_on_cluster(pgsql_conn_t *pg_conn, time_t event_time) suspended_char); xfree(suspended_char); } - if(id_char) { + if (id_char) { xstrfmtcat(query, "UPDATE %s.%s SET state=%d, time_end=%ld WHERE %s;", pg_conn->cluster_name, job_table, @@ -1151,7 +1151,7 @@ as_pg_flush_jobs_on_cluster(pgsql_conn_t *pg_conn, time_t event_time) xfree(id_char); } - if(query) + if (query) rc = DEF_QUERY_RET_RC; return rc; diff --git a/src/plugins/accounting_storage/pgsql/as_pg_job.h b/src/plugins/accounting_storage/pgsql/as_pg_job.h index 403d20ddea72f93a6e58bcb1562dc4e2e898f0e7..01a1d29e535f1e6ababd5c6206da61e9078cfa70 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_job.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_job.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_problem.c b/src/plugins/accounting_storage/pgsql/as_pg_problem.c index 230ad0b52eb93e87bd4450842f129f7736dfc378..94909a9640044a16b675558200d373d70cc4b487 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_problem.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_problem.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,7 +58,7 @@ _get_acct_no_assocs(pgsql_conn_t *pg_conn, slurmdb_association_cond_t *assoc_q, concat_cond_list(assoc_q->acct_list, NULL, "name", &query); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { @@ -74,7 +74,7 @@ _get_acct_no_assocs(pgsql_conn_t *pg_conn, slurmdb_association_cond_t *assoc_q, } END_EACH_CLUSTER; xstrcat(query, " LIMIT 1;"); result2 = DEF_QUERY_RET; - if(!result2) { + if (!result2) { rc = SLURM_ERROR; break; } @@ -126,7 +126,7 @@ _get_acct_no_users(pgsql_conn_t *pg_conn, slurmdb_association_cond_t *assoc_q, xfree(cond); xstrcat(query, " ORDER BY cluster, acct;"); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { @@ -134,13 +134,13 @@ _get_acct_no_users(pgsql_conn_t *pg_conn, slurmdb_association_cond_t *assoc_q, xmalloc(sizeof(slurmdb_association_rec_t)); list_append(ret_list, assoc); assoc->id = SLURMDB_PROBLEM_ACCT_NO_USERS; -/* if(ROW(F_USER)[0]) */ +/* if (ROW(F_USER)[0]) */ /* assoc->user = xstrdup(ROW(F_USER)); */ assoc->acct = xstrdup(ROW(F_ACCT)); assoc->cluster = xstrdup(ROW(F_COUNT)); - if(ROW(F_PARENT)[0]) + if (ROW(F_PARENT)[0]) assoc->parent_acct = xstrdup(ROW(F_PARENT)); -/* if(ROW(F_PART)[0]) */ +/* if (ROW(F_PART)[0]) */ /* assoc->partition = xstrdup(ROW(F_PART)); */ } END_EACH_ROW; PQclear(result); @@ -161,11 +161,11 @@ _get_user_no_assocs_or_no_uid(pgsql_conn_t *pg_conn, query = xstrdup_printf("SELECT name FROM %s WHERE deleted=0", user_table); - if(assoc_q) + if (assoc_q) concat_cond_list(assoc_q->user_list, NULL, "name", &query); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; FOR_EACH_ROW { @@ -191,7 +191,7 @@ _get_user_no_assocs_or_no_uid(pgsql_conn_t *pg_conn, } END_EACH_CLUSTER; xstrcat(query, " LIMIT 1;"); result2 = DEF_QUERY_RET; - if(!result2) { + if (!result2) { rc = SLURM_ERROR; break; } @@ -221,20 +221,20 @@ as_pg_get_problems(pgsql_conn_t *pg_conn, uid_t uid, { List ret_list = NULL; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; ret_list = list_create(slurmdb_destroy_association_rec); - if(_get_acct_no_assocs(pg_conn, assoc_q, ret_list) + if (_get_acct_no_assocs(pg_conn, assoc_q, ret_list) != SLURM_SUCCESS) goto end_it; - if(_get_acct_no_users(pg_conn, assoc_q, ret_list) + if (_get_acct_no_users(pg_conn, assoc_q, ret_list) != SLURM_SUCCESS) goto end_it; - if(_get_user_no_assocs_or_no_uid(pg_conn, assoc_q, ret_list) + if (_get_user_no_assocs_or_no_uid(pg_conn, assoc_q, ret_list) != SLURM_SUCCESS) goto end_it; diff --git a/src/plugins/accounting_storage/pgsql/as_pg_problem.h b/src/plugins/accounting_storage/pgsql/as_pg_problem.h index d8e0667112d0c177658fa1ac13def5fe821ea4f7..d1444207fcc86b6cd6266f6b3c04321d7c56a115 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_problem.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_problem.h @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_qos.c b/src/plugins/accounting_storage/pgsql/as_pg_qos.c index 880efceeb67c46136eee7e54ebfe266036acd419..780333c17dbe4afd8f5f5d975bfc46a784070a95 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_qos.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_qos.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -209,7 +209,7 @@ _make_qos_record_for_add(slurmdb_qos_rec_t *object, time_t now, xstrfmtcat(*rec, "'', "); } - if((object->preempt_mode != (uint16_t)NO_VAL) + if ((object->preempt_mode != (uint16_t)NO_VAL) && ((int16_t)object->preempt_mode >= 0)) { object->preempt_mode &= (~PREEMPT_MODE_GANG); xstrfmtcat(*rec, "%u, ", object->preempt_mode); @@ -295,7 +295,7 @@ _make_qos_vals_for_modify(slurmdb_qos_rec_t *qos, char **vals, concat_limit_64("grp_cpu_mins", qos->grp_cpu_mins, NULL, vals); concat_limit_64("grp_cpu_run_mins", qos->grp_cpu_run_mins, NULL, vals); - if(qos->preempt_list && list_count(qos->preempt_list)) { + if (qos->preempt_list && list_count(qos->preempt_list)) { char *preempt_val = NULL; char *tmp_char = NULL, *begin_preempt = NULL; ListIterator preempt_itr = @@ -304,25 +304,25 @@ _make_qos_vals_for_modify(slurmdb_qos_rec_t *qos, char **vals, begin_preempt = xstrdup("preempt"); while((tmp_char = list_next(preempt_itr))) { - if(tmp_char[0] == '-') { + if (tmp_char[0] == '-') { xstrfmtcat(preempt_val, "replace(%s, ',%s', '')", begin_preempt, tmp_char+1); xfree(begin_preempt); begin_preempt = preempt_val; - } else if(tmp_char[0] == '+') { + } else if (tmp_char[0] == '+') { xstrfmtcat(preempt_val, "(replace(%s, ',%s', '') || ',%s')", begin_preempt, tmp_char+1, tmp_char+1); - if(added_preempt) + if (added_preempt) xstrfmtcat(*added_preempt, ",%s", tmp_char+1); xfree(begin_preempt); begin_preempt = preempt_val; - } else if(tmp_char[0]) { + } else if (tmp_char[0]) { xstrfmtcat(preempt_val, ",%s", tmp_char); - if(added_preempt) + if (added_preempt) xstrfmtcat(*added_preempt, ",%s", tmp_char); } else @@ -335,9 +335,9 @@ _make_qos_vals_for_modify(slurmdb_qos_rec_t *qos, char **vals, concat_limit_32("priority", qos->priority, NULL, vals); - if(qos->usage_factor >= 0) { + if (qos->usage_factor >= 0) { xstrfmtcat(*vals, ", usage_factor=%f", qos->usage_factor); - } else if((int)qos->usage_factor == INFINITE) { + } else if ((int)qos->usage_factor == INFINITE) { xstrcat(*vals, ", usage_factor=1.0"); } return; @@ -362,7 +362,7 @@ _preemption_loop(pgsql_conn_t *pg_conn, int begin_qosid, /* check in the preempt list for all qos's preempted */ for(i=0; i<bit_size(preempt_bitstr); i++) { - if(!bit_test(preempt_bitstr, i)) + if (!bit_test(preempt_bitstr, i)) continue; memset(&qos_rec, 0, sizeof(qos_rec)); @@ -372,19 +372,19 @@ _preemption_loop(pgsql_conn_t *pg_conn, int begin_qosid, NULL); /* check if the begin_qosid is preempted by this qos * if so we have a loop */ - if(qos_rec.preempt_bitstr + if (qos_rec.preempt_bitstr && bit_test(qos_rec.preempt_bitstr, begin_qosid)) { error("QOS id %d has a loop at QOS %s", begin_qosid, qos_rec.name); rc = 1; break; - } else if(qos_rec.preempt_bitstr) { + } else if (qos_rec.preempt_bitstr) { /* * qos_rec.preempt_bitstr are also (newly introduced) * preemptees of begin_qosid. * i.e., preemption is transitive */ - if((rc = _preemption_loop(pg_conn, begin_qosid, + if ((rc = _preemption_loop(pg_conn, begin_qosid, qos_rec.preempt_bitstr))) break; } @@ -402,7 +402,7 @@ _set_qos_cnt(PGconn *db_conn) result = pgsql_db_query_ret(db_conn, query); xfree(query); - if(!result) + if (!result) return SLURM_ERROR; if (PQntuples(result) == 0) { PQclear(result); @@ -443,7 +443,7 @@ check_qos_tables(PGconn *db_conn) char *qos = NULL, *desc = NULL, *query = NULL; time_t now = time(NULL); - if(slurmdbd_conf && slurmdbd_conf->default_qos) { + if (slurmdbd_conf && slurmdbd_conf->default_qos) { slurm_addto_char_list(char_list, slurmdbd_conf->default_qos); desc = "Added as default"; @@ -470,14 +470,14 @@ check_qos_tables(PGconn *db_conn) DEBUG_QUERY; qos_id = pgsql_query_ret_id(db_conn, query); xfree(query); - if(!qos_id) + if (!qos_id) fatal("problem add default qos '%s'", qos); xstrfmtcat(default_qos_str, ",%d", qos_id); } list_iterator_destroy(itr); list_destroy(char_list); - if(_set_qos_cnt(db_conn) != SLURM_SUCCESS) + if (_set_qos_cnt(db_conn) != SLURM_SUCCESS) return SLURM_ERROR; } return rc; @@ -506,7 +506,7 @@ as_pg_add_qos(pgsql_conn_t *pg_conn, uint32_t uid, List qos_list) user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(qos_list); while((object = list_next(itr))) { - if(!object->name || !object->name[0]) { + if (!object->name || !object->name[0]) { error("as/pg: add_qos: We need a qos name to add."); rc = SLURM_ERROR; continue; @@ -520,7 +520,7 @@ as_pg_add_qos(pgsql_conn_t *pg_conn, uint32_t uid, List qos_list) xstrfmtcat(query, "SELECT public.add_qos(%s);", rec); object->id = DEF_QUERY_RET_ID; - if(!object->id) { + if (!object->id) { error("as/pg: couldn't add qos %s", object->name); added=0; break; @@ -528,10 +528,10 @@ as_pg_add_qos(pgsql_conn_t *pg_conn, uint32_t uid, List qos_list) rc = add_txn(pg_conn, now, "", DBD_ADD_QOS, object->name, user_name, txn); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add txn"); } else { - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_ADD_QOS, object) == SLURM_SUCCESS) list_remove(itr); @@ -541,7 +541,7 @@ as_pg_add_qos(pgsql_conn_t *pg_conn, uint32_t uid, List qos_list) list_iterator_destroy(itr); xfree(user_name); - if(!added) + if (!added) reset_pgsql_conn(pg_conn); return rc; @@ -568,15 +568,15 @@ as_pg_modify_qos(pgsql_conn_t *pg_conn, uint32_t uid, time_t now = time(NULL); int rc = SLURM_SUCCESS, loop = 0; - if(!qos_cond || !qos) { + if (!qos_cond || !qos) { error("as/pg: modify_qos: we need something to change"); return NULL; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; _make_qos_vals_for_modify(qos, &vals, &added_preempt); - if(!vals) { + if (!vals) { errno = SLURM_NO_CHANGE_IN_DATA; error("Nothing to change"); return NULL; @@ -598,7 +598,7 @@ as_pg_modify_qos(pgsql_conn_t *pg_conn, uint32_t uid, "WHERE deleted=0 %s;", qos_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) { + if (!result) { xfree (vals); return NULL; } @@ -616,7 +616,7 @@ as_pg_modify_qos(pgsql_conn_t *pg_conn, uint32_t uid, } object = xstrdup(ROW(0)); list_append(ret_list, object); - if(!rc) { + if (!rc) { xstrfmtcat(name_char, "(name='%s'", object); rc = 1; } else { @@ -645,7 +645,7 @@ as_pg_modify_qos(pgsql_conn_t *pg_conn, uint32_t uid, qos_rec->preempt_mode = qos->preempt_mode; qos_rec->priority = qos->priority; - if(qos->preempt_list) { + if (qos->preempt_list) { ListIterator new_preempt_itr = list_iterator_create(qos->preempt_list); char *preempt = ROW(0); @@ -653,18 +653,18 @@ as_pg_modify_qos(pgsql_conn_t *pg_conn, uint32_t uid, int cleared = 0; qos_rec->preempt_bitstr = bit_alloc(g_qos_count); - if(preempt && preempt[0]) + if (preempt && preempt[0]) bit_unfmt(qos_rec->preempt_bitstr, preempt+1); while((new_preempt = list_next(new_preempt_itr))) { - if(new_preempt[0] == '-') { + if (new_preempt[0] == '-') { bit_clear(qos_rec->preempt_bitstr, atoi(new_preempt+1)); - } else if(new_preempt[0] == '+') { + } else if (new_preempt[0] == '+') { bit_set(qos_rec->preempt_bitstr, atoi(new_preempt+1)); } else { - if(!cleared) { + if (!cleared) { cleared = 1; bit_nclear( qos_rec->preempt_bitstr, @@ -691,7 +691,7 @@ as_pg_modify_qos(pgsql_conn_t *pg_conn, uint32_t uid, return NULL; } - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("didn't effect anything"); xfree(vals); @@ -756,16 +756,16 @@ as_pg_remove_qos(pgsql_conn_t *pg_conn, uint32_t uid, char *qos = NULL, *delta_qos = NULL, *tmp = NULL; time_t now = time(NULL); - if(!qos_cond) { + if (!qos_cond) { error("as/pg: remove_qos: we need something to remove"); return NULL; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; cond = _make_qos_cond(qos_cond); - if(!cond) { + if (!cond) { error("Nothing to remove"); return NULL; } @@ -774,7 +774,7 @@ as_pg_remove_qos(pgsql_conn_t *pg_conn, uint32_t uid, qos_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) + if (!result) return NULL; name_char = NULL; @@ -787,7 +787,7 @@ as_pg_remove_qos(pgsql_conn_t *pg_conn, uint32_t uid, char *name = ROW(1); list_append(ret_list, xstrdup(name)); - if(!name_char) + if (!name_char) xstrfmtcat(name_char, "id_qos='%s'", id); else xstrfmtcat(name_char, " OR id_qos='%s'", id); @@ -809,7 +809,7 @@ as_pg_remove_qos(pgsql_conn_t *pg_conn, uint32_t uid, } END_EACH_ROW; PQclear(result); - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("didn't effect anything"); return ret_list; @@ -825,7 +825,7 @@ as_pg_remove_qos(pgsql_conn_t *pg_conn, uint32_t uid, xfree(qos); xfree(delta_qos); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { reset_pgsql_conn(pg_conn); list_destroy(ret_list); return NULL; @@ -903,15 +903,15 @@ as_pg_get_qos(pgsql_conn_t *pg_conn, uid_t uid, F_COUNT }; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; - if(!qos_cond) { + if (!qos_cond) { query = xstrdup_printf("SELECT %s FROM %s WHERE deleted=0;", gq_fields, qos_table); } else { cond = _make_qos_cond(qos_cond); - if(qos_cond->with_deleted) + if (qos_cond->with_deleted) query = xstrdup_printf("SELECT %s FROM %s WHERE " "(deleted=0 OR deleted=1) %s", gq_fields, qos_table, @@ -933,15 +933,15 @@ as_pg_get_qos(pgsql_conn_t *pg_conn, uid_t uid, slurmdb_qos_rec_t *qos = xmalloc(sizeof(slurmdb_qos_rec_t)); list_append(qos_list, qos); - if(! ISEMPTY(F_DESC)) + if (! ISEMPTY(F_DESC)) qos->description = xstrdup(ROW(F_DESC)); qos->id = atoi(ROW(F_ID)); - if(! ISEMPTY(F_NAME)) + if (! ISEMPTY(F_NAME)) qos->name = xstrdup(ROW(F_NAME)); - if(! ISNULL(F_GCM)) + if (! ISNULL(F_GCM)) qos->grp_cpu_mins = atoll(ROW(F_GCM)); else qos->grp_cpu_mins = (uint64_t)INFINITE; @@ -949,71 +949,71 @@ as_pg_get_qos(pgsql_conn_t *pg_conn, uid_t uid, qos->grp_cpu_run_mins = atoll(ROW(F_GCRM)); else qos->grp_cpu_run_mins = (uint64_t)INFINITE; - if(! ISNULL(F_GC)) + if (! ISNULL(F_GC)) qos->grp_cpus = atoi(ROW(F_GC)); else qos->grp_cpus = INFINITE; - if(! ISNULL(F_GJ)) + if (! ISNULL(F_GJ)) qos->grp_jobs = atoi(ROW(F_GJ)); else qos->grp_jobs = INFINITE; - if(! ISNULL(F_GMEM)) + if (! ISNULL(F_GMEM)) qos->grp_mem = atoi(ROW(F_GMEM)); else qos->grp_mem = INFINITE; - if(! ISNULL(F_GN)) + if (! ISNULL(F_GN)) qos->grp_nodes = atoi(ROW(F_GN)); else qos->grp_nodes = INFINITE; - if(! ISNULL(F_GSJ)) + if (! ISNULL(F_GSJ)) qos->grp_submit_jobs = atoi(ROW(F_GSJ)); else qos->grp_submit_jobs = INFINITE; - if(! ISNULL(F_GW)) + if (! ISNULL(F_GW)) qos->grp_wall = atoi(ROW(F_GW)); else qos->grp_wall = INFINITE; - if(! ISNULL(F_MCMPJ)) + if (! ISNULL(F_MCMPJ)) qos->max_cpu_mins_pj = atoll(ROW(F_MCMPJ)); else qos->max_cpu_mins_pj = (uint64_t)INFINITE; - if(! ISNULL(F_MCRMPU)) + if (! ISNULL(F_MCRMPU)) qos->max_cpu_run_mins_pu = atoll(ROW(F_MCRMPU)); else qos->max_cpu_run_mins_pu = (uint64_t)INFINITE; - if(! ISNULL(F_MCPJ)) + if (! ISNULL(F_MCPJ)) qos->max_cpus_pj = atoi(ROW(F_MCPJ)); else qos->max_cpus_pj = INFINITE; - if(! ISNULL(F_MJPU)) + if (! ISNULL(F_MJPU)) qos->max_jobs_pu = atoi(ROW(F_MJPU)); else qos->max_jobs_pu = INFINITE; - if(! ISNULL(F_MNPJ)) + if (! ISNULL(F_MNPJ)) qos->max_nodes_pj = atoi(ROW(F_MNPJ)); else qos->max_nodes_pj = INFINITE; - if(! ISNULL(F_MSJPU)) + if (! ISNULL(F_MSJPU)) qos->max_submit_jobs_pu = atoi(ROW(F_MSJPU)); else qos->max_submit_jobs_pu = INFINITE; - if(! ISNULL(F_MWPJ)) + if (! ISNULL(F_MWPJ)) qos->max_wall_pj = atoi(ROW(F_MWPJ)); else qos->max_wall_pj = INFINITE; - if(! ISEMPTY(F_PREE)) { - if(!qos->preempt_bitstr) + if (! ISEMPTY(F_PREE)) { + if (!qos->preempt_bitstr) qos->preempt_bitstr = bit_alloc(g_qos_count); bit_unfmt(qos->preempt_bitstr, ROW(F_PREE)+1); } - if(! ISNULL(F_PREEM)) + if (! ISNULL(F_PREEM)) qos->preempt_mode = atoi(ROW(F_PREEM)); - if(! ISNULL(F_PRIO)) + if (! ISNULL(F_PRIO)) qos->priority = atoi(ROW(F_PRIO)); - if(! ISNULL(F_UF)) + if (! ISNULL(F_UF)) qos->usage_factor = atof(ROW(F_UF)); } END_EACH_ROW; PQclear(result); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_qos.h b/src/plugins/accounting_storage/pgsql/as_pg_qos.h index c031b9ac856ee3775465e71c72f11ad4cf656a1a..26466e320a09f073eae232ecd46523ef85247c67 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_qos.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_qos.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_resv.c b/src/plugins/accounting_storage/pgsql/as_pg_resv.c index 57b7263b1430d3b985985580d6e0f700414f747f..c072450f183a2a08d683318b548e05b939b3d0a0 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_resv.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_resv.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -158,14 +158,14 @@ _make_resv_cond(slurmdb_reservation_cond_t *resv_cond, char **cond) concat_cond_list(resv_cond->id_list, NULL, "id_resv", cond); concat_cond_list(resv_cond->name_list, NULL, "resv_name", cond); - if(resv_cond->time_start) { - if(!resv_cond->time_end) + if (resv_cond->time_start) { + if (!resv_cond->time_end) resv_cond->time_end = now; xstrfmtcat(*cond, "AND (time_start<%ld " "AND (time_end>=%ld OR time_end=0))", resv_cond->time_end, resv_cond->time_start); - } else if(resv_cond->time_end) { + } else if (resv_cond->time_end) { xstrfmtcat(*cond, "AND (time_start < %ld)", resv_cond->time_end); } @@ -204,19 +204,19 @@ as_pg_add_reservation(pgsql_conn_t *pg_conn, slurmdb_reservation_rec_t *resv) int rc = SLURM_SUCCESS; char *query = NULL, *rec = NULL; - if(!resv) { + if (!resv) { error("as/pg: add_reservation: no reservation given"); return SLURM_ERROR; } - if(!resv->id) { + if (!resv->id) { error("as/pg: add_reservation: reservation id not given"); return SLURM_ERROR; } - if(!resv->time_start) { + if (!resv->time_start) { error("as/pg: add_reservation: start time not given"); return SLURM_ERROR; } - if(!resv->cluster || !resv->cluster[0]) { + if (!resv->cluster || !resv->cluster[0]) { error("as/pg: add_reservation: cluster name not given"); return SLURM_ERROR; } @@ -261,23 +261,23 @@ as_pg_modify_reservation(pgsql_conn_t *pg_conn, F_COUNT }; - if(!resv) { + if (!resv) { error("as/pg: modify_reservation: no reservation given"); return SLURM_ERROR; } - if(!resv->id) { + if (!resv->id) { error("as/pg: modify_reservation: reservation id not given"); return SLURM_ERROR; } - if(!resv->time_start) { + if (!resv->time_start) { error("as/pg: modify_reservation: time_start not given"); return SLURM_ERROR; } - if(!resv->cluster || !resv->cluster[0]) { + if (!resv->cluster || !resv->cluster[0]) { error("as/pg: modify_reservation: cluster not given"); return SLURM_ERROR; } - if(!resv->time_start_prev) { + if (!resv->time_start_prev) { error("as/pg: modify_reservation: time_start_prev not given"); return SLURM_ERROR; } @@ -305,7 +305,7 @@ try_again: error("as/pg: modify_reservation: There is no reservation" " by id %u, start %ld, and cluster '%s'", resv->id, resv->time_start_prev, resv->cluster); - if(!set && resv->time_end) { + if (!set && resv->time_end) { /* This should never really happen, but just incase the controller and the database get out of sync we check @@ -328,36 +328,36 @@ try_again: set = 0; /* check differences here */ - if(!resv->name && !PG_EMPTY(F_NAME)) + if (!resv->name && !PG_EMPTY(F_NAME)) // if this changes we just update the // record, no need to create a new one since // this doesn't really effect the // reservation accounting wise resv->name = xstrdup(PG_VAL(F_NAME)); - if(resv->assocs) + if (resv->assocs) set = 1; - else if(!PG_EMPTY(F_ASSOCS)) + else if (!PG_EMPTY(F_ASSOCS)) resv->assocs = xstrdup(PG_VAL(F_ASSOCS)); - if(resv->cpus != (uint32_t)NO_VAL) + if (resv->cpus != (uint32_t)NO_VAL) set = 1; else resv->cpus = atoi(PG_VAL(F_CPU)); - if(resv->flags != (uint16_t)NO_VAL) + if (resv->flags != (uint16_t)NO_VAL) set = 1; else resv->flags = atoi(PG_VAL(F_FLAGS)); - if(resv->nodes) + if (resv->nodes) set = 1; - else if(! PG_EMPTY(F_NODES)) { + else if (! PG_EMPTY(F_NODES)) { resv->nodes = xstrdup(PG_VAL(F_NODES)); resv->node_inx = xstrdup(PG_VAL(F_NODE_INX)); } - if(!resv->time_end) + if (!resv->time_end) resv->time_end = atoi(PG_VAL(F_END)); PQclear(result); @@ -367,7 +367,7 @@ try_again: * just incase we have a different one from being out * of sync */ - if((start > now) || !set) { + if ((start > now) || !set) { /* we haven't started the reservation yet, or we are changing the associations or end time which we can just update it */ @@ -405,11 +405,11 @@ as_pg_remove_reservation(pgsql_conn_t *pg_conn, int rc = SLURM_SUCCESS; char *query = NULL; - if(!resv) { + if (!resv) { error("as/pg: remove_reservation: no reservation given"); return SLURM_ERROR; } - if(!resv->id || !resv->time_start || !resv->cluster) { + if (!resv->id || !resv->time_start || !resv->cluster) { error("as/pg: remove_reservation: id, start time " " or cluster not given"); return SLURM_ERROR; @@ -487,20 +487,20 @@ as_pg_get_reservations(pgsql_conn_t *pg_conn, uid_t uid, return NULL; } - if(!resv_cond) { + if (!resv_cond) { goto empty; } with_usage = resv_cond->with_usage; memset(&job_cond, 0, sizeof(slurmdb_job_cond_t)); - if(resv_cond->nodes) { + if (resv_cond->nodes) { job_cond.usage_start = resv_cond->time_start; job_cond.usage_end = resv_cond->time_end; job_cond.used_nodes = resv_cond->nodes; job_cond.cluster_list = resv_cond->cluster_list; cnodes = setup_cluster_nodes(pg_conn, &job_cond); - } else if(with_usage) { + } else if (with_usage) { job_cond.usage_start = resv_cond->time_start; job_cond.usage_end = resv_cond->time_end; } @@ -521,9 +521,9 @@ empty: if (query) xstrcat(query, " ORDER BY cluster, resv_name;"); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("as/pg: get_reservations: failed to get resv from db"); - if(cnodes) + if (cnodes) destroy_cluster_nodes(cnodes); return NULL; } @@ -535,13 +535,13 @@ empty: int start; start = atoi(ROW(F_START)); - if(!good_nodes_from_inx(cnodes, ROW(F_NODE_INX), start)) + if (!good_nodes_from_inx(cnodes, ROW(F_NODE_INX), start)) continue; resv = xmalloc(sizeof(slurmdb_reservation_rec_t)); resv->id = atoi(ROW(F_ID)); - if(with_usage) { - if(!job_cond.resvid_list) + if (with_usage) { + if (!job_cond.resvid_list) job_cond.resvid_list = list_create(NULL); list_append(job_cond.resvid_list, ROW(F_ID)); } @@ -556,17 +556,17 @@ empty: list_append(resv_list, resv); } END_EACH_ROW; - if(cnodes) + if (cnodes) destroy_cluster_nodes(cnodes); - if(with_usage && resv_list && list_count(resv_list)) { + if (with_usage && resv_list && list_count(resv_list)) { ListIterator itr = NULL, itr2 = NULL; slurmdb_job_rec_t *job = NULL; slurmdb_reservation_rec_t *resv = NULL; List job_list = jobacct_storage_p_get_jobs_cond( pg_conn, uid, &job_cond); - if(!job_list || !list_count(job_list)) + if (!job_list || !list_count(job_list)) goto no_jobs; itr = list_iterator_create(job_list); @@ -582,24 +582,24 @@ empty: have to make sure we get the time in the correct record. */ - if(resv->id != job->resvid) + if (resv->id != job->resvid) continue; set = 1; - if(start < resv->time_start) + if (start < resv->time_start) start = resv->time_start; - if(!end || end > resv->time_end) + if (!end || end > resv->time_end) end = resv->time_end; - if((elapsed = (end - start)) < 1) + if ((elapsed = (end - start)) < 1) continue; - if(job->alloc_cpus) + if (job->alloc_cpus) resv->alloc_secs += elapsed * job->alloc_cpus; } list_iterator_reset(itr2); - if(!set) { + if (!set) { error("we got a job %u with no reservation " "associatied with it?", job->jobid); } @@ -607,11 +607,11 @@ empty: list_iterator_destroy(itr2); list_iterator_destroy(itr); no_jobs: - if(job_list) + if (job_list) list_destroy(job_list); } - if(job_cond.resvid_list) { + if (job_cond.resvid_list) { list_destroy(job_cond.resvid_list); job_cond.resvid_list = NULL; } diff --git a/src/plugins/accounting_storage/pgsql/as_pg_resv.h b/src/plugins/accounting_storage/pgsql/as_pg_resv.h index 05ee26b575fe934424c6cb4b389a434696fff4a6..4033d890752bc4f01f81783beaf80410015ec023 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_resv.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_resv.h @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_rollup.c b/src/plugins/accounting_storage/pgsql/as_pg_rollup.c index d55c52e5ef354e31e18cadfda343a0efa6677a4b..b1da8a7c2978f8fb0ffce2b9b77625e0cb6d5fc5 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_rollup.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_rollup.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -82,22 +82,22 @@ typedef struct { static void _destroy_local_id_usage(void *object) { local_id_usage_t *a_usage = (local_id_usage_t *)object; - if(a_usage) { + if (a_usage) { xfree(a_usage); } } static void _destroy_local_cluster_usage(void *object) { local_cluster_usage_t *c_usage = (local_cluster_usage_t *)object; - if(c_usage) { + if (c_usage) { xfree(c_usage); } } static void _destroy_local_resv_usage(void *object) { local_resv_usage_t *r_usage = (local_resv_usage_t *)object; - if(r_usage) { - if(r_usage->local_assocs) + if (r_usage) { + if (r_usage->local_assocs) list_destroy(r_usage->local_assocs); xfree(r_usage); } @@ -137,7 +137,7 @@ _process_event_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, " ORDER BY node_name, time_start", ge_fields, cluster, event_table, NODE_STATE_MAINT, end, start); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("failed to get events"); return SLURM_ERROR; } @@ -147,26 +147,26 @@ _process_event_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, int row_end = atoi(ROW(F_END)); int row_cpu = atoi(ROW(F_CPU)); - if(row_start < start) + if (row_start < start) row_start = start; - if(!row_end || row_end > end) + if (!row_end || row_end > end) row_end = end; /* Ignore time less than 1 second. */ - if((row_end - row_start) < 1) + if ((row_end - row_start) < 1) continue; /* * node_name=='' means cluster registration entry, * else, node down entry */ - if(ISEMPTY(F_NAME)) { + if (ISEMPTY(F_NAME)) { /* if the cpu count changes we will * only care about the last cpu count but * we will keep a total of the time for * all cpus to get the correct cpu time * for the entire period. */ - if(!c_usage) { + if (!c_usage) { c_usage = xmalloc( sizeof(local_cluster_usage_t)); list_append(cu_list, c_usage); @@ -188,15 +188,15 @@ _process_event_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, are looking for. If it was during this time period we would already have it. */ - if(c_usage) { + if (c_usage) { int local_start = row_start; int local_end = row_end; - if(c_usage->start > local_start) + if (c_usage->start > local_start) local_start = c_usage->start; - if(c_usage->end < local_end) + if (c_usage->end < local_end) local_end = c_usage->end; - if((local_end - local_start) > 0) { + if ((local_end - local_start) > 0) { seconds = (local_end - local_start); c_usage->d_cpu += seconds * row_cpu; } @@ -232,7 +232,7 @@ _process_resv_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, " ORDER BY time_start", gr_fields, cluster, resv_table, end, start); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("failed to get resv"); return SLURM_ERROR; } @@ -254,12 +254,12 @@ _process_resv_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, int row_cpu = atoi(ROW(F_CPU)); int row_flags = atoi(ROW(F_FLAGS)); - if(row_start < start) + if (row_start < start) row_start = start; - if(!row_end || row_end > end) + if (!row_end || row_end > end) row_end = end; /* ignore time less than 1 seconds */ - if((row_end - row_start) < 1) + if ((row_end - row_start) < 1) continue; r_usage = xmalloc(sizeof(local_resv_usage_t)); @@ -283,7 +283,7 @@ _process_resv_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, if (!c_usage) continue; - if(row_flags & RESERVE_FLAG_MAINT) + if (row_flags & RESERVE_FLAG_MAINT) c_usage->pd_cpu += r_usage->total_time; else c_usage->a_cpu += r_usage->total_time; @@ -331,7 +331,7 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, "time_eligible", gj_fields, cluster, job_table, (long)end, (long)start); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("failed to get jobs"); return SLURM_ERROR; } @@ -349,37 +349,37 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, int row_rcpu = atoi(ROW(F_RCPU)); seconds = 0; - if(row_start && (row_start < start)) + if (row_start && (row_start < start)) row_start = start; - if(!row_start && row_end) + if (!row_start && row_end) row_start = row_end; - if(!row_end || row_end > end) + if (!row_end || row_end > end) row_end = end; - if(!row_start || ((row_end - row_start) < 1)) + if (!row_start || ((row_end - row_start) < 1)) goto calc_cluster; seconds = (row_end - row_start); - if(strcmp(ROW(F_SUSPENDED), "0")) { + if (strcmp(ROW(F_SUSPENDED), "0")) { query = xstrdup_printf( "SELECT %s.get_job_suspend_time(%s, %ld, %ld);", cluster, ROW(F_DB_INX), start, end); result2 = DEF_QUERY_RET; - if(!result2) { + if (!result2) { list_iterator_destroy(r_itr); return SLURM_ERROR; } seconds -= atoi(PQgetvalue(result2, 0, 0)); PQclear(result2); } - if(seconds < 1) { + if (seconds < 1) { debug4("This job (%u) was suspended " "the entire hour", job_id); /* TODO: how about resv usage? */ continue; } - if(last_id != assoc_id) { /* ORDER BY associd */ + if (last_id != assoc_id) { /* ORDER BY associd */ a_usage = xmalloc(sizeof(local_id_usage_t)); a_usage->id = assoc_id; list_append(au_list, a_usage); @@ -387,14 +387,14 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, } a_usage->a_cpu += seconds * row_acpu; - if(!track_wckey) + if (!track_wckey) goto calc_cluster; /* do the wckey calculation */ - if(last_wckeyid != wckey_id) { + if (last_wckeyid != wckey_id) { w_usage = list_find_first(wu_list, _cmp_local_id, &wckey_id); - if(!w_usage) { + if (!w_usage) { w_usage = xmalloc(sizeof(local_id_usage_t)); w_usage->id = wckey_id; list_append(wu_list, w_usage); @@ -407,8 +407,8 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, calc_cluster: /* first figure out the reservation */ - if(resv_id) { - if(seconds <= 0) + if (resv_id) { + if (seconds <= 0) continue; /* Since we have already added the entire reservation as used time on @@ -433,15 +433,15 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, sure all the reservations are checked to see if such a thing has happened */ - if((r_usage->id == resv_id)) { + if ((r_usage->id == resv_id)) { int temp_end = row_end; int temp_start = row_start; - if(r_usage->start > temp_start) + if (r_usage->start > temp_start) temp_start = r_usage->start; - if(r_usage->end < temp_end) + if (r_usage->end < temp_end) temp_end = r_usage->end; - if((temp_end - temp_start) > 0) { + if ((temp_end - temp_start) > 0) { r_usage->a_cpu += row_acpu * (temp_end - temp_start); } @@ -456,10 +456,10 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, registered. This continue should rarely if ever happen. */ - if(!c_usage) + if (!c_usage) continue; - if(row_start && (seconds > 0)) { + if (row_start && (seconds > 0)) { /* info("%d assoc %d adds " */ /* "(%d)(%d-%d) * %d = %d " */ /* "to %d", */ @@ -479,15 +479,15 @@ _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, * by (start_time - eligible_time) seconds * large r_cpu means cluster overload or bad scheduling? */ - if(!row_start || (row_start >= c_usage->start)) { + if (!row_start || (row_start >= c_usage->start)) { row_end = row_start; row_start = row_eligible; - if(c_usage->start > row_start) + if (c_usage->start > row_start) row_start = c_usage->start; - if(c_usage->end < row_end) + if (c_usage->end < row_end) row_end = c_usage->end; - if((row_end - row_start) > 0) { + if ((row_end - row_start) > 0) { seconds = (row_end - row_start) * row_rcpu; @@ -528,7 +528,7 @@ _process_resv_idle_time(List resv_usage_list, List assoc_usage_list) ListIterator tmp_itr = NULL; int64_t idle = r_usage->total_time - r_usage->a_cpu; - if(idle <= 0) + if (idle <= 0) continue; /* now divide that time by the number of @@ -541,12 +541,12 @@ _process_resv_idle_time(List resv_usage_list, List assoc_usage_list) tmp_itr = list_iterator_create(r_usage->local_assocs); while((assoc = list_next(tmp_itr))) { int associd = atoi(assoc); - if(last_id != associd) { + if (last_id != associd) { a_usage = list_find_first(assoc_usage_list, _cmp_local_id, &associd); } - if(!a_usage) { + if (!a_usage) { a_usage = xmalloc(sizeof(local_id_usage_t)); a_usage->id = associd; list_append(assoc_usage_list, a_usage); @@ -568,7 +568,7 @@ _cluster_usage_sanity_check(char *cluster, local_cluster_usage_t *c_usage, uint64_t total_used = 0; /* no more allocated cpus than possible. */ - if(c_usage->total_time < c_usage->a_cpu) { + if (c_usage->total_time < c_usage->a_cpu) { char *start_char = xstrdup(ctime(&curr_start)); char *end_char = xstrdup(ctime(&curr_end)); error("We have more allocated time than is " @@ -587,7 +587,7 @@ _cluster_usage_sanity_check(char *cluster, local_cluster_usage_t *c_usage, /* Make sure the total time we care about doesn't go over the limit */ - if(c_usage->total_time < (total_used)) { + if (c_usage->total_time < (total_used)) { char *start_char = xstrdup(ctime(&curr_start)); char *end_char = xstrdup(ctime(&curr_end)); int64_t overtime; @@ -611,14 +611,14 @@ _cluster_usage_sanity_check(char *cluster, local_cluster_usage_t *c_usage, overtime = (int64_t)(c_usage->total_time - (c_usage->a_cpu + c_usage->d_cpu)); - if(overtime < 0) + if (overtime < 0) c_usage->d_cpu += overtime; overtime = (int64_t)(c_usage->total_time - (c_usage->a_cpu + c_usage->d_cpu + c_usage->pd_cpu)); - if(overtime < 0) + if (overtime < 0) c_usage->pd_cpu += overtime; total_used = c_usage->a_cpu + @@ -640,13 +640,13 @@ _cluster_usage_sanity_check(char *cluster, local_cluster_usage_t *c_usage, */ /* info("%s got idle of %lld", c_usage->name, */ /* (int64_t)c_usage->i_cpu); */ - if((int64_t)c_usage->i_cpu < 0) { + if ((int64_t)c_usage->i_cpu < 0) { /* info("got %d %d %d", c_usage->r_cpu, */ /* c_usage->i_cpu, c_usage->o_cpu); */ c_usage->r_cpu += (int64_t)c_usage->i_cpu; c_usage->o_cpu -= (int64_t)c_usage->i_cpu; c_usage->i_cpu = 0; - if((int64_t)c_usage->r_cpu < 0) + if ((int64_t)c_usage->r_cpu < 0) c_usage->r_cpu = 0; } } @@ -751,7 +751,7 @@ pgsql_hourly_rollup(pgsql_conn_t *pg_conn, char *cluster, /* info("association (%d) %d alloc %d", */ /* a_usage->id, last_id, */ /* a_usage->a_cpu); */ - if(usage_recs) + if (usage_recs) xstrcat(usage_recs, ", "); xstrfmtcat(usage_recs, "CAST((%ld, %ld, 0, %d, %ld, " @@ -759,19 +759,19 @@ pgsql_hourly_rollup(pgsql_conn_t *pg_conn, char *cluster, now, now, a_usage->id, curr_start, a_usage->a_cpu, cluster, assoc_hour_table); } - if(usage_recs) { + if (usage_recs) { query = xstrdup_printf( "SELECT %s.add_assoc_hour_usages(ARRAY[%s]);", cluster, usage_recs); xfree(usage_recs); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add assoc hour rollup"); goto end_it; } } - if(!track_wckey) + if (!track_wckey) goto end_loop; list_iterator_reset(w_itr); @@ -779,7 +779,7 @@ pgsql_hourly_rollup(pgsql_conn_t *pg_conn, char *cluster, /* info("association (%d) %d alloc %d", */ /* w_usage->id, last_id, */ /* w_usage->a_cpu); */ - if(usage_recs) + if (usage_recs) xstrcat(usage_recs, ", "); xstrfmtcat(usage_recs, "CAST((%ld, %ld, 0, %d, %ld, " @@ -787,13 +787,13 @@ pgsql_hourly_rollup(pgsql_conn_t *pg_conn, char *cluster, now, now, w_usage->id, curr_start, w_usage->a_cpu, cluster, wckey_hour_table); } - if(usage_recs) { + if (usage_recs) { query = xstrdup_printf( "SELECT %s.add_wckey_hour_usages(ARRAY[%s]);", cluster, usage_recs); xfree(usage_recs); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add wckey hour rollup"); goto end_it; } @@ -876,7 +876,7 @@ pgsql_daily_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start, cluster, now, curr_start, curr_end); } rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); return SLURM_ERROR; } @@ -947,7 +947,7 @@ pgsql_monthly_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start, cluster, now, curr_start, curr_end); } rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add month rollup"); return SLURM_ERROR; } @@ -959,10 +959,10 @@ pgsql_monthly_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start, /* if we didn't ask for archive data return here and don't do anything extra just rollup */ - if(!archive_data) + if (!archive_data) return SLURM_SUCCESS; - if(!slurmdbd_conf) + if (!slurmdbd_conf) return SLURM_SUCCESS; memset(&arch_cond, 0, sizeof(arch_cond)); @@ -1009,14 +1009,14 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, F_COUNT }; - if(!sent_start) { + if (!sent_start) { query = xstrdup_printf("SELECT %s FROM %s.%s LIMIT 1", ru_fields, cluster, last_ran_table); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; - if(PQntuples(result)) { + if (PQntuples(result)) { last_hour = atoi(PG_VAL(F_HOUR)); last_day = atoi(PG_VAL(F_DAY)); last_month = atoi(PG_VAL(F_MONTH)); @@ -1027,7 +1027,7 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, query = xstrdup_printf("SELECT %s.init_last_ran(%ld);", cluster, now); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; last_hour = last_day = last_month = atoi(PG_VAL(0)); @@ -1040,14 +1040,14 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, } } - if(!my_time) + if (!my_time) my_time = time(NULL); - if(!localtime_r(&last_hour, &start_tm)) { + if (!localtime_r(&last_hour, &start_tm)) { error("Couldn't get localtime from hour start %ld", last_hour); return SLURM_ERROR; } - if(!localtime_r(&my_time, &end_tm)) { + if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from hour end %ld", my_time); return SLURM_ERROR; } @@ -1076,14 +1076,14 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, global_last_rollup = end_time; //slurm_mutex_unlock(&rollup_lock); - if(end_time-start_time > 0) { + if (end_time-start_time > 0) { START_TIMER; - if((rc = pgsql_hourly_rollup(pg_conn, cluster, start_time, end_time)) + if ((rc = pgsql_hourly_rollup(pg_conn, cluster, start_time, end_time)) != SLURM_SUCCESS) return rc; END_TIMER3("hourly_rollup", 5000000); /* If we have a sent_end do not update the last_run_table */ - if(!sent_end) + if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET hourly_rollup=%ld", cluster, last_ran_table, end_time); @@ -1093,7 +1093,7 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, } - if(!localtime_r(&last_day, &start_tm)) { + if (!localtime_r(&last_day, &start_tm)) { error("Couldn't get localtime from day %ld", last_day); return SLURM_ERROR; } @@ -1111,16 +1111,16 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, /* info("day end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ - if(end_time-start_time > 0) { + if (end_time-start_time > 0) { START_TIMER; - if((rc = pgsql_daily_rollup(pg_conn, cluster, start_time, + if ((rc = pgsql_daily_rollup(pg_conn, cluster, start_time, end_time, archive_data)) != SLURM_SUCCESS) return rc; END_TIMER2("daily_rollup"); - if(query && !sent_end) + if (query && !sent_end) xstrfmtcat(query, ", daily_rollup=%ld", (long)end_time); - else if(!sent_end) + else if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET daily_rollup=%ld", cluster, last_ran_table, (long)end_time); @@ -1129,7 +1129,7 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, (long)end_time, (long)start_time); } - if(!localtime_r(&last_month, &start_tm)) { + if (!localtime_r(&last_month, &start_tm)) { error("Couldn't get localtime from month %ld", last_month); return SLURM_ERROR; } @@ -1154,18 +1154,18 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, /* info("month end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ - if(end_time-start_time > 0) { + if (end_time-start_time > 0) { START_TIMER; - if((rc = pgsql_monthly_rollup(pg_conn, cluster, start_time, + if ((rc = pgsql_monthly_rollup(pg_conn, cluster, start_time, end_time, archive_data)) != SLURM_SUCCESS) return rc; END_TIMER2("monthly_rollup"); - if(query && !sent_end) + if (query && !sent_end) xstrfmtcat(query, ", monthly_rollup=%ld", (long)end_time); - else if(!sent_end) + else if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET monthly_rollup=%ld", cluster, last_ran_table, (long)end_time); @@ -1174,7 +1174,7 @@ _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, (long)end_time, (long)start_time); } - if(query) { + if (query) { rc = DEF_QUERY_RET_RC; } return rc; @@ -1197,7 +1197,7 @@ as_pg_roll_usage(pgsql_conn_t *pg_conn, time_t sent_start, { int rc = SLURM_SUCCESS; - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; slurm_mutex_lock(&usage_rollup_lock); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_rollup.h b/src/plugins/accounting_storage/pgsql/as_pg_rollup.h index 75912196d747537e9a04550e907d90dc19ee10fc..96fd6abc104e84f511fa2ae937bb551219024a84 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_rollup.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_rollup.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_txn.c b/src/plugins/accounting_storage/pgsql/as_pg_txn.c index 42eef0cefe7018ac97637f256dc69d113de88947..2552bd6051378b98b9ad5c718837c4afd68ec2db 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_txn.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_txn.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -70,12 +70,12 @@ _concat_txn_cond_list(List cond_list, char *col, char **cond) char *object; ListIterator itr; - if(cond_list && list_count(cond_list)) { + if (cond_list && list_count(cond_list)) { xstrcat(*cond, " AND ("); itr = list_iterator_create(cond_list); list_iterator_destroy(itr); while((object = list_next(itr))) { - if(set) + if (set) xstrcat(*cond, " OR "); xstrfmtcat(*cond, "name LIKE '%%%s%%' OR " @@ -128,7 +128,7 @@ _make_txn_cond(pgsql_conn_t *pg_conn, slurmdb_txn_cond_t *txn_cond) /* handle query for associations first */ concat_cond_list(txn_cond->acct_list, NULL, "acct", &assoc_cond); concat_cond_list(txn_cond->user_list, NULL, "user_name", &assoc_cond); - if(assoc_cond) { + if (assoc_cond) { FOR_EACH_CLUSTER(txn_cond->cluster_list) { query = xstrdup_printf( "SELECT id_assoc FROM %s.%s WHERE TRUE %s", @@ -172,7 +172,7 @@ _make_txn_cond(pgsql_conn_t *pg_conn, slurmdb_txn_cond_t *txn_cond) concat_like_cond_list(txn_cond->info_list, NULL, "info", &cond); concat_like_cond_list(txn_cond->name_list, NULL, "name", &cond); - if(txn_cond->time_start) + if (txn_cond->time_start) xstrfmtcat(cond, " AND (timestamp >= %ld) ", txn_cond->time_start); if (txn_cond->time_end) @@ -225,16 +225,16 @@ as_pg_get_txn(pgsql_conn_t *pg_conn, uid_t uid, if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; - if(txn_cond) + if (txn_cond) cond = _make_txn_cond(pg_conn, txn_cond); query = xstrdup_printf("SELECT %s FROM %s", gt_fields, txn_table); - if(cond) { + if (cond) { xstrfmtcat(query, " WHERE TRUE %s", cond); xfree(cond); } xstrcat(query, " ORDER BY timestamp;"); result = DEF_QUERY_RET; - if(!result) + if (!result) return NULL; txn_list = list_create(slurmdb_destroy_txn_rec); @@ -250,7 +250,7 @@ as_pg_get_txn(pgsql_conn_t *pg_conn, uid_t uid, txn->where_query = xstrdup(ROW(F_NAME)); txn->clusters = xstrdup(ROW(F_CLUSTER)); - if(txn_cond && txn_cond->with_assoc_info + if (txn_cond && txn_cond->with_assoc_info && (txn->action == DBD_ADD_ASSOCS || txn->action == DBD_MODIFY_ASSOCS || txn->action == DBD_REMOVE_ASSOCS)) { diff --git a/src/plugins/accounting_storage/pgsql/as_pg_txn.h b/src/plugins/accounting_storage/pgsql/as_pg_txn.h index 0f3fdd2f5728630d8236642006806148947432ce..160135e3185d8e57b6c46cfe2c6a31acb1732b43 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_txn.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_txn.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_usage.c b/src/plugins/accounting_storage/pgsql/as_pg_usage.c index ac0353e0ae73bc050a3d3e213cc4d975aa4601f3..44ba927631cdba6f715b1c117a2cb8d5fa625cb8 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_usage.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_usage.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -626,7 +626,7 @@ _get_assoc_usage(pgsql_conn_t *pg_conn, uid_t uid, return SLURM_ERROR; } cluster = slurmdb_assoc->cluster; - if(!slurmdb_assoc->id) { + if (!slurmdb_assoc->id) { error("We need an assoc id to set data for getting usage"); return SLURM_ERROR; } @@ -642,27 +642,27 @@ _get_assoc_usage(pgsql_conn_t *pg_conn, uid_t uid, ListIterator itr = NULL; slurmdb_coord_rec_t *coord = NULL; - if(slurmdb_assoc->user && + if (slurmdb_assoc->user && !strcmp(slurmdb_assoc->user, user.name)) goto is_user; - if(!user.coord_accts) { + if (!user.coord_accts) { debug4("This user isn't a coord."); goto bad_user; } - if(!slurmdb_assoc->acct) { + if (!slurmdb_assoc->acct) { debug("No account name given " "in association."); goto bad_user; } itr = list_iterator_create(user.coord_accts); while((coord = list_next(itr))) { - if(!strcasecmp(coord->name, + if (!strcasecmp(coord->name, slurmdb_assoc->acct)) break; } list_iterator_destroy(itr); - if(coord) + if (coord) goto is_user; bad_user: @@ -672,7 +672,7 @@ _get_assoc_usage(pgsql_conn_t *pg_conn, uid_t uid, is_user: usage_table = assoc_day_table; - if(set_usage_information(&usage_table, DBD_GET_ASSOC_USAGE, + if (set_usage_information(&usage_table, DBD_GET_ASSOC_USAGE, &start, &end) != SLURM_SUCCESS) return SLURM_ERROR; @@ -686,10 +686,10 @@ is_user: cluster, usage_table, cluster, assoc_table, cluster, assoc_table, end, start, slurmdb_assoc->id); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; - if(!slurmdb_assoc->accounting_list) + if (!slurmdb_assoc->accounting_list) slurmdb_assoc->accounting_list = list_create(slurmdb_destroy_accounting_rec); @@ -737,7 +737,7 @@ _get_wckey_usage(pgsql_conn_t *pg_conn, uid_t uid, return SLURM_ERROR; } cluster = slurmdb_wckey->cluster; - if(!slurmdb_wckey->id) { + if (!slurmdb_wckey->id) { error("We need an wckey id to set data for getting usage"); return SLURM_ERROR; } @@ -750,7 +750,7 @@ _get_wckey_usage(pgsql_conn_t *pg_conn, uid_t uid, } if (!is_admin) { - if(! slurmdb_wckey->user || + if (! slurmdb_wckey->user || strcmp(slurmdb_wckey->user, user.name)) { errno = ESLURM_ACCESS_DENIED; return SLURM_ERROR; @@ -758,7 +758,7 @@ _get_wckey_usage(pgsql_conn_t *pg_conn, uid_t uid, } usage_table = wckey_day_table; - if(set_usage_information(&usage_table, DBD_GET_WCKEY_USAGE, + if (set_usage_information(&usage_table, DBD_GET_WCKEY_USAGE, &start, &end) != SLURM_SUCCESS) { return SLURM_ERROR; } @@ -769,10 +769,10 @@ _get_wckey_usage(pgsql_conn_t *pg_conn, uid_t uid, "AND id_wckey=%d ORDER BY id_wckey, time_start;", cluster, usage_table, end, start, slurmdb_wckey->id); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; - if(!slurmdb_wckey->accounting_list) + if (!slurmdb_wckey->accounting_list) slurmdb_wckey->accounting_list = list_create(slurmdb_destroy_accounting_rec); @@ -821,12 +821,12 @@ _get_cluster_usage(pgsql_conn_t *pg_conn, uid_t uid, F_COUNT }; - if(!cluster_rec->name || !cluster_rec->name[0]) { + if (!cluster_rec->name || !cluster_rec->name[0]) { error("We need a cluster name to set data for"); return SLURM_ERROR; } - if(set_usage_information(&usage_table, DBD_GET_CLUSTER_USAGE, + if (set_usage_information(&usage_table, DBD_GET_CLUSTER_USAGE, &start, &end) != SLURM_SUCCESS) { return SLURM_ERROR; } @@ -840,7 +840,7 @@ _get_cluster_usage(pgsql_conn_t *pg_conn, uid_t uid, if (!result) return SLURM_ERROR; - if(!cluster_rec->accounting_list) + if (!cluster_rec->accounting_list) cluster_rec->accounting_list = list_create(slurmdb_destroy_cluster_accounting_rec); @@ -926,19 +926,19 @@ get_usage_for_assoc_list(pgsql_conn_t *pg_conn, char *cluster, List assoc_list, F_COUNT }; - if(!assoc_list) { + if (!assoc_list) { error("We need an object to set data for getting usage"); return SLURM_ERROR; } usage_table = assoc_day_table; - if(set_usage_information(&usage_table, DBD_GET_ASSOC_USAGE, + if (set_usage_information(&usage_table, DBD_GET_ASSOC_USAGE, &start, &end) != SLURM_SUCCESS) { return SLURM_ERROR; } itr = list_iterator_create(assoc_list); while((assoc = list_next(itr))) { - if(id_str) + if (id_str) xstrfmtcat(id_str, " OR t3.id_assoc=%d", assoc->id); else xstrfmtcat(id_str, "t3.id_assoc=%d", assoc->id); @@ -956,7 +956,7 @@ get_usage_for_assoc_list(pgsql_conn_t *pg_conn, char *cluster, List assoc_list, assoc_table, end, start, id_str); xfree(id_str); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; usage_list = list_create(slurmdb_destroy_accounting_rec); @@ -974,16 +974,16 @@ get_usage_for_assoc_list(pgsql_conn_t *pg_conn, char *cluster, List assoc_list, itr = list_iterator_create(assoc_list); while((assoc = list_next(itr))) { int found = 0; - if(!assoc->accounting_list) + if (!assoc->accounting_list) assoc->accounting_list = list_create( slurmdb_destroy_accounting_rec); while((accounting_rec = list_next(u_itr))) { - if(assoc->id == accounting_rec->id) { + if (assoc->id == accounting_rec->id) { list_append(assoc->accounting_list, accounting_rec); list_remove(u_itr); found = 1; - } else if(found) { + } else if (found) { /* here we know the list is in id order so if the next record @@ -1002,7 +1002,7 @@ get_usage_for_assoc_list(pgsql_conn_t *pg_conn, char *cluster, List assoc_list, list_iterator_destroy(itr); list_iterator_destroy(u_itr); - if(list_count(usage_list)) + if (list_count(usage_list)) error("we have %d records not added " "to the association list", list_count(usage_list)); @@ -1037,20 +1037,20 @@ get_usage_for_wckey_list(pgsql_conn_t *pg_conn, char *cluster, List wckey_list, F_COUNT }; - if(!wckey_list) { + if (!wckey_list) { error("We need an object to set data for getting usage"); return SLURM_ERROR; } usage_table = wckey_day_table; - if(set_usage_information(&usage_table, DBD_GET_WCKEY_USAGE, + if (set_usage_information(&usage_table, DBD_GET_WCKEY_USAGE, &start, &end) != SLURM_SUCCESS) { return SLURM_ERROR; } itr = list_iterator_create(wckey_list); while((wckey = list_next(itr))) { - if(id_str) + if (id_str) xstrfmtcat(id_str, " OR id_wckey=%d", wckey->id); else xstrfmtcat(id_str, "id_wckey=%d", wckey->id); @@ -1064,7 +1064,7 @@ get_usage_for_wckey_list(pgsql_conn_t *pg_conn, char *cluster, List wckey_list, cluster, usage_table, end, start, id_str); xfree(id_str); result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; usage_list = list_create(slurmdb_destroy_accounting_rec); @@ -1082,16 +1082,16 @@ get_usage_for_wckey_list(pgsql_conn_t *pg_conn, char *cluster, List wckey_list, itr = list_iterator_create(wckey_list); while((wckey = list_next(itr))) { int found = 0; - if(!wckey->accounting_list) + if (!wckey->accounting_list) wckey->accounting_list = list_create( slurmdb_destroy_accounting_rec); while((accounting_rec = list_next(u_itr))) { - if(wckey->id == accounting_rec->id) { + if (wckey->id == accounting_rec->id) { list_append(wckey->accounting_list, accounting_rec); list_remove(u_itr); found = 1; - } else if(found) { + } else if (found) { /* here we know the list is in id order so if the next record @@ -1110,7 +1110,7 @@ get_usage_for_wckey_list(pgsql_conn_t *pg_conn, char *cluster, List wckey_list, list_iterator_destroy(itr); list_iterator_destroy(u_itr); - if(list_count(usage_list)) + if (list_count(usage_list)) error("we have %d records not added " "to the wckey list", list_count(usage_list)); diff --git a/src/plugins/accounting_storage/pgsql/as_pg_usage.h b/src/plugins/accounting_storage/pgsql/as_pg_usage.h index 703b31a05c3e00e73834c03308d5d1dc1c290f6d..24ed49c8b4b9ce9488d6b36f8e501e3fd958c03c 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_usage.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_usage.h @@ -10,7 +10,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_user.c b/src/plugins/accounting_storage/pgsql/as_pg_user.c index 211df3a109c86589dc458c31b8863e67e9c52516..1d86101d8d7fcf514fa5059c21aa40be53e184c4 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_user.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_user.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -167,7 +167,7 @@ _make_user_record(slurmdb_user_rec_t *object, time_t now, char **rec, char **txn now, now, object->name, object->default_acct); xstrfmtcat(*txn, "default_acct='%s'", object->default_acct); - if(object->default_wckey) { + if (object->default_wckey) { xstrfmtcat(*rec, ", '%s'", object->default_wckey); xstrfmtcat(*txn, ", default_wckey='%s'", object->default_wckey); @@ -177,7 +177,7 @@ _make_user_record(slurmdb_user_rec_t *object, time_t now, char **rec, char **txn xstrcat(*txn, ", default_wckey=''"); } - if(object->admin_level != SLURMDB_ADMIN_NOTSET) { + if (object->admin_level != SLURMDB_ADMIN_NOTSET) { xstrfmtcat(*rec, ", %u)", object->admin_level); xstrfmtcat(*txn, ", admin_level=%u", object->admin_level); @@ -201,12 +201,12 @@ _get_user_coords(pgsql_conn_t *pg_conn, slurmdb_user_rec_t *user) ListIterator itr = NULL; char *cond = NULL; - if(!user) { + if (!user) { error("as/pg: _get_user_coord: user not given."); return SLURM_ERROR; } - if(!user->coord_accts) + if (!user->coord_accts) user->coord_accts = list_create(slurmdb_destroy_coord_rec); query = xstrdup_printf( @@ -231,7 +231,7 @@ _get_user_coords(pgsql_conn_t *pg_conn, slurmdb_user_rec_t *user) return SLURM_SUCCESS; FOR_EACH_CLUSTER(NULL) { - if(query) + if (query) xstrcat(query, " UNION "); query = xstrdup_printf( "SELECT DISTINCT t1.acct FROM %s.%s AS t1, %s.%s AS t2 " @@ -242,20 +242,20 @@ _get_user_coords(pgsql_conn_t *pg_conn, slurmdb_user_rec_t *user) } END_EACH_CLUSTER; xfree(cond); - if(query) { + if (query) { result = DEF_QUERY_RET; - if(!result) + if (!result) return SLURM_ERROR; itr = list_iterator_create(user->coord_accts); FOR_EACH_ROW { char *acct = ROW(0); while((coord = list_next(itr))) { - if(!strcmp(coord->name, acct)) + if (!strcmp(coord->name, acct)) break; } list_iterator_reset(itr); - if(coord) /* already in list */ + if (coord) /* already in list */ continue; coord = xmalloc(sizeof(slurmdb_coord_rec_t)); @@ -279,7 +279,7 @@ _get_user_coords(pgsql_conn_t *pg_conn, slurmdb_user_rec_t *user) static void _make_user_cond(slurmdb_user_cond_t *user_cond, char **cond) { - if(user_cond->assoc_cond) + if (user_cond->assoc_cond) concat_cond_list(user_cond->assoc_cond->user_list, NULL, "name", cond); @@ -287,7 +287,7 @@ _make_user_cond(slurmdb_user_cond_t *user_cond, char **cond) NULL, "default_acct", cond); concat_cond_list(user_cond->def_wckey_list, NULL, "default_wckey", cond); - if(user_cond->admin_level != SLURMDB_ADMIN_NOTSET) { + if (user_cond->admin_level != SLURMDB_ADMIN_NOTSET) { xstrfmtcat(*cond, " AND admin_level=%u", user_cond->admin_level); } @@ -318,7 +318,7 @@ _change_user_name(pgsql_conn_t *pg_conn, slurmdb_user_rec_t *user) acct_coord_table, user->name, user->old_name); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) + if (rc != SLURM_SUCCESS) reset_pgsql_conn(pg_conn); return rc; @@ -374,7 +374,7 @@ as_pg_add_users(pgsql_conn_t *pg_conn, uint32_t uid, List user_list) itr = list_iterator_create(user_list); while((object = list_next(itr))) { - if(!object->name || !object->name[0] || + if (!object->name || !object->name[0] || !object->default_acct || !object->default_acct[0]) { error("as/pg: add_users: we need a user name and " "default acct to add."); @@ -386,18 +386,18 @@ as_pg_add_users(pgsql_conn_t *pg_conn, uint32_t uid, List user_list) query = xstrdup_printf("SELECT public.add_user(%s);", rec); xfree(rec); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add user %s", object->name); xfree(info); continue; } /* object moved to update_list, remove from user_list */ - if(addto_update_list(pg_conn->update_list, SLURMDB_ADD_USER, + if (addto_update_list(pg_conn->update_list, SLURMDB_ADD_USER, object) == SLURM_SUCCESS) list_remove(itr); - if(txn_query) + if (txn_query) xstrfmtcat(txn_query, ", (%ld, %u, '%s', '%s', $$%s$$)", now, DBD_ADD_USERS, object->name, @@ -412,20 +412,20 @@ as_pg_add_users(pgsql_conn_t *pg_conn, uint32_t uid, List user_list) user_name, info); xfree(info); - if(object->assoc_list) + if (object->assoc_list) list_transfer(assoc_list, object->assoc_list); - if(object->wckey_list) + if (object->wckey_list) list_transfer(wckey_list, object->wckey_list); } list_iterator_destroy(itr); xfree(user_name); - if(rc == SLURM_SUCCESS) { - if(txn_query) { + if (rc == SLURM_SUCCESS) { + if (txn_query) { xstrcat(txn_query, ";"); rc = pgsql_db_query(pg_conn->db_conn, txn_query); xfree(txn_query); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add txn"); /* rc = SLURM_SUCCESS; */ } @@ -433,8 +433,8 @@ as_pg_add_users(pgsql_conn_t *pg_conn, uint32_t uid, List user_list) } else xfree(txn_query); - if(rc == SLURM_SUCCESS && list_count(assoc_list)) { - if(acct_storage_p_add_associations(pg_conn, uid, assoc_list) + if (rc == SLURM_SUCCESS && list_count(assoc_list)) { + if (acct_storage_p_add_associations(pg_conn, uid, assoc_list) == SLURM_ERROR) { error("Problem adding user associations"); rc = SLURM_ERROR; @@ -442,8 +442,8 @@ as_pg_add_users(pgsql_conn_t *pg_conn, uint32_t uid, List user_list) } list_destroy(assoc_list); - if(rc == SLURM_SUCCESS && list_count(wckey_list)) { - if(acct_storage_p_add_wckeys(pg_conn, uid, wckey_list) + if (rc == SLURM_SUCCESS && list_count(wckey_list)) { + if (acct_storage_p_add_wckeys(pg_conn, uid, wckey_list) == SLURM_ERROR) { error("Problem adding user wckeys"); rc = SLURM_ERROR; @@ -474,25 +474,25 @@ as_pg_modify_users(pgsql_conn_t *pg_conn, uint32_t uid, time_t now = time(NULL); PGresult *result = NULL; - if(!user_cond || !user) { + if (!user_cond || !user) { error("as/pg: modify_users: we need something to change"); return NULL; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; /* make condition string */ _make_user_cond(user_cond, &cond); /* make value string */ - if(user->default_acct) + if (user->default_acct) xstrfmtcat(vals, ", default_acct='%s'", user->default_acct); - if(user->default_wckey) + if (user->default_wckey) xstrfmtcat(vals, ", default_wckey='%s'", user->default_wckey); - if(user->name) + if (user->name) xstrfmtcat(vals, ", name='%s'", user->name); - if(user->admin_level != SLURMDB_ADMIN_NOTSET) + if (user->admin_level != SLURMDB_ADMIN_NOTSET) xstrfmtcat(vals, ", admin_level=%u", user->admin_level); if (!cond || !vals) { @@ -505,13 +505,13 @@ as_pg_modify_users(pgsql_conn_t *pg_conn, uint32_t uid, user_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("as/pg: failed to retrieve users to modify"); xfree(vals); return NULL; } - if(user->name && (PQntuples(result) != 1)) { + if (user->name && (PQntuples(result) != 1)) { errno = ESLURM_ONE_CHANGE; xfree(vals); return NULL; @@ -532,12 +532,12 @@ as_pg_modify_users(pgsql_conn_t *pg_conn, uint32_t uid, user_rec = xmalloc(sizeof(slurmdb_user_rec_t)); - if(!user->name) + if (!user->name) user_rec->name = xstrdup(object); else { user_rec->name = xstrdup(user->name); user_rec->old_name = xstrdup(object); - if(_change_user_name(pg_conn, user_rec) + if (_change_user_name(pg_conn, user_rec) != SLURM_SUCCESS) break; } @@ -549,7 +549,7 @@ as_pg_modify_users(pgsql_conn_t *pg_conn, uint32_t uid, } END_EACH_ROW; PQclear(result); - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("didn't effect anything"); xfree(vals); @@ -634,7 +634,7 @@ _get_user_running_jobs(pgsql_conn_t *pg_conn, char *assoc_cond) job = xstrdup_printf( "JobID = %-10s C = %-10s A = %-10s U = %-9s", ROW(0), ROW(4), ROW(1), ROW(2)); - if(!ISEMPTY(3)) + if (!ISEMPTY(3)) xstrfmtcat(job, " P = %s", ROW(3)); if (!job_list) job_list = list_create(slurm_destroy_char); @@ -682,7 +682,7 @@ _cluster_remove_user_assoc(pgsql_conn_t *pg_conn, char *cluster, xstrfmtcat(assoc_char, "id_assoc=%s", ROW(0)); lft = atoi(ROW(1)); - if(lft < smallest_lft) + if (lft < smallest_lft) smallest_lft = lft; rem_assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -709,7 +709,7 @@ _cluster_remove_user_assoc(pgsql_conn_t *pg_conn, char *cluster, } } - if(rc == SLURM_SUCCESS) + if (rc == SLURM_SUCCESS) rc = pgsql_get_modified_lfts(pg_conn, cluster, smallest_lft); if (rc != SLURM_SUCCESS) { @@ -760,7 +760,7 @@ as_pg_remove_users(pgsql_conn_t *pg_conn, uint32_t uid, /* make condition string */ _make_user_cond(user_cond, &cond); - if(!cond) { + if (!cond) { error("Nothing to remove"); return NULL; } @@ -769,7 +769,7 @@ as_pg_remove_users(pgsql_conn_t *pg_conn, uint32_t uid, user_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("as/pg: remove_users: failed to get users to remove"); return NULL; } @@ -793,7 +793,7 @@ as_pg_remove_users(pgsql_conn_t *pg_conn, uint32_t uid, list_append(ret_list, object); list_append(assoc_cond.user_list, object); - if(!rc) { + if (!rc) { xstrfmtcat(name_char, "name='%s'", object); xstrfmtcat(assoc_char, "t1.user_name='%s'", object); rc = 1; @@ -808,7 +808,7 @@ as_pg_remove_users(pgsql_conn_t *pg_conn, uint32_t uid, } END_EACH_ROW; PQclear(result); - if(!list_count(ret_list)) { + if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("as/pg: remove_users: nothing affected"); list_destroy(assoc_cond.user_list); @@ -818,13 +818,13 @@ as_pg_remove_users(pgsql_conn_t *pg_conn, uint32_t uid, /* remove these users from the coord table */ tmp_list = acct_storage_p_remove_coord(pg_conn, uid, NULL, &user_coord_cond); - if(tmp_list) + if (tmp_list) list_destroy(tmp_list); /* remove these users from the wckey table */ wckey_cond.user_list = assoc_cond.user_list; tmp_list = acct_storage_p_remove_wckeys(pg_conn, uid, &wckey_cond); - if(tmp_list) + if (tmp_list) list_destroy(tmp_list); list_destroy(assoc_cond.user_list); @@ -916,11 +916,11 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, return NULL; } - if(!user_cond) { + if (!user_cond) { xstrcat(cond, "WHERE deleted=0"); } else { - if(user_cond->with_deleted) + if (user_cond->with_deleted) xstrcat(cond, "WHERE (deleted=0 OR deleted=1)"); else xstrcat(cond, "WHERE deleted=0"); @@ -928,7 +928,7 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, } /* only get the requesting user if this flag is set */ - if(!is_admin) { + if (!is_admin) { xstrfmtcat(cond, " AND name='%s'", user.name); } @@ -936,7 +936,7 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, user_table, cond); xfree(cond); result = DEF_QUERY_RET; - if(!result) + if (!result) return NULL; user_list = list_create(slurmdb_destroy_user_rec); @@ -946,7 +946,7 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, user->name = xstrdup(ROW(F_NAME)); user->default_acct = xstrdup(ROW(F_DEF_ACCT)); - if(! ISNULL(F_DEF_WCKEY)) + if (! ISNULL(F_DEF_WCKEY)) user->default_wckey = xstrdup(ROW(F_DEF_WCKEY)); else user->default_wckey = xstrdup(""); @@ -956,13 +956,13 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, * different machine where this user may not exist or * may have a different uid */ - if(user_cond && user_cond->with_coords) + if (user_cond && user_cond->with_coords) _get_user_coords(pg_conn, user); } END_EACH_ROW; PQclear(result); /* get associations for users */ - if(user_cond && user_cond->with_assocs) { + if (user_cond && user_cond->with_assocs) { ListIterator assoc_itr = NULL; slurmdb_user_rec_t *user = NULL; slurmdb_association_rec_t *assoc = NULL; @@ -971,16 +971,16 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, /* Make sure we don't get any non-user associations * this is done by at least having a user_list * defined */ - if(!user_cond->assoc_cond) + if (!user_cond->assoc_cond) user_cond->assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); - if(!user_cond->assoc_cond->user_list) + if (!user_cond->assoc_cond->user_list) user_cond->assoc_cond->user_list = list_create(NULL); assoc_list = acct_storage_p_get_associations( pg_conn, uid, user_cond->assoc_cond); - if(!assoc_list) { + if (!assoc_list) { error("as/pg: gt_users: no associations got"); goto get_wckeys; } @@ -989,10 +989,10 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, assoc_itr = list_iterator_create(assoc_list); while((user = list_next(itr))) { while((assoc = list_next(assoc_itr))) { - if(strcmp(assoc->user, user->name)) + if (strcmp(assoc->user, user->name)) continue; - if(!user->assoc_list) + if (!user->assoc_list) user->assoc_list = list_create( slurmdb_destroy_association_rec); list_append(user->assoc_list, assoc); @@ -1007,7 +1007,7 @@ as_pg_get_users(pgsql_conn_t *pg_conn, uid_t uid, get_wckeys: /* get wckey for users */ - if(user_cond && user_cond->with_wckeys) { + if (user_cond && user_cond->with_wckeys) { ListIterator wckey_itr = NULL; slurmdb_user_rec_t *user = NULL; slurmdb_wckey_rec_t *wckey = NULL; @@ -1015,7 +1015,7 @@ get_wckeys: slurmdb_wckey_cond_t wckey_cond; memset(&wckey_cond, 0, sizeof(slurmdb_wckey_cond_t)); - if(user_cond->assoc_cond) { + if (user_cond->assoc_cond) { wckey_cond.user_list = user_cond->assoc_cond->user_list; wckey_cond.cluster_list = @@ -1024,7 +1024,7 @@ get_wckeys: wckey_list = acct_storage_p_get_wckeys( pg_conn, uid, &wckey_cond); - if(!wckey_list) { + if (!wckey_list) { error("as/pg: get_users: no wckeys got"); return user_list; } @@ -1033,10 +1033,10 @@ get_wckeys: wckey_itr = list_iterator_create(wckey_list); while((user = list_next(itr))) { while((wckey = list_next(wckey_itr))) { - if(strcmp(wckey->user, user->name)) + if (strcmp(wckey->user, user->name)) continue; - if(!user->wckey_list) + if (!user->wckey_list) user->wckey_list = list_create( slurmdb_destroy_wckey_rec); list_append(user->wckey_list, wckey); @@ -1072,7 +1072,7 @@ as_pg_add_coord(pgsql_conn_t *pg_conn, uint32_t uid, int rc = SLURM_SUCCESS; slurmdb_user_rec_t *user_rec = NULL; - if(!user_cond || !user_cond->assoc_cond + if (!user_cond || !user_cond->assoc_cond || !user_cond->assoc_cond->user_list || !list_count(user_cond->assoc_cond->user_list) || !acct_list || !list_count(acct_list)) { @@ -1094,13 +1094,13 @@ as_pg_add_coord(pgsql_conn_t *pg_conn, uint32_t uid, * acct, user_name * CAST is required in ARRAY */ - if(vals) + if (vals) xstrcat(vals, ", "); xstrfmtcat(vals, "CAST((%ld, %ld, 0, '%s', '%s') AS %s)", now, now, acct, user, acct_coord_table); - if(txn_query) + if (txn_query) xstrfmtcat(txn_query, ", (%ld, %u, '%s', '%s', '%s')", now, DBD_ADD_ACCOUNT_COORDS, user, @@ -1122,13 +1122,13 @@ as_pg_add_coord(pgsql_conn_t *pg_conn, uint32_t uid, list_iterator_destroy(itr); list_iterator_destroy(itr2); - if(vals) { + if (vals) { xstrfmtcat(query, "SELECT public.add_coords(ARRAY[%s]); %s;", vals, txn_query); xfree(vals); xfree(txn_query); rc = DEF_QUERY_RET_RC; - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add account coordinator"); return rc; } @@ -1171,7 +1171,7 @@ as_pg_remove_coord(pgsql_conn_t *pg_conn, uint32_t uid, if (!user_cond && !acct_list) { error("as/pg: remove_coord: we need something to remove"); return NULL; - } else if(user_cond && user_cond->assoc_cond) + } else if (user_cond && user_cond->assoc_cond) user_list = user_cond->assoc_cond->user_list; if (check_db_connection(pg_conn) != SLURM_SUCCESS) @@ -1191,7 +1191,7 @@ as_pg_remove_coord(pgsql_conn_t *pg_conn, uint32_t uid, concat_cond_list(user_list, NULL, "user_name", &cond); concat_cond_list(acct_list, NULL, "acct", &cond); - if(!cond) { + if (!cond) { errno = SLURM_ERROR; debug3("as/pg: remove_coord: No conditions given"); return NULL; @@ -1202,7 +1202,7 @@ as_pg_remove_coord(pgsql_conn_t *pg_conn, uint32_t uid, acct_coord_table, cond); /* cond used below */ result = DEF_QUERY_RET; - if(!result) { + if (!result) { xfree(cond); errno = SLURM_ERROR; return NULL; @@ -1211,7 +1211,7 @@ as_pg_remove_coord(pgsql_conn_t *pg_conn, uint32_t uid, ret_list = list_create(slurm_destroy_char); user_list = list_create(slurm_destroy_char); FOR_EACH_ROW { - if(!is_admin && !is_user_coord(&user, ROW(1))) { + if (!is_admin && !is_user_coord(&user, ROW(1))) { error("as/pg: remove_coord: User %s(%d) does " "not have the ability to change this " "account (%s)", @@ -1224,7 +1224,7 @@ as_pg_remove_coord(pgsql_conn_t *pg_conn, uint32_t uid, return NULL; } /* record users affected */ - if(!last_user || strcasecmp(last_user, ROW(0))) { + if (!last_user || strcasecmp(last_user, ROW(0))) { list_append(user_list, xstrdup(ROW(0))); last_user = ROW(0); } diff --git a/src/plugins/accounting_storage/pgsql/as_pg_user.h b/src/plugins/accounting_storage/pgsql/as_pg_user.h index 3a6f49a587d401942c11c321ecf1cbff38d3bd87..2ae5e62b3f5d8f6fecf406735d4eabfcc003c95e 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_user.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_user.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/pgsql/as_pg_wckey.c b/src/plugins/accounting_storage/pgsql/as_pg_wckey.c index 4077e5bc56106c7c098124659e60bf1b006fc67e..bfd2b2610eaa90557affbebd8c65bbfa4785fed8 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_wckey.c +++ b/src/plugins/accounting_storage/pgsql/as_pg_wckey.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -149,7 +149,7 @@ as_pg_add_wckeys(pgsql_conn_t *pg_conn, uint32_t uid, List wckey_list) user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(wckey_list); while((object = list_next(itr))) { - if(!object->cluster || !object->cluster[0] || + if (!object->cluster || !object->cluster[0] || !object->user || !object->user[0]) { error("as/pg: add_wckeys: we need a wckey name, " "cluster, and user to add."); @@ -164,7 +164,7 @@ as_pg_add_wckeys(pgsql_conn_t *pg_conn, uint32_t uid, List wckey_list) DEBUG_QUERY; object->id = pgsql_query_ret_id(pg_conn->db_conn, query); xfree(query); - if(!object->id) { + if (!object->id) { error("Couldn't add wckey %s", object->name); added=0; break; @@ -176,10 +176,10 @@ as_pg_add_wckeys(pgsql_conn_t *pg_conn, uint32_t uid, List wckey_list) id_str, user_name, info); xfree(id_str); xfree(info); - if(rc != SLURM_SUCCESS) { + if (rc != SLURM_SUCCESS) { error("Couldn't add txn"); } else { - if(addto_update_list(pg_conn->update_list, + if (addto_update_list(pg_conn->update_list, SLURMDB_ADD_WCKEY, object) == SLURM_SUCCESS) list_remove(itr); @@ -189,8 +189,8 @@ as_pg_add_wckeys(pgsql_conn_t *pg_conn, uint32_t uid, List wckey_list) list_iterator_destroy(itr); xfree(user_name); - if(!added) { - if(pg_conn->rollback) { + if (!added) { + if (pg_conn->rollback) { pgsql_db_rollback(pg_conn->db_conn); } list_flush(pg_conn->update_list); @@ -242,7 +242,7 @@ _cluster_remove_wckeys(pgsql_conn_t *pg_conn, char *cluster, char *user_name, FOR_EACH_ROW { slurmdb_wckey_rec_t *wckey_rec = NULL; list_append(ret_list, xstrdup(ROW(1))); - if(!name_char) + if (!name_char) xstrfmtcat(name_char, "id_wckey=%s", ROW(0)); else @@ -295,7 +295,7 @@ as_pg_remove_wckeys(pgsql_conn_t *pg_conn, uint32_t uid, return NULL; } - if(check_db_connection(pg_conn) != SLURM_SUCCESS) + if (check_db_connection(pg_conn) != SLURM_SUCCESS) return NULL; cond = _make_wckey_cond(wckey_cond); @@ -318,7 +318,7 @@ as_pg_remove_wckeys(pgsql_conn_t *pg_conn, uint32_t uid, reset_pgsql_conn(pg_conn); list_destroy(ret_list); ret_list = NULL; - } else if(!list_count(ret_list)) { + } else if (!list_count(ret_list)) { errno = SLURM_NO_CHANGE_IN_DATA; debug3("as/pg: remove_wckeys: didn't effect anything"); } @@ -351,7 +351,7 @@ _cluster_get_wckeys(pgsql_conn_t *pg_conn, char *cluster, "ORDER BY wckey_name, user_name;", gw_fields, cluster, wckey_table, cond ?: ""); result = DEF_QUERY_RET; - if(!result) { + if (!result) { error("as/pg: get_wckeys: failed to get wckey"); return SLURM_ERROR; } @@ -366,14 +366,14 @@ _cluster_get_wckeys(pgsql_conn_t *pg_conn, char *cluster, wckey->user = xstrdup(ROW(GW_USER)); wckey->cluster = xstrdup(cluster); /* we want a blank wckey if the name is null */ - if(ROW(GW_NAME)) + if (ROW(GW_NAME)) wckey->name = xstrdup(ROW(GW_NAME)); else wckey->name = xstrdup(""); } END_EACH_ROW; PQclear(result); - if(with_usage && list_count(wckey_list)) { + if (with_usage && list_count(wckey_list)) { get_usage_for_wckey_list(pg_conn, cluster, wckey_list, wckey_cond->usage_start, wckey_cond->usage_end); @@ -427,7 +427,7 @@ as_pg_get_wckeys(pgsql_conn_t *pg_conn, uid_t uid, */ continue; } - if(_cluster_get_wckeys(pg_conn, cluster_name, wckey_cond, + if (_cluster_get_wckeys(pg_conn, cluster_name, wckey_cond, cond, wckey_list) != SLURM_SUCCESS) { list_destroy(wckey_list); @@ -463,18 +463,18 @@ get_wckeyid(pgsql_conn_t *pg_conn, char **name, /* since we are unable to rely on uids here (someone could not have there uid in the system yet) we must first get the user name from the associd */ - if(!(user = get_user_from_associd(pg_conn, cluster, associd))) { + if (!(user = get_user_from_associd(pg_conn, cluster, associd))) { error("No user for associd %u", associd); return 0; } /* get the default key */ - if(!*name) { + if (!*name) { slurmdb_user_rec_t user_rec; memset(&user_rec, 0, sizeof(slurmdb_user_rec_t)); user_rec.uid = NO_VAL; user_rec.name = user; - if(assoc_mgr_fill_in_user(pg_conn, &user_rec, + if (assoc_mgr_fill_in_user(pg_conn, &user_rec, 1, NULL) != SLURM_SUCCESS) { error("No user by name of %s assoc %u", user, associd); @@ -482,7 +482,7 @@ get_wckeyid(pgsql_conn_t *pg_conn, char **name, goto no_wckeyid; } - if(user_rec.default_wckey) + if (user_rec.default_wckey) *name = xstrdup_printf("*%s", user_rec.default_wckey); else @@ -494,7 +494,7 @@ get_wckeyid(pgsql_conn_t *pg_conn, char **name, wckey_rec.uid = NO_VAL; wckey_rec.user = user; wckey_rec.cluster = cluster; - if(assoc_mgr_fill_in_wckey(pg_conn, &wckey_rec, + if (assoc_mgr_fill_in_wckey(pg_conn, &wckey_rec, ACCOUNTING_ENFORCE_WCKEYS, NULL) != SLURM_SUCCESS) { List wckey_list = NULL; @@ -513,7 +513,7 @@ get_wckeyid(pgsql_conn_t *pg_conn, char **name, /* we have already checked to make sure this was the slurm user before calling this */ - if(acct_storage_p_add_wckeys( + if (acct_storage_p_add_wckeys( pg_conn, slurm_get_slurm_user_id(), wckey_list) diff --git a/src/plugins/accounting_storage/pgsql/as_pg_wckey.h b/src/plugins/accounting_storage/pgsql/as_pg_wckey.h index be6c980e540ccc4c9bcb77c8d4e305b5b6e50529..dc58c490ac695389f39037624c0f068e8c8136cc 100644 --- a/src/plugins/accounting_storage/pgsql/as_pg_wckey.h +++ b/src/plugins/accounting_storage/pgsql/as_pg_wckey.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/accounting_storage/slurmdbd/Makefile.in b/src/plugins/accounting_storage/slurmdbd/Makefile.in index 41de2a4e018022e3c3e6cc02e13a5fc3cdb1dd54..c0d245d76e11786756898080bdaaaccb7acbbd77 100644 --- a/src/plugins/accounting_storage/slurmdbd/Makefile.in +++ b/src/plugins/accounting_storage/slurmdbd/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/accounting_storage/slurmdbd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -183,6 +187,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -203,6 +209,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -212,6 +221,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -219,6 +230,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -253,6 +273,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -280,6 +303,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index 0a154339d15323a3d56865079e53861ce154b1d3..9ad66892fefde48ac956f849b715a71c27010a69 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -194,8 +194,10 @@ static int _setup_job_start_msg(dbd_job_start_msg_t *req, } req->alloc_cpus = job_ptr->total_cpus; req->partition = xstrdup(job_ptr->partition); - if (job_ptr->details) + if (job_ptr->details) { req->req_cpus = job_ptr->details->min_cpus; + req->req_mem = job_ptr->details->pn_min_memory; + } req->resv_id = job_ptr->resv_id; req->priority = job_ptr->priority; req->timelimit = job_ptr->time_limit; @@ -399,7 +401,8 @@ extern int init ( void ) verbose("%s loaded with AuthInfo=%s", plugin_name, slurmdbd_auth_info); - if (job_list) { + if (job_list && !(slurm_get_accounting_storage_enforce() & + ACCOUNTING_ENFORCE_NO_JOBS)) { /* only do this when job_list is defined * (in the slurmctld) */ pthread_attr_t thread_attr; @@ -2217,7 +2220,6 @@ extern int jobacct_storage_p_step_start(void *db_conn, step_ptr->step_node_bitmap); } req.node_cnt = nodes; - if (step_ptr->start_time > step_ptr->job_ptr->resize_time) req.start_time = step_ptr->start_time; else @@ -2233,6 +2235,7 @@ extern int jobacct_storage_p_step_start(void *db_conn, req.task_dist = task_dist; req.total_cpus = cpus; req.total_tasks = tasks; + req.req_cpufreq = step_ptr->cpu_freq; msg.msg_type = DBD_STEP_START; msg.data = &req; diff --git a/src/plugins/acct_gather_energy/Makefile.in b/src/plugins/acct_gather_energy/Makefile.in index 24c965b1ede8bdad3471a9a0454a5624e1bed38a..466bf9fed8e45b8419aec108e62cbcc17521eec0 100644 --- a/src/plugins/acct_gather_energy/Makefile.in +++ b/src/plugins/acct_gather_energy/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/acct_gather_energy DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/acct_gather_energy/ipmi/Makefile.am b/src/plugins/acct_gather_energy/ipmi/Makefile.am index 3313587bfa61ad41e8ac02a3d47f33ca0b8f00fd..c1215800c8c437c986d3300c390e5b691eb3b3b2 100644 --- a/src/plugins/acct_gather_energy/ipmi/Makefile.am +++ b/src/plugins/acct_gather_energy/ipmi/Makefile.am @@ -5,15 +5,27 @@ AUTOMAKE_OPTIONS = foreign -PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(FREEIPMI_CPPFLAGS) + +if BUILD_IPMI -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +PLUGIN_FLAGS = -module -avoid-version --export-dynamic pkglib_LTLIBRARIES = acct_gather_energy_ipmi.la -# Null energy accounting ipmi plugin. -acct_gather_energy_ipmi_la_SOURCES = acct_gather_energy_ipmi.c +# Energy accounting ipmi plugin. +acct_gather_energy_ipmi_la_SOURCES = acct_gather_energy_ipmi.c \ + acct_gather_energy_ipmi_config.c \ + acct_gather_energy_ipmi_config.h #acct_gather_energy_ipmi_la_CFLAGS = $(MYSQL_CFLAGS) -acct_gather_energy_ipmi_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +acct_gather_energy_ipmi_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(FREEIPMI_LDFLAGS) +acct_gather_energy_ipmi_la_LIBADD = $(FREEIPMI_LIBS) + +else + +EXTRA_acct_gather_energy_ipmi_la_SOURCES = acct_gather_energy_ipmi.c \ + acct_gather_energy_ipmi_config.c \ + acct_gather_energy_ipmi_config.h +endif diff --git a/src/plugins/acct_gather_energy/ipmi/Makefile.in b/src/plugins/acct_gather_energy/ipmi/Makefile.in index 69a8bc8a52b4b6296f2e7d08f4d623d0ba3a3b1b..dc4979f869a549d78fe319abc36aeb803b61f97d 100644 --- a/src/plugins/acct_gather_energy/ipmi/Makefile.in +++ b/src/plugins/acct_gather_energy/ipmi/Makefile.in @@ -61,6 +61,7 @@ subdir = src/plugins/acct_gather_energy/ipmi DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -78,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -86,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -132,14 +136,26 @@ am__uninstall_files_from_dir = { \ } am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) -acct_gather_energy_ipmi_la_LIBADD = -am_acct_gather_energy_ipmi_la_OBJECTS = acct_gather_energy_ipmi.lo +am__DEPENDENCIES_1 = +@BUILD_IPMI_TRUE@acct_gather_energy_ipmi_la_DEPENDENCIES = \ +@BUILD_IPMI_TRUE@ $(am__DEPENDENCIES_1) +am__acct_gather_energy_ipmi_la_SOURCES_DIST = \ + acct_gather_energy_ipmi.c acct_gather_energy_ipmi_config.c \ + acct_gather_energy_ipmi_config.h +@BUILD_IPMI_TRUE@am_acct_gather_energy_ipmi_la_OBJECTS = \ +@BUILD_IPMI_TRUE@ acct_gather_energy_ipmi.lo \ +@BUILD_IPMI_TRUE@ acct_gather_energy_ipmi_config.lo +am__EXTRA_acct_gather_energy_ipmi_la_SOURCES_DIST = \ + acct_gather_energy_ipmi.c acct_gather_energy_ipmi_config.c \ + acct_gather_energy_ipmi_config.h acct_gather_energy_ipmi_la_OBJECTS = \ $(am_acct_gather_energy_ipmi_la_OBJECTS) acct_gather_energy_ipmi_la_LINK = $(LIBTOOL) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(acct_gather_energy_ipmi_la_LDFLAGS) \ $(LDFLAGS) -o $@ +@BUILD_IPMI_TRUE@am_acct_gather_energy_ipmi_la_rpath = -rpath \ +@BUILD_IPMI_TRUE@ $(pkglibdir) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -153,8 +169,10 @@ CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ -SOURCES = $(acct_gather_energy_ipmi_la_SOURCES) -DIST_SOURCES = $(acct_gather_energy_ipmi_la_SOURCES) +SOURCES = $(acct_gather_energy_ipmi_la_SOURCES) \ + $(EXTRA_acct_gather_energy_ipmi_la_SOURCES) +DIST_SOURCES = $(am__acct_gather_energy_ipmi_la_SOURCES_DIST) \ + $(am__EXTRA_acct_gather_energy_ipmi_la_SOURCES_DIST) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -185,6 +203,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -205,6 +225,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -214,6 +237,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -221,6 +246,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -255,6 +289,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -282,6 +319,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -371,15 +411,23 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign -PLUGIN_FLAGS = -module -avoid-version --export-dynamic -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -pkglib_LTLIBRARIES = acct_gather_energy_ipmi.la +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(FREEIPMI_CPPFLAGS) +@BUILD_IPMI_TRUE@PLUGIN_FLAGS = -module -avoid-version --export-dynamic +@BUILD_IPMI_TRUE@pkglib_LTLIBRARIES = acct_gather_energy_ipmi.la + +# Energy accounting ipmi plugin. +@BUILD_IPMI_TRUE@acct_gather_energy_ipmi_la_SOURCES = acct_gather_energy_ipmi.c \ +@BUILD_IPMI_TRUE@ acct_gather_energy_ipmi_config.c \ +@BUILD_IPMI_TRUE@ acct_gather_energy_ipmi_config.h -# Null energy accounting ipmi plugin. -acct_gather_energy_ipmi_la_SOURCES = acct_gather_energy_ipmi.c #acct_gather_energy_ipmi_la_CFLAGS = $(MYSQL_CFLAGS) -acct_gather_energy_ipmi_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +@BUILD_IPMI_TRUE@acct_gather_energy_ipmi_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(FREEIPMI_LDFLAGS) +@BUILD_IPMI_TRUE@acct_gather_energy_ipmi_la_LIBADD = $(FREEIPMI_LIBS) +@BUILD_IPMI_FALSE@EXTRA_acct_gather_energy_ipmi_la_SOURCES = acct_gather_energy_ipmi.c \ +@BUILD_IPMI_FALSE@ acct_gather_energy_ipmi_config.c \ +@BUILD_IPMI_FALSE@ acct_gather_energy_ipmi_config.h + all: all-am .SUFFIXES: @@ -447,7 +495,7 @@ clean-pkglibLTLIBRARIES: rm -f "$${dir}/so_locations"; \ done acct_gather_energy_ipmi.la: $(acct_gather_energy_ipmi_la_OBJECTS) $(acct_gather_energy_ipmi_la_DEPENDENCIES) $(EXTRA_acct_gather_energy_ipmi_la_DEPENDENCIES) - $(acct_gather_energy_ipmi_la_LINK) -rpath $(pkglibdir) $(acct_gather_energy_ipmi_la_OBJECTS) $(acct_gather_energy_ipmi_la_LIBADD) $(LIBS) + $(acct_gather_energy_ipmi_la_LINK) $(am_acct_gather_energy_ipmi_la_rpath) $(acct_gather_energy_ipmi_la_OBJECTS) $(acct_gather_energy_ipmi_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -456,6 +504,7 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_energy_ipmi.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_energy_ipmi_config.Plo@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c index 26ed8b8d3edf1278e5d92622074364102fe57176..0f744fc12160a880fd80534f628375de9aad0e2f 100644 --- a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c @@ -1,11 +1,11 @@ /*****************************************************************************\ * acct_gather_energy_ipmi.c - slurm energy accounting plugin for ipmi. ***************************************************************************** - * Written by Bull-HN-PHX/d.rusak, - * Copyright (C) 2012 Bull-HN-PHX. + * Copyright (C) 2012 + * Written by Bull- Thomas Cadeau * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,26 +48,42 @@ * including adjusting the makefiles for SLUrM building . */ +#include <stdlib.h> +#include <stdio.h> #include <fcntl.h> +#include <sys/stat.h> #include <signal.h> + #include "src/common/slurm_xlator.h" -#include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather_energy.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" +#include "src/common/fd.h" #include "src/slurmd/common/proctrack.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_inttypes.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_intf.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_sdr.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_sel.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_mc.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_sensor.h" -//#include "src/plugins/acct_gather_energy/ipmi/ipmi_sol.h" +#include "src/slurmd/slurmd/slurmd.h" +#include "acct_gather_energy_ipmi_config.h" +/* + * freeipmi includes for the lib + */ +#include <ipmi_monitoring.h> +#include <ipmi_monitoring_bitmasks.h> + +/* These are defined here so when we link with something other than + * the slurmctld we will have these symbols defined. They will get + * overwritten when linking with the slurmctld. + */ +#if defined (__APPLE__) +slurmd_conf_t *conf __attribute__((weak_import)) = NULL; +#else +slurmd_conf_t *conf = NULL; +#endif #define _DEBUG 1 #define _DEBUG_ENERGY 1 +#define IPMI_VERSION 1 /* Data structure version number */ +#define NBFIRSTREAD 3 /* * These variables are required by the generic plugin interface. If they @@ -103,33 +119,724 @@ const char plugin_name[] = "AcctGatherEnergy IPMI plugin"; const char plugin_type[] = "acct_gather_energy/ipmi"; const uint32_t plugin_version = 100; +/* + * freeipmi variable declaration + */ +/* Global structure */ +struct ipmi_monitoring_ipmi_config ipmi_config; +ipmi_monitoring_ctx_t ipmi_ctx = NULL; +unsigned int sensor_reading_flags = 0; +/* Hostname, NULL for In-band communication, non-null for a hostname */ +char *hostname = NULL; +/* Set to an appropriate alternate if desired */ +char *sdr_cache_directory = "/tmp"; +char *sensor_config_file = NULL; +/* + * internal variables + */ +static uint32_t last_update_watt = 0; +static time_t last_update_time = 0; +static time_t previous_update_time = 0; +static acct_gather_energy_t *local_energy = NULL; +static slurm_ipmi_conf_t slurm_ipmi_conf; static uint32_t debug_flags = 0; +static bool flag_energy_accounting_shutdown = false; +static bool flag_thread_started = false; +static bool flag_init = false; +static pthread_mutex_t ipmi_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_t cleanup_handler_thread = 0; +pthread_t thread_ipmi_id_launcher = 0; +pthread_t thread_ipmi_id_run = 0; + +static bool _is_thread_launcher(void) +{ + static bool set = false; + static bool run = false; + + if (!set) { + set = 1; + run = run_in_daemon("slurmd"); + } + + return run; +} + +static bool _run_in_daemon(void) +{ + static bool set = false; + static bool run = false; + + if (!set) { + set = 1; + run = run_in_daemon("slurmd,slurmstepd"); + } + + return run; +} + +static void _task_sleep(int rem) +{ + while (rem) + rem = sleep(rem); // subject to interupt +} + +static int _running_profile(void) +{ + static bool run = false; + static uint32_t profile_opt = ACCT_GATHER_PROFILE_NOT_SET; + + if (profile_opt == ACCT_GATHER_PROFILE_NOT_SET) { + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, + &profile_opt); + if (profile_opt & ACCT_GATHER_PROFILE_ENERGY) + run = true; + } + + return run; +} + +/* + * _get_additional_consumption computes consumption between 2 times + * method is set to third method strongly + */ +static uint32_t _get_additional_consumption(time_t time0, time_t time1, + uint32_t watt0, uint32_t watt1) +{ + uint32_t consumption; + consumption = (uint32_t) ((time1 - time0)*(watt1 + watt0)/2); + + return consumption; +} + +/* + * _init_ipmi_config initializes parameters for freeipmi library + */ +static int _init_ipmi_config (void) +{ + int errnum; + /* Initialization flags + * Most commonly bitwise OR IPMI_MONITORING_FLAGS_DEBUG and/or + * IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS for extra debugging + * information. + */ + unsigned int ipmimonitoring_init_flags = 0; + memset(&ipmi_config, 0, sizeof(struct ipmi_monitoring_ipmi_config)); + ipmi_config.driver_type = (int) slurm_ipmi_conf.driver_type; + ipmi_config.disable_auto_probe = + (int) slurm_ipmi_conf.disable_auto_probe; + ipmi_config.driver_address = + (unsigned int) slurm_ipmi_conf.driver_address; + ipmi_config.register_spacing = + (unsigned int) slurm_ipmi_conf.register_spacing; + ipmi_config.driver_device = slurm_ipmi_conf.driver_device; + ipmi_config.protocol_version = (int) slurm_ipmi_conf.protocol_version; + ipmi_config.username = slurm_ipmi_conf.username; + ipmi_config.password = slurm_ipmi_conf.password; + ipmi_config.k_g = slurm_ipmi_conf.k_g; + ipmi_config.k_g_len = (unsigned int) slurm_ipmi_conf.k_g_len; + ipmi_config.privilege_level = (int) slurm_ipmi_conf.privilege_level; + ipmi_config.authentication_type = + (int) slurm_ipmi_conf.authentication_type; + ipmi_config.cipher_suite_id = (int) slurm_ipmi_conf.cipher_suite_id; + ipmi_config.session_timeout_len = (int) slurm_ipmi_conf.session_timeout; + ipmi_config.retransmission_timeout_len = + (int) slurm_ipmi_conf.retransmission_timeout; + ipmi_config.workaround_flags = + (unsigned int) slurm_ipmi_conf.workaround_flags; + + if (ipmi_monitoring_init(ipmimonitoring_init_flags, &errnum) < 0) { + error("ipmi_monitoring_init: %s", + ipmi_monitoring_ctx_strerror(errnum)); + return SLURM_FAILURE; + } + if (!(ipmi_ctx = ipmi_monitoring_ctx_create())) { + error("ipmi_monitoring_ctx_create"); + return SLURM_FAILURE; + } + if (sdr_cache_directory) { + if (ipmi_monitoring_ctx_sdr_cache_directory( + ipmi_ctx, sdr_cache_directory) < 0) { + error("ipmi_monitoring_ctx_sdr_cache_directory: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + } + /* Must call otherwise only default interpretations ever used */ + if (ipmi_monitoring_ctx_sensor_config_file( + ipmi_ctx, sensor_config_file) < 0) { + error("ipmi_monitoring_ctx_sensor_config_file: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + + if (slurm_ipmi_conf.reread_sdr_cache) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + if (slurm_ipmi_conf.ignore_non_interpretable_sensors) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS; + if (slurm_ipmi_conf.bridge_sensors) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS; + if (slurm_ipmi_conf.interpret_oem_data) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA; + if (slurm_ipmi_conf.shared_sensors) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS; + if (slurm_ipmi_conf.discrete_reading) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING; + if (slurm_ipmi_conf.ignore_scanning_disabled) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED; + if (slurm_ipmi_conf.assume_bmc_owner) + sensor_reading_flags |= + IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER; + /* FIXME: This is not included until later versions of IPMI, so don't + always have it. + */ + /* if (slurm_ipmi_conf.entity_sensor_names) */ + /* sensor_reading_flags |= */ + /* IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; */ + + return SLURM_SUCCESS; +} + +/* + * _check_power_sensor check if the sensor is in Watt + */ +static int _check_power_sensor(void) +{ + unsigned int record_ids[] = {(int) slurm_ipmi_conf.power_sensor_num}; + unsigned int record_ids_length = 1; + int sensor_units; + void* sensor_reading; + + if ((ipmi_monitoring_sensor_readings_by_record_id( + ipmi_ctx, + hostname, + &ipmi_config, + sensor_reading_flags, + record_ids, + record_ids_length, + NULL, NULL)) != record_ids_length) { + error("ipmi_monitoring_sensor_readings_by_record_id: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + + if ((sensor_units = ipmi_monitoring_sensor_read_sensor_units(ipmi_ctx)) + < 0) { + error("ipmi_monitoring_sensor_read_sensor_units: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + + if (sensor_units != slurm_ipmi_conf.variable) { + error("Configured sensor is not in Watt, " + "please check ipmi.conf"); + return SLURM_FAILURE; + } + + ipmi_monitoring_sensor_iterator_first(ipmi_ctx); + if (ipmi_monitoring_sensor_read_record_id(ipmi_ctx) < 0) { + error("ipmi_monitoring_sensor_read_record_id: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + + sensor_reading = ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); + if (sensor_reading) { + last_update_watt = (uint32_t)(*((double *)sensor_reading)); + previous_update_time = last_update_time; + last_update_time = time(NULL); + } else { + error("ipmi read an empty value for power consumption"); + return SLURM_FAILURE; + } + return SLURM_SUCCESS; +} + +/* + * _find_power_sensor reads all sensors and find sensor in Watt + */ +static int _find_power_sensor(void) +{ + int sensor_count; + int i; + int rc = SLURM_FAILURE; + void* sensor_reading; + int sensor_units, record_id; + + sensor_count = ipmi_monitoring_sensor_readings_by_record_id( + ipmi_ctx, + hostname, + &ipmi_config, + sensor_reading_flags, + NULL, + 0, + NULL, + NULL); + + if (sensor_count < 0) { + error("ipmi_monitoring_sensor_readings_by_record_id: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + + for (i = 0; i < sensor_count; i++, + ipmi_monitoring_sensor_iterator_next(ipmi_ctx)) { + if ((sensor_units = + ipmi_monitoring_sensor_read_sensor_units(ipmi_ctx)) + < 0) { + error("ipmi_monitoring_sensor_read_sensor_units: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + + if (sensor_units != slurm_ipmi_conf.variable) + continue; + + if ((record_id = + ipmi_monitoring_sensor_read_record_id(ipmi_ctx)) + < 0) { + error("ipmi_monitoring_sensor_read_record_id: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + slurm_ipmi_conf.power_sensor_num = (uint32_t) record_id; + sensor_reading = ipmi_monitoring_sensor_read_sensor_reading( + ipmi_ctx); + if (sensor_reading) { + last_update_watt = + (uint32_t)(*((double *)sensor_reading)); + last_update_time = time(NULL); + } else { + error("ipmi read an empty value for power consumption"); + rc = SLURM_FAILURE; + continue; + } + rc = SLURM_SUCCESS; + break; + } + + if (rc != SLURM_SUCCESS) + info("Power sensor not found."); + else if (debug_flags & DEBUG_FLAG_ENERGY) + info("Power sensor found: %d", + slurm_ipmi_conf.power_sensor_num); + + return rc; +} + +/* + * _read_ipmi_values read the Power sensor and update last_update_watt and times + */ +static int _read_ipmi_values(void) +{ + unsigned int record_ids[] = {(int) slurm_ipmi_conf.power_sensor_num}; + unsigned int record_ids_length = 1; + void* sensor_reading; + + if ((ipmi_monitoring_sensor_readings_by_record_id( + ipmi_ctx, + hostname, + &ipmi_config, + sensor_reading_flags, + record_ids, + record_ids_length, + NULL,NULL)) != record_ids_length) { + error("ipmi_monitoring_sensor_readings_by_record_id: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + ipmi_monitoring_sensor_iterator_first(ipmi_ctx); + if (ipmi_monitoring_sensor_read_record_id(ipmi_ctx) < 0) { + error("ipmi_monitoring_sensor_read_record_id: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + sensor_reading = ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); + if (sensor_reading) { + last_update_watt = (uint32_t)(*((double *)sensor_reading)); + previous_update_time = last_update_time; + last_update_time = time(NULL); + } else { + error("ipmi read an empty value for power consumption"); + return SLURM_FAILURE; + } + + return SLURM_SUCCESS; +} + +/* + * _thread_update_node_energy calls _read_ipmi_values and updates all values + * for node consumption + */ +static int _thread_update_node_energy(void) +{ + int rc = SLURM_SUCCESS; + + if (local_energy->current_watts == NO_VAL) + return rc; + + rc = _read_ipmi_values(); + + if (rc == SLURM_SUCCESS) { + if (local_energy->current_watts != 0) { + local_energy->base_watts = local_energy->current_watts; + local_energy->current_watts = last_update_watt; + if (previous_update_time == 0) + local_energy->base_consumed_energy = 0; + else + local_energy->base_consumed_energy = + _get_additional_consumption( + previous_update_time, + last_update_time, + local_energy->base_watts, + local_energy->current_watts); + local_energy->previous_consumed_energy = + local_energy->consumed_energy; + local_energy->consumed_energy += + local_energy->base_consumed_energy; + } + if (previous_update_time == 0) + previous_update_time = last_update_time; + if (local_energy->current_watts == 0) { + local_energy->consumed_energy = 0; + local_energy->base_watts = 0; + local_energy->current_watts = last_update_watt; + } + local_energy->poll_time = time(NULL); + } + if (debug_flags & DEBUG_FLAG_ENERGY) { + info("ipmi-thread = %d sec, current %d Watts, " + "consumed %d Joules %d new", + (int) (last_update_time - previous_update_time), + local_energy->current_watts, + local_energy->consumed_energy, + local_energy->base_consumed_energy); + } + + return rc; +} + +/* + * _thread_init initializes values and conf for the ipmi thread + */ +static int _thread_init(void) +{ + static bool first = true; + static bool first_init = SLURM_FAILURE; + int rc = SLURM_SUCCESS; + + if (!first) + return first_init; + first = false; + + if (_init_ipmi_config() != SLURM_SUCCESS) { + //TODO verbose error? + rc = SLURM_FAILURE; + } else { + if ((slurm_ipmi_conf.power_sensor_num == -1 + && _find_power_sensor() != SLURM_SUCCESS) + || _check_power_sensor() != SLURM_SUCCESS) { + local_energy->current_watts = NO_VAL; + } else { + local_energy->current_watts = last_update_watt; + } + if (slurm_ipmi_conf.reread_sdr_cache) + //IPMI cache is reread only on initialisation + //This option need a big EnergyIPMITimeout + sensor_reading_flags ^= + IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + } + local_energy->consumed_energy = 0; + local_energy->base_watts = 0; + slurm_mutex_unlock(&ipmi_mutex); + + if (rc != SLURM_SUCCESS) + if (ipmi_ctx) + ipmi_monitoring_ctx_destroy(ipmi_ctx); + + if (debug_flags & DEBUG_FLAG_ENERGY) + info("%s thread init", plugin_name); + + first_init = SLURM_SUCCESS; + + return rc; +} + +static int _ipmi_send_profile(void) +{ + acct_energy_data_t ener; + + if (!_running_profile()) + return SLURM_SUCCESS; + + if (debug_flags & DEBUG_FLAG_ENERGY) + info("_ipmi_send_profile: consumed %d watts", + local_energy->current_watts); + + memset(&ener, 0, sizeof(acct_energy_data_t)); + /*TODO function to calculate Average CPUs Frequency*/ + /*ener->cpu_freq = // read /proc/...*/ + ener.cpu_freq = 1; + ener.time = time(NULL); + ener.power = local_energy->current_watts; + acct_gather_profile_g_add_sample_data( + ACCT_GATHER_PROFILE_ENERGY, &ener); + + return SLURM_ERROR; +} + + +/* + * _thread_ipmi_run is the thread calling ipmi and launching _thread_ipmi_write + */ +static void *_thread_ipmi_run(void *no_data) +{ +// need input (attr) + int time_lost; + + (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + + flag_energy_accounting_shutdown = false; + if (debug_flags & DEBUG_FLAG_ENERGY) + info("ipmi-thread: launched"); + + slurm_mutex_lock(&ipmi_mutex); + if (_thread_init() != SLURM_SUCCESS) { + if (debug_flags & DEBUG_FLAG_ENERGY) + info("ipmi-thread: aborted"); + slurm_mutex_unlock(&ipmi_mutex); + return NULL; + } + slurm_mutex_unlock(&ipmi_mutex); + + flag_thread_started = true; + + //loop until slurm stop + while (!flag_energy_accounting_shutdown) { + time_lost = (int)(time(NULL) - last_update_time); + if (time_lost <= slurm_ipmi_conf.freq) + _task_sleep(slurm_ipmi_conf.freq - time_lost); + else + _task_sleep(1); + slurm_mutex_lock(&ipmi_mutex); + _thread_update_node_energy(); + slurm_mutex_unlock(&ipmi_mutex); + } + + if (debug_flags & DEBUG_FLAG_ENERGY) + info("ipmi-thread: ended"); + + return NULL; +} + +static void *_cleanup_thread(void *no_data) +{ + if (thread_ipmi_id_run) + pthread_join(thread_ipmi_id_run, NULL); + + if (ipmi_ctx) + ipmi_monitoring_ctx_destroy(ipmi_ctx); + reset_slurm_ipmi_conf(&slurm_ipmi_conf); + + return NULL; +} + +static void *_thread_launcher(void *no_data) +{ + //what arg would countain? frequency, socket? + pthread_attr_t attr_run; + time_t begin_time; + int rc = SLURM_SUCCESS; + + slurm_attr_init(&attr_run); + if (pthread_create(&thread_ipmi_id_run, &attr_run, + &_thread_ipmi_run, NULL)) { + //if (pthread_create(... (void *)arg)) { + debug("energy accounting failed to create _thread_ipmi_run " + "thread: %m"); + } + slurm_attr_destroy(&attr_run); + + begin_time = time(NULL); + while (rc == SLURM_SUCCESS) { + if (time(NULL) - begin_time > slurm_ipmi_conf.timeout) { + error("ipmi thread init timeout"); + rc = SLURM_ERROR; + break; + } + if (flag_thread_started) + break; + _task_sleep(1); + } + + if (rc != SLURM_SUCCESS) { + error("%s threads failed to start in a timely manner", + plugin_name); + + if (thread_ipmi_id_run) { + pthread_cancel(thread_ipmi_id_run); + pthread_join(thread_ipmi_id_run, NULL); + } + + flag_energy_accounting_shutdown = true; + } else { + /* This is here to join the decay thread so we don't core + * dump if in the sleep, since there is no other place to join + * we have to create another thread to do it. */ + slurm_attr_init(&attr_run); + if (pthread_create(&cleanup_handler_thread, &attr_run, + _cleanup_thread, NULL)) + fatal("pthread_create error %m"); + + slurm_attr_destroy(&attr_run); + } + + return NULL; +} + +static int _get_joules_task(uint16_t delta) +{ + acct_gather_energy_t *last_energy = NULL; + time_t now; + static bool first = true; + static uint32_t start_current_energy = 0; + uint32_t adjustment = 0; + + last_energy = local_energy; + local_energy = NULL; + + if (slurm_get_node_energy(NULL, delta, &local_energy)) { + error("_get_joules_task: can't get info from slurmd"); + local_energy = last_energy; + return SLURM_ERROR; + } + now = time(NULL); + + local_energy->previous_consumed_energy = last_energy->consumed_energy; + + if (slurm_ipmi_conf.adjustment) + adjustment = _get_additional_consumption( + local_energy->poll_time, now, + local_energy->current_watts, + local_energy->current_watts); + + if (!first) { + local_energy->consumed_energy -= start_current_energy; + + local_energy->base_consumed_energy = + (local_energy->consumed_energy + - last_energy->consumed_energy) + + adjustment; + } else { + /* This is just for the step, so take all the pervious + consumption out of the mix. + */ + start_current_energy = + local_energy->consumed_energy + adjustment; + local_energy->base_consumed_energy = 0; + first = false; + } + + local_energy->consumed_energy = local_energy->previous_consumed_energy + + local_energy->base_consumed_energy; + + acct_gather_energy_destroy(last_energy); + + if (debug_flags & DEBUG_FLAG_ENERGY) + info("_get_joules_task: consumed %u Joules " + "(received %u(%u watts) from slurmd)", + local_energy->consumed_energy, + local_energy->base_consumed_energy, + local_energy->current_watts); + + return SLURM_SUCCESS; +} /* * init() is called when the plugin is loaded, before any other functions * are called. Put global initialization here. */ -extern int init ( void ) +extern int init(void) { - verbose("%s loaded", plugin_name); debug_flags = slurm_get_debug_flags(); + /* put anything that requires the .conf being read in + acct_gather_energy_p_conf_parse + */ + + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + if (!_run_in_daemon()) + return SLURM_SUCCESS; + + flag_energy_accounting_shutdown = true; + + slurm_mutex_lock(&ipmi_mutex); + if (thread_ipmi_id_run) + pthread_cancel(thread_ipmi_id_run); + if (cleanup_handler_thread) + pthread_join(cleanup_handler_thread, NULL); + slurm_mutex_unlock(&ipmi_mutex); + + acct_gather_energy_destroy(local_energy); + local_energy = NULL; return SLURM_SUCCESS; } extern int acct_gather_energy_p_update_node_energy(void) { int rc = SLURM_SUCCESS; + xassert(_run_in_daemon()); + return rc; } extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy) + void *data) { int rc = SLURM_SUCCESS; + acct_gather_energy_t *energy = (acct_gather_energy_t *)data; + time_t *last_poll = (time_t *)data; + + xassert(_run_in_daemon()); + switch (data_type) { case ENERGY_DATA_JOULES_TASK: + slurm_mutex_lock(&ipmi_mutex); + if (_is_thread_launcher()) { + if (_thread_init() == SLURM_SUCCESS) + _thread_update_node_energy(); + } else + _get_joules_task(10); /* Since we don't have + access to the + frequency here just + send in something. + */ + memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); + slurm_mutex_unlock(&ipmi_mutex); break; case ENERGY_DATA_STRUCT: + slurm_mutex_lock(&ipmi_mutex); + memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); + slurm_mutex_unlock(&ipmi_mutex); + if (debug_flags & DEBUG_FLAG_ENERGY) { + info("_get_joules_node_ipmi = consumed %d Joules", + energy->consumed_energy); + } + break; + case ENERGY_DATA_LAST_POLL: + slurm_mutex_lock(&ipmi_mutex); + *last_poll = local_energy->poll_time; + slurm_mutex_unlock(&ipmi_mutex); break; default: error("acct_gather_energy_p_get_data: unknown enum %d", @@ -141,14 +848,23 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, } extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy) + void *data) { int rc = SLURM_SUCCESS; + int *delta = (int *)data; + + xassert(_run_in_daemon()); switch (data_type) { case ENERGY_DATA_RECONFIG: debug_flags = slurm_get_debug_flags(); break; + case ENERGY_DATA_PROFILE: + slurm_mutex_lock(&ipmi_mutex); + _get_joules_task(*delta); + _ipmi_send_profile(); + slurm_mutex_unlock(&ipmi_mutex); + break; default: error("acct_gather_energy_p_set_data: unknown enum %d", data_type); @@ -157,3 +873,174 @@ extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, } return rc; } + +extern void acct_gather_energy_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ +// s_p_options_t *full_options_ptr; + s_p_options_t options[] = { + {"EnergyIPMIDriverType", S_P_UINT32}, + {"EnergyIPMIDisableAutoProbe", S_P_UINT32}, + {"EnergyIPMIDriverAddress", S_P_UINT32}, + {"EnergyIPMIRegisterSpacing", S_P_UINT32}, + {"EnergyIPMIDriverDevice", S_P_STRING}, + {"EnergyIPMIProtocolVersion", S_P_UINT32}, + {"EnergyIPMIUsername", S_P_STRING}, + {"EnergyIPMIPassword", S_P_STRING}, +/* FIXME: remove these from the structure? */ +// {"EnergyIPMIk_g", S_P_STRING}, +// {"EnergyIPMIk_g_len", S_P_UINT32}, + {"EnergyIPMIPrivilegeLevel", S_P_UINT32}, + {"EnergyIPMIAuthenticationType", S_P_UINT32}, + {"EnergyIPMICipherSuiteId", S_P_UINT32}, + {"EnergyIPMISessionTimeout", S_P_UINT32}, + {"EnergyIPMIRetransmissionTimeout", S_P_UINT32}, + {"EnergyIPMIWorkaroundFlags", S_P_UINT32}, + {"EnergyIPMIRereadSdrCache", S_P_BOOLEAN}, + {"EnergyIPMIIgnoreNonInterpretableSensors", S_P_BOOLEAN}, + {"EnergyIPMIBridgeSensors", S_P_BOOLEAN}, + {"EnergyIPMIInterpretOemData", S_P_BOOLEAN}, + {"EnergyIPMISharedSensors", S_P_BOOLEAN}, + {"EnergyIPMIDiscreteReading", S_P_BOOLEAN}, + {"EnergyIPMIIgnoreScanningDisabled", S_P_BOOLEAN}, + {"EnergyIPMIAssumeBmcOwner", S_P_BOOLEAN}, + {"EnergyIPMIEntitySensorNames", S_P_BOOLEAN}, + {"EnergyIPMIFrequency", S_P_UINT32}, + {"EnergyIPMICalcAdjustment", S_P_BOOLEAN}, + {"EnergyIPMIPowerSensor", S_P_UINT32}, + {"EnergyIPMITimeout", S_P_UINT32}, + {"EnergyIPMIVariable", S_P_STRING}, + {NULL} }; + + transfer_s_p_options(full_options, options, full_options_cnt); +} + +extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) +{ + char *tmp_char; + + /* Set initial values */ + reset_slurm_ipmi_conf(&slurm_ipmi_conf); + + if (tbl) { + /* ipmi initialisation parameters */ + s_p_get_uint32(&slurm_ipmi_conf.driver_type, + "EnergyIPMIDriverType", tbl); + s_p_get_uint32(&slurm_ipmi_conf.disable_auto_probe, + "EnergyIPMIDisableAutoProbe", tbl); + s_p_get_uint32(&slurm_ipmi_conf.driver_address, + "EnergyIPMIDriverAddress", tbl); + s_p_get_uint32(&slurm_ipmi_conf.register_spacing, + "EnergyIPMIRegisterSpacing", tbl); + + s_p_get_string(&slurm_ipmi_conf.driver_device, + "EnergyIPMIDriverDevice", tbl); + + s_p_get_uint32(&slurm_ipmi_conf.protocol_version, + "EnergyIPMIProtocolVersion", tbl); + + if (!s_p_get_string(&slurm_ipmi_conf.username, + "EnergyIPMIUsername", tbl)) + slurm_ipmi_conf.username = xstrdup(DEFAULT_IPMI_USER); + + s_p_get_string(&slurm_ipmi_conf.password, + "EnergyIPMIPassword", tbl); + if (!slurm_ipmi_conf.password) + slurm_ipmi_conf.password = xstrdup("foopassword"); + + s_p_get_uint32(&slurm_ipmi_conf.privilege_level, + "EnergyIPMIPrivilegeLevel", tbl); + s_p_get_uint32(&slurm_ipmi_conf.authentication_type, + "EnergyIPMIAuthenticationType", tbl); + s_p_get_uint32(&slurm_ipmi_conf.cipher_suite_id, + "EnergyIPMICipherSuiteId", tbl); + s_p_get_uint32(&slurm_ipmi_conf.session_timeout, + "EnergyIPMISessionTimeout", tbl); + s_p_get_uint32(&slurm_ipmi_conf.retransmission_timeout, + "EnergyIPMIRetransmissionTimeout", tbl); + s_p_get_uint32(&slurm_ipmi_conf. workaround_flags, + "EnergyIPMIWorkaroundFlags", tbl); + + if (!s_p_get_boolean(&slurm_ipmi_conf.reread_sdr_cache, + "EnergyIPMIRereadSdrCache", tbl)) + slurm_ipmi_conf.reread_sdr_cache = false; + if (!s_p_get_boolean(&slurm_ipmi_conf. + ignore_non_interpretable_sensors, + "EnergyIPMIIgnoreNonInterpretableSensors", + tbl)) + slurm_ipmi_conf.ignore_non_interpretable_sensors = + false; + if (!s_p_get_boolean(&slurm_ipmi_conf.bridge_sensors, + "EnergyIPMIBridgeSensors", tbl)) + slurm_ipmi_conf.bridge_sensors = false; + if (!s_p_get_boolean(&slurm_ipmi_conf.interpret_oem_data, + "EnergyIPMIInterpretOemData", tbl)) + slurm_ipmi_conf.interpret_oem_data = false; + if (!s_p_get_boolean(&slurm_ipmi_conf.shared_sensors, + "EnergyIPMISharedSensors", tbl)) + slurm_ipmi_conf.shared_sensors = false; + if (!s_p_get_boolean(&slurm_ipmi_conf.discrete_reading, + "EnergyIPMIDiscreteReading", tbl)) + slurm_ipmi_conf.discrete_reading = false; + if (!s_p_get_boolean(&slurm_ipmi_conf.ignore_scanning_disabled, + "EnergyIPMIIgnoreScanningDisabled", tbl)) + slurm_ipmi_conf.ignore_scanning_disabled = false; + if (!s_p_get_boolean(&slurm_ipmi_conf.assume_bmc_owner, + "EnergyIPMIAssumeBmcOwner", tbl)) + slurm_ipmi_conf.assume_bmc_owner = false; + if (!s_p_get_boolean(&slurm_ipmi_conf.entity_sensor_names, + "EnergyIPMIEntitySensorNames", tbl)) + slurm_ipmi_conf.entity_sensor_names = false; + + s_p_get_uint32(&slurm_ipmi_conf.freq, + "EnergyIPMIFrequency", tbl); + + if ((int)slurm_ipmi_conf.freq <= 0) + fatal("EnergyIPMIFrequency must be a positive integer " + "in acct_gather.conf."); + + if (!s_p_get_boolean(&(slurm_ipmi_conf.adjustment), + "EnergyIPMICalcAdjustment", tbl)) + slurm_ipmi_conf.adjustment = false; + + s_p_get_uint32(&slurm_ipmi_conf.power_sensor_num, + "EnergyIPMIPowerSensor", tbl); + + s_p_get_uint32(&slurm_ipmi_conf.timeout, + "EnergyIPMITimeout", tbl); + + if (s_p_get_string(&tmp_char, "EnergyIPMIVariable", tbl)) { + if (!strcmp(tmp_char, "Temp")) + slurm_ipmi_conf.variable = + IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE; + xfree(tmp_char); + } + } + + if (!_run_in_daemon()) + return; + + if (!flag_init) { + local_energy = acct_gather_energy_alloc(); + local_energy->consumed_energy=0; + local_energy->base_consumed_energy=0; + local_energy->base_watts=0; + flag_init = true; + if (_is_thread_launcher()) { + pthread_attr_t attr; + slurm_attr_init(&attr); + if (pthread_create(&thread_ipmi_id_launcher, &attr, + &_thread_launcher, NULL)) { + //if (pthread_create(... (void *)arg)) { + debug("energy accounting failed to create " + "_thread_launcher thread: %m"); + } + slurm_attr_destroy(&attr); + if (debug_flags & DEBUG_FLAG_ENERGY) + info("%s thread launched", plugin_name); + } else + _get_joules_task(0); + } + + verbose("%s loaded", plugin_name); +} diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c new file mode 100644 index 0000000000000000000000000000000000000000..23d1d32259a5cf477cd6d22b0fd4f8146b9e7fda --- /dev/null +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c @@ -0,0 +1,95 @@ +/*****************************************************************************\ + * acct_gather_energy_ipmi_config.c - functions for reading ipmi.conf + ***************************************************************************** + * Copyright (C) 2012 + * Written by Bull- Thomas Cadeau + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pwd.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "slurm/slurm_errno.h" +#include "src/common/log.h" +#include "src/common/list.h" +#include "src/common/macros.h" +#include "src/common/parse_config.h" +#include "src/common/parse_time.h" +#include "src/common/read_config.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" + +#include "acct_gather_energy_ipmi_config.h" + +#define TIMEOUT 10 + +/* Local functions */ +extern void reset_slurm_ipmi_conf(slurm_ipmi_conf_t *slurm_ipmi_conf) +{ + if (slurm_ipmi_conf) { + slurm_ipmi_conf->power_sensor_num = -1; + slurm_ipmi_conf->freq = DEFAULT_IPMI_FREQ; + slurm_ipmi_conf->adjustment = false; + slurm_ipmi_conf->timeout = TIMEOUT; + slurm_ipmi_conf->driver_type = -1; + slurm_ipmi_conf->disable_auto_probe = 0; + slurm_ipmi_conf->driver_address = 0; + slurm_ipmi_conf->register_spacing = 0; + xfree(slurm_ipmi_conf->driver_device); + slurm_ipmi_conf->protocol_version = -1; + xfree(slurm_ipmi_conf->username); + xfree(slurm_ipmi_conf->password); + xfree(slurm_ipmi_conf->k_g); + slurm_ipmi_conf->k_g_len = 0; + slurm_ipmi_conf->privilege_level = -1; + slurm_ipmi_conf->authentication_type = -1; + slurm_ipmi_conf->cipher_suite_id = 0; + slurm_ipmi_conf->session_timeout = 0; + slurm_ipmi_conf->retransmission_timeout = 0; + slurm_ipmi_conf->workaround_flags = 0; + slurm_ipmi_conf->reread_sdr_cache = false; + slurm_ipmi_conf->ignore_non_interpretable_sensors = true; + slurm_ipmi_conf->bridge_sensors = false; + slurm_ipmi_conf->interpret_oem_data = false; + slurm_ipmi_conf->shared_sensors = false; + slurm_ipmi_conf->discrete_reading = false; + slurm_ipmi_conf->ignore_scanning_disabled = false; + slurm_ipmi_conf->assume_bmc_owner = false; + slurm_ipmi_conf->entity_sensor_names = false; + slurm_ipmi_conf->variable = IPMI_MONITORING_SENSOR_UNITS_WATTS; + + } +} diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h new file mode 100644 index 0000000000000000000000000000000000000000..4acd8ec9e97a455171e0fee0a132ae4a5fe4af81 --- /dev/null +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h @@ -0,0 +1,174 @@ +/*****************************************************************************\ + * acct_gather_energy_ipmi_config.h - declarations for reading ipmi.conf + ***************************************************************************** + * Copyright (C) 2012 + * Written by Bull- Thomas Cadeau + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef _IPMI_READ_CONFIG_H +#define _IPMI_READ_CONFIG_H + +#include <ipmi_monitoring.h> + +#define DEFAULT_IPMI_FREQ 30 +#define DEFAULT_IPMI_USER "foousername" +#define DEFAULT_IPMI_VARIABLE IPMI_MONITORING_SENSOR_UNITS_WATTS + +typedef struct slurm_ipmi_conf { + /* Options for SLURM IPMI plugin*/ + /* sensor num (only for power) */ + uint32_t power_sensor_num; + /* frequency for ipmi call*/ + uint32_t freq; + /* Adjust/approach the consumption + * in function of time between ipmi update and read call */ + bool adjustment; + /*Timeout for the ipmi thread*/ + uint32_t timeout; + /* Options for IPMI configuration*/ + /* Use a specific in-band driver. + * IPMI_MONITORING_DRIVER_TYPE_KCS = 0x00, + * IPMI_MONITORING_DRIVER_TYPE_SSIF = 0x01, + * IPMI_MONITORING_DRIVER_TYPE_OPENIPMI = 0x02, + * IPMI_MONITORING_DRIVER_TYPE_SUNBMC = 0x03, + * Pass < 0 for default of IPMI_MONITORING_DRIVER_TYPE_KCS.*/ + uint32_t driver_type; + /* Flag informs the library if in-band driver information should be + * probed or not.*/ + uint32_t disable_auto_probe; + /* Use this specified driver address instead of a probed one.*/ + uint32_t driver_address; + /* Use this register space instead of the probed one.*/ + uint32_t register_spacing; + /* Use this driver device for the IPMI driver.*/ + char *driver_device; + /* Out-of-band Communication Configuration */ + /* Indicate the IPMI protocol version to use + * IPMI_MONITORING_PROTOCOL_VERSION_1_5 = 0x00, + * IPMI_MONITORING_PROTOCOL_VERSION_2_0 = 0x01, + * Pass < 0 for default of IPMI_MONITORING_VERSION_1_5.*/ + uint32_t protocol_version; + /* BMC username. Pass NULL ptr for default username. Standard + * default is the null (e.g. empty) username. Maximum length of 16 + * bytes.*/ + char *username; + /* BMC password. Pass NULL ptr for default password. Standard + * default is the null (e.g. empty) password. Maximum length of 20 + * bytes.*/ + char *password; + /* BMC Key for 2-key authentication. Pass NULL ptr to use the + * default. Standard default is the null (e.g. empty) k_g, + * which will use the password as the BMC key. The k_g key need not + * be an ascii string.*/ + unsigned char *k_g; + /* Length of k_g. Necessary b/c k_g may contain null values in its + * key. Maximum length of 20 bytes.*/ + uint32_t k_g_len; + /* privilege level to authenticate with. + * Supported privilege levels: + * 0 = IPMICONSOLE_PRIVILEGE_USER + * 1 = IPMICONSOLE_PRIVILEGE_OPERATOR + * 2 = IPMICONSOLE_PRIVILEGE_ADMIN + * Pass < 0 for default of IPMICONSOLE_PRIVILEGE_ADMIN.*/ + uint32_t privilege_level; + /* authentication type to use + * IPMI_MONITORING_AUTHENTICATION_TYPE_NONE = 0x00, + * IPMI_MONITORING_AUTHENTICATION_TYPE_STRAIGHT_PASSWORD_KEY = 0x01, + * IPMI_MONITORING_AUTHENTICATION_TYPE_MD2 = 0x02, + * IPMI_MONITORING_AUTHENTICATION_TYPE_MD5 = 0x03, + * Pass < 0 for default of IPMI_MONITORING_AUTHENTICATION_TYPE_MD5*/ + uint32_t authentication_type; + /* Cipher suite identifier to determine authentication, integrity, + * and confidentiality algorithms to use. + * Supported Cipher Suite IDs + * (Key: A - Authentication Algorithm + * I - Integrity Algorithm + * C - Confidentiality Algorithm) + * 0 - A = None; I = None; C = None + * 1 - A = HMAC-SHA1; I = None; C = None + * 2 - A = HMAC-SHA1; I = HMAC-SHA1-96; C = None + * 3 - A = HMAC-SHA1; I = HMAC-SHA1-96; C = AES-CBC-128 + * 6 - A = HMAC-MD5; I = None; C = None + * 7 - A = HMAC-MD5; I = HMAC-MD5-128; C = None + * 8 - A = HMAC-MD5; I = HMAC-MD5-128; C = AES-CBC-128 + * 11 - A = HMAC-MD5; I = MD5-128; C = None + * 12 - A = HMAC-MD5; I = MD5-128; C = AES-CBC-128 + * 15 - A = HMAC-SHA256; I = None; C = None + * 16 - A = HMAC-SHA256; I = HMAC-SHA256-128; C = None + * 17 - A = HMAC-SHA256; I = HMAC-SHA256-128; C = AES-CBC-128 + * Pass < 0 for default.of 3.*/ + uint32_t cipher_suite_id; + /* Specifies the session timeout length in milliseconds. Pass <= 0 + * to default 60000 (60 seconds).*/ + uint32_t session_timeout; + /* Specifies the packet retransmission timeout length in + * milliseconds. Pass <= 0 to default 500 (0.5 seconds).*/ + uint32_t retransmission_timeout; + /* Bitwise OR of flags indicating IPMI implementation changes. Some + * BMCs which are non-compliant and may require a workaround flag + * for correct operation. Pass IPMICONSOLE_WORKAROUND_DEFAULT for + * default. Standard default is 0, no modifications to the IPMI + * protocol.*/ + uint32_t workaround_flags; + /* Re-read the SDR cache*/ + bool reread_sdr_cache; + /* Do not read sensors that cannot be interpreted.*/ + bool ignore_non_interpretable_sensors; + /* Attempt to bridge sensors not owned by the BMC*/ + bool bridge_sensors; + /* Attempt to interpret OEM data if read.*/ + bool interpret_oem_data; + /* Iterate through shared sensors if found*/ + bool shared_sensors; + /* Allow sensor readings to be read even if the event/reading type + * code for the sensor is not valid. This option works around + * poorly defined (and arguably illegal) SDR records that list + * non-discrete sensor expectations along with discrete state + * conditions.*/ + bool discrete_reading; + /* Ignore the scanning bit and read sensors no matter + * what. This option works around motherboards + * that incorrectly indicate sensors as disabled.*/ + bool ignore_scanning_disabled; + /* Assume the BMC is the sensor owner no matter what. This option + * works around motherboards that incorrectly indicate a non-BMC + * sensor owner (e.g. usually bridging is required).*/ + bool assume_bmc_owner; + /* Return sensor names with appropriate entity + * id and instance prefixed when appropriate.*/ + bool entity_sensor_names; + uint32_t variable; +} slurm_ipmi_conf_t; + +extern void reset_slurm_ipmi_conf(slurm_ipmi_conf_t *slurm_ipmi_conf); + +#endif diff --git a/src/plugins/acct_gather_energy/none/Makefile.in b/src/plugins/acct_gather_energy/none/Makefile.in index 5ae6572c363ded22ee6a8fab5917be9c10d86c11..840bc04f7b381f35e807d50cba75541d7a9eed68 100644 --- a/src/plugins/acct_gather_energy/none/Makefile.in +++ b/src/plugins/acct_gather_energy/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/acct_gather_energy/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/acct_gather_energy/none/acct_gather_energy_none.c b/src/plugins/acct_gather_energy/none/acct_gather_energy_none.c index aba7292b72de32ecd3d5533685c516d5c5d601dc..f2c13bcbe6711a30985fa2f77ec9a2346ef3468f 100644 --- a/src/plugins/acct_gather_energy/none/acct_gather_energy_none.c +++ b/src/plugins/acct_gather_energy/none/acct_gather_energy_none.c @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -122,3 +122,14 @@ extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, { return SLURM_SUCCESS; } + +extern void acct_gather_energy_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + return; +} + +extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) +{ + return; +} diff --git a/src/plugins/acct_gather_energy/rapl/Makefile.am b/src/plugins/acct_gather_energy/rapl/Makefile.am index a4555d55f2bfea2bf4f57da6cbae0fcd5c998545..d671bdf4c793bf4244be522a3b61a42123725514 100644 --- a/src/plugins/acct_gather_energy/rapl/Makefile.am +++ b/src/plugins/acct_gather_energy/rapl/Makefile.am @@ -9,8 +9,7 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = acct_gather_energy_rapl.la # cpu/core energy accounting plugin. -acct_gather_energy_rapl_la_SOURCES = acct_gather_energy_rapl.c \ - acct_gather_energy_rapl.h +acct_gather_energy_rapl_la_SOURCES = acct_gather_energy_rapl.c acct_gather_energy_rapl_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) acct_gather_energy_rapl_la_LIBADD = -lm diff --git a/src/plugins/acct_gather_energy/rapl/Makefile.in b/src/plugins/acct_gather_energy/rapl/Makefile.in index 4940328eef8ea96d4d4e07dcf345937103b5d8a6..7279f0f23e5dc4bbadfc4173a28734c73220ff04 100644 --- a/src/plugins/acct_gather_energy/rapl/Makefile.in +++ b/src/plugins/acct_gather_energy/rapl/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/acct_gather_energy/rapl DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -373,9 +399,7 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = acct_gather_energy_rapl.la # cpu/core energy accounting plugin. -acct_gather_energy_rapl_la_SOURCES = acct_gather_energy_rapl.c \ - acct_gather_energy_rapl.h - +acct_gather_energy_rapl_la_SOURCES = acct_gather_energy_rapl.c acct_gather_energy_rapl_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) acct_gather_energy_rapl_la_LIBADD = -lm all: all-am diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c index ff86cbc56e1e9a6ea6acae279bf0901ebc03324a..72a6576dbca639491c25ec05737216982364c490 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c +++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -65,13 +65,28 @@ #include <inttypes.h> #include <unistd.h> #include <math.h> -#include "acct_gather_energy_rapl.h" /* From Linux sys/types.h */ #if defined(__FreeBSD__) typedef unsigned long int ulong; #endif +#define MAX_PKGS 256 + +#define MSR_RAPL_POWER_UNIT 0x606 + +/* Package RAPL Domain */ +#define MSR_PKG_RAPL_POWER_LIMIT 0x610 +#define MSR_PKG_ENERGY_STATUS 0x611 +#define MSR_PKG_PERF_STATUS 0x613 +#define MSR_PKG_POWER_INFO 0x614 + +/* DRAM RAPL Domain */ +#define MSR_DRAM_POWER_LIMIT 0x618 +#define MSR_DRAM_ENERGY_STATUS 0x619 +#define MSR_DRAM_PERF_STATUS 0x61B +#define MSR_DRAM_POWER_INFO 0x61C + union { uint64_t val; struct { @@ -116,15 +131,13 @@ const char plugin_name[] = "AcctGatherEnergy RAPL plugin"; const char plugin_type[] = "acct_gather_energy/rapl"; const uint32_t plugin_version = 100; -static int freq = 0; static acct_gather_energy_t *local_energy = NULL; -static bool acct_gather_energy_shutdown = true; static uint32_t debug_flags = 0; /* one cpu in the package */ static int pkg2cpu[MAX_PKGS] = {[0 ... MAX_PKGS-1] -1}; static int pkg_fd[MAX_PKGS] = {[0 ... MAX_PKGS-1] -1}; - +static char hostname[MAXHOSTNAMELEN]; static int nb_pkg = 0; @@ -140,15 +153,18 @@ static char *_msr_string(int which) static uint64_t _read_msr(int fd, int which) { uint64_t data = 0; + static bool first = true; if (lseek(fd, which, SEEK_SET) < 0) error("lseek of /dev/cpu/#/msr: %m"); if (read(fd, &data, sizeof(data)) != sizeof(data)) { if (which == MSR_DRAM_ENERGY_STATUS) { - if (debug_flags & DEBUG_FLAG_ENERGY) + if (first && (debug_flags & DEBUG_FLAG_ENERGY)) { + first = false; info("It appears you don't have any DRAM, " "this can be common. Check your system " "if you think this is in error."); + } } else { debug("Check if your CPU has RAPL support for %s: %m", _msr_string(which)); @@ -205,7 +221,7 @@ static int _open_msr(int core) } else if ( errno == EIO ) { error("CPU %d doesn't support MSRs", core); } else - error("MSR register problem: %m"); + error("MSR register problem (%s): %m", msr_filename); } else { /* If this is loaded in the slurmd we need to make sure it gets closed when a slurmstepd launches. @@ -220,10 +236,10 @@ static void _hardware(void) { char buf[1024]; FILE *fd; - int cpu, pkg; + int cpu = 0, pkg = 0; if ((fd = fopen("/proc/cpuinfo", "r")) == 0) - error("fopen"); + fatal("RAPL: error on attempt to open /proc/cpuinfo"); while (fgets(buf, 1024, fd)) { if (strncmp(buf, "processor", sizeof("processor") - 1) == 0) { sscanf(buf, "processor\t: %d", &cpu); @@ -251,78 +267,57 @@ static void _hardware(void) info("RAPL Found: %d packages", nb_pkg); } -extern int acct_gather_energy_p_update_node_energy(void) +static bool _run_in_daemon(void) { - int rc = SLURM_SUCCESS; - int i; - double energy_units; - uint64_t result; - double ret; + static bool set = false; + static bool run = false; - if (local_energy->current_watts == NO_VAL) - return rc; - acct_gather_energy_shutdown = false; - if (!acct_gather_energy_shutdown) { - uint32_t node_current_energy; - uint16_t node_freq; + if (!set) { + set = 1; + run = run_in_daemon("slurmd,slurmstepd"); + } - xassert(pkg_fd[0] != -1); + return run; +} - /* MSR_RAPL_POWER_UNIT - * Power Units - bits 3:0 - * Energy Status Units - bits 12:8 - * Time Units - bits 19:16 - * See: Intel 64 and IA-32 Architectures Software Developer's - * Manual, Volume 3 for details */ - result = _read_msr(pkg_fd[0], MSR_RAPL_POWER_UNIT); - energy_units = pow(0.5,(double)((result>>8)&0x1f)); - result = 0; - for (i = 0; i < nb_pkg; i++) - result += _get_package_energy(i) + _get_dram_energy(i); - ret = (double)result * energy_units; - - /* current_watts = the average power consumption between two - * measurements - * base_watts = base energy consumed - */ - node_current_energy = (int)ret; - if (local_energy->consumed_energy != 0) { - local_energy->consumed_energy = - node_current_energy - local_energy->base_watts; - local_energy->current_watts = - node_current_energy - - local_energy->previous_consumed_energy; - node_freq = slurm_get_acct_gather_node_freq(); - if (node_freq) /* Prevent divide by zero */ - local_energy->current_watts /= (float)node_freq; - } - if (local_energy->consumed_energy == 0) { - local_energy->consumed_energy = 1; - local_energy->base_watts = node_current_energy; - } - local_energy->previous_consumed_energy = node_current_energy; +/* _send_drain_request() + */ +static void +_send_drain_request(void) +{ + update_node_msg_t node_msg; + static char drain_request_sent; - if (debug_flags & DEBUG_FLAG_ENERGY) { - info("_getjoules_rapl = %d sec, current %.6f Joules, " - "consumed %d", - freq, ret, local_energy->consumed_energy); - } - } + if (drain_request_sent) + return; - if (debug_flags & DEBUG_FLAG_ENERGY) - info("_getjoules_rapl shutdown"); - return rc; + slurm_init_update_node_msg(&node_msg); + node_msg.node_names = hostname; + node_msg.reason = "Cannot collect energy data."; + node_msg.node_state = NODE_STATE_DRAIN; + + drain_request_sent = 1; + debug("%s: sending NODE_STATE_DRAIN to controller", __func__); + + if (slurm_update_node(&node_msg) != SLURM_SUCCESS) { + error("%s: Unable to drain node %s: %m", __func__, hostname); + drain_request_sent = 0; + } } static void _get_joules_task(acct_gather_energy_t *energy) { int i; - double energy_units, power_units; + double energy_units; uint64_t result; - ulong max_power; double ret; - xassert(pkg_fd[0] != -1); + if (pkg_fd[0] < 0) { + error("%s: device /dev/cpu/#msr not opened " + "energy data cannot be collected.", __func__); + _send_drain_request(); + return; + } /* MSR_RAPL_POWER_UNIT * Power Units - bits 3:0 @@ -331,46 +326,106 @@ static void _get_joules_task(acct_gather_energy_t *energy) * See: Intel 64 and IA-32 Architectures Software Developer's * Manual, Volume 3 for details */ result = _read_msr(pkg_fd[0], MSR_RAPL_POWER_UNIT); - power_units = pow(0.5, (double)(result&0xf)); energy_units = pow(0.5, (double)((result>>8)&0x1f)); - if (debug_flags & DEBUG_FLAG_ENERGY) + + if (debug_flags & DEBUG_FLAG_ENERGY) { + double power_units = pow(0.5, (double)(result&0xf)); + ulong max_power; + info("RAPL powercapture_debug Energy units = %.6f, " "Power Units = %.6f", energy_units, power_units); - - /* MSR_PKG_POWER_INFO - * Thermal Spec Power - bits 14:0 - * Minimum Power - bits 30:16 - * Maximum Power - bits 46:32 - * Maximum Time Window - bits 53:48 - * See: Intel 64 and IA-32 Architectures Software Developer's - * Manual, Volume 3 for details */ - result = _read_msr(pkg_fd[0], MSR_PKG_POWER_INFO); - max_power = power_units * ((result >> 32) & 0x7fff); - if (debug_flags & DEBUG_FLAG_ENERGY) + /* MSR_PKG_POWER_INFO + * Thermal Spec Power - bits 14:0 + * Minimum Power - bits 30:16 + * Maximum Power - bits 46:32 + * Maximum Time Window - bits 53:48 + * See: Intel 64 and IA-32 Architectures Software Developer's + * Manual, Volume 3 for details */ + result = _read_msr(pkg_fd[0], MSR_PKG_POWER_INFO); + max_power = power_units * ((result >> 32) & 0x7fff); info("RAPL Max power = %ld w", max_power); + } result = 0; for (i = 0; i < nb_pkg; i++) result += _get_package_energy(i) + _get_dram_energy(i); - if (debug_flags & DEBUG_FLAG_ENERGY) - info("RAPL Result = %"PRIu64"", result); + ret = (double)result * energy_units; + if (debug_flags & DEBUG_FLAG_ENERGY) - info("RAPL Result float %.6f Joules", ret); + info("RAPL Result %"PRIu64" = %.6f Joules", result, ret); if (energy->consumed_energy != 0) { - energy->consumed_energy = ret - energy->base_consumed_energy; + uint16_t node_freq; + energy->consumed_energy = (uint32_t)ret - energy->base_watts; + energy->current_watts = + (uint32_t)ret - energy->previous_consumed_energy; + node_freq = slurm_get_acct_gather_node_freq(); + if (node_freq) /* Prevent divide by zero */ + local_energy->current_watts /= (float)node_freq; } if (energy->consumed_energy == 0) { energy->consumed_energy = 1; - energy->base_consumed_energy = ret; + energy->base_watts = (uint32_t)ret; } + energy->previous_consumed_energy = (uint32_t)ret; + energy->poll_time = time(NULL); - if (debug_flags & DEBUG_FLAG_ENERGY) { - info("_get_joules_task energy = %.6f, base %u , current %u", - ret, energy->base_consumed_energy, - energy->consumed_energy); + if (debug_flags & DEBUG_FLAG_ENERGY) + info("_get_joules_task: current %.6f Joules, consumed %u", + ret, energy->consumed_energy); +} + +static int _running_profile(void) +{ + static bool run = false; + static uint32_t profile_opt = ACCT_GATHER_PROFILE_NOT_SET; + + if (profile_opt == ACCT_GATHER_PROFILE_NOT_SET) { + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, + &profile_opt); + if (profile_opt & ACCT_GATHER_PROFILE_ENERGY) + run = true; } + + return run; +} + +static int _send_profile(void) +{ + acct_energy_data_t ener; + + if (!_running_profile()) + return SLURM_SUCCESS; + + if (debug_flags & DEBUG_FLAG_ENERGY) + info("_send_profile: consumed %d watts", + local_energy->current_watts); + + memset(&ener, 0, sizeof(acct_energy_data_t)); + /*TODO function to calculate Average CPUs Frequency*/ + /*ener->cpu_freq = // read /proc/...*/ + ener.cpu_freq = 1; + ener.time = time(NULL); + ener.power = local_energy->current_watts; + acct_gather_profile_g_add_sample_data( + ACCT_GATHER_PROFILE_ENERGY, &ener); + + return SLURM_ERROR; +} + +extern int acct_gather_energy_p_update_node_energy(void) +{ + int rc = SLURM_SUCCESS; + + xassert(_run_in_daemon()); + + if (local_energy->current_watts == NO_VAL) + return rc; + + _get_joules_task(local_energy); + + return rc; } /* @@ -379,21 +434,14 @@ static void _get_joules_task(acct_gather_energy_t *energy) */ extern int init(void) { - int i; - uint64_t result; - - _hardware(); - for (i = 0; i < nb_pkg; i++) - pkg_fd[i] = _open_msr(pkg2cpu[i]); + debug_flags = slurm_get_debug_flags(); - local_energy = acct_gather_energy_alloc(); + gethostname(hostname, MAXHOSTNAMELEN); - result = _read_msr(pkg_fd[0], MSR_RAPL_POWER_UNIT); - if (result == 0) - local_energy->current_watts = NO_VAL; + /* put anything that requires the .conf being read in + acct_gather_energy_p_conf_parse + */ - debug_flags = slurm_get_debug_flags(); - verbose("%s loaded", plugin_name); return SLURM_SUCCESS; } @@ -401,6 +449,9 @@ extern int fini(void) { int i; + if (!_run_in_daemon()) + return SLURM_SUCCESS; + for (i = 0; i < nb_pkg; i++) { if (pkg_fd[i] != -1) { close(pkg_fd[i]); @@ -414,9 +465,14 @@ extern int fini(void) } extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy) + void *data) { int rc = SLURM_SUCCESS; + acct_gather_energy_t *energy = (acct_gather_energy_t *)data; + time_t *last_poll = (time_t *)data; + + xassert(_run_in_daemon()); + switch (data_type) { case ENERGY_DATA_JOULES_TASK: if (local_energy->current_watts == NO_VAL) @@ -427,6 +483,9 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, case ENERGY_DATA_STRUCT: memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); break; + case ENERGY_DATA_LAST_POLL: + *last_poll = local_energy->poll_time; + break; default: error("acct_gather_energy_p_get_data: unknown enum %d", data_type); @@ -437,14 +496,20 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, } extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy) + void *data) { int rc = SLURM_SUCCESS; + xassert(_run_in_daemon()); + switch (data_type) { case ENERGY_DATA_RECONFIG: debug_flags = slurm_get_debug_flags(); break; + case ENERGY_DATA_PROFILE: + _get_joules_task(local_energy); + _send_profile(); + break; default: error("acct_gather_energy_p_set_data: unknown enum %d", data_type); @@ -453,3 +518,32 @@ extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, } return rc; } + +extern void acct_gather_energy_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + return; +} + +extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) +{ + int i; + uint64_t result; + + if (!_run_in_daemon()) + return; + + _hardware(); + for (i = 0; i < nb_pkg; i++) + pkg_fd[i] = _open_msr(pkg2cpu[i]); + + local_energy = acct_gather_energy_alloc(); + + result = _read_msr(pkg_fd[0], MSR_RAPL_POWER_UNIT); + if (result == 0) + local_energy->current_watts = NO_VAL; + + verbose("%s loaded", plugin_name); + + return; +} diff --git a/src/plugins/acct_gather_filesystem/Makefile.am b/src/plugins/acct_gather_filesystem/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..5b95ab898dfab565337baefd4cc83890ff6ae455 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/Makefile.am @@ -0,0 +1,3 @@ +# Makefile for accounting gather filesystem plugins + +SUBDIRS = lustre none diff --git a/src/plugins/acct_gather_filesystem/Makefile.in b/src/plugins/acct_gather_filesystem/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..73ea535d51b7109afcc7e29b828714c110f70bb5 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/Makefile.in @@ -0,0 +1,735 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for accounting gather filesystem plugins +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_filesystem +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = lustre none +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/plugins/acct_gather_filesystem/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/plugins/acct_gather_filesystem/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_filesystem/lustre/Makefile.am b/src/plugins/acct_gather_filesystem/lustre/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..476e129af73a05e6e29799a0422a05470925c8a2 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/lustre/Makefile.am @@ -0,0 +1,18 @@ +# Makefile for acct_gather_filesystem/lustre plugin + +#dprx need to uncomment this when safe to build +#SUBDIRS = include lib + +AUTOMAKE_OPTIONS = foreign + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +pkglib_LTLIBRARIES = acct_gather_filesystem_lustre.la + +# Infiniband accounting lustre plugin. +acct_gather_filesystem_lustre_la_SOURCES = acct_gather_filesystem_lustre.c + +acct_gather_filesystem_lustre_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +acct_gather_filesystem_lustre_la_LIBADD = -lm diff --git a/src/plugins/acct_gather_filesystem/lustre/Makefile.in b/src/plugins/acct_gather_filesystem/lustre/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..8e1a97cef31a29bef75d71a3df2a6cfb251b8536 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/lustre/Makefile.in @@ -0,0 +1,722 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for acct_gather_filesystem/lustre plugin + +#dprx need to uncomment this when safe to build +#SUBDIRS = include lib + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_filesystem/lustre +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +acct_gather_filesystem_lustre_la_DEPENDENCIES = +am_acct_gather_filesystem_lustre_la_OBJECTS = \ + acct_gather_filesystem_lustre.lo +acct_gather_filesystem_lustre_la_OBJECTS = \ + $(am_acct_gather_filesystem_lustre_la_OBJECTS) +acct_gather_filesystem_lustre_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) \ + $(acct_gather_filesystem_lustre_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(acct_gather_filesystem_lustre_la_SOURCES) +DIST_SOURCES = $(acct_gather_filesystem_lustre_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +pkglib_LTLIBRARIES = acct_gather_filesystem_lustre.la + +# Infiniband accounting lustre plugin. +acct_gather_filesystem_lustre_la_SOURCES = acct_gather_filesystem_lustre.c +acct_gather_filesystem_lustre_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +acct_gather_filesystem_lustre_la_LIBADD = -lm +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_filesystem/lustre/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_filesystem/lustre/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +acct_gather_filesystem_lustre.la: $(acct_gather_filesystem_lustre_la_OBJECTS) $(acct_gather_filesystem_lustre_la_DEPENDENCIES) $(EXTRA_acct_gather_filesystem_lustre_la_DEPENDENCIES) + $(acct_gather_filesystem_lustre_la_LINK) -rpath $(pkglibdir) $(acct_gather_filesystem_lustre_la_OBJECTS) $(acct_gather_filesystem_lustre_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_filesystem_lustre.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c b/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c new file mode 100644 index 0000000000000000000000000000000000000000..0e942d69d0560a3963f4c77d4df328d9ca7001e2 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c @@ -0,0 +1,337 @@ +/*****************************************************************************\ + * acct_gather_filesystem_lustre.c -slurm filesystem accounting plugin for lustre + ***************************************************************************** + * Copyright (C) 2013 + * Written by Bull- Yiannis Georgiou + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * This file is patterned after jobcomp_linux.c, written by Morris Jette and + * Copyright (C) 2002 The Regents of the University of California. +\*****************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <signal.h> +#include <dirent.h> + +#include <unistd.h> +#include <getopt.h> +#include <netinet/in.h> + + +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_acct_gather_filesystem.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" +#include "src/common/slurm_acct_gather_profile.h" + +#include "src/slurmd/slurmd/slurmd.h" + + +/***************************************************************/ + + + +#define _DEBUG 1 +#define _DEBUG_FILESYSTEM 1 +#define FILESYSTEM_DEFAULT_PORT 1 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ + +const char plugin_name[] = "AcctGatherFilesystem LUSTRE plugin"; +const char plugin_type[] = "acct_gather_filesystem/lustre"; +const uint32_t plugin_version = 100; + + +typedef struct { + time_t last_update_time; + time_t update_time; + uint64_t lustre_nb_writes; + uint64_t lustre_nb_reads; + uint64_t all_lustre_nb_writes; + uint64_t all_lustre_nb_reads; + uint64_t lustre_write_bytes; + uint64_t lustre_read_bytes; + uint64_t all_lustre_write_bytes; + uint64_t all_lustre_read_bytes; +} lustre_sens_t; + +static lustre_sens_t lustre_se = {0,0,0,0,0,0,0,0}; + +static uint32_t debug_flags = 0; +static pthread_mutex_t lustre_lock = PTHREAD_MUTEX_INITIALIZER; + +/* Default path to lustre stats */ +const char proc_base_path[] = "/proc/fs/lustre/"; + +/** + * is lustre fs supported + **/ +static int _check_lustre_fs(void) +{ + static bool set = false; + static int rc = SLURM_SUCCESS; + + if (!set) { + uint32_t profile = 0; + char lustre_directory[BUFSIZ]; + DIR *proc_dir; + + set = true; + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, + &profile); + if ((profile & ACCT_GATHER_PROFILE_LUSTRE)) { + sprintf(lustre_directory, "%s/llite", proc_base_path); + proc_dir = opendir(proc_base_path); + if (!proc_dir) { + debug2("not able to read %s", + lustre_directory); + rc = SLURM_FAILURE; + } else { + closedir(proc_dir); + } + } else + rc = SLURM_ERROR; + } + + return rc; +} + +/** + * read counters from all mounted lustre fs + */ +static int _read_lustre_counters(void ) +{ + char lustre_dir[PATH_MAX]; + char path_stats[PATH_MAX]; + DIR *proc_dir; + struct dirent *entry; + FILE *fff; + char buffer[BUFSIZ]; + + + sprintf(lustre_dir, "%s/llite", proc_base_path); + + proc_dir = opendir(lustre_dir); + if (proc_dir == NULL) { + error("Cannot open %s\n", lustre_dir); + return SLURM_FAILURE; + } + + entry = readdir(proc_dir); + + while (entry != NULL) { + snprintf(path_stats, PATH_MAX - 1, "%s/%s/stats", lustre_dir, + entry->d_name); + debug3("Found file %s\n", path_stats); + + fff = fopen(path_stats, "r"); + if (fff) { + while(1) { + if (!fgets(buffer,BUFSIZ,fff)) + break; + + if (strstr(buffer, "write_bytes")) { + sscanf(buffer, + "%*s %"PRIu64" %*s %*s " + "%*d %*d %"PRIu64"", + &lustre_se.lustre_nb_writes, + &lustre_se.lustre_write_bytes); + debug3("Lustre Counter " + "%"PRIu64" " + "write_bytes %"PRIu64" " + "writes\n", + lustre_se.lustre_write_bytes, + lustre_se.lustre_nb_writes); + } + + if (strstr(buffer, "read_bytes")) { + sscanf(buffer, + "%*s %"PRIu64" %*s %*s " + "%*d %*d %"PRIu64"", + &lustre_se.lustre_nb_reads, + &lustre_se.lustre_read_bytes); + debug3("Lustre Counter " + "%"PRIu64" " + "read_bytes %"PRIu64" " + "reads\n", + lustre_se.lustre_read_bytes, + lustre_se.lustre_nb_reads); + } + } + fclose(fff); + } + entry = readdir(proc_dir); + lustre_se.all_lustre_write_bytes += + lustre_se.lustre_write_bytes; + lustre_se.all_lustre_read_bytes += lustre_se.lustre_read_bytes; + lustre_se.all_lustre_nb_writes += lustre_se.lustre_nb_writes; + lustre_se.all_lustre_nb_reads += lustre_se.lustre_nb_reads; + } + closedir(proc_dir); + + lustre_se.last_update_time = lustre_se.update_time; + lustre_se.update_time = time(NULL); + + + return SLURM_SUCCESS; +} + + + + +/* + * _thread_update_node_energy calls _read_ipmi_values and updates all values + * for node consumption + */ +static int _update_node_filesystem(void) +{ + acct_filesystem_data_t *fls; + int rc = SLURM_SUCCESS; + + slurm_mutex_lock(&lustre_lock); + rc = _read_lustre_counters(); + + fls = xmalloc(sizeof(acct_filesystem_data_t)); + + fls->reads = lustre_se.all_lustre_nb_reads; + fls->writes = lustre_se.all_lustre_nb_writes; + fls->read_size = (double) lustre_se.all_lustre_read_bytes / 1048576; + fls->write_size = (double) lustre_se.all_lustre_write_bytes / 1048576; + acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, fls); + + debug3("Collection of Lustre counters Finished"); + xfree(fls); + + + if (debug_flags & DEBUG_FLAG_FILESYSTEM) { + info("lustre-thread = %d sec, transmitted %"PRIu64" bytes, " + "received %"PRIu64" bytes", + (int) (lustre_se.update_time - lustre_se.last_update_time), + lustre_se.all_lustre_read_bytes, + lustre_se.all_lustre_write_bytes); + } + slurm_mutex_unlock(&lustre_lock); + + return rc; +} + +static bool _run_in_daemon(void) +{ + static bool set = false; + static bool run = false; + + if (!set) { + set = 1; + run = run_in_daemon("slurmstepd"); + } + + return run; +} + + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + debug_flags = slurm_get_debug_flags(); + + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + if (!_run_in_daemon()) + return SLURM_SUCCESS; + + if (debug_flags & DEBUG_FLAG_FILESYSTEM) + info("lustre: ended"); + + return SLURM_SUCCESS; +} + +extern int acct_gather_filesystem_p_node_update(void) +{ + if (_run_in_daemon() && (_check_lustre_fs() == SLURM_SUCCESS)) + _update_node_filesystem(); + + return SLURM_SUCCESS; +} + + +extern void acct_gather_filesystem_p_conf_set(s_p_hashtbl_t *tbl) +{ + if (!_run_in_daemon()) + return; + + verbose("%s loaded", plugin_name); +} + +extern void acct_gather_filesystem_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + + return; +} diff --git a/src/plugins/acct_gather_filesystem/none/Makefile.am b/src/plugins/acct_gather_filesystem/none/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..18bf771ecf0b1955f3fbb79fc771a307c51300c3 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/none/Makefile.am @@ -0,0 +1,15 @@ +# Makefile for acct_gather_filesystem/none plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +pkglib_LTLIBRARIES = acct_gather_filesystem_none.la + +# Null job completion logging plugin. +acct_gather_filesystem_none_la_SOURCES = acct_gather_filesystem_none.c + +acct_gather_filesystem_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + diff --git a/src/plugins/acct_gather_filesystem/none/Makefile.in b/src/plugins/acct_gather_filesystem/none/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..e49aa65b7551986012218af7dc94cf36bd9076ca --- /dev/null +++ b/src/plugins/acct_gather_filesystem/none/Makefile.in @@ -0,0 +1,718 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for acct_gather_filesystem/none plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_filesystem/none +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +acct_gather_filesystem_none_la_LIBADD = +am_acct_gather_filesystem_none_la_OBJECTS = \ + acct_gather_filesystem_none.lo +acct_gather_filesystem_none_la_OBJECTS = \ + $(am_acct_gather_filesystem_none_la_OBJECTS) +acct_gather_filesystem_none_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) \ + $(acct_gather_filesystem_none_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(acct_gather_filesystem_none_la_SOURCES) +DIST_SOURCES = $(acct_gather_filesystem_none_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +pkglib_LTLIBRARIES = acct_gather_filesystem_none.la + +# Null job completion logging plugin. +acct_gather_filesystem_none_la_SOURCES = acct_gather_filesystem_none.c +acct_gather_filesystem_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_filesystem/none/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_filesystem/none/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +acct_gather_filesystem_none.la: $(acct_gather_filesystem_none_la_OBJECTS) $(acct_gather_filesystem_none_la_DEPENDENCIES) $(EXTRA_acct_gather_filesystem_none_la_DEPENDENCIES) + $(acct_gather_filesystem_none_la_LINK) -rpath $(pkglibdir) $(acct_gather_filesystem_none_la_OBJECTS) $(acct_gather_filesystem_none_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_filesystem_none.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_filesystem/none/acct_gather_filesystem_none.c b/src/plugins/acct_gather_filesystem/none/acct_gather_filesystem_none.c new file mode 100644 index 0000000000000000000000000000000000000000..745e50334d2d8378cd85fbac26415e89dab0f821 --- /dev/null +++ b/src/plugins/acct_gather_filesystem/none/acct_gather_filesystem_none.c @@ -0,0 +1,121 @@ +/*****************************************************************************\ + * acct_gather_filesystem_none.c - slurm filesystem accounting plugin for none. + ***************************************************************************** + * Copyright (C) 2013 BULL + * Written by Yiannis Georgiou <yiannis.georgiou@bull.net>, + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * This file is patterned after jobcomp_linux.c, written by Morris Jette and + * Copyright (C) 2002 The Regents of the University of California. +\*****************************************************************************/ + + +/* acct_gather_filesystem_none + * This plugin does not initiate a node-level thread. + * It is the acct_gather_filesystem stub. + */ + +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" + +#include <fcntl.h> +#include <signal.h> + +#define _DEBUG 1 +#define _DEBUG_INFINIBAND 1 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "AcctGatherFilesystem NONE plugin"; +const char plugin_type[] = "acct_gather_filesystem/none"; +const uint32_t plugin_version = 100; + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_filesystem_p_node_update(void) +{ + return SLURM_SUCCESS; +} + +extern void acct_gather_filesystem_p_conf_set(s_p_hashtbl_t *tbl) +{ + return; +} + +extern void acct_gather_filesystem_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + return; +} + diff --git a/src/plugins/acct_gather_infiniband/Makefile.am b/src/plugins/acct_gather_infiniband/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..4de761647fad2ef3b43637a9e3feab1ce0897607 --- /dev/null +++ b/src/plugins/acct_gather_infiniband/Makefile.am @@ -0,0 +1,3 @@ +# Makefile for accounting gather infiniband plugins + +SUBDIRS = ofed none diff --git a/src/plugins/acct_gather_infiniband/Makefile.in b/src/plugins/acct_gather_infiniband/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..974104eb01ba3cb9fc094d6b48b3edd37f0c63e3 --- /dev/null +++ b/src/plugins/acct_gather_infiniband/Makefile.in @@ -0,0 +1,735 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for accounting gather infiniband plugins +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_infiniband +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = ofed none +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/plugins/acct_gather_infiniband/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/plugins/acct_gather_infiniband/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_infiniband/none/Makefile.am b/src/plugins/acct_gather_infiniband/none/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..d7f212edf5a2ba9f8faeed9e0043d3d86fa1dbac --- /dev/null +++ b/src/plugins/acct_gather_infiniband/none/Makefile.am @@ -0,0 +1,15 @@ +# Makefile for acct_gather_energy/none plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +pkglib_LTLIBRARIES = acct_gather_infiniband_none.la + +# Null job completion logging plugin. +acct_gather_infiniband_none_la_SOURCES = acct_gather_infiniband_none.c + +acct_gather_infiniband_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + diff --git a/src/plugins/acct_gather_infiniband/none/Makefile.in b/src/plugins/acct_gather_infiniband/none/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..00a47548ba57dda9785e4be3d170173bae5d11aa --- /dev/null +++ b/src/plugins/acct_gather_infiniband/none/Makefile.in @@ -0,0 +1,718 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for acct_gather_energy/none plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_infiniband/none +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +acct_gather_infiniband_none_la_LIBADD = +am_acct_gather_infiniband_none_la_OBJECTS = \ + acct_gather_infiniband_none.lo +acct_gather_infiniband_none_la_OBJECTS = \ + $(am_acct_gather_infiniband_none_la_OBJECTS) +acct_gather_infiniband_none_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) \ + $(acct_gather_infiniband_none_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(acct_gather_infiniband_none_la_SOURCES) +DIST_SOURCES = $(acct_gather_infiniband_none_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +pkglib_LTLIBRARIES = acct_gather_infiniband_none.la + +# Null job completion logging plugin. +acct_gather_infiniband_none_la_SOURCES = acct_gather_infiniband_none.c +acct_gather_infiniband_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_infiniband/none/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_infiniband/none/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +acct_gather_infiniband_none.la: $(acct_gather_infiniband_none_la_OBJECTS) $(acct_gather_infiniband_none_la_DEPENDENCIES) $(EXTRA_acct_gather_infiniband_none_la_DEPENDENCIES) + $(acct_gather_infiniband_none_la_LINK) -rpath $(pkglibdir) $(acct_gather_infiniband_none_la_OBJECTS) $(acct_gather_infiniband_none_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_infiniband_none.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_infiniband/none/acct_gather_infiniband_none.c b/src/plugins/acct_gather_infiniband/none/acct_gather_infiniband_none.c new file mode 100644 index 0000000000000000000000000000000000000000..b9abd7a7cfbaa7709f0d7864eb2ef27e242f3e6b --- /dev/null +++ b/src/plugins/acct_gather_infiniband/none/acct_gather_infiniband_none.c @@ -0,0 +1,121 @@ +/*****************************************************************************\ + * acct_gather_infiniband_none.c - slurm infiniband accounting plugin for none. + ***************************************************************************** + * Copyright (C) 2013 BULL + * Written by Yiannis Georgiou <yiannis.georgiou@bull.net>, + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * This file is patterned after jobcomp_linux.c, written by Morris Jette and + * Copyright (C) 2002 The Regents of the University of California. +\*****************************************************************************/ + + +/* acct_gather_infiniband_none + * This plugin does not initiate a node-level thread. + * It is the acct_gather_infiniband stub. + */ + +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" + +#include <fcntl.h> +#include <signal.h> + +#define _DEBUG 1 +#define _DEBUG_INFINIBAND 1 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "AcctGatherInfiniband NONE plugin"; +const char plugin_type[] = "acct_gather_infiniband/none"; +const uint32_t plugin_version = 100; + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_infiniband_p_node_update(void) +{ + return SLURM_SUCCESS; +} + +extern void acct_gather_infiniband_p_conf_set(s_p_hashtbl_t *tbl) +{ + return; +} + +extern void acct_gather_infiniband_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + return; +} + diff --git a/src/plugins/acct_gather_infiniband/ofed/Makefile.am b/src/plugins/acct_gather_infiniband/ofed/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..ad77d8bf6ec872a472b5028108f4ed24fa337bc7 --- /dev/null +++ b/src/plugins/acct_gather_infiniband/ofed/Makefile.am @@ -0,0 +1,29 @@ +# Makefile for acct_gather_infiniband/ofed plugin + +#dprx need to uncomment this when safe to build +#SUBDIRS = include lib + +AUTOMAKE_OPTIONS = foreign + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +if BUILD_OFED + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +pkglib_LTLIBRARIES = acct_gather_infiniband_ofed.la + +# Infiniband accounting ofed plugin. +acct_gather_infiniband_ofed_la_SOURCES = acct_gather_infiniband_ofed.c \ + acct_gather_infiniband_ofed.h + +acct_gather_infiniband_ofed_la_CFLAGS = $(OFED_CPPFLAGS) + +acct_gather_infiniband_ofed_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(OFED_LDFLAGS) +acct_gather_infiniband_ofed_la_LIBADD = $(OFED_LIBS) + +else + +EXTRA_acct_gather_infiniband_ofed_la_SOURCES = acct_gather_infiniband_ofed.c \ + acct_gather_infiniband_ofed.h +endif diff --git a/src/plugins/acct_gather_infiniband/ofed/Makefile.in b/src/plugins/acct_gather_infiniband/ofed/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..798c2c81d9a2f20cdafd160d0502db1742743b64 --- /dev/null +++ b/src/plugins/acct_gather_infiniband/ofed/Makefile.in @@ -0,0 +1,744 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for acct_gather_infiniband/ofed plugin + +#dprx need to uncomment this when safe to build +#SUBDIRS = include lib + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_infiniband/ofed +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +am__DEPENDENCIES_1 = +@BUILD_OFED_TRUE@acct_gather_infiniband_ofed_la_DEPENDENCIES = \ +@BUILD_OFED_TRUE@ $(am__DEPENDENCIES_1) +am__acct_gather_infiniband_ofed_la_SOURCES_DIST = \ + acct_gather_infiniband_ofed.c acct_gather_infiniband_ofed.h +@BUILD_OFED_TRUE@am_acct_gather_infiniband_ofed_la_OBJECTS = acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.lo +am__EXTRA_acct_gather_infiniband_ofed_la_SOURCES_DIST = \ + acct_gather_infiniband_ofed.c acct_gather_infiniband_ofed.h +acct_gather_infiniband_ofed_la_OBJECTS = \ + $(am_acct_gather_infiniband_ofed_la_OBJECTS) +acct_gather_infiniband_ofed_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(acct_gather_infiniband_ofed_la_CFLAGS) $(CFLAGS) \ + $(acct_gather_infiniband_ofed_la_LDFLAGS) $(LDFLAGS) -o $@ +@BUILD_OFED_TRUE@am_acct_gather_infiniband_ofed_la_rpath = -rpath \ +@BUILD_OFED_TRUE@ $(pkglibdir) +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(acct_gather_infiniband_ofed_la_SOURCES) \ + $(EXTRA_acct_gather_infiniband_ofed_la_SOURCES) +DIST_SOURCES = $(am__acct_gather_infiniband_ofed_la_SOURCES_DIST) \ + $(am__EXTRA_acct_gather_infiniband_ofed_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +@BUILD_OFED_TRUE@PLUGIN_FLAGS = -module -avoid-version --export-dynamic +@BUILD_OFED_TRUE@pkglib_LTLIBRARIES = acct_gather_infiniband_ofed.la + +# Infiniband accounting ofed plugin. +@BUILD_OFED_TRUE@acct_gather_infiniband_ofed_la_SOURCES = acct_gather_infiniband_ofed.c \ +@BUILD_OFED_TRUE@ acct_gather_infiniband_ofed.h + +@BUILD_OFED_TRUE@acct_gather_infiniband_ofed_la_CFLAGS = $(OFED_CPPFLAGS) +@BUILD_OFED_TRUE@acct_gather_infiniband_ofed_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(OFED_LDFLAGS) +@BUILD_OFED_TRUE@acct_gather_infiniband_ofed_la_LIBADD = $(OFED_LIBS) +@BUILD_OFED_FALSE@EXTRA_acct_gather_infiniband_ofed_la_SOURCES = acct_gather_infiniband_ofed.c \ +@BUILD_OFED_FALSE@ acct_gather_infiniband_ofed.h + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_infiniband/ofed/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_infiniband/ofed/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +acct_gather_infiniband_ofed.la: $(acct_gather_infiniband_ofed_la_OBJECTS) $(acct_gather_infiniband_ofed_la_DEPENDENCIES) $(EXTRA_acct_gather_infiniband_ofed_la_DEPENDENCIES) + $(acct_gather_infiniband_ofed_la_LINK) $(am_acct_gather_infiniband_ofed_la_rpath) $(acct_gather_infiniband_ofed_la_OBJECTS) $(acct_gather_infiniband_ofed_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.lo: acct_gather_infiniband_ofed.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(acct_gather_infiniband_ofed_la_CFLAGS) $(CFLAGS) -MT acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.lo -MD -MP -MF $(DEPDIR)/acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.Tpo -c -o acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.lo `test -f 'acct_gather_infiniband_ofed.c' || echo '$(srcdir)/'`acct_gather_infiniband_ofed.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.Tpo $(DEPDIR)/acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='acct_gather_infiniband_ofed.c' object='acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(acct_gather_infiniband_ofed_la_CFLAGS) $(CFLAGS) -c -o acct_gather_infiniband_ofed_la-acct_gather_infiniband_ofed.lo `test -f 'acct_gather_infiniband_ofed.c' || echo '$(srcdir)/'`acct_gather_infiniband_ofed.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c b/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c new file mode 100644 index 0000000000000000000000000000000000000000..30c0c2a284e425995f08065c13e7a4586449678a --- /dev/null +++ b/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c @@ -0,0 +1,382 @@ +/*****************************************************************************\ + * acct_gather_infiniband_ofed.c -slurm infiniband accounting plugin for ofed + ***************************************************************************** + * Copyright (C) 2013 + * Written by Bull- Yiannis Georgiou + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * This file is patterned after jobcomp_linux.c, written by Morris Jette and + * Copyright (C) 2002 The Regents of the University of California. +\*****************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <signal.h> + +#include <unistd.h> +#include <getopt.h> +#include <netinet/in.h> + + +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_acct_gather_infiniband.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" +#include "src/common/slurm_acct_gather_profile.h" + +#include "src/slurmd/slurmd/slurmd.h" +#include "acct_gather_infiniband_ofed.h" + +/* + * ofed includes for the lib + */ + +#include <infiniband/umad.h> +#include <infiniband/mad.h> + +/***************************************************************/ + +#define ALL_PORTS 0xFF + + +#define _DEBUG 1 +#define _DEBUG_INFINIBAND 1 +#define TIMEOUT 20 +#define IB_FREQ 4 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ + +const char plugin_name[] = "AcctGatherInfiniband OFED plugin"; +const char plugin_type[] = "acct_gather_infiniband/ofed"; +const uint32_t plugin_version = 100; + +typedef struct { + uint32_t port; +} slurm_ofed_conf_t; + + +struct ibmad_port *srcport = NULL; +static ib_portid_t portid; +static int ibd_timeout = 0; +static int port = 0; + +typedef struct { + time_t last_update_time; + time_t update_time; + uint64_t xmtdata; + uint64_t rcvdata; + uint64_t xmtpkts; + uint64_t rcvpkts; + uint64_t total_xmtdata; + uint64_t total_rcvdata; + uint64_t total_xmtpkts; + uint64_t total_rcvpkts; +} ofed_sens_t; + +static ofed_sens_t ofed_sens = {0,0,0,0,0,0,0,0}; + +static uint8_t pc[1024]; + +static slurm_ofed_conf_t ofed_conf; +static uint32_t debug_flags = 0; +static pthread_mutex_t ofed_lock = PTHREAD_MUTEX_INITIALIZER; + +static uint8_t *_slurm_pma_query_via(void *rcvbuf, ib_portid_t * dest, int port, + unsigned timeout, unsigned id, + const struct ibmad_port *srcport) +{ +#ifdef HAVE_OFED_PMA_QUERY_VIA + return pma_query_via(rcvbuf, dest, port, timeout, id, srcport); +#else + switch (id) { + case CLASS_PORT_INFO: + return perf_classportinfo_query_via( + pc, &portid, port, ibd_timeout, srcport); + break; + case IB_GSI_PORT_COUNTERS_EXT: + return port_performance_ext_query_via( + pc, &portid, port, ibd_timeout, srcport); + break; + default: + error("_slurm_pma_query_via: unhandled id"); + } + return NULL; +#endif +} + +/* + * _read_ofed_values read the IB sensor and update last_update values and times + */ +static int _read_ofed_values(void) +{ + static uint64_t last_update_xmtdata = 0; + static uint64_t last_update_rcvdata = 0; + static uint64_t last_update_xmtpkts = 0; + static uint64_t last_update_rcvpkts = 0; + static bool first = true; + + int rc = SLURM_SUCCESS; + + uint16_t cap_mask; + uint64_t send_val, recv_val, send_pkts, recv_pkts; + + ofed_sens.last_update_time = ofed_sens.update_time; + ofed_sens.update_time = time(NULL); + + if (first) { + char *ibd_ca = NULL; + int mgmt_classes[4] = {IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, + IB_SA_CLASS, IB_PERFORMANCE_CLASS}; + srcport = mad_rpc_open_port(ibd_ca, ofed_conf.port, + mgmt_classes, 4); + if (!srcport){ + error("Failed to open '%s' port '%d'", ibd_ca, + ofed_conf.port); + debug("INFINIBAND: failed"); + return SLURM_ERROR; + } + + if (ib_resolve_self_via(&portid, &port, 0, srcport) < 0) + error("can't resolve self port %d", port); + + memset(pc, 0, sizeof(pc)); + if (!_slurm_pma_query_via(pc, &portid, port, ibd_timeout, + CLASS_PORT_INFO, srcport)) + error("classportinfo query: %m"); + + memcpy(&cap_mask, pc + 2, sizeof(cap_mask)); + if (!_slurm_pma_query_via(pc, &portid, port, ibd_timeout, + IB_GSI_PORT_COUNTERS_EXT, srcport)) { + error("ofed: %m"); + return SLURM_ERROR; + } + + mad_decode_field(pc, IB_PC_EXT_XMT_BYTES_F, + &last_update_xmtdata); + mad_decode_field(pc, IB_PC_EXT_RCV_BYTES_F, + &last_update_rcvdata); + mad_decode_field(pc, IB_PC_EXT_XMT_PKTS_F, + &last_update_xmtpkts); + mad_decode_field(pc, IB_PC_EXT_RCV_PKTS_F, + &last_update_rcvpkts); + + if (debug_flags & DEBUG_FLAG_INFINIBAND) + info("%s ofed init", plugin_name); + + first = 0; + return SLURM_SUCCESS; + } + + memset(pc, 0, sizeof(pc)); + memcpy(&cap_mask, pc + 2, sizeof(cap_mask)); + if (!_slurm_pma_query_via(pc, &portid, port, ibd_timeout, + IB_GSI_PORT_COUNTERS_EXT, srcport)) { + error("ofed: %m"); + return SLURM_ERROR; + } + + mad_decode_field(pc, IB_PC_EXT_XMT_BYTES_F, &send_val); + mad_decode_field(pc, IB_PC_EXT_RCV_BYTES_F, &recv_val); + mad_decode_field(pc, IB_PC_EXT_XMT_PKTS_F, &send_pkts); + mad_decode_field(pc, IB_PC_EXT_RCV_PKTS_F, &recv_pkts); + + ofed_sens.xmtdata = (send_val - last_update_xmtdata) * 4; + ofed_sens.total_xmtdata += ofed_sens.xmtdata; + ofed_sens.rcvdata = (recv_val - last_update_rcvdata) * 4; + ofed_sens.total_rcvdata += ofed_sens.rcvdata; + ofed_sens.xmtpkts = send_pkts - last_update_xmtpkts; + ofed_sens.total_xmtpkts += ofed_sens.xmtpkts; + ofed_sens.rcvpkts = recv_pkts - last_update_rcvpkts; + ofed_sens.total_rcvpkts += ofed_sens.rcvpkts; + + last_update_xmtdata = send_val; + last_update_rcvdata = recv_val; + last_update_xmtpkts = send_pkts; + last_update_rcvpkts = recv_pkts; + + return rc; +} + + +/* + * _thread_update_node_energy calls _read_ipmi_values and updates all values + * for node consumption + */ +static int _update_node_infiniband(void) +{ + acct_network_data_t net; + int rc = SLURM_SUCCESS; + + slurm_mutex_lock(&ofed_lock); + rc = _read_ofed_values(); + + memset(&net, 0, sizeof(acct_network_data_t)); + + net.packets_in = ofed_sens.rcvpkts; + net.packets_out = ofed_sens.xmtpkts; + net.size_in = (double) ofed_sens.rcvdata / 1048576; + net.size_out = (double) ofed_sens.xmtdata / 1048576; + acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_NETWORK, + &net); + + if (debug_flags & DEBUG_FLAG_INFINIBAND) { + info("ofed-thread = %d sec, transmitted %"PRIu64" bytes, " + "received %"PRIu64" bytes", + (int) (ofed_sens.update_time - ofed_sens.last_update_time), + ofed_sens.xmtdata, ofed_sens.rcvdata); + } + slurm_mutex_unlock(&ofed_lock); + + return rc; +} + +static bool _run_in_daemon(void) +{ + static bool set = false; + static bool run = false; + + if (!set) { + set = 1; + run = run_in_daemon("slurmstepd"); + } + + return run; +} + + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + debug_flags = slurm_get_debug_flags(); + + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + if (!_run_in_daemon()) + return SLURM_SUCCESS; + + if (srcport) { + _update_node_infiniband(); + mad_rpc_close_port(srcport); + } + + if (debug_flags & DEBUG_FLAG_INFINIBAND) + info("ofed: ended"); + + return SLURM_SUCCESS; +} + +extern int acct_gather_infiniband_p_node_update(void) +{ + uint32_t profile; + int rc = SLURM_SUCCESS; + static bool set = false; + static bool run = true; + + if (!set) { + set = true; + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, + &profile); + + if (!(profile & ACCT_GATHER_PROFILE_NETWORK)) + run = false; + } + + if (run) + _update_node_infiniband(); + + return rc; +} + + +extern void acct_gather_infiniband_p_conf_set(s_p_hashtbl_t *tbl) +{ + if (tbl) { + if (!s_p_get_uint32(&ofed_conf.port, + "InfinibandOFEDPort", tbl)) + ofed_conf.port = INFINIBAND_DEFAULT_PORT; + } + + if (!_run_in_daemon()) + return; + + verbose("%s loaded", plugin_name); + ofed_sens.update_time = time(NULL); +} + +extern void acct_gather_infiniband_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + s_p_options_t options[] = { + {"InfinibandOFEDPort", S_P_UINT32}, + {NULL} }; + + transfer_s_p_options(full_options, options, full_options_cnt); + + return; +} diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.h b/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.h similarity index 68% rename from src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.h rename to src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.h index c269be519fbc8a96c7bb7f260251f8506dfdc0e8..ac9bc737ced5fea27dc5507e6913d08e82238eb2 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.h +++ b/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.h @@ -1,9 +1,8 @@ /*****************************************************************************\ - * acct_gather_energy_rapl.h - slurm energy accounting plugin for rapl. + * acct_gather_infiniband_ofed.h -slurm infiniband accounting plugin.for ofed ***************************************************************************** - * Copyright (C) 2012 + * Copyright (C) 2013 * Written by Bull- Yiannis Georgiou - * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. * For details, see <http://www.schedmd.com/slurmdocs/>. @@ -38,31 +37,18 @@ * Copyright (C) 2002 The Regents of the University of California. \*****************************************************************************/ -#ifndef _ENERGY_GATHER_RAPL_H_ -#define _ENERGY_GATHER_RAPL_H_ +#ifndef _GATHER_INFINIBAND_H_ +#define _GATHER_INFINIBAND_H_ -#define MAX_PKGS 256 +#define INFINIBAND_DEFAULT_PORT 1 -#define MSR_RAPL_POWER_UNIT 0x606 - -/* Package RAPL Domain */ -#define MSR_PKG_RAPL_POWER_LIMIT 0x610 -#define MSR_PKG_ENERGY_STATUS 0x611 -#define MSR_PKG_PERF_STATUS 0x613 -#define MSR_PKG_POWER_INFO 0x614 - -/* DRAM RAPL Domain */ -#define MSR_DRAM_POWER_LIMIT 0x618 -#define MSR_DRAM_ENERGY_STATUS 0x619 -#define MSR_DRAM_PERF_STATUS 0x61B -#define MSR_DRAM_POWER_INFO 0x61C - -extern int acct_gather_energy_p_update_node_energy(void); -extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy); -extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, - acct_gather_energy_t *energy); extern int init ( void ); extern int fini ( void ); +extern int acct_gather_infiniband_p_update_node(void); +extern void acct_gather_infiniband_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +extern void acct_gather_infiniband_p_conf_set(s_p_hashtbl_t *tbl); + #endif + diff --git a/src/plugins/acct_gather_profile/Makefile.am b/src/plugins/acct_gather_profile/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..5e67c2099cdd55d65cf61791495d33738db8ca42 --- /dev/null +++ b/src/plugins/acct_gather_profile/Makefile.am @@ -0,0 +1,6 @@ +# Makefile for accounting gather profile plugins + +SUBDIRS = none +if BUILD_HDF5 +SUBDIRS += hdf5 +endif diff --git a/src/plugins/acct_gather_profile/Makefile.in b/src/plugins/acct_gather_profile/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..a7e6311264cd8a1b50315ee9e3e29682641c70cd --- /dev/null +++ b/src/plugins/acct_gather_profile/Makefile.in @@ -0,0 +1,736 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for accounting gather profile plugins +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@BUILD_HDF5_TRUE@am__append_1 = hdf5 +subdir = src/plugins/acct_gather_profile +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = none hdf5 +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = none $(am__append_1) +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/plugins/acct_gather_profile/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/plugins/acct_gather_profile/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_profile/hdf5/Makefile.am b/src/plugins/acct_gather_profile/hdf5/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..e19b873b9bf9164e2686d685211140db30cced8e --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/Makefile.am @@ -0,0 +1,38 @@ +# Makefile for acct_gather_profile/hdf5 plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +# Do not put a link to common here. src/common contains an mpi.h which +# hdf5 could of been installed with a link to the generic mpi.h. +INCLUDES = -I$(top_srcdir) + +# cpu/core energy accounting plugin. +HDF5_SOURCES = acct_gather_profile_hdf5.c +HDF5_API_SOURCES = hdf5_api.c hdf5_api.h + +if BUILD_HDF5 + +SUBDIRS = sh5util + +pkglib_LTLIBRARIES = acct_gather_profile_hdf5.la +noinst_LTLIBRARIES = libhdf5_api.la + +libhdf5_api_la_SOURCES = $(HDF5_API_SOURCES) +libhdf5_api_la_LDFLAGS = $(HDF5_LDFLAGS) +libhdf5_api_la_CPPFLAGS = $(HDF5_CPPFLAGS) +libhdf5_api_la_LIBADD = $(HDF5_LIBS) + +acct_gather_profile_hdf5_la_SOURCES = $(HDF5_SOURCES) +acct_gather_profile_hdf5_la_LDFLAGS = \ + $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(HDF5_LDFLAGS) +acct_gather_profile_hdf5_la_CPPFLAGS = $(HDF5_CPPFLAGS) +acct_gather_profile_hdf5_la_LIBADD = $(HDF5_LIBS) libhdf5_api.la + +else + +EXTRA_acct_gather_profile_hdf5_la_SOURCES = $(HDF5_SOURCES) +EXTRA_libhdf5_api_la_SOURCES = $(HDF5_API_SOURCES) + +endif diff --git a/src/plugins/acct_gather_profile/hdf5/Makefile.in b/src/plugins/acct_gather_profile/hdf5/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..56c85861899c46980547a5576e2923a7376b0c96 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/Makefile.in @@ -0,0 +1,936 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for acct_gather_profile/hdf5 plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_profile/hdf5 +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(noinst_LTLIBRARIES) $(pkglib_LTLIBRARIES) +am__DEPENDENCIES_1 = +@BUILD_HDF5_TRUE@acct_gather_profile_hdf5_la_DEPENDENCIES = \ +@BUILD_HDF5_TRUE@ $(am__DEPENDENCIES_1) libhdf5_api.la +am__acct_gather_profile_hdf5_la_SOURCES_DIST = \ + acct_gather_profile_hdf5.c +am__objects_1 = \ + acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.lo +@BUILD_HDF5_TRUE@am_acct_gather_profile_hdf5_la_OBJECTS = \ +@BUILD_HDF5_TRUE@ $(am__objects_1) +am__EXTRA_acct_gather_profile_hdf5_la_SOURCES_DIST = \ + acct_gather_profile_hdf5.c +acct_gather_profile_hdf5_la_OBJECTS = \ + $(am_acct_gather_profile_hdf5_la_OBJECTS) +acct_gather_profile_hdf5_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(acct_gather_profile_hdf5_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@BUILD_HDF5_TRUE@am_acct_gather_profile_hdf5_la_rpath = -rpath \ +@BUILD_HDF5_TRUE@ $(pkglibdir) +@BUILD_HDF5_TRUE@libhdf5_api_la_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__libhdf5_api_la_SOURCES_DIST = hdf5_api.c hdf5_api.h +am__objects_2 = libhdf5_api_la-hdf5_api.lo +@BUILD_HDF5_TRUE@am_libhdf5_api_la_OBJECTS = $(am__objects_2) +am__EXTRA_libhdf5_api_la_SOURCES_DIST = hdf5_api.c hdf5_api.h +libhdf5_api_la_OBJECTS = $(am_libhdf5_api_la_OBJECTS) +libhdf5_api_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libhdf5_api_la_LDFLAGS) $(LDFLAGS) -o $@ +@BUILD_HDF5_TRUE@am_libhdf5_api_la_rpath = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(acct_gather_profile_hdf5_la_SOURCES) \ + $(EXTRA_acct_gather_profile_hdf5_la_SOURCES) \ + $(libhdf5_api_la_SOURCES) $(EXTRA_libhdf5_api_la_SOURCES) +DIST_SOURCES = $(am__acct_gather_profile_hdf5_la_SOURCES_DIST) \ + $(am__EXTRA_acct_gather_profile_hdf5_la_SOURCES_DIST) \ + $(am__libhdf5_api_la_SOURCES_DIST) \ + $(am__EXTRA_libhdf5_api_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = sh5util +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +# Do not put a link to common here. src/common contains an mpi.h which +# hdf5 could of been installed with a link to the generic mpi.h. +INCLUDES = -I$(top_srcdir) + +# cpu/core energy accounting plugin. +HDF5_SOURCES = acct_gather_profile_hdf5.c +HDF5_API_SOURCES = hdf5_api.c hdf5_api.h +@BUILD_HDF5_TRUE@SUBDIRS = sh5util +@BUILD_HDF5_TRUE@pkglib_LTLIBRARIES = acct_gather_profile_hdf5.la +@BUILD_HDF5_TRUE@noinst_LTLIBRARIES = libhdf5_api.la +@BUILD_HDF5_TRUE@libhdf5_api_la_SOURCES = $(HDF5_API_SOURCES) +@BUILD_HDF5_TRUE@libhdf5_api_la_LDFLAGS = $(HDF5_LDFLAGS) +@BUILD_HDF5_TRUE@libhdf5_api_la_CPPFLAGS = $(HDF5_CPPFLAGS) +@BUILD_HDF5_TRUE@libhdf5_api_la_LIBADD = $(HDF5_LIBS) +@BUILD_HDF5_TRUE@acct_gather_profile_hdf5_la_SOURCES = $(HDF5_SOURCES) +@BUILD_HDF5_TRUE@acct_gather_profile_hdf5_la_LDFLAGS = \ +@BUILD_HDF5_TRUE@ $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(HDF5_LDFLAGS) + +@BUILD_HDF5_TRUE@acct_gather_profile_hdf5_la_CPPFLAGS = $(HDF5_CPPFLAGS) +@BUILD_HDF5_TRUE@acct_gather_profile_hdf5_la_LIBADD = $(HDF5_LIBS) libhdf5_api.la +@BUILD_HDF5_FALSE@EXTRA_acct_gather_profile_hdf5_la_SOURCES = $(HDF5_SOURCES) +@BUILD_HDF5_FALSE@EXTRA_libhdf5_api_la_SOURCES = $(HDF5_API_SOURCES) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/hdf5/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/hdf5/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +acct_gather_profile_hdf5.la: $(acct_gather_profile_hdf5_la_OBJECTS) $(acct_gather_profile_hdf5_la_DEPENDENCIES) $(EXTRA_acct_gather_profile_hdf5_la_DEPENDENCIES) + $(acct_gather_profile_hdf5_la_LINK) $(am_acct_gather_profile_hdf5_la_rpath) $(acct_gather_profile_hdf5_la_OBJECTS) $(acct_gather_profile_hdf5_la_LIBADD) $(LIBS) +libhdf5_api.la: $(libhdf5_api_la_OBJECTS) $(libhdf5_api_la_DEPENDENCIES) $(EXTRA_libhdf5_api_la_DEPENDENCIES) + $(libhdf5_api_la_LINK) $(am_libhdf5_api_la_rpath) $(libhdf5_api_la_OBJECTS) $(libhdf5_api_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhdf5_api_la-hdf5_api.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.lo: acct_gather_profile_hdf5.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(acct_gather_profile_hdf5_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.lo -MD -MP -MF $(DEPDIR)/acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.Tpo -c -o acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.lo `test -f 'acct_gather_profile_hdf5.c' || echo '$(srcdir)/'`acct_gather_profile_hdf5.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.Tpo $(DEPDIR)/acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='acct_gather_profile_hdf5.c' object='acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(acct_gather_profile_hdf5_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o acct_gather_profile_hdf5_la-acct_gather_profile_hdf5.lo `test -f 'acct_gather_profile_hdf5.c' || echo '$(srcdir)/'`acct_gather_profile_hdf5.c + +libhdf5_api_la-hdf5_api.lo: hdf5_api.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libhdf5_api_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libhdf5_api_la-hdf5_api.lo -MD -MP -MF $(DEPDIR)/libhdf5_api_la-hdf5_api.Tpo -c -o libhdf5_api_la-hdf5_api.lo `test -f 'hdf5_api.c' || echo '$(srcdir)/'`hdf5_api.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libhdf5_api_la-hdf5_api.Tpo $(DEPDIR)/libhdf5_api_la-hdf5_api.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='hdf5_api.c' object='libhdf5_api_la-hdf5_api.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libhdf5_api_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libhdf5_api_la-hdf5_api.lo `test -f 'hdf5_api.c' || echo '$(srcdir)/'`hdf5_api.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + clean-pkglibLTLIBRARIES mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES clean-pkglibLTLIBRARIES ctags \ + ctags-recursive distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-pkglibLTLIBRARIES install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c new file mode 100644 index 0000000000000000000000000000000000000000..8a36ceacae08bfa4c85adce52c11a86f74551038 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c @@ -0,0 +1,616 @@ +/*****************************************************************************\ + * acct_gather_profile_hdf5.c - slurm energy accounting plugin for + * hdf5 profiling. + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Portions Copyright (C) 2013 SchedMD LLC. + * Written by Danny Auble <da@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * This file is patterned after jobcomp_linux.c, written by Morris Jette and + * Copyright (C) 2002 The Regents of the University of California. +\*****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <inttypes.h> +#include <unistd.h> +#include <math.h> + +#include "src/common/slurm_xlator.h" +#include "src/common/fd.h" +#include "src/common/slurm_acct_gather_profile.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" +#include "hdf5_api.h" + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "AcctGatherProfile hdf5 plugin"; +const char plugin_type[] = "acct_gather_profile/hdf5"; +const uint32_t plugin_version = 100; + +hid_t typTOD; + +typedef struct { + char *dir; + uint32_t def; +} slurm_hdf5_conf_t; + +// Global HDF5 Variables +// The HDF5 file and base objects will remain open for the duration of the +// step. This avoids reconstruction on every acct_gather_sample and +// flushing the buffers on every put. +// Static variables ok as add function are inside a lock. +static hid_t file_id = -1; // File +static hid_t gid_node = -1; +static hid_t gid_tasks = -1; +static hid_t gid_samples = -1; +static hid_t gid_totals = -1; +static char group_node[MAX_GROUP_NAME+1]; +static slurm_hdf5_conf_t hdf5_conf; +static uint32_t debug_flags = 0; +static uint32_t g_profile_running = ACCT_GATHER_PROFILE_NOT_SET; +static slurmd_job_t *g_job = NULL; + +static void _reset_slurm_profile_conf() +{ + xfree(hdf5_conf.dir); + hdf5_conf.def = ACCT_GATHER_PROFILE_NONE; +} + +static uint32_t _determine_profile() +{ + uint32_t profile; + + xassert(g_job); + + if (g_profile_running != ACCT_GATHER_PROFILE_NOT_SET) + profile = g_profile_running; + else if (g_job->profile > ACCT_GATHER_PROFILE_NONE) + profile = g_job->profile; + else + profile = hdf5_conf.def; + + return profile; +} + +static int _get_taskid_from_pid(pid_t pid, uint32_t *gtid) +{ + int tx; + + xassert(g_job); + + for (tx=0; tx<g_job->node_tasks; tx++) { + if (g_job->task[tx]->pid == pid) { + *gtid = g_job->task[tx]->gtid; + return SLURM_SUCCESS; + } + } + + return SLURM_ERROR; +} + +static int _create_directories() +{ + int rc; + struct stat st; + char *user_dir = NULL; + + xassert(g_job); + xassert(hdf5_conf.dir); + /* + * If profile director does not exist, try to create it. + * Otherwise, ensure path is a directory as expected, and that + * we have permission to write to it. + * also make sure the subdirectory tmp exists. + */ + + if (((rc = stat(hdf5_conf.dir, &st)) < 0) && (errno == ENOENT)) { + if (mkdir(hdf5_conf.dir, 0755) < 0) + fatal("mkdir(%s): %m", hdf5_conf.dir); + } else if (rc < 0) + fatal("Unable to stat acct_gather_profile_dir: %s: %m", + hdf5_conf.dir); + else if (!S_ISDIR(st.st_mode)) + fatal("acct_gather_profile_dir: %s: Not a directory!", + hdf5_conf.dir); + else if (access(hdf5_conf.dir, R_OK|W_OK|X_OK) < 0) + fatal("Incorrect permissions on acct_gather_profile_dir: %s", + hdf5_conf.dir); + chmod(hdf5_conf.dir, 0755); + + user_dir = xstrdup_printf("%s/%s", hdf5_conf.dir, g_job->pwd->pw_name); + if (((rc = stat(user_dir, &st)) < 0) && (errno == ENOENT)) { + if (mkdir(user_dir, 0700) < 0) + fatal("mkdir(%s): %m", user_dir); + } + chmod(user_dir, 0700); + if (chown(user_dir, (uid_t)g_job->pwd->pw_uid, + (gid_t)g_job->pwd->pw_gid) < 0) + error("chown(%s): %m", user_dir); + + xfree(user_dir); + + return SLURM_SUCCESS; +} + +static bool _do_profile(uint32_t profile, uint32_t req_profiles) +{ + if (req_profiles <= ACCT_GATHER_PROFILE_NONE) + return false; + if ((profile == ACCT_GATHER_PROFILE_NOT_SET) + || (req_profiles & profile)) + return true; + + return false; +} + +static bool _run_in_daemon(void) +{ + static bool set = false; + static bool run = false; + + if (!set) { + set = 1; + run = run_in_daemon("slurmstepd"); + } + + return run; +} + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + if (!_run_in_daemon()) + return SLURM_SUCCESS; + + debug_flags = slurm_get_debug_flags(); + + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + return SLURM_SUCCESS; +} + +extern void acct_gather_profile_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + s_p_options_t options[] = { + {"ProfileHDF5Dir", S_P_STRING}, + {"ProfileHDF5Default", S_P_STRING}, + {NULL} }; + + transfer_s_p_options(full_options, options, full_options_cnt); + return; +} + +extern void acct_gather_profile_p_conf_set(s_p_hashtbl_t *tbl) +{ + char *tmp = NULL; + _reset_slurm_profile_conf(); + if (tbl) { + s_p_get_string(&hdf5_conf.dir, "ProfileHDF5Dir", tbl); + + if (s_p_get_string(&tmp, "ProfileHDF5Default", tbl)) { + hdf5_conf.def = acct_gather_profile_from_string(tmp); + xfree(tmp); + if (hdf5_conf.def == ACCT_GATHER_PROFILE_NOT_SET) + fatal("ProfileHDF5Default can not be " + "set to NotSet, please specify a valid " + "option"); + } + } + + if (!hdf5_conf.dir) + fatal("No ProfileHDF5Dir in your acct_gather.conf file. " + "This is required to use the %s plugin", plugin_type); + + verbose("%s loaded", plugin_name); +} + +extern void acct_gather_profile_p_get(enum acct_gather_profile_info info_type, + void *data) +{ + uint32_t *uint32 = (uint32_t *) data; + char **tmp_char = (char **) data; + + switch (info_type) { + case ACCT_GATHER_PROFILE_DIR: + *tmp_char = xstrdup(hdf5_conf.dir); + break; + case ACCT_GATHER_PROFILE_DEFAULT: + *uint32 = hdf5_conf.def; + break; + case ACCT_GATHER_PROFILE_RUNNING: + *uint32 = g_profile_running; + break; + default: + debug2("acct_gather_profile_p_get info_type %d invalid", + info_type); + } +} + +extern int acct_gather_profile_p_node_step_start(slurmd_job_t* job) +{ + int rc = SLURM_SUCCESS; + + time_t start_time; + char *profile_file_name; + char *profile_str; + + xassert(_run_in_daemon()); + + g_job = job; + + if (g_job->stepid == NO_VAL) { + g_profile_running = ACCT_GATHER_PROFILE_NONE; + return rc; + } + + xassert(hdf5_conf.dir); + + if (debug_flags & DEBUG_FLAG_PROFILE) { + profile_str = acct_gather_profile_to_string(g_job->profile); + info("PROFILE: option --profile=%s", profile_str); + } + + if (g_profile_running == ACCT_GATHER_PROFILE_NOT_SET) + g_profile_running = _determine_profile(); + + if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) + return rc; + + _create_directories(); + + profile_file_name = xstrdup_printf( + "%s/%s/%u_%u_%s.h5", + hdf5_conf.dir, g_job->pwd->pw_name, + g_job->jobid, g_job->stepid, g_job->node_name); + + if (debug_flags & DEBUG_FLAG_PROFILE) { + profile_str = acct_gather_profile_to_string(g_profile_running); + info("PROFILE: node_step_start, opt=%s file=%s", + profile_str, profile_file_name); + } + + // Create a new file using the default properties. + profile_init(); + file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT, + H5P_DEFAULT); + + if (chown(profile_file_name, (uid_t)g_job->pwd->pw_uid, + (gid_t)g_job->pwd->pw_gid) < 0) + error("chown(%s): %m", profile_file_name); + chmod(profile_file_name, 0600); + xfree(profile_file_name); + + if (file_id < 1) { + info("PROFILE: Failed to create Node group"); + return SLURM_FAILURE; + } + + sprintf(group_node, "/%s_%s", GRP_NODE, g_job->node_name); + gid_node = H5Gcreate(file_id, group_node, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + if (gid_node < 1) { + H5Fclose(file_id); + file_id = -1; + info("PROFILE: Failed to create Node group"); + return SLURM_FAILURE; + } + put_string_attribute(gid_node, ATTR_NODENAME, g_job->node_name); + put_int_attribute(gid_node, ATTR_NTASKS, g_job->node_tasks); + start_time = time(NULL); + put_string_attribute(gid_node, ATTR_STARTTIME, ctime(&start_time)); + + return rc; +} + +extern int acct_gather_profile_p_node_step_end() +{ + int rc = SLURM_SUCCESS; + + xassert(_run_in_daemon()); + + if (g_job->stepid == NO_VAL) + return rc; + + xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); + + // No check for --profile as we always want to close the HDF5 file + // if it has been opened. + + + if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) + return rc; + + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: node_step_end (shutdown)"); + + if (gid_totals > 0) + H5Gclose(gid_totals); + if (gid_samples > 0) + H5Gclose(gid_samples); + if (gid_tasks > 0) + H5Gclose(gid_tasks); + if (gid_node > 0) + H5Gclose(gid_node); + if (file_id > 0) + H5Fclose(file_id); + profile_fini(); + file_id = -1; + + return rc; +} + +extern int acct_gather_profile_p_task_start(uint32_t taskid) +{ + int rc = SLURM_SUCCESS; + + xassert(_run_in_daemon()); + xassert(g_job); + + if (g_job->stepid == NO_VAL) + return rc; + + xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); + + if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) + return rc; + + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: task_start"); + + return rc; +} + +extern int acct_gather_profile_p_task_end(pid_t taskpid) +{ + hid_t gid_task; + char group_task[MAX_GROUP_NAME+1]; + uint32_t task_id; + int rc = SLURM_SUCCESS; + + xassert(_run_in_daemon()); + xassert(g_job); + + if (g_job->stepid == NO_VAL) + return rc; + + xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); + + if (!_do_profile(ACCT_GATHER_PROFILE_NOT_SET, g_profile_running)) + return rc; + + if (_get_taskid_from_pid(taskpid, &task_id) != SLURM_SUCCESS) + return SLURM_FAILURE; + if (file_id == -1) { + info("PROFILE: add_task_data, HDF5 file is not open"); + return SLURM_FAILURE; + } + if (gid_tasks < 0) { + gid_tasks = make_group(gid_node, GRP_TASKS); + if (gid_tasks < 1) { + info("PROFILE: Failed to create Tasks group"); + return SLURM_FAILURE; + } + } + sprintf(group_task, "%s_%d", GRP_TASK, task_id); + gid_task = get_group(gid_tasks, group_task); + if (gid_task == -1) { + gid_task = make_group(gid_tasks, group_task); + if (gid_task < 0) { + info("Failed to open tasks %s", group_task); + return SLURM_FAILURE; + } + put_int_attribute(gid_task, ATTR_TASKID, task_id); + } + put_int_attribute(gid_task, ATTR_CPUPERTASK, g_job->cpus_per_task); + + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: task_end"); + return rc; +} + +extern int acct_gather_profile_p_add_sample_data(uint32_t type, void *data) +{ + hid_t g_sample_grp; + char group[MAX_GROUP_NAME+1]; + char group_sample[MAX_GROUP_NAME+1]; + static uint32_t sample_no = 0; + uint32_t task_id = 0; + void *send_profile = NULL; + char *type_name = NULL; + + profile_task_t profile_task; + profile_network_t profile_network; + profile_energy_t profile_energy; + profile_io_t profile_io; + + struct jobacctinfo *jobacct = (struct jobacctinfo *)data; + acct_network_data_t *net = (acct_network_data_t *)data; + acct_energy_data_t *ener = (acct_energy_data_t *)data; + struct lustre_data *lus = (struct lustre_data *)data; + + xassert(_run_in_daemon()); + xassert(g_job); + + if (g_job->stepid == NO_VAL) + return SLURM_SUCCESS; + + xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); + + if (!_do_profile(type, g_profile_running)) + return SLURM_SUCCESS; + + switch (type) { + case ACCT_GATHER_PROFILE_ENERGY: + snprintf(group, sizeof(group), "%s", GRP_ENERGY); + + memset(&profile_energy, 0, sizeof(profile_energy_t)); + profile_energy.time = ener->time; + profile_energy.cpu_freq = ener->cpu_freq; + profile_energy.power = ener->power; + + send_profile = &profile_energy; + break; + case ACCT_GATHER_PROFILE_TASK: + if (_get_taskid_from_pid(jobacct->pid, &task_id) + != SLURM_SUCCESS) + return SLURM_ERROR; + + snprintf(group, sizeof(group), "%s_%u", GRP_TASK, task_id); + + memset(&profile_task, 0, sizeof(profile_task_t)); + profile_task.time = time(NULL); + profile_task.cpu_freq = jobacct->act_cpufreq; + profile_task.cpu_time = jobacct->tot_cpu; + profile_task.cpu_utilization = jobacct->tot_cpu; + profile_task.pages = jobacct->tot_pages; + profile_task.read_size = jobacct->tot_disk_read; + profile_task.rss = jobacct->tot_rss; + profile_task.vm_size = jobacct->tot_vsize; + profile_task.write_size = jobacct->tot_disk_write; + + send_profile = &profile_task; + break; + case ACCT_GATHER_PROFILE_LUSTRE: + snprintf(group, sizeof(group), "%s", GRP_LUSTRE); + + memset(&profile_io, 0, sizeof(profile_io_t)); + profile_io.time = time(NULL); + profile_io.reads = lus->reads; + profile_io.read_size = lus->read_size; + profile_io.writes = lus->writes; + profile_io.write_size = lus->write_size; + + send_profile = &profile_io; + + break; + case ACCT_GATHER_PROFILE_NETWORK: + + snprintf(group, sizeof(group), "%s", GRP_NETWORK); + + memset(&profile_network, 0, sizeof(profile_network_t)); + profile_network.time = time(NULL); + profile_network.packets_in = net->packets_in; + profile_network.size_in = net->size_in; + profile_network.packets_out = net->packets_out; + profile_network.size_out = net->size_out; + + send_profile = &profile_network; + + break; + default: + error("acct_gather_profile_p_add_sample_data: " + "Unknown type %d sent", type); + return SLURM_ERROR; + } + + type_name = acct_gather_profile_type_to_string(type); + + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: add_sample_data Group-%s Type=%s", + group, type_name); + + if (file_id == -1) { + if (debug_flags & DEBUG_FLAG_PROFILE) { + // This can happen from samples from the gather threads + // before the step actually starts. + info("PROFILE: add_sample_data, HDF5 file not open"); + } + return SLURM_FAILURE; + } + if (gid_samples < 0) { + gid_samples = make_group(gid_node, GRP_SAMPLES); + if (gid_samples < 1) { + info("PROFILE: failed to create TimeSeries group"); + return SLURM_FAILURE; + } + } + g_sample_grp = get_group(gid_samples, group); + if (g_sample_grp < 0) { + g_sample_grp = make_group(gid_samples, group); + if (g_sample_grp < 0) { + info("PROFILE: failed to open TimeSeries %s", group); + return SLURM_FAILURE; + } + put_string_attribute(g_sample_grp, ATTR_DATATYPE, type_name); + } + sprintf(group_sample, "%s_%10.10d", group, ++sample_no); + put_hdf5_data(g_sample_grp, type, SUBDATA_SAMPLE, + group_sample, send_profile, 1); + H5Gclose(g_sample_grp); + + return SLURM_SUCCESS; +} diff --git a/src/plugins/acct_gather_profile/hdf5/hdf5_api.c b/src/plugins/acct_gather_profile/hdf5/hdf5_api.c new file mode 100644 index 0000000000000000000000000000000000000000..25f0580fa590bffc22ac962ea149920d5fac6d78 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/hdf5_api.c @@ -0,0 +1,2046 @@ +/****************************************************************************\ + * hdf5_api.c + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Provide support for acct_gather_profile plugins based on HDF5 files. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ + +#include "src/common/macros.h" +#include "src/common/xassert.h" +#include "src/common/xstring.h" + +#include "hdf5_api.h" + + +// Static variables ok as add function are inside a lock. +static time_t seriesStart; +static hid_t typTOD; +static int i; // General index used in some macros. +static int moffset; // General variable used by insert macros + +/* + * Macro to insert a date string type into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MEM_ADD_DATE_TIME(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), typTOD) < 0) { \ + debug3("PROFILE: failed insert into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a date string type into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + * offset offset into record + */ +#define FILE_ADD_DATE_TIME(p, label, offset) \ + if(H5Tinsert(p, label, offset, typTOD) < 0) { \ + debug3("PROFILE: failed insert into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } + +/* + * Macro to insert an uint64 into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MEM_ADD_UINT64(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_UINT64) < 0) { \ + debug3("PROFILE: failed insert64 into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a uint64 into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + */ +#define FILE_ADD_UINT64(p, label) \ + if(H5Tinsert(p, label, moffset, H5T_NATIVE_UINT64) < 0) { \ + debug3("PROFILE: failed insert64 into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } \ + moffset += 8; + +/* + * Macro to insert a double into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MEM_ADD_DBL(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_DOUBLE) < 0) { \ + debug3("PROFILE: failed insertdbl into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a double into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + */ +#define FILE_ADD_DBL(p, label) \ + if(H5Tinsert(p, label, moffset, H5T_NATIVE_DOUBLE) < 0) { \ + debug3("PROFILE: failed insertdbl into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } \ + moffset += 8; + +/* + * Macro to increment a sample in a difference series + * -- Difference means each sample represents counts for only that interval + * (assumes consistent naming convention) + * + * + * Parameters + * tot total pointer + * smp sample pointer + * var variable name in sample + * count number of items in series + */ +#define INCR_DIF_SAMPLE(tot, smp, var, count) \ + for (i=0; i<count; i++) { \ + if (i == 0) { \ + total->var.min = smp[i].var; \ + } \ + tot->var.total += smp[i].var; \ + tot->var.min = MIN(smp[i].var, tot->var.min); \ + tot->var.max = MAX(smp[i].var, tot->var.max); \ + } \ + tot->var.ave = tot->var.total / count; + +/* + * Macro to increment a sample in a running total + * -- Running total means first sample is initial conditions + * (assumes consistent naming convention) + * + * + * Parameters + * tot total pointer + * smp sample pointer + * var variable name in sample + * count number of items in series + */ +#define INCR_RT_SAMPLE(tot, smp, var, count) \ + for (i=1; i<count; i++) { \ + if (i == 1) { \ + total->var.min = smp[i].var; \ + } \ + tot->var.total += smp[i].var; \ + tot->var.min = MIN(smp[i].var, tot->var.min); \ + tot->var.max = MAX(smp[i].var, tot->var.max); \ + } \ + tot->var.ave = tot->var.total / count; + +/* Macro to put an int min,ave,max,total for a variable to extract file + * + * Parameters + * fp file descriptor + * var variable name + * prf prefix for series (usually ',' + */ +#define PUT_UINT_SUM(fp, var, prfx) \ + fprintf(fp, "%s%ld,%ld,%ld,%ld", prfx, \ + var.min, var.ave, var.max, var.total); +/* Macro to put an int min,ave,max,total for a variable to extract file + * + * Parameters + * fp file descriptor + * var variable name + * prf prefix for series (usually ',' + */ +#define PUT_DBL_SUM(fp, var, prfx) \ + fprintf(fp, "%s%.3f,%.3f,%.3f,%.3f", prfx, \ + var.min, var.ave, var.max, var.total); + + +// ============================================================================ +// Routines supporting Energy Data type +// ============================================================================ + +static int _energy_dataset_size(void) +{ + return sizeof(profile_energy_t); +} + +static hid_t _energy_create_memory_datatype(void) +{ + hid_t mtyp_energy = H5Tcreate(H5T_COMPOUND, sizeof(profile_energy_t)); + if (mtyp_energy < 0) { + debug3("PROFILE: failed to create Energy memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_energy, "Date_Time", profile_energy_t, tod); + MEM_ADD_UINT64(mtyp_energy, "Time", profile_energy_t, time); + MEM_ADD_UINT64(mtyp_energy, "Power", profile_energy_t, power); + MEM_ADD_UINT64(mtyp_energy, "CPU_Frequency", + profile_energy_t, cpu_freq); + + return mtyp_energy; +} + +static hid_t _energy_create_file_datatype(void) +{ + hid_t ftyp_energy = H5Tcreate(H5T_COMPOUND, (TOD_LEN+3*8)); + if (ftyp_energy < 0) { + debug3("PROFILE: failed to create Energy file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_energy, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_energy, "Time"); + FILE_ADD_UINT64(ftyp_energy, "Power"); + FILE_ADD_UINT64(ftyp_energy, "CPU_Frequency"); + + return ftyp_energy; +} + +static hid_t _energy_s_create_memory_datatype(void) +{ + hid_t mtyp_energy = H5Tcreate(H5T_COMPOUND, + sizeof(profile_energy_s_t)); + if (mtyp_energy < 0) { + debug3("PROFILE: failed to create Energy_s memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_energy, "Start Time", + profile_energy_s_t, start_time); + MEM_ADD_UINT64(mtyp_energy, "Elapsed Time", + profile_energy_s_t, elapsed_time); + MEM_ADD_UINT64(mtyp_energy, "Min Power", profile_energy_s_t, power.min); + MEM_ADD_UINT64(mtyp_energy, "Ave Power", profile_energy_s_t, power.ave); + MEM_ADD_UINT64(mtyp_energy, "Max Power", profile_energy_s_t, power.max); + MEM_ADD_UINT64(mtyp_energy, "Total Power", + profile_energy_s_t, power.total); + MEM_ADD_UINT64(mtyp_energy, "Min CPU Frequency", profile_energy_s_t, + cpu_freq.min); + MEM_ADD_UINT64(mtyp_energy, "Ave CPU Frequency", profile_energy_s_t, + cpu_freq.ave); + MEM_ADD_UINT64(mtyp_energy, "Max CPU Frequency", profile_energy_s_t, + cpu_freq.max); + MEM_ADD_UINT64(mtyp_energy, "Total CPU Frequency", profile_energy_s_t, + cpu_freq.total); + + return mtyp_energy; +} + +static hid_t _energy_s_create_file_datatype(void) +{ + hid_t ftyp_energy = H5Tcreate(H5T_COMPOUND, (TOD_LEN+9*8)); + if (ftyp_energy < 0) { + debug3("PROFILE: failed to create Energy_s file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_energy, "Start Time", 0); + FILE_ADD_UINT64(ftyp_energy, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_energy, "Min Power"); + FILE_ADD_UINT64(ftyp_energy, "Ave Power"); + FILE_ADD_UINT64(ftyp_energy, "Max Power"); + FILE_ADD_UINT64(ftyp_energy, "Total Power"); + FILE_ADD_UINT64(ftyp_energy, "Min CPU Frequency"); + FILE_ADD_UINT64(ftyp_energy, "Ave CPU Frequency"); + FILE_ADD_UINT64(ftyp_energy, "Max CPU Frequency"); + FILE_ADD_UINT64(ftyp_energy, "Total CPU Frequency"); + + return ftyp_energy; +} + +static void *_energy_init_job_series(int n_samples) +{ + profile_energy_t* energy_data; + + energy_data = xmalloc(n_samples * sizeof(profile_energy_t)); + if (energy_data == NULL) { + debug3("PROFILE: failed to get memory for energy data"); + return NULL; + } + return (void*) energy_data; +} + +static char** _energy_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_energy_t* energy_series = (profile_energy_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for energy tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(energy_series[ix].tod); + } + return tod_values; +} + +static double* _energy_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_energy_t* energy_series = (profile_energy_t*) data; + double *energy_values = NULL; + energy_values = xmalloc(nsmp*sizeof(double)); + if (energy_values == NULL) { + info("PROFILE: Failed to get memory for energy data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + energy_values[ix] = (double) energy_series[ix].time; + + } + return energy_values; + } else if (strcasecmp(data_name,"Power") == 0) { + for (ix=0; ix < nsmp; ix++) { + energy_values[ix] = (double) energy_series[ix].power; + + } + return energy_values; + } else if (strcasecmp(data_name,"CPU_Frequency") == 0) { + for (ix=0; ix < nsmp; ix++) { + energy_values[ix] = (double) energy_series[ix].cpu_freq; + + } + return energy_values; + } + xfree(energy_values); + info("PROFILE: %s is invalid data item for energy data", data_name); + return NULL; +} + +static void _energy_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ +// This is a difference series + profile_energy_t* prf_cur = (profile_energy_t*) cur; + profile_energy_t* prf_buf = (profile_energy_t*) buf; + struct tm *ts = localtime(&prf_cur->time); + strftime(prf_buf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prf_cur->time; + prf_buf->time = 0; + + } else { + prf_buf->time = prf_cur->time - seriesStart; + } + prf_buf->power = prf_cur->power; + prf_buf->cpu_freq = prf_cur->cpu_freq; + return; +} + +static void *_energy_series_total(int n_samples, void *data) +{ + profile_energy_t* energy_data; + profile_energy_s_t* total; + if (n_samples < 1) + return NULL; + energy_data = (profile_energy_t*) data; + total = xmalloc(sizeof(profile_energy_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting energy total"); + return NULL; + } + // Assuming energy series are a difference series + strcpy(total->start_time, energy_data[0].tod); + total->elapsed_time = energy_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, energy_data, power, n_samples); + INCR_DIF_SAMPLE(total, energy_data, cpu_freq, n_samples); + return total; +} + +static void _energy_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + + int n_items, ix; + profile_energy_t* energy_data = (profile_energy_t*) data; + if (put_header) { + fprintf(fp, "Job,Step,Node,Series,Date_Time,Elapsed_Time," + "Power, CPU_Frequency\n"); + } + n_items = size_data / sizeof(profile_energy_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp, "%d,%d,%s,%s,%s,%ld,%ld,%ld\n", job, step, node, + series, energy_data[ix].tod, energy_data[ix].time, + energy_data[ix].power, energy_data[ix].cpu_freq); + } + return; +} + +static void _energy_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + profile_energy_s_t* energy_data = (profile_energy_s_t*) data; + if (put_header) { + fprintf(fp, "Job,Step,Node,Series,Start_Time,Elapsed_Time," + "Min_Power,Ave_Power,Max_Power,Total_Power," + "Min_CPU Frequency,Ave_CPU Frequency," + "Max_CPU Frequency,Total_CPU Frequency\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + energy_data->start_time, energy_data->elapsed_time); + PUT_UINT_SUM(fp, energy_data->power, ","); + PUT_UINT_SUM(fp, energy_data->cpu_freq, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t* _energy_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_energy_dataset_size; + ops->create_memory_datatype = &_energy_create_memory_datatype; + ops->create_file_datatype = &_energy_create_file_datatype; + ops->create_s_memory_datatype = &_energy_s_create_memory_datatype; + ops->create_s_file_datatype = &_energy_s_create_file_datatype; + ops->init_job_series = &_energy_init_job_series; + ops->get_series_tod = &_energy_get_series_tod; + ops->get_series_values = &_energy_get_series_values; + ops->merge_step_series = &_energy_merge_step_series; + ops->series_total = &_energy_series_total; + ops->extract_series = &_energy_extract_series; + ops->extract_total = &_energy_extract_total; + return ops; +} + + +// ============================================================================ +// Routines supporting I/O Data type +// ============================================================================ + +static int _io_dataset_size(void) +{ + return sizeof(profile_io_t); +} + +static hid_t _io_create_memory_datatype(void) +{ + hid_t mtyp_io = -1; + + mtyp_io = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_t)); + if (mtyp_io < 0) { + debug3("PROFILE: failed to create IO memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_io, "Date_Time", profile_io_t, tod); + MEM_ADD_UINT64(mtyp_io, "Time", profile_io_t, time); + MEM_ADD_UINT64(mtyp_io, "Reads", profile_io_t, reads); + MEM_ADD_DBL(mtyp_io, "Megabytes_Read", profile_io_t, read_size); + MEM_ADD_UINT64(mtyp_io, "Writes", profile_io_t, writes); + MEM_ADD_DBL(mtyp_io, "Megabytes_Write", profile_io_t, write_size); + return mtyp_io; +} + +static hid_t _io_create_file_datatype(void) +{ + hid_t ftyp_io = -1; + + ftyp_io = H5Tcreate(H5T_COMPOUND, TOD_LEN+5*8); + if (ftyp_io < 0) { + debug3("PROFILE: failed to create IO file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_io, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_io, "Time"); + FILE_ADD_UINT64(ftyp_io, "Reads"); + FILE_ADD_DBL(ftyp_io, "Megabytes_Read"); + FILE_ADD_UINT64(ftyp_io, "Writes"); + FILE_ADD_DBL(ftyp_io, "Megabytes_Write"); + + return ftyp_io; +} + +static hid_t _io_s_create_memory_datatype(void) +{ + hid_t mtyp_io = -1; + + mtyp_io = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_s_t)); + if (mtyp_io < 0) { + debug3("PROFILE: failed to create IO memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_io, "Start Time", profile_io_s_t, start_time); + MEM_ADD_UINT64(mtyp_io, "Elapsed Time", profile_io_s_t, elapsed_time); + MEM_ADD_UINT64(mtyp_io, "Min Reads", profile_io_s_t, reads.min); + MEM_ADD_UINT64(mtyp_io, "Ave Reads", profile_io_s_t, reads.ave); + MEM_ADD_UINT64(mtyp_io, "Max Reads", profile_io_s_t, reads.max); + MEM_ADD_UINT64(mtyp_io, "Total Reads", profile_io_s_t, reads.total); + MEM_ADD_DBL(mtyp_io, "Min Read Megabytes", + profile_io_s_t, read_size.min); + MEM_ADD_DBL(mtyp_io, "Ave Read Megabytes", + profile_io_s_t, read_size.ave); + MEM_ADD_DBL(mtyp_io, "Max Read Megabytes", + profile_io_s_t, read_size.max); + MEM_ADD_DBL(mtyp_io, "Total Read Megabytes", profile_io_s_t, + read_size.total); + MEM_ADD_UINT64(mtyp_io, "Min Writes", profile_io_s_t, writes.min); + MEM_ADD_UINT64(mtyp_io, "Ave Writes", profile_io_s_t, writes.ave); + MEM_ADD_UINT64(mtyp_io, "Max Writes", profile_io_s_t, writes.max); + MEM_ADD_UINT64(mtyp_io, "Total Writes", profile_io_s_t, writes.total); + MEM_ADD_DBL(mtyp_io, "Min Write Megabytes", profile_io_s_t, + write_size.min); + MEM_ADD_DBL(mtyp_io, "Ave Write Megabytes", profile_io_s_t, + write_size.ave); + MEM_ADD_DBL(mtyp_io, "Max Write Megabytes", profile_io_s_t, + write_size.max); + MEM_ADD_DBL(mtyp_io, "Total Write Megabytes", profile_io_s_t, + write_size.total); + + return mtyp_io; +} + +static hid_t _io_s_create_file_datatype(void) +{ + hid_t ftyp_io = -1; + + ftyp_io = H5Tcreate(H5T_COMPOUND, TOD_LEN+17*8); + if (ftyp_io < 0) { + debug3("PROFILE: failed to create IO file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_io, "Start Time", 0); + FILE_ADD_UINT64(ftyp_io, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_io, "Min Reads"); + FILE_ADD_UINT64(ftyp_io, "Ave Reads"); + FILE_ADD_UINT64(ftyp_io, "Max Reads"); + FILE_ADD_UINT64(ftyp_io, "Total Reads"); + FILE_ADD_DBL(ftyp_io, "Min Read Megabytes"); + FILE_ADD_DBL(ftyp_io, "Ave Read Megabytes"); + FILE_ADD_DBL(ftyp_io, "Max Read Megabytes"); + FILE_ADD_DBL(ftyp_io, "Total Read Megabytes"); + FILE_ADD_UINT64(ftyp_io, "Min Writes"); + FILE_ADD_UINT64(ftyp_io, "Ave Writes"); + FILE_ADD_UINT64(ftyp_io, "Max Writes"); + FILE_ADD_UINT64(ftyp_io, "Total Writes"); + FILE_ADD_DBL(ftyp_io, "Min Write Megabytes"); + FILE_ADD_DBL(ftyp_io, "Ave Write Megabytes"); + FILE_ADD_DBL(ftyp_io, "Max Write Megabytes"); + FILE_ADD_DBL(ftyp_io, "Total Write Megabytes"); + + return ftyp_io; +} + +static void *_io_init_job_series(int n_samples) +{ + profile_io_t* io_data; + io_data = xmalloc(n_samples * sizeof(profile_io_t)); + if (io_data == NULL) { + debug3("PROFILE: failed to get memory for combined io data"); + return NULL; + } + return (void*) io_data; +} + +static char** _io_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_io_t* io_series = (profile_io_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for io tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(io_series[ix].tod); + } + return tod_values; +} + +static double* _io_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_io_t* io_series = (profile_io_t*) data; + double *io_values = NULL; + io_values = xmalloc(nsmp*sizeof(double)); + if (io_values == NULL) { + info("PROFILE: Failed to get memory for io data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = (double) io_series[ix].time; + + } + return io_values; + } else if (strcasecmp(data_name,"Reads") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = (double) io_series[ix].reads; + + } + return io_values; + } else if (strcasecmp(data_name,"Megabytes_Read") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = io_series[ix].read_size; + + } + return io_values; + } else if (strcasecmp(data_name,"Writes") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = (double) io_series[ix].writes; + + } + return io_values; + } else if (strcasecmp(data_name,"Megabytes_Write") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = io_series[ix].write_size; + + } + return io_values; + } + xfree(io_values); + info("PROFILE: %s is invalid data item for io data", data_name); + return NULL; +} + +static void _io_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ + // This is a difference series + static uint64_t start_reads = 0; + static uint64_t start_writes = 0; + static double start_read_size = 0; + static double start_write_size = 0; + profile_io_t* prfCur = (profile_io_t*) cur; + profile_io_t* prfBuf = (profile_io_t*) buf; + struct tm *ts = localtime(&prfCur->time); + strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prfCur->time; + prfBuf->time = 0; + start_reads = prfCur->reads; + prfBuf->reads = 0; + start_writes = prfCur->writes; + prfBuf->writes = 0; + start_read_size = prfCur->read_size; + prfBuf->read_size = 0; + start_write_size = prfCur->write_size; + prfBuf->write_size = 0; + } else { + prfBuf->time = prfCur->time - seriesStart; + prfBuf->reads = prfCur->reads - start_reads; + prfBuf->writes = prfCur->writes - start_writes; + prfBuf->read_size = prfCur->read_size - start_read_size; + prfBuf->write_size = prfCur->write_size - start_write_size; + } + return; +} + +static void *_io_series_total(int n_samples, void *data) +{ + profile_io_t* io_data; + profile_io_s_t* total; + if (n_samples < 1) + return NULL; + io_data = (profile_io_t*) data; + total = xmalloc(sizeof(profile_io_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting I/O total"); + return NULL; + } + // Assuming io series are a running total, and the first + // sample just sets the initial conditions + strcpy(total->start_time, io_data[0].tod); + total->elapsed_time = io_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, io_data, reads, n_samples); + INCR_DIF_SAMPLE(total, io_data, read_size, n_samples); + INCR_DIF_SAMPLE(total, io_data, writes, n_samples); + INCR_DIF_SAMPLE(total, io_data, write_size, n_samples); + return total; +} + +static void _io_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + int n_items, ix; + profile_io_t* io_data = (profile_io_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Date_Time,Elapsed_time," + "Reads,Read Megabytes,Writes,Write Megabytes\n"); + } + n_items = size_data / sizeof(profile_io_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", + job, step, node, series, + io_data[ix].tod, io_data[ix].time, + io_data[ix].reads, io_data[ix].read_size, + io_data[ix].writes, io_data[ix].write_size); + } + return; +} + +static void _io_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + profile_io_s_t* io_data = (profile_io_s_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min_Reads,Ave_Reads,Max_Reads,Total_Reads," + "Min_Read_Megabytes,Ave_Read_Megabytes," + "Max_Read_Megabytes,Total_Read_Megabytes," + "Min_Writes,Ave_Writes,Max_Writes,Total_Writes," + "Min_Write_Megabytes,Ave_Write_Megabytes," + "Max_Write_Megabytes,Total_Write_Megabytes\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + io_data->start_time, io_data->elapsed_time); + PUT_UINT_SUM(fp, io_data->reads, ","); + PUT_DBL_SUM(fp, io_data->read_size, ","); + PUT_UINT_SUM(fp, io_data->writes, ","); + PUT_DBL_SUM(fp, io_data->write_size, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t* _io_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_io_dataset_size; + ops->create_memory_datatype = &_io_create_memory_datatype; + ops->create_file_datatype = &_io_create_file_datatype; + ops->create_s_memory_datatype = &_io_s_create_memory_datatype; + ops->create_s_file_datatype = &_io_s_create_file_datatype; + ops->init_job_series = &_io_init_job_series; + ops->get_series_tod = &_io_get_series_tod; + ops->get_series_values = &_io_get_series_values; + ops->merge_step_series = &_io_merge_step_series; + ops->series_total = &_io_series_total; + ops->extract_series = &_io_extract_series; + ops->extract_total = &_io_extract_total; + return ops; +} + + +// ============================================================================ +// Routines supporting Network Data type +// ============================================================================ + +static int _network_dataset_size(void) +{ + return sizeof(profile_network_t); +} + +static hid_t _network_create_memory_datatype(void) +{ + hid_t mtyp_network = H5Tcreate(H5T_COMPOUND, + sizeof(profile_network_t)); + if (mtyp_network < 0) { + debug3("PROFILE: failed to create Network memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_network, "Date_Time", profile_network_t, tod); + MEM_ADD_UINT64(mtyp_network, "Time", profile_network_t, time); + MEM_ADD_UINT64(mtyp_network, "Packets_In", + profile_network_t, packets_in); + MEM_ADD_DBL(mtyp_network, "Megabytes_In", profile_network_t, size_in); + MEM_ADD_UINT64(mtyp_network, "Packets_Out", + profile_network_t, packets_out); + MEM_ADD_DBL(mtyp_network, "Megabytes_Out", profile_network_t, size_out); + + return mtyp_network; +} + +static hid_t _network_create_file_datatype(void) +{ + hid_t ftyp_network = H5Tcreate(H5T_COMPOUND, TOD_LEN+5*8); + if (ftyp_network < 0) { + debug3("PROFILE: failed to create Network file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_network, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_network, "Time"); + FILE_ADD_UINT64(ftyp_network, "Packets_In"); + FILE_ADD_DBL(ftyp_network, "Megabytes_In"); + FILE_ADD_UINT64(ftyp_network, "Packets_Out"); + FILE_ADD_DBL(ftyp_network, "Megabytes_Out"); + + return ftyp_network; +} + +static hid_t _network_s_create_memory_datatype(void) +{ + hid_t mtyp_network = -1; + + mtyp_network = H5Tcreate(H5T_COMPOUND, sizeof(profile_network_s_t)); + if (mtyp_network < 0) { + debug3("PROFILE: failed to create Network memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_network, "Start Time", profile_network_s_t, + start_time); + MEM_ADD_UINT64(mtyp_network, "Elapsed Time", profile_network_s_t, + elapsed_time); + MEM_ADD_UINT64(mtyp_network, "Min Packets In", profile_network_s_t, + packets_in.min); + MEM_ADD_UINT64(mtyp_network, "Ave Packets In", profile_network_s_t, + packets_in.ave); + MEM_ADD_UINT64(mtyp_network, "Max Packets In", profile_network_s_t, + packets_in.max); + MEM_ADD_UINT64(mtyp_network, "Total Packets In", profile_network_s_t, + packets_in.total); + MEM_ADD_DBL(mtyp_network, "Min Megabytes In", profile_network_s_t, + size_in.min); + MEM_ADD_DBL(mtyp_network, "Ave Megabytes In", profile_network_s_t, + size_in.ave); + MEM_ADD_DBL(mtyp_network, "Max Megabytes In", profile_network_s_t, + size_in.max); + MEM_ADD_DBL(mtyp_network, "Total Megabytes In", profile_network_s_t, + size_in.total); + MEM_ADD_UINT64(mtyp_network, "Min Packets Out", profile_network_s_t, + packets_out.min); + MEM_ADD_UINT64(mtyp_network, "Ave Packets Out", profile_network_s_t, + packets_out.ave); + MEM_ADD_UINT64(mtyp_network, "Max Packets Out", profile_network_s_t, + packets_out.max); + MEM_ADD_UINT64(mtyp_network, "Total Packets Out", profile_network_s_t, + packets_out.total); + MEM_ADD_DBL(mtyp_network, "Min Megabytes Out", profile_network_s_t, + size_out.min); + MEM_ADD_DBL(mtyp_network, "Ave Megabytes Out", profile_network_s_t, + size_out.ave); + MEM_ADD_DBL(mtyp_network, "Max Megabytes Out", profile_network_s_t, + size_out.max); + MEM_ADD_DBL(mtyp_network, "Total Megabytes Out", profile_network_s_t, + size_out.total); + + return mtyp_network; +} + +static hid_t _network_s_create_file_datatype(void) +{ + hid_t ftyp_network = H5Tcreate(H5T_COMPOUND, TOD_LEN+17*8); + if (ftyp_network < 0) { + debug3("PROFILE: failed to create Network file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_network, "Start Time", 0); + FILE_ADD_UINT64(ftyp_network, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_network, "Min Packets In"); + FILE_ADD_UINT64(ftyp_network, "Ave Packets In"); + FILE_ADD_UINT64(ftyp_network, "Max Packets In"); + FILE_ADD_UINT64(ftyp_network, "Total Packets In"); + FILE_ADD_DBL(ftyp_network, "Min Megabytes In"); + FILE_ADD_DBL(ftyp_network, "Ave Megabytes In"); + FILE_ADD_DBL(ftyp_network, "Max Megabytes In"); + FILE_ADD_DBL(ftyp_network, "Total Megabytes In"); + FILE_ADD_UINT64(ftyp_network, "Min Packets Out"); + FILE_ADD_UINT64(ftyp_network, "Ave Packets Out"); + FILE_ADD_UINT64(ftyp_network, "Max Packets Out"); + FILE_ADD_UINT64(ftyp_network, "Total Packets Out"); + FILE_ADD_DBL(ftyp_network, "Min Megabytes Out"); + FILE_ADD_DBL(ftyp_network, "Ave Megabytes Out"); + FILE_ADD_DBL(ftyp_network, "Max Megabytes Out"); + FILE_ADD_DBL(ftyp_network, "Total Megabytes Out"); + + return ftyp_network; +} + +static void *_network_init_job_series(int n_samples) +{ + profile_network_t* network_data; + + network_data = xmalloc(n_samples * sizeof(profile_network_t)); + if (network_data == NULL) { + debug3("PROFILE: failed to get memory for network data"); + return NULL; + } + return (void*) network_data; +} + +static char** _network_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_network_t* network_series = (profile_network_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for network tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(network_series[ix].tod); + } + return tod_values; +} + +static double* _network_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_network_t* network_series = (profile_network_t*) data; + double *network_values = NULL; + network_values = xmalloc(nsmp*sizeof(double)); + if (network_values == NULL) { + info("PROFILE: Failed to get memory for network data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = (double) network_series[ix].time; + + } + return network_values; + } else if (strcasecmp(data_name,"Packets_In") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = + (double) network_series[ix].packets_in; + + } + return network_values; + } else if (strcasecmp(data_name,"Megabytes_In") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = network_series[ix].size_in; + + } + return network_values; + } else if (strcasecmp(data_name,"Packets_Out") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = + (double) network_series[ix].packets_out; + + } + return network_values; + } else if (strcasecmp(data_name,"Megabytes_Out") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = network_series[ix].size_out; + + } + return network_values; + } + xfree(network_values); + info("PROFILE: %s is invalid data item for network data", data_name); + return NULL; +} + +static void _network_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ +// This is a difference series + profile_network_t* prf_cur = (profile_network_t*) cur; + profile_network_t* prf_buf = (profile_network_t*) buf; + struct tm *ts = localtime(&prf_cur->time); + strftime(prf_buf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prf_cur->time; + prf_buf->time = 0; + } else { + prf_buf->time = prf_cur->time - seriesStart; + } + prf_buf->packets_in = prf_cur->packets_in; + prf_buf->packets_out = prf_cur->packets_out; + prf_buf->size_in = prf_cur->size_in; + prf_buf->size_out = prf_cur->size_out; + return; +} + +static void *_network_series_total(int n_samples, void *data) +{ + profile_network_t* network_data; + profile_network_s_t* total; + if (n_samples < 1) + return NULL; + network_data = (profile_network_t*) data; + total = xmalloc(sizeof(profile_network_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting network total"); + return NULL; + } + // Assuming network series are a running total, and the first + // sample just sets the initial conditions + strcpy(total->start_time, network_data[0].tod); + total->elapsed_time = network_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, network_data, packets_in, n_samples); + INCR_DIF_SAMPLE(total, network_data, size_in, n_samples); + INCR_DIF_SAMPLE(total, network_data, packets_out, n_samples); + INCR_DIF_SAMPLE(total, network_data, size_out, n_samples); + return total; +} + +static void _network_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + int n_items, ix; + profile_network_t* network_data = (profile_network_t*) data; + + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Date_Time,Elapsed_time," + "Packets_In,MegaBytes_In,Packets_Out,MegaBytes_Out\n"); + } + n_items = size_data / sizeof(profile_network_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", + job, step, node,series, + network_data[ix].tod, network_data[ix].time, + network_data[ix].packets_in, network_data[ix].size_in, + network_data[ix].packets_out, + network_data[ix].size_out); + } + return; +} + +static void _network_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + profile_network_s_t* network_data = (profile_network_s_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min_Packets_In,Ave_Packets_In," + "Max_Packets_In,Total_Packets_In," + "Min_Megabytes_In,Ave_Megabytes_In," + "Max_Megabytes_In,Total_Megabytes_In," + "Min_Packets_Out,Ave_Packets_Out," + "Max_Packets_Out,Total_Packets_Out," + "Min_Megabytes_Out,Ave_Megabytes_Out," + "Max_Megabytes_Out,Total_Megabytes_Out\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + network_data->start_time, network_data->elapsed_time); + PUT_UINT_SUM(fp, network_data->packets_in, ","); + PUT_DBL_SUM(fp, network_data->size_in, ","); + PUT_UINT_SUM(fp, network_data->packets_out, ","); + PUT_DBL_SUM(fp, network_data->size_out, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t *_network_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_network_dataset_size; + ops->create_memory_datatype = &_network_create_memory_datatype; + ops->create_file_datatype = &_network_create_file_datatype; + ops->create_s_memory_datatype = &_network_s_create_memory_datatype; + ops->create_s_file_datatype = &_network_s_create_file_datatype; + ops->init_job_series = &_network_init_job_series; + ops->get_series_tod = &_network_get_series_tod; + ops->get_series_values = &_network_get_series_values; + ops->merge_step_series = &_network_merge_step_series; + ops->series_total = &_network_series_total; + ops->extract_series = &_network_extract_series; + ops->extract_total = &_network_extract_total; + return ops; +} + +// ============================================================================ +// Routines supporting Task Data type +// ============================================================================ + +static int _task_dataset_size(void) +{ + return sizeof(profile_task_t); +} + +static hid_t _task_create_memory_datatype(void) +{ + hid_t mtyp_task = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_t)); + if (mtyp_task < 0) { + debug3("PROFILE: failed to create Task memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_task, "Date_Time", profile_task_t, tod); + MEM_ADD_UINT64(mtyp_task, "Time", profile_task_t, time); + MEM_ADD_UINT64(mtyp_task, "CPU_Frequency", profile_task_t, cpu_freq); + MEM_ADD_UINT64(mtyp_task, "CPU_Time", profile_task_t, cpu_time); + MEM_ADD_DBL(mtyp_task, "CPU_Utilization", + profile_task_t, cpu_utilization); + MEM_ADD_UINT64(mtyp_task, "RSS", profile_task_t, rss); + MEM_ADD_UINT64(mtyp_task, "VM_Size", profile_task_t, vm_size); + MEM_ADD_UINT64(mtyp_task, "Pages", profile_task_t, pages); + MEM_ADD_DBL(mtyp_task, "Read_Megabytes", profile_task_t, read_size); + MEM_ADD_DBL(mtyp_task, "Write_Megabytes", profile_task_t, write_size); + + return mtyp_task; +} + +static hid_t _task_create_file_datatype(void) +{ + hid_t ftyp_task = H5Tcreate(H5T_COMPOUND, TOD_LEN+9*8); + if (ftyp_task < 0) { + debug3("PROFILE: failed to create Task file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_task, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_task, "Time"); + FILE_ADD_UINT64(ftyp_task, "CPU_Frequency"); + FILE_ADD_UINT64(ftyp_task, "CPU_Time"); + FILE_ADD_DBL(ftyp_task, "CPU_Utilization"); + FILE_ADD_UINT64(ftyp_task, "RSS"); + FILE_ADD_UINT64(ftyp_task, "VM_Size"); + FILE_ADD_UINT64(ftyp_task, "Pages"); + FILE_ADD_DBL(ftyp_task, "Read_Megabytes"); + FILE_ADD_DBL(ftyp_task, "Write_Megabytes"); + + return ftyp_task; +} + +static hid_t _task_s_create_memory_datatype(void) +{ + hid_t mtyp_task = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_s_t)); + if (mtyp_task < 0) { + debug3("PROFILE: failed to create Task memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_task, "Start Time", profile_task_s_t, + start_time); + MEM_ADD_UINT64(mtyp_task, "Elapsed Time", profile_task_s_t, + elapsed_time); + MEM_ADD_UINT64(mtyp_task, "Min CPU Frequency", profile_task_s_t, + cpu_freq.min); + MEM_ADD_UINT64(mtyp_task, "Ave CPU Frequency", profile_task_s_t, + cpu_freq.ave); + MEM_ADD_UINT64(mtyp_task, "Max CPU Frequency", profile_task_s_t, + cpu_freq.max); + MEM_ADD_UINT64(mtyp_task, "Total CPU Frequency", profile_task_s_t, + cpu_freq.total); + MEM_ADD_UINT64(mtyp_task, "Min CPU Time", profile_task_s_t, + cpu_time.min); + MEM_ADD_UINT64(mtyp_task, "Ave CPU Time", profile_task_s_t, + cpu_time.ave); + MEM_ADD_UINT64(mtyp_task, "Max CPU Time", profile_task_s_t, + cpu_time.max); + MEM_ADD_UINT64(mtyp_task, "Total CPU Time", profile_task_s_t, + cpu_time.total); + MEM_ADD_DBL(mtyp_task, "Min CPU Utilization", profile_task_s_t, + cpu_utilization.min); + MEM_ADD_DBL(mtyp_task, "Ave CPU Utilization", profile_task_s_t, + cpu_utilization.ave); + MEM_ADD_DBL(mtyp_task, "Max CPU Utilization", profile_task_s_t, + cpu_utilization.max); + MEM_ADD_DBL(mtyp_task, "Total CPU Utilization", profile_task_s_t, + cpu_utilization.total); + MEM_ADD_UINT64(mtyp_task, "Min RSS", profile_task_s_t, rss.min); + MEM_ADD_UINT64(mtyp_task, "Ave RSS", profile_task_s_t, rss.ave); + MEM_ADD_UINT64(mtyp_task, "Max RSS", profile_task_s_t, rss.max); + MEM_ADD_UINT64(mtyp_task, "Total RSS", profile_task_s_t, rss.total); + MEM_ADD_UINT64(mtyp_task, "Min VM Size", profile_task_s_t, vm_size.min); + MEM_ADD_UINT64(mtyp_task, "Ave VM Size", profile_task_s_t, vm_size.ave); + MEM_ADD_UINT64(mtyp_task, "Max VM Size", profile_task_s_t, vm_size.max); + MEM_ADD_UINT64(mtyp_task, "Total VM Size", + profile_task_s_t, vm_size.total); + MEM_ADD_UINT64(mtyp_task, "Min Pages", profile_task_s_t, pages.min); + MEM_ADD_UINT64(mtyp_task, "Ave Pages", profile_task_s_t, pages.ave); + MEM_ADD_UINT64(mtyp_task, "Max Pages", profile_task_s_t, pages.max); + MEM_ADD_UINT64(mtyp_task, "Total Pages", profile_task_s_t, pages.total); + MEM_ADD_DBL(mtyp_task, "Min Read Megabytes", profile_task_s_t, + read_size.min); + MEM_ADD_DBL(mtyp_task, "Ave Read Megabytes", profile_task_s_t, + read_size.ave); + MEM_ADD_DBL(mtyp_task, "Max Read Megabytes", profile_task_s_t, + read_size.max); + MEM_ADD_DBL(mtyp_task, "Total Read Megabytes", profile_task_s_t, + read_size.total); + MEM_ADD_DBL(mtyp_task, "Min Write Megabytes", profile_task_s_t, + write_size.min); + MEM_ADD_DBL(mtyp_task, "Ave Write Megabytes", profile_task_s_t, + write_size.ave); + MEM_ADD_DBL(mtyp_task, "Max Write Megabytes", profile_task_s_t, + write_size.max); + MEM_ADD_DBL(mtyp_task, "Total Write Megabytes", profile_task_s_t, + write_size.total); + + return mtyp_task; +} + +static hid_t _task_s_create_file_datatype(void) +{ + hid_t ftyp_task = H5Tcreate(H5T_COMPOUND, TOD_LEN+33*8); + if (ftyp_task < 0) { + debug3("PROFILE: failed to create Task file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_task, "Start Time", 0); + FILE_ADD_UINT64(ftyp_task, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_task, "Min CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Ave CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Max CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Total CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Min CPU Time"); + FILE_ADD_UINT64(ftyp_task, "Ave CPU Time"); + FILE_ADD_UINT64(ftyp_task, "Max CPU Time"); + FILE_ADD_UINT64(ftyp_task, "Total CPU Time"); + FILE_ADD_DBL(ftyp_task, "Min CPU Utilization"); + FILE_ADD_DBL(ftyp_task, "Ave CPU Utilization"); + FILE_ADD_DBL(ftyp_task, "Max CPU Utilization"); + FILE_ADD_DBL(ftyp_task, "Total CPU Utilization"); + FILE_ADD_UINT64(ftyp_task, "Min RSS"); + FILE_ADD_UINT64(ftyp_task, "Ave RSS"); + FILE_ADD_UINT64(ftyp_task, "Max RSS"); + FILE_ADD_UINT64(ftyp_task, "Total RSS"); + FILE_ADD_UINT64(ftyp_task, "Min VM Size"); + FILE_ADD_UINT64(ftyp_task, "Ave VM Size"); + FILE_ADD_UINT64(ftyp_task, "Max VM Size"); + FILE_ADD_UINT64(ftyp_task, "Total VM Size"); + FILE_ADD_UINT64(ftyp_task, "Min Pages"); + FILE_ADD_UINT64(ftyp_task, "Ave Pages"); + FILE_ADD_UINT64(ftyp_task, "Max Pages"); + FILE_ADD_UINT64(ftyp_task, "Total Pages"); + FILE_ADD_DBL(ftyp_task, "Min Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Ave Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Max Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Total Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Min Write Megabytes"); + FILE_ADD_DBL(ftyp_task, "Ave Write Megabytes"); + FILE_ADD_DBL(ftyp_task, "Max Write Megabytes"); + FILE_ADD_DBL(ftyp_task, "Total Write Megabytes"); + + return ftyp_task; +} + +static void *_task_init_job_series(int n_samples) +{ + profile_task_t* task_data; + task_data = xmalloc(n_samples * sizeof(profile_task_t)); + if (task_data == NULL) { + debug3("PROFILE: failed to get memory for combined task data"); + return NULL; + } + return (void*) task_data; +} + +static char** _task_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_task_t* task_series = (profile_task_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for task tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(task_series[ix].tod); + } + return tod_values; +} + +static double* _task_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_task_t* task_series = (profile_task_t*) data; + double *task_values = NULL; + task_values = xmalloc(nsmp*sizeof(double)); + if (task_values == NULL) { + info("PROFILE: Failed to get memory for task data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].time; + + } + return task_values; + } else if (strcasecmp(data_name,"CPU_Frequency") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].cpu_freq; + + } + return task_values; + } else if (strcasecmp(data_name,"CPU_Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].cpu_time; + + } + return task_values; + } else if (strcasecmp(data_name,"CPU_Utilization") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = task_series[ix].cpu_utilization; + + } + return task_values; + } else if (strcasecmp(data_name,"RSS") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].rss; + + } + return task_values; + } else if (strcasecmp(data_name,"VM_Size") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].vm_size; + + } + return task_values; + } else if (strcasecmp(data_name,"Pages") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].pages; + + } + return task_values; + } else if (strcasecmp(data_name,"Read_Megabytes") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = task_series[ix].read_size; + + } + return task_values; + } else if (strcasecmp(data_name,"Write_Megabytes") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = task_series[ix].write_size; + + } + return task_values; + } + xfree(task_values); + info("PROFILE: %s is invalid data item for task data", data_name); + return NULL; +} + +static void _task_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ +// This is a running total series + profile_task_t* prf_prior = (profile_task_t*) prior; + profile_task_t* prf_cur = (profile_task_t*) cur; + profile_task_t* buf_prv = NULL; + profile_task_t* buf_cur = (profile_task_t*) buf; + + struct tm *ts; + ts = localtime(&prf_cur->time); + strftime(buf_cur->tod, TOD_LEN, TOD_FMT, ts); + if (prf_prior == NULL) { + // First sample. + seriesStart = prf_cur->time; + buf_cur->time = 0; + buf_cur->cpu_time = 0; + buf_cur->cpu_utilization = 0; + buf_cur->read_size = 0.0; + buf_cur->write_size = 0.0; + } else { + buf_prv = buf_cur - 1; + buf_cur->time = prf_cur->time - seriesStart; + buf_cur->cpu_time = prf_cur->cpu_time - prf_prior->cpu_time; + buf_cur->cpu_utilization = 100.0*((double) buf_cur->cpu_time / + (double) (buf_cur->time - buf_prv->time)); + buf_cur->read_size = + prf_cur->read_size - prf_prior->read_size; + buf_cur->write_size = + prf_cur->write_size - prf_prior->write_size; + } + buf_cur->cpu_freq = prf_cur->cpu_freq; + buf_cur->rss = prf_cur->rss; + buf_cur->vm_size = prf_cur->vm_size; + buf_cur->pages = prf_cur->pages; + return; +} + +static void *_task_series_total(int n_samples, void *data) +{ + profile_task_t* task_data; + profile_task_s_t* total; + task_data = (profile_task_t*) data; + total = xmalloc(sizeof(profile_task_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting task total"); + return NULL; + } + strcpy(total->start_time, task_data[0].tod); + total->elapsed_time = task_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, task_data, cpu_freq, n_samples); + INCR_RT_SAMPLE(total, task_data, cpu_time, n_samples); + INCR_DIF_SAMPLE(total, task_data, cpu_utilization, n_samples); + INCR_DIF_SAMPLE(total, task_data, rss, n_samples); + INCR_DIF_SAMPLE(total, task_data, vm_size , n_samples); + INCR_DIF_SAMPLE(total, task_data, pages, n_samples); + INCR_RT_SAMPLE(total, task_data, read_size, n_samples); + INCR_RT_SAMPLE(total, task_data, write_size, n_samples); + return total; +} + +static void _task_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + int n_items, ix; + profile_task_t* task_data = (profile_task_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Date Time,ElapsedTime," + "CPU Frequency,CPU Time," + "CPU Utilization,rss,VM Size,Pages," + "Read_bytes,Write_bytes\n"); + } + n_items = size_data / sizeof(profile_task_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%ld,%.3f", + job, step, node, series, + task_data[ix].tod, task_data[ix].time, + task_data[ix].cpu_freq, + task_data[ix].cpu_time, task_data[ix].cpu_utilization); + fprintf(fp,",%ld,%ld,%ld,%.3f,%.3f\n", task_data[ix].rss, + task_data[ix].vm_size, task_data[ix].pages, + task_data[ix].read_size, task_data[ix].write_size); + } + return; +} + +static void _task_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + + profile_task_s_t* task_data = (profile_task_s_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min CPU Frequency,Ave CPU Frequency," + "Ave CPU Frequency,Total CPU Frequency," + "Min_CPU_Time,Ave_CPU_Time," + "Max_CPU_Time,Total_CPU_Time," + "Min_CPU_Utilization,Ave_CPU_Utilization," + "Max_CPU_Utilization,Total_CPU_Utilization," + "Min_RSS,Ave_RSS,Max_RSS,Total_RSS," + "Min_VMSize,Ave_VMSize,Max_VMSize,Total_VMSize," + "Min_Pages,Ave_Pages,Max_Pages,Total_Pages," + "Min_Read_Megabytes,Ave_Read_Megabytes," + "Max_Read_Megabytes,Total_Read_Megabytes," + "Min_Write_Megabytes,Ave_Write_Megabytes," + "Max_Write_Megabytes,Total_Write_Megabytes\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + task_data->start_time, task_data->elapsed_time); + PUT_UINT_SUM(fp, task_data->cpu_freq, ","); + PUT_UINT_SUM(fp, task_data->cpu_time, ","); + PUT_DBL_SUM(fp, task_data->cpu_utilization, ","); + PUT_UINT_SUM(fp, task_data->rss, ","); + PUT_UINT_SUM(fp, task_data->vm_size, ","); + PUT_UINT_SUM(fp, task_data->pages, ","); + PUT_DBL_SUM(fp, task_data->read_size, ","); + PUT_DBL_SUM(fp, task_data->write_size, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t *_task_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_task_dataset_size; + ops->create_memory_datatype = &_task_create_memory_datatype; + ops->create_file_datatype = &_task_create_file_datatype; + ops->create_s_memory_datatype = &_task_s_create_memory_datatype; + ops->create_s_file_datatype = &_task_s_create_file_datatype; + ops->init_job_series = &_task_init_job_series; + ops->get_series_tod = &_task_get_series_tod; + ops->get_series_values = &_task_get_series_values; + ops->merge_step_series = &_task_merge_step_series; + ops->series_total = &_task_series_total; + ops->extract_series = &_task_extract_series; + ops->extract_total = &_task_extract_total; + return ops; +} + +/* ============================================================================ + * Common support functions + ===========================================================================*/ + +extern hdf5_api_ops_t* profile_factory(uint32_t type) +{ + switch (type) { + case ACCT_GATHER_PROFILE_ENERGY: + return _energy_profile_factory(); + break; + case ACCT_GATHER_PROFILE_TASK: + return _task_profile_factory(); + break; + case ACCT_GATHER_PROFILE_LUSTRE: + return _io_profile_factory(); + break; + case ACCT_GATHER_PROFILE_NETWORK: + return _network_profile_factory(); + break; + default: + error("profile_factory: Unknown type %d sent", type); + return NULL; + } +} + + +extern void profile_init(void) +{ + typTOD = H5Tcopy (H5T_C_S1); + H5Tset_size (typTOD, TOD_LEN); /* create string of length TOD_LEN */ + + return; +} + +extern void profile_fini(void) +{ + H5Tclose(typTOD); + H5close(); /* make sure all H5 Objects are closed */ + + return; +} + +extern char *get_data_set_name(char *type) +{ + static char dset_name[MAX_DATASET_NAME+1]; + dset_name[0] = '\0'; + sprintf(dset_name, "%s Data", type); + + return dset_name; +} + + +static char* _H5O_type_t2str(H5O_type_t type) +{ + switch (type) + { + case H5O_TYPE_UNKNOWN: + return "H5O_TYPE_UNKNOWN"; + case H5O_TYPE_GROUP: + return "H5O_TYPE_GROUP"; + case H5O_TYPE_DATASET: + return "H5O_TYPE_DATASET"; + case H5O_TYPE_NAMED_DATATYPE: + return "H5O_TYPE_NAMED_DATATYPE"; + case H5O_TYPE_NTYPES: + return "H5O_TYPE_NTYPES"; + default: + return "Invalid H5O_TYPE"; + } +} + + +extern void hdf5_obj_info(hid_t group, char *nam_group) +{ + char buf[MAX_GROUP_NAME+1]; + hsize_t nobj, nattr; + hid_t aid; + int i, len; + H5G_info_t group_info; + H5O_info_t object_info; + + if (group < 0) { + info("PROFILE: Group is not HDF5 object"); + return; + } + H5Gget_info(group, &group_info); + nobj = group_info.nlinks; + H5Oget_info(group, &object_info); + nattr = object_info.num_attrs; + info("PROFILE group: %s NumObject=%d NumAttributes=%d", + nam_group, (int) nobj, (int) nattr); + for (i = 0; (nobj>0) && (i<nobj); i++) { + H5Oget_info_by_idx(group, ".", H5_INDEX_NAME, H5_ITER_INC, i, + &object_info, H5P_DEFAULT); + len = H5Lget_name_by_idx(group, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + info("PROFILE: Obj=%d Type=%s Name=%s", + i, _H5O_type_t2str(object_info.type), buf); + } else { + info("PROFILE: Obj=%d Type=%s Name=%s (is truncated)", + i, _H5O_type_t2str(object_info.type), buf); + } + } + for (i = 0; (nattr>0) && (i<nattr); i++) { + aid = H5Aopen_by_idx(group, ".", H5_INDEX_NAME, H5_ITER_INC, + i, H5P_DEFAULT, H5P_DEFAULT); + // Get the name of the attribute. + len = H5Aget_name(aid, MAX_ATTR_NAME, buf); + if (len < MAX_ATTR_NAME) { + info("PROFILE: Attr=%d Name=%s", i, buf); + } else { + info("PROFILE: Attr=%d Name=%s (is truncated)", i, buf); + } + H5Aclose(aid); + } + + return; +} + +extern hid_t get_attribute_handle(hid_t parent, char *name) +{ + char buf[MAX_ATTR_NAME+1]; + int nattr, i, len; + hid_t aid; + H5O_info_t object_info; + + if (parent < 0) { + debug3("PROFILE: parent is not HDF5 object"); + return -1; + } + + H5Oget_info(parent, &object_info); + nattr = object_info.num_attrs; + for (i = 0; (nattr>0) && (i<nattr); i++) { + aid = H5Aopen_by_idx(parent, ".", H5_INDEX_NAME, H5_ITER_INC, + i, H5P_DEFAULT, H5P_DEFAULT); + // Get the name of the attribute. + len = H5Aget_name(aid, MAX_ATTR_NAME, buf); + if (len < MAX_ATTR_NAME) { + if (strcmp(buf, name) == 0) { + return aid; + } + } + H5Aclose(aid); + } + debug3("PROFILE: failed to find HDF5 attribute=%s\n", name); + + return -1; +} + +extern hid_t get_group(hid_t parent, char *name) +{ + char buf[MAX_GROUP_NAME]; + hsize_t nobj; + hid_t gid; + int i, len; + H5G_info_t group_info; + + if (parent < 0) { + debug3("PROFILE: parent is not HDF5 object"); + return -1; + } + H5Gget_info(parent, &group_info); + nobj = group_info.nlinks; + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Lget_name_by_idx(parent, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + if (strcmp(buf, name) == 0) { + gid = H5Gopen(parent, name, H5P_DEFAULT); + if (gid < 0) + error("PROFILE: Failed to open %s", + name); + return gid; + } + } + } + + return -1; +} + +extern hid_t make_group(hid_t parent, char *name) +{ + hid_t gid = -1; + + if (parent < 0) { + debug3("PROFILE: parent is not HDF5 object"); + return -1; + } + gid = get_group(parent, name); + if (gid > 0) + return gid; + gid = H5Gcreate(parent, name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (gid < 0) { + debug3("PROFILE: failed to create HDF5 group=%s", name); + return -1; + } + + return gid; +} + +extern void put_string_attribute(hid_t parent, char *name, char *value) +{ + hid_t attr, space_attr, typ_attr; + hsize_t dim_attr[1] = {1}; // Single dimension array of values + + typ_attr = H5Tcopy(H5T_C_S1); + if (typ_attr < 0) { + debug3("PROFILE: failed to copy type for attribute %s", name); + return; + } + H5Tset_size(typ_attr, strlen(value)); + H5Tset_strpad(typ_attr, H5T_STR_NULLTERM); + space_attr = H5Screate_simple(1, dim_attr, NULL); + if (space_attr < 0) { + H5Tclose(typ_attr); + debug3("PROFILE: failed to create space for attribute %s", + name); + return; + } + attr = H5Acreate(parent, name, typ_attr, space_attr, + H5P_DEFAULT, H5P_DEFAULT); + if (attr < 0) { + H5Tclose(typ_attr); + H5Sclose(space_attr); + debug3("PROFILE: failed to create attribute %s", name); + return; + } + if (H5Awrite(attr, typ_attr, value) < 0) { + debug3("PROFILE: failed to write attribute %s", name); + // Fall through to release resources + } + H5Sclose(space_attr); + H5Tclose(typ_attr); + H5Aclose(attr); + + return; +} + +extern char *get_string_attribute(hid_t parent, char *name) +{ + char *value = NULL; + + hid_t attr, type; + size_t size; + + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist", name); + return NULL; + } + type = H5Aget_type(attr); + if (H5Tget_class(type) != H5T_STRING) { + H5Aclose(attr); + debug3("PROFILE: Attribute=%s is not a string", name); + return NULL; + } + size = H5Tget_size(type); + value = xmalloc(size+1); + if (value == NULL) { + H5Tclose(type); + H5Aclose(attr); + debug3("PROFILE: failed to malloc %d bytes for attribute=%s", + (int) size, + name); + return NULL; + } + if (H5Aread(attr, type, value) < 0) { + xfree(value); + H5Tclose(type); + H5Aclose(attr); + debug3("PROFILE: failed to read attribute=%s", name); + return NULL; + } + H5Tclose(type); + H5Aclose(attr); + + return value; +} + +extern void put_int_attribute(hid_t parent, char *name, int value) +{ + hid_t attr, space_attr; + hsize_t dim_attr[1] = {1}; // Single dimension array of values + space_attr = H5Screate_simple(1, dim_attr, NULL); + if (space_attr < 0) { + debug3("PROFILE: failed to create space for attribute %s", + name); + return; + } + attr = H5Acreate(parent, name, H5T_NATIVE_INT, space_attr, + H5P_DEFAULT, H5P_DEFAULT); + if (attr < 0) { + H5Sclose(space_attr); + debug3("PROFILE: failed to create attribute %s", name); + return; + } + if (H5Awrite(attr, H5T_NATIVE_INT, &value) < 0) { + debug3("PROFILE: failed to write attribute %s", name); + // Fall through to release resources + } + H5Sclose(space_attr); + H5Aclose(attr); + + return; +} + +extern int get_int_attribute(hid_t parent, char *name) +{ + int value = 0; + + hid_t attr; + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist, returning", name); + return value; + } + if (H5Aread(attr, H5T_NATIVE_INT, &value) < 0) { + debug3("PROFILE: failed to read attribute=%s, returning", name); + } + H5Aclose(attr); + + return value; +} + + +extern void put_uint32_attribute(hid_t parent, char *name, uint32_t value) +{ + hid_t attr, space_attr; + hsize_t dim_attr[1] = {1}; // Single dimension array of values + + space_attr = H5Screate_simple(1, dim_attr, NULL); + if (space_attr < 0) { + debug3("PROFILE: failed to create space for attribute %s", + name); + return; + } + attr = H5Acreate(parent, name, H5T_NATIVE_UINT32, space_attr, + H5P_DEFAULT, H5P_DEFAULT); + if (attr < 0) { + H5Sclose(space_attr); + debug3("PROFILE: failed to create attribute %s", name); + return; + } + if (H5Awrite(attr, H5T_NATIVE_UINT32, &value) < 0) { + debug3("PROFILE: failed to write attribute %s", name); + // Fall through to release resources + } + H5Sclose(space_attr); + H5Aclose(attr); + + return; +} + +extern uint32_t get_uint32_attribute(hid_t parent, char *name) +{ + int value = 0; + hid_t attr; + + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist, returning", name); + return value; + } + if (H5Aread(attr, H5T_NATIVE_UINT32, &value) < 0) { + debug3("PROFILE: failed to read attribute=%s, returning", name); + } + H5Aclose(attr); + + return value; +} + +extern void *get_hdf5_data(hid_t parent, uint32_t type, + char *nam_group, int *size_data) +{ + void * data = NULL; + + hid_t id_data_set, dtyp_memory; + hsize_t szDset; + herr_t ec; + char *subtype = NULL; + hdf5_api_ops_t* ops = profile_factory(type); + char *type_name = acct_gather_profile_type_to_string(type); + + if (ops == NULL) { + debug3("PROFILE: failed to create %s operations", + type_name); + return NULL; + } + subtype = get_string_attribute(parent, ATTR_SUBDATATYPE); + if (subtype < 0) { + xfree(ops); + debug3("PROFILE: failed to get %s attribute", + ATTR_SUBDATATYPE); + return NULL; + } + id_data_set = H5Dopen(parent, get_data_set_name(nam_group), + H5P_DEFAULT); + if (id_data_set < 0) { + xfree(subtype); + xfree(ops); + debug3("PROFILE: failed to open %s Data Set", + type_name); + return NULL; + } + if (strcmp(subtype, SUBDATA_SUMMARY)) + dtyp_memory = (*(ops->create_memory_datatype))(); + else + dtyp_memory = (*(ops->create_s_memory_datatype))(); + xfree(subtype); + if (dtyp_memory < 0) { + H5Dclose(id_data_set); + xfree(ops); + debug3("PROFILE: failed to create %s memory datatype", + type_name); + return NULL; + } + szDset = H5Dget_storage_size(id_data_set); + *size_data = (int) szDset; + if (szDset == 0) { + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(ops); + debug3("PROFILE: %s data set is empty", + type_name); + return NULL; + } + data = xmalloc(szDset); + if (data == NULL) { + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(ops); + debug3("PROFILE: failed to get memory for %s data set", + type_name); + return NULL; + } + ec = H5Dread(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, H5P_DEFAULT, + data); + if (ec < 0) { + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(data); + xfree(ops); + debug3("PROFILE: failed to read %s data", + type_name); + return NULL; + } + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(ops); + + return data; +} + +extern void put_hdf5_data(hid_t parent, uint32_t type, char *subtype, + char *group, void *data, int n_item) +{ + hid_t id_group, dtyp_memory, dtyp_file, id_data_space, id_data_set; + hsize_t dims[1]; + herr_t ec; + hdf5_api_ops_t* ops = profile_factory(type); + char *type_name = acct_gather_profile_type_to_string(type); + + if (ops == NULL) { + debug3("PROFILE: failed to create %s operations", + type_name); + return; + } + // Create the datatypes. + if (strcmp(subtype, SUBDATA_SUMMARY)) { + dtyp_memory = (*(ops->create_memory_datatype))(); + dtyp_file = (*(ops->create_file_datatype))(); + } else { + dtyp_memory = (*(ops->create_s_memory_datatype))(); + dtyp_file = (*(ops->create_s_file_datatype))(); + } + + if (dtyp_memory < 0) { + xfree(ops); + debug3("PROFILE: failed to create %s memory datatype", + type_name); + return; + } + + if (dtyp_file < 0) { + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s file datatype", + type_name); + return; + } + + dims[0] = n_item; + id_data_space = H5Screate_simple(1, dims, NULL); + if (id_data_space < 0) { + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s space descriptor", + type_name); + return; + } + + id_group = H5Gcreate(parent, group, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + if (id_group < 0) { + H5Sclose(id_data_space); + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s group", group); + return; + } + + put_string_attribute(id_group, ATTR_DATATYPE, type_name); + put_string_attribute(id_group, ATTR_SUBDATATYPE, subtype); + + id_data_set = H5Dcreate(id_group, get_data_set_name(group), dtyp_file, + id_data_space, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (id_data_set < 0) { + H5Gclose(id_group); + H5Sclose(id_data_space); + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s dataset", group); + return; + } + + ec = H5Dwrite(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, H5P_DEFAULT, + data); + if (ec < 0) { + debug3("PROFILE: failed to create write task data"); + // Fall through to release resources + } + H5Dclose(id_data_set); + H5Gclose(id_group); + H5Sclose(id_data_space); + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + + + return; +} + diff --git a/src/plugins/acct_gather_profile/hdf5/hdf5_api.h b/src/plugins/acct_gather_profile/hdf5/hdf5_api.h new file mode 100644 index 0000000000000000000000000000000000000000..721bb189735e5c0f6c146351e0084b797062e2eb --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/hdf5_api.h @@ -0,0 +1,423 @@ +/****************************************************************************\ + * hdf5_api.h + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Portions Copyright (C) 2013 SchedMD LLC. + * Written by Danny Auble <da@schedmd.com> + * + * Provide support for acct_gather_profile plugins based on HDF5 files. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ +#ifndef __ACCT_GATHER_HDF5_API_H__ +#define __ACCT_GATHER_HDF5_API_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> + +#include <hdf5.h> +#include "src/common/slurm_acct_gather_profile.h" + +#define MAX_PROFILE_PATH 1024 +#define MAX_ATTR_NAME 64 +#define MAX_GROUP_NAME 64 +#define MAX_DATASET_NAME 64 + +#define ATTR_NODENAME "Node Name" +#define ATTR_STARTTIME "Start Time" +#define ATTR_NSTEPS "Number of Steps" +#define ATTR_NNODES "Number of Nodes" +#define ATTR_NTASKS "Number of Tasks" +#define ATTR_TASKID "Task Id" +#define ATTR_CPUPERTASK "CPUs per Task" +#define ATTR_DATATYPE "Data Type" +#define ATTR_SUBDATATYPE "Subdata Type" +#define ATTR_STARTTIME "Start Time" +#define ATTR_STARTSEC "Start Second" +#define SUBDATA_DATA "Data" +#define SUBDATA_NODE "Node" +#define SUBDATA_SAMPLE "Sample" +#define SUBDATA_SERIES "Series" +#define SUBDATA_TOTAL "Total" +#define SUBDATA_SUMMARY "Summary" + +#define GRP_ENERGY "Energy" +#define GRP_LUSTRE "Lustre" +#define GRP_STEP "Step" +#define GRP_NODES "Nodes" +#define GRP_NODE "Node" +#define GRP_NETWORK "Network" +#define GRP_SAMPLES "Time Series" +#define GRP_SAMPLE "Sample" +#define GRP_TASKS "Tasks" +#define GRP_TASK "Task" +#define GRP_TOTALS "Totals" + +// Data types supported by all HDF5 plugins of this type + +#define TOD_LEN 24 +#define TOD_FMT "%F %T" + +/* + * prof_uint_sum is a low level structure intended to hold the + * minimum, average, maximum, and total values of a data item. + * It is usually used in a summary data structure for an item + * that occurs in a time series. + */ +typedef struct prof_uint_sum { + uint64_t min; // Minumum value + uint64_t ave; // Average value + uint64_t max; // Maximum value + uint64_t total; // Accumlated value +} prof_uint_sum_t; + +// Save as prof_uint_sum, but for double precision items +typedef struct prof_dbl_sum { + double min; // Minumum value + double ave; // Average value + double max; // Maximum value + double total; // Accumlated value +} prof_dbl_sum_t; + +#define PROFILE_ENERGY_DATA "Energy" +// energy data structures +// node_step file +typedef struct profile_energy { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t power; + uint64_t cpu_freq; +} profile_energy_t; +// summary data in job-node-totals +typedef struct profile_energy_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t power; + prof_uint_sum_t cpu_freq; +} profile_energy_s_t; // series summary + +#define PROFILE_IO_DATA "I/O" +// io data structure +// node_step file +typedef struct profile_io { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t reads; + double read_size; // currently in megabytes + uint64_t writes; + double write_size; // currently in megabytes +} profile_io_t; +// summary data in job-node-totals +typedef struct profile_io_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t reads; + prof_dbl_sum_t read_size; // currently in megabytes + prof_uint_sum_t writes; + prof_dbl_sum_t write_size; // currently in megabytes +} profile_io_s_t; + +#define PROFILE_NETWORK_DATA "Network" +// Network data structure +// node_step file +typedef struct profile_network { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t packets_in; + double size_in; // currently in megabytes + uint64_t packets_out; + double size_out; // currently in megabytes +} profile_network_t; +// summary data in job-node-totals +typedef struct profile_network_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t packets_in; + prof_dbl_sum_t size_in; // currently in megabytes + prof_uint_sum_t packets_out; + prof_dbl_sum_t size_out; // currently in megabytes +} profile_network_s_t; + +#define PROFILE_TASK_DATA "Task" +// task data structure +// node_step file +typedef struct profile_task { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t cpu_freq; + uint64_t cpu_time; + double cpu_utilization; + uint64_t rss; + uint64_t vm_size; + uint64_t pages; + double read_size; // currently in megabytes + double write_size; // currently in megabytes +} profile_task_t; +// summary data in job-node-totals +typedef struct profile_task_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t cpu_freq; + prof_uint_sum_t cpu_time; + prof_dbl_sum_t cpu_utilization; + prof_uint_sum_t rss; + prof_uint_sum_t vm_size; + prof_uint_sum_t pages; + prof_dbl_sum_t read_size; // currently in megabytes + prof_dbl_sum_t write_size; // currently in megabytes +} profile_task_s_t; + +/* + * Structure of function pointers of common operations on a profile data type. + * dataset_size -- size of one dataset (structure size) + * create_memory_datatype -- creates hdf5 memory datatype corresponding + * to the datatype structure. + * create_file_datatype -- creates hdf5 file datatype corresponding + * to the datatype structure. + * create_s_memory_datatype -- creates hdf5 memory datatype corresponding + * to the summary datatype structure. + * create_s_file_datatype -- creates hdf5 file datatype corresponding + * to the summary datatype structure. + * init_job_series -- allocates a buffer for a complete time series + * (in job merge) and initializes each member + * get_series_tod -- get the date/time value of each sample in the series + * get_series_values -- gets a specific data item from each sample in the + * series + * merge_step_series -- merges all the individual time samples into a + * single data set with one item per sample. + * Data items can be scaled (e.g. subtracting beginning time) + * differenced (to show counts in interval) or other things + * appropriate for the series. + * series_total -- accumulate or average members in the entire series to + * be added to the file as totals for the node or task. + * extract_series -- format members of a structure for putting to + * to a file data extracted from a time series to be imported into + * another analysis tool. (e.g. format as comma separated value.) + * extract_totals -- format members of a structure for putting to + * to a file data extracted from a time series total to be + * imported into another analysis tool. + * (format as comma,separated value, for example.) + */ +typedef struct hdf5_api_ops { + int (*dataset_size) (void); + hid_t (*create_memory_datatype) (void); + hid_t (*create_file_datatype) (void); + hid_t (*create_s_memory_datatype) (void); + hid_t (*create_s_file_datatype) (void); + void* (*init_job_series) (int); + char** (*get_series_tod) (void*, int); + double* (*get_series_values) (char*, void*, int); + void (*merge_step_series) (hid_t, void*, void*, void*); + void* (*series_total) (int, void*); + void (*extract_series) (FILE*, bool, int, int, char*, char*, void*, + int); + void (*extract_total) (FILE*, bool, int, int, char*, char*, void*, + int); +} hdf5_api_ops_t; + +/* ============================================================================ + * Common support functions + ==========================================================================*/ + +/* + * Create a opts group from type + */ +hdf5_api_ops_t* profile_factory(uint32_t type); + +/* + * Initialize profile (initialize static memory) + */ +void profile_init(void); + +/* + * Finialize profile (initialize static memory) + */ +void profile_fini(void); + +/* + * Make a dataset name + * + * Parameters + * type - series name + * + * Returns + * common data set name based on type in static memory + */ +char* get_data_set_name(char* type); + +/* + * print info on an object for debugging + * + * Parameters + * group - handle to group. + * namGroup - name of the group + */ +void hdf5_obj_info(hid_t group, char* namGroup); + +/* + * get attribute handle by name. + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Returns - handle for attribute (or -1 when not found), caller must close + */ +hid_t get_attribute_handle(hid_t parent, char* name); + +/* + * get group by name. + * + * Parameters + * parent - handle to parent group. + * name - name of the group + * + * Returns - handle for group (or -1 when not found), caller must close + */ +hid_t get_group(hid_t parent, char* name); + +/* + * make group by name. + * + * Parameters + * parent - handle to parent group. + * name - name of the group + * + * Returns - handle for group (or -1 on error), caller must close + */ +hid_t make_group(hid_t parent, char* name); + +/* + * Put string attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * value - value of the attribute + */ +void put_string_attribute(hid_t parent, char* name, char* value); + +/* + * get string attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: pointer to value. Caller responsibility to free!!! + */ +char* get_string_attribute(hid_t parent, char* name); + +/* + * Put integer attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * value - value of the attribute + */ +void put_int_attribute(hid_t parent, char* name, int value); + +/* + * get int attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: value + */ +int get_int_attribute(hid_t parent, char* name); + +/* + * Put uint32_t attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * value - value of the attribute + */ +void put_uint32_attribute(hid_t parent, char* name, uint32_t value); + +/* + * get uint32_t attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: value + */ +uint32_t get_uint32_attribute(hid_t parent, char* name); + +/* + * Get data from a group of a HDF5 file + * + * Parameters + * parent - handle to parent. + * type - type of data (ACCT_GATHER_PROFILE_* in slurm.h) + * namGroup - name of group + * sizeData - pointer to variable into which to put size of dataset + * + * Returns -- data set of type (or null), caller must free. + */ +void* get_hdf5_data(hid_t parent, uint32_t type, char* namGroup, int* sizeData); + +/* + * Put one data sample into a new group in an HDF5 file + * + * Parameters + * parent - handle to parent group. + * type - type of data (ACCT_GATHER_PROFILE_* in slurm.h) + * subtype - generally source (node, series, ...) or summary + * group - name of new group + * data - data for the sample + * nItems - number of items of type in the data + */ +void put_hdf5_data(hid_t parent, uint32_t type, char* subtype, char* group, + void* data, int nItems); + +#endif /*__ACCT_GATHER_HDF5_API_H__*/ diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..dadb2a81f90b93274124300ee4bb6c926b12632e --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am @@ -0,0 +1,30 @@ +# +# Makefile for sh5util + +AUTOMAKE_OPTIONS = foreign + +# Do not put a link to common here. src/common contains an mpi.h which +# hdf5 could of been installed with a link to the generic mpi.h. +INCLUDES = -I$(top_srcdir) -I../ + +SHDF5_SOURCES = sh5util.c + +if BUILD_HDF5 + +bin_PROGRAMS = sh5util + +sh5util_SOURCES = $(SHDF5_SOURCES) +sh5util_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) \ + ../libhdf5_api.la + +sh5util_CPPFLAGS = $(HDF5_CPPFLAGS) +sh5util_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) \ + $(HWLOC_LDFLAGS) $(HWLOC_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) + +force: +$(sh5util_LDADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` + +else +EXTRA_sh5util_SOURCES = $(SHDF5_SOURCES) +endif diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..685776fbc0f9a362ecfce854180feb9300dd0821 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in @@ -0,0 +1,736 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Makefile for sh5util + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@BUILD_HDF5_TRUE@bin_PROGRAMS = sh5util$(EXEEXT) +subdir = src/plugins/acct_gather_profile/hdf5/sh5util +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am__sh5util_SOURCES_DIST = sh5util.c +am__objects_1 = sh5util-sh5util.$(OBJEXT) +@BUILD_HDF5_TRUE@am_sh5util_OBJECTS = $(am__objects_1) +am__EXTRA_sh5util_SOURCES_DIST = sh5util.c +sh5util_OBJECTS = $(am_sh5util_OBJECTS) +am__DEPENDENCIES_1 = +@BUILD_HDF5_TRUE@sh5util_DEPENDENCIES = \ +@BUILD_HDF5_TRUE@ $(top_builddir)/src/api/libslurm.o \ +@BUILD_HDF5_TRUE@ $(am__DEPENDENCIES_1) ../libhdf5_api.la +sh5util_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sh5util_LDFLAGS) \ + $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(sh5util_SOURCES) $(EXTRA_sh5util_SOURCES) +DIST_SOURCES = $(am__sh5util_SOURCES_DIST) \ + $(am__EXTRA_sh5util_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign + +# Do not put a link to common here. src/common contains an mpi.h which +# hdf5 could of been installed with a link to the generic mpi.h. +INCLUDES = -I$(top_srcdir) -I../ +SHDF5_SOURCES = sh5util.c +@BUILD_HDF5_TRUE@sh5util_SOURCES = $(SHDF5_SOURCES) +@BUILD_HDF5_TRUE@sh5util_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) \ +@BUILD_HDF5_TRUE@ ../libhdf5_api.la + +@BUILD_HDF5_TRUE@sh5util_CPPFLAGS = $(HDF5_CPPFLAGS) +@BUILD_HDF5_TRUE@sh5util_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) \ +@BUILD_HDF5_TRUE@ $(HWLOC_LDFLAGS) $(HWLOC_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) + +@BUILD_HDF5_FALSE@EXTRA_sh5util_SOURCES = $(SHDF5_SOURCES) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/hdf5/sh5util/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/hdf5/sh5util/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p || test -f $$p1; \ + then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +sh5util$(EXEEXT): $(sh5util_OBJECTS) $(sh5util_DEPENDENCIES) $(EXTRA_sh5util_DEPENDENCIES) + @rm -f sh5util$(EXEEXT) + $(sh5util_LINK) $(sh5util_OBJECTS) $(sh5util_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sh5util-sh5util.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +sh5util-sh5util.o: sh5util.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sh5util_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sh5util-sh5util.o -MD -MP -MF $(DEPDIR)/sh5util-sh5util.Tpo -c -o sh5util-sh5util.o `test -f 'sh5util.c' || echo '$(srcdir)/'`sh5util.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sh5util-sh5util.Tpo $(DEPDIR)/sh5util-sh5util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sh5util.c' object='sh5util-sh5util.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sh5util_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sh5util-sh5util.o `test -f 'sh5util.c' || echo '$(srcdir)/'`sh5util.c + +sh5util-sh5util.obj: sh5util.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sh5util_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sh5util-sh5util.obj -MD -MP -MF $(DEPDIR)/sh5util-sh5util.Tpo -c -o sh5util-sh5util.obj `if test -f 'sh5util.c'; then $(CYGPATH_W) 'sh5util.c'; else $(CYGPATH_W) '$(srcdir)/sh5util.c'; fi` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sh5util-sh5util.Tpo $(DEPDIR)/sh5util-sh5util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sh5util.c' object='sh5util-sh5util.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sh5util_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sh5util-sh5util.obj `if test -f 'sh5util.c'; then $(CYGPATH_W) 'sh5util.c'; else $(CYGPATH_W) '$(srcdir)/sh5util.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am \ + uninstall-binPROGRAMS + + +@BUILD_HDF5_TRUE@force: +@BUILD_HDF5_TRUE@$(sh5util_LDADD) : force +@BUILD_HDF5_TRUE@ @cd `dirname $@` && $(MAKE) `basename $@` + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c new file mode 100644 index 0000000000000000000000000000000000000000..497ce5a642b1b5a988f7117be73565eedda58ad8 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c @@ -0,0 +1,1858 @@ +/*****************************************************************************\ + * sh5util.c - slurm profile accounting plugin for io and energy using hdf5. + * - Utility to merge node-step files into a job file + * - or extract data from an job file + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Copyright (C) 2013 SchedMD LLC + * + * Written by Danny Auble <da@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +\*****************************************************************************/ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#if HAVE_GETOPT_H +# include <getopt.h> +#else +# include "src/common/getopt.h" +#endif + +#include <dirent.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> + +#include "src/common/uid.h" +#include "src/common/read_config.h" +#include "src/common/proc_args.h" +#include "src/common/xstring.h" +#include "../hdf5_api.h" + +typedef enum { + SH5UTIL_MODE_MERGE, + SH5UTIL_MODE_EXTRACT, + SH5UTIL_MODE_ITEM_EXTRACT, +} sh5util_mode_t; + +typedef struct { + char *dir; + int help; + char *input; + int job_id; + bool keepfiles; + char *level; + sh5util_mode_t mode; + char *node; + char *output; + char *series; + char *data_item; + int step_id; + char *user; + int verbose; +} sh5util_opts_t; + + +static sh5util_opts_t params; +static char **series_names; +static int num_series; + +static int _set_options(const int argc, char **argv); +static int _merge_step_files(void); +static int _extract_data(void); +static int _series_data(void); +static int _check_params(void); +static void _free_options(void); +static void _remove_empty_output(void); + +static void _help_msg(void) +{ + printf("\ +Usage sh5util [<OPTION>] -j <job[.stepid]>\n" +"\n" +"Valid <OPTION> values are:\n" +" -E, --extract Extract data series from job file.\n" +" -i, --input merged file to extract from (default ./job_$jobid.h5)\n" +" -N, --node Node name to extract (default is all)\n" +" -l, --level Level to which series is attached\n" +" [Node:Totals|Node:TimeSeries] (default Node:Totals)\n" +" -s, --series Name of series:\n" +" Energy | Lustre | Network | Tasks | Task_#\n" +" 'Tasks' is all tasks, Task_# is task_id (default is all)\n" +" -I, --item-extract Extract data item from one series from \n" +" all samples on all nodes from thejob file.\n" +" -i, --input merged file to extract from (default ./job_$jobid.h5)\n" +" -s, --series Name of series:\n" +" Energy | Lustre | Network | Task\n" +" -d, --data Name of data item in series (see man page) \n" +" -j, --jobs Format is <job(.step)>. Merge this job/step.\n" +" or comma-separated list of job steps. This option is\n" +" required. Not specifying a step will result in all\n" +" steps found to be processed.\n" +" -h, --help Print this description of use.\n" +" -o, --output Path to a file into which to write.\n" +" Default for merge is ./job_$jobid.h5\n" +" Default for extract is ./extract_$jobid.csv\n" +" -p, --profiledir Profile directory location where node-step files exist\n" +" default is what is set in acct_gather.conf\n" +" -S, --savefiles Don't remove node-step files after merging them \n" +" --user User who profiled job. (Handy for root user, defaults to \n" +" user running this command.)\n" +" --usage Display brief usage message\n"); +} + +int +main(int argc, char **argv) +{ + int cc; + + cc = _set_options(argc, argv); + if (cc < 0) + goto ouch; + + cc = _check_params(); + if (cc < 0) + goto ouch; + + profile_init(); + + switch (params.mode) { + + case SH5UTIL_MODE_MERGE: + + info("Merging node-step files into %s", + params.output); + cc = _merge_step_files(); + if (cc < 0) + goto ouch; + break; + + case SH5UTIL_MODE_EXTRACT: + + info("Extracting job data from %s into %s", + params.input, params.output); + cc = _extract_data(); + if (cc < 0) + goto ouch; + break; + + case SH5UTIL_MODE_ITEM_EXTRACT: + + info("Extracting '%s' from '%s' data from %s into %s", + params.data_item, params.series, + params.input, params.output); + cc = _series_data(); + if (cc < 0) + goto ouch; + break; + + default: + error("Unknown type %d", params.mode); + break; + } + + _remove_empty_output(); + profile_fini(); + _free_options(); + + return 0; + +ouch: + _remove_empty_output(); + _free_options(); + + return -1; +} + +/* _free_options() + */ +static void +_free_options(void) +{ + xfree(params.dir); + xfree(params.input); + xfree(params.node); + xfree(params.output); + xfree(params.series); + xfree(params.data_item); + xfree(params.user); +} +/* + * delete list of strings + * + * Parameters + * list - xmalloc'd list of pointers of xmalloc'd strings. + * listlen - number of strings in the list + */ +static void _delete_string_list(char **list, int listLen) +{ + int ix; + + if (list == NULL) + return; + + for (ix = 0; ix < listLen; ix++) { + xfree(list[ix]); + } + + xfree(list); + +} + +static void _remove_empty_output(void) +{ + struct stat sb; + + if (stat(params.output, &sb) == -1) { + /* Ignore the error as the file may have + * not been created yet. + */ + return; + } + + /* Remove the file if 0 size which means + * the program failed somewhere along the + * way and the file is just left hanging... + */ + if (sb.st_size == 0) + remove(params.output); +} + +static void _init_opts(void) +{ + memset(¶ms, 0, sizeof(sh5util_opts_t)); + params.job_id = -1; + params.mode = SH5UTIL_MODE_MERGE; + params.step_id = -1; +} + +static int _set_options(const int argc, char **argv) +{ + int option_index = 0; + int cc; + log_options_t logopt = LOG_OPTS_STDERR_ONLY; + char *next_str = NULL; + uid_t u; + + static struct option long_options[] = { + {"extract", no_argument, 0, 'E'}, + {"item-extract", no_argument, 0, 'I'}, + {"data", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"jobs", required_argument, 0, 'j'}, + {"input", required_argument, 0, 'i'}, + {"level", required_argument, 0, 'l'}, + {"node", required_argument, 0, 'N'}, + {"output", required_argument, 0, 'o'}, + {"profiledir", required_argument, 0, 'p'}, + {"series", required_argument, 0, 's'}, + {"savefiles", no_argument, 0, 'S'}, + {"usage", no_argument, 0, 'U'}, + {"user", required_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}}; + + log_init(xbasename(argv[0]), logopt, 0, NULL); + + _init_opts(); + + while ((cc = getopt_long(argc, argv, "d:Ehi:Ij:l:N:o:p:s:S:uUvV", + long_options, &option_index)) != EOF) { + switch (cc) { + case 'd': + params.data_item = xstrdup(optarg); + params.data_item = xstrtolower(params.data_item); + break; + case 'E': + params.mode = SH5UTIL_MODE_EXTRACT; + break; + case 'I': + params.mode = SH5UTIL_MODE_ITEM_EXTRACT; + break; + case 'h': + _help_msg(); + return -1; + break; + case 'i': + params.input = xstrdup(optarg); + break; + case 'j': + params.job_id = strtol(optarg, &next_str, 10); + if (next_str[0] == '.') + params.step_id = + strtol(next_str + 1, NULL, 10); + break; + case 'l': + params.level = xstrdup(optarg); + break; + case 'N': + params.node = xstrdup(optarg); + break; + case 'o': + params.output = xstrdup(optarg); + break; + case 'p': + params.dir = xstrdup(optarg); + break; + case 's': + if (strcmp(optarg, GRP_ENERGY) + && strcmp(optarg, GRP_LUSTRE) + && strcmp(optarg, GRP_NETWORK) + && strncmp(optarg,GRP_TASK,strlen(GRP_TASK))) { + error("Bad value for --series=\"%s\"", optarg); + return -1; + } + params.series = xstrdup(optarg); + break; + case 'S': + params.keepfiles = 1; + break; + case 'u': + u = atoi(optarg); + if (uid_from_string(optarg, &u) < 0) { + error("No such user --uid=\"%s\"", optarg); + return -1; + } + break; + case 'U': + _help_msg(); + return -1; + break; + case 'v': + params.verbose++; + break; + case 'V': + print_slurm_version(); + return -1; + break; + case ':': + case '?': /* getopt() has explained it */ + return -1; + } + } + + if (params.verbose) { + logopt.stderr_level += params.verbose; + log_alter(logopt, SYSLOG_FACILITY_USER, NULL); + } + + return 0; +} + +/* _check_params() + */ +static int +_check_params(void) +{ + if (params.job_id == -1) { + error("JobID must be specified."); + return -1; + } + + if (params.user == NULL) + params.user = uid_to_string(getuid()); + + if (!params.dir) + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_DIR, ¶ms.dir); + + if (!params.dir) { + error("Cannot read/parse acct_gather.conf"); + return -1; + } + + if (params.mode == SH5UTIL_MODE_EXTRACT) { + if (!params.level) + params.level = xstrdup("Node:Totals"); + if (!params.input) + params.input = xstrdup_printf( + "./job_%d.h5", params.job_id); + if (!params.output) + params.output = xstrdup_printf( + "./extract_%d.csv", params.job_id); + + } + if (params.mode == SH5UTIL_MODE_ITEM_EXTRACT) { + if (!params.data_item) + fatal("Must specify data option --data "); + + if (!params.series) + fatal("Must specify series option --series"); + + if (!params.input) + params.input = xstrdup_printf("./job_%d.h5", params.job_id); + + if (!params.output) + params.output = xstrdup_printf("./%s_%s_%d.csv", + params.series, + params.data_item, + params.job_id); + } + + if (!params.output) + params.output = xstrdup_printf("./job_%d.h5", params.job_id); + + return 0; +} + +/* ============================================================================ + * ============================================================================ + * Functions for merging samples from node step files into a job file + * ============================================================================ + * ========================================================================= */ + +static void* _get_all_samples(hid_t gid_series, char* nam_series, uint32_t type, + int nsamples) +{ + void* data = NULL; + + hid_t id_data_set, dtyp_memory, g_sample, sz_dest; + herr_t ec; + int smpx ,len; + void *data_prior = NULL, *data_cur = NULL; + char name_sample[MAX_GROUP_NAME+1]; + hdf5_api_ops_t* ops; + + ops = profile_factory(type); + if (ops == NULL) { + error("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + return NULL; + } + data = (*(ops->init_job_series))(nsamples); + if (data == NULL) { + xfree(ops); + error("Failed to get memory for combined data"); + return NULL; + } + dtyp_memory = (*(ops->create_memory_datatype))(); + if (dtyp_memory < 0) { + xfree(ops); + xfree(data); + error("Failed to create %s memory datatype", + acct_gather_profile_type_to_string(type)); + return NULL; + } + for (smpx=0; smpx<nsamples; smpx++) { + len = H5Lget_name_by_idx(gid_series, ".", H5_INDEX_NAME, + H5_ITER_INC, smpx, name_sample, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + error("Invalid group name %s", name_sample); + continue; + } + g_sample = H5Gopen(gid_series, name_sample, H5P_DEFAULT); + if (g_sample < 0) { + info("Failed to open %s", name_sample); + } + id_data_set = H5Dopen(g_sample, get_data_set_name(name_sample), + H5P_DEFAULT); + if (id_data_set < 0) { + H5Gclose(g_sample); + error("Failed to open %s dataset", + acct_gather_profile_type_to_string(type)); + continue; + } + sz_dest = (*(ops->dataset_size))(); + data_cur = xmalloc(sz_dest); + if (data_cur == NULL) { + H5Dclose(id_data_set); + H5Gclose(g_sample); + error("Failed to get memory for prior data"); + continue; + } + ec = H5Dread(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, + H5P_DEFAULT, data_cur); + if (ec < 0) { + xfree(data_cur); + H5Dclose(id_data_set); + H5Gclose(g_sample); + error("Failed to read %s data", + acct_gather_profile_type_to_string(type)); + continue; + } + (*(ops->merge_step_series))(g_sample, data_prior, data_cur, + data+(smpx)*sz_dest); + + xfree(data_prior); + data_prior = data_cur; + H5Dclose(id_data_set); + H5Gclose(g_sample); + } + xfree(data_cur); + H5Tclose(dtyp_memory); + xfree(ops); + + return data; +} + +static void _merge_series_data(hid_t jgid_tasks, hid_t jg_node, hid_t nsg_node) +{ + hid_t jg_samples, nsg_samples; + hid_t g_series, g_series_total = -1; + hsize_t num_samples, n_series; + int idsx, len; + void *data = NULL, *series_total = NULL; + uint32_t type; + char *data_type; + char nam_series[MAX_GROUP_NAME+1]; + hdf5_api_ops_t* ops = NULL; + H5G_info_t group_info; + H5O_info_t object_info; + + if (jg_node < 0) { + info("Job Node is not HDF5 object"); + return; + } + if (nsg_node < 0) { + info("Node-Step is not HDF5 object"); + return; + } + + jg_samples = H5Gcreate(jg_node, GRP_SAMPLES, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_samples < 0) { + info("Failed to create job node Samples"); + return; + } + nsg_samples = get_group(nsg_node, GRP_SAMPLES); + if (nsg_samples < 0) { + H5Gclose(jg_samples); + debug("Failed to get node-step Samples"); + return; + } + H5Gget_info(nsg_samples, &group_info); + n_series = group_info.nlinks; + if (n_series < 1) { + // No series? + H5Gclose(jg_samples); + H5Gclose(nsg_samples); + info("No Samples"); + return; + } + for (idsx = 0; idsx < n_series; idsx++) { + H5Oget_info_by_idx(nsg_samples, ".", H5_INDEX_NAME, H5_ITER_INC, + idsx, &object_info, H5P_DEFAULT); + if (object_info.type != H5O_TYPE_GROUP) + continue; + + len = H5Lget_name_by_idx(nsg_samples, ".", H5_INDEX_NAME, + H5_ITER_INC, idsx, nam_series, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", nam_series); + continue; + } + g_series = H5Gopen(nsg_samples, nam_series, H5P_DEFAULT); + if (g_series < 0) { + info("Failed to open %s", nam_series); + continue; + } + H5Gget_info(g_series, &group_info); + num_samples = group_info.nlinks; + if (num_samples <= 0) { + H5Gclose(g_series); + info("_series %s has no samples", nam_series); + continue; + } + // Get first sample in series to find out how big the data is. + data_type = get_string_attribute(g_series, ATTR_DATATYPE); + if (!data_type) { + H5Gclose(g_series); + info("Failed to get datatype for Time Series Dataset"); + continue; + } + type = acct_gather_profile_type_from_string(data_type); + xfree(data_type); + data = _get_all_samples(g_series, nam_series, type, + num_samples); + if (data == NULL) { + H5Gclose(g_series); + info("Failed to get memory for Time Series Dataset"); + continue; + } + put_hdf5_data(jg_samples, type, SUBDATA_SERIES, nam_series, + data, num_samples); + ops = profile_factory(type); + if (ops == NULL) { + xfree(data); + H5Gclose(g_series); + info("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + continue; + } + series_total = (*(ops->series_total))(num_samples, data); + if (series_total != NULL) { + // Totals for series attaches to node + g_series_total = make_group(jg_node, GRP_TOTALS); + if (g_series_total < 0) { + H5Gclose(g_series); + xfree(series_total); + xfree(data); + xfree(ops); + info("Failed to make Totals for Node"); + continue; + } + put_hdf5_data(g_series_total, type, + SUBDATA_SUMMARY, + nam_series, series_total, 1); + H5Gclose(g_series_total); + } + xfree(series_total); + xfree(ops); + xfree(data); + H5Gclose(g_series); + } + + return; +} + +/* ============================================================================ + * Functions for merging tasks data into a job file + ==========================================================================*/ + +static void _merge_task_totals(hid_t jg_tasks, hid_t nsg_node, char* node_name) +{ + hid_t jg_task, jg_totals, nsg_totals, + g_total, nsg_tasks, nsg_task = -1; + hsize_t nobj, ntasks = -1; + int i, len, taskx, taskid, taskcpus, size_data; + void *data; + uint32_t type; + char buf[MAX_GROUP_NAME+1]; + char group_name[MAX_GROUP_NAME+1]; + H5G_info_t group_info; + + if (jg_tasks < 0) { + info("Job Tasks is not HDF5 object"); + return; + } + if (nsg_node < 0) { + info("Node-Step is not HDF5 object"); + return; + } + + nsg_tasks = get_group(nsg_node, GRP_TASKS); + if (nsg_tasks < 0) { + debug("No Tasks group in node-step file"); + return; + } + + H5Gget_info(nsg_tasks, &group_info); + ntasks = group_info.nlinks; + for (taskx = 0; ((int)ntasks>0) && (taskx<((int)ntasks)); taskx++) { + // Get the name of the group. + len = H5Lget_name_by_idx(nsg_tasks, ".", H5_INDEX_NAME, + H5_ITER_INC, taskx, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", buf); + continue; + } + nsg_task = H5Gopen(nsg_tasks, buf, H5P_DEFAULT); + if (nsg_task < 0) { + debug("Failed to open %s", buf); + continue; + } + taskid = get_int_attribute(nsg_task, ATTR_TASKID); + sprintf(group_name, "%s_%d", GRP_TASK, taskid); + jg_task = H5Gcreate(jg_tasks, group_name, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_task < 0) { + H5Gclose(nsg_task); + info("Failed to create job task group"); + continue; + } + put_string_attribute(jg_task, ATTR_NODENAME, node_name); + put_int_attribute(jg_task, ATTR_TASKID, taskid); + taskcpus = get_int_attribute(nsg_task, ATTR_CPUPERTASK); + put_int_attribute(jg_task, ATTR_CPUPERTASK, taskcpus); + nsg_totals = get_group(nsg_task, GRP_TOTALS); + if (nsg_totals < 0) { + H5Gclose(jg_task); + H5Gclose(nsg_task); + continue; + } + jg_totals = H5Gcreate(jg_task, GRP_TOTALS, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_totals < 0) { + H5Gclose(jg_task); + H5Gclose(nsg_task); + info("Failed to create job task totals"); + continue; + } + H5Gget_info(nsg_totals, &group_info); + nobj = group_info.nlinks; + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Lget_name_by_idx(nsg_totals, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", buf); + continue; + } + g_total = H5Gopen(nsg_totals, buf, H5P_DEFAULT); + if (g_total < 0) { + info("Failed to open %s", buf); + continue; + } + type = get_uint32_attribute(g_total, ATTR_DATATYPE); + if (!type) { + H5Gclose(g_total); + info("No %s attribute", ATTR_DATATYPE); + continue; + } + data = get_hdf5_data(g_total, type, buf, &size_data); + if (data == NULL) { + H5Gclose(g_total); + info("Failed to get group %s type %s data", buf, + acct_gather_profile_type_to_string(type)); + continue; + } + put_hdf5_data(jg_totals, type, SUBDATA_DATA, + buf, data, 1); + xfree(data); + H5Gclose(g_total); + } + H5Gclose(nsg_totals); + H5Gclose(nsg_task); + H5Gclose(jg_totals); + H5Gclose(jg_task); + } + H5Gclose(nsg_tasks); +} + +/* ============================================================================ + * Functions for merging node totals into a job file + ==========================================================================*/ + +static void _merge_node_totals(hid_t jg_node, hid_t nsg_node) +{ + hid_t jg_totals, nsg_totals, g_total; + hsize_t nobj; + int i, len, size_data; + void *data; + uint32_t type; + char buf[MAX_GROUP_NAME+1]; + H5G_info_t group_info; + + if (jg_node < 0) { + info("Job Node is not HDF5 object"); + return; + } + if (nsg_node < 0) { + info("Node-Step is not HDF5 object"); + return; + } + jg_totals = H5Gcreate(jg_node, GRP_TOTALS, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_totals < 0) { + info("Failed to create job node totals"); + return; + } + nsg_totals = get_group(nsg_node, GRP_TOTALS); + if (nsg_totals < 0) { + H5Gclose(jg_totals); + return; + } + + H5Gget_info(nsg_totals, &group_info); + nobj = group_info.nlinks; + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Lget_name_by_idx(nsg_totals, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + info("invalid group name %s", buf); + continue; + } + g_total = H5Gopen(nsg_totals, buf, H5P_DEFAULT); + if (g_total < 0) { + info("Failed to open %s", buf); + continue; + } + type = get_uint32_attribute(g_total, ATTR_DATATYPE); + if (!type) { + H5Gclose(g_total); + info("No %s attribute", ATTR_DATATYPE); + continue; + } + data = get_hdf5_data(g_total, type, buf, &size_data); + if (data == NULL) { + H5Gclose(g_total); + info("Failed to get group %s type %s data", + buf, acct_gather_profile_type_to_string(type)); + continue; + } + put_hdf5_data(jg_totals, type, SUBDATA_DATA, buf, data, 1); + xfree(data); + H5Gclose(g_total); + } + H5Gclose(nsg_totals); + H5Gclose(jg_totals); + return; +} + +/* ============================================================================ + * Functions for merging step data into a job file + ==========================================================================*/ + +static void _merge_node_step_data(hid_t fid_job, char* file_name, int nodeIndex, + char* node_name, hid_t jgid_nodes, + hid_t jgid_tasks) +{ + hid_t fid_nodestep, jgid_node, nsgid_root, nsgid_node; + char *start_time; + char group_name[MAX_GROUP_NAME+1]; + + jgid_node = H5Gcreate(jgid_nodes, node_name, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgid_node < 0) { + error("Failed to create group %s",node_name); + return; + } + put_string_attribute(jgid_node, ATTR_NODENAME, node_name); + // Process node step file + // Open the file and the node group. + fid_nodestep = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_nodestep < 0) { + H5Gclose(jgid_node); + error("Failed to open %s",file_name); + return; + } + nsgid_root = H5Gopen(fid_nodestep,"/", H5P_DEFAULT); + sprintf(group_name, "/%s_%s", GRP_NODE, node_name); + nsgid_node = H5Gopen(nsgid_root, group_name, H5P_DEFAULT); + if (nsgid_node < 0) { + H5Gclose(fid_nodestep); + H5Gclose(jgid_node); + error("Failed to open node group"); + return;; + } + start_time = get_string_attribute(nsgid_node,ATTR_STARTTIME); + if (start_time == NULL) { + info("No %s attribute", ATTR_STARTTIME); + } else { + put_string_attribute(jgid_node, ATTR_STARTTIME, start_time); + xfree(start_time); + } + _merge_node_totals(jgid_node, nsgid_node); + _merge_task_totals(jgid_tasks, nsgid_node, node_name); + _merge_series_data(jgid_tasks, jgid_node, nsgid_node); + H5Gclose(nsgid_node); + H5Fclose(fid_nodestep); + H5Gclose(jgid_node); + + if (!params.keepfiles) + remove(file_name); + + return; +} + +static int _merge_step_files(void) +{ + hid_t fid_job = -1; + hid_t jgid_step = -1; + hid_t jgid_nodes = -1; + hid_t jgid_tasks = -1; + DIR *dir; + struct dirent *de; + char file_name[MAX_PROFILE_PATH+1]; + char step_dir[MAX_PROFILE_PATH+1]; + char step_path[MAX_PROFILE_PATH+1]; + char jgrp_step_name[MAX_GROUP_NAME+1]; + char jgrp_nodes_name[MAX_GROUP_NAME+1]; + char jgrp_tasks_name[MAX_GROUP_NAME+1]; + char *step_node; + char *pos_char; + char *stepno; + int stepx = 0; + int num_steps = 0; + int nodex = -1; + int max_step = -1; + int jobid, stepid; + bool found_files = false; + + sprintf(step_dir, "%s/%s", params.dir, params.user); + + while (max_step == -1 || stepx <= max_step) { + + if (!(dir = opendir(step_dir))) { + error("Cannot open %s job profile directory: %m", step_dir); + return -1; + } + + nodex = 0; + while ((de = readdir(dir))) { + + strcpy(file_name, de->d_name); + if (file_name[0] == '.') + continue; + + pos_char = strstr(file_name,".h5"); + if (!pos_char) + continue; + *pos_char = 0; + + pos_char = strchr(file_name,'_'); + if (!pos_char) + continue; + *pos_char = 0; + + jobid = strtol(file_name, NULL, 10); + if (jobid != params.job_id) + continue; + + stepno = pos_char + 1; + pos_char = strchr(stepno,'_'); + if (!pos_char) { + continue; + } + *pos_char = 0; + + stepid = strtol(stepno, NULL, 10); + if (stepid > max_step) + max_step = stepid; + if (stepid != stepx) + continue; + + step_node = pos_char + 1; + + if (!found_files) { + fid_job = H5Fcreate(params.output, + H5F_ACC_TRUNC, + H5P_DEFAULT, + H5P_DEFAULT); + if (fid_job < 0) { + error("Failed create HDF5 file %s", params.output); + return -1; + } + found_files = true; + } + + if (nodex == 0) { + + num_steps++; + sprintf(jgrp_step_name, "/%s_%d", GRP_STEP, + stepx); + + jgid_step = make_group(fid_job, jgrp_step_name); + if (jgid_step < 0) { + error("Failed to create %s", jgrp_step_name); + continue; + } + + sprintf(jgrp_nodes_name,"%s/%s", + jgrp_step_name, + GRP_NODES); + jgid_nodes = make_group(jgid_step, + jgrp_nodes_name); + if (jgid_nodes < 0) { + error("Failed to create %s", jgrp_nodes_name); + continue; + } + + sprintf(jgrp_tasks_name,"%s/%s", + jgrp_step_name, + GRP_TASKS); + jgid_tasks = make_group(jgid_step, + jgrp_tasks_name); + if (jgid_tasks < 0) { + error("Failed to create %s", jgrp_tasks_name); + continue; + } + } + + sprintf(step_path, "%s/%s", step_dir, de->d_name); + debug("Adding %s to the job file", step_path); + _merge_node_step_data(fid_job, step_path, + nodex, step_node, + jgid_nodes, jgid_tasks); + nodex++; + } + + closedir(dir); + + if (nodex > 0) { + put_int_attribute(jgid_step, ATTR_NNODES, nodex); + H5Gclose(jgid_tasks); + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + } + stepx++; + } + + if (!found_files) + info("No node-step files found for jobid %d", params.job_id); + else + put_int_attribute(fid_job, ATTR_NSTEPS, num_steps); + + if (fid_job != -1) + H5Fclose(fid_job); + + return 0; +} + +/* ============================================================================ + * ============================================================================ + * Functions for data extraction + * ============================================================================ + * ========================================================================= */ + +static hid_t _get_series_parent(hid_t group) +{ + hid_t gid_level = -1; + + if (strcasecmp(params.level, "Node:Totals") == 0) { + gid_level = get_group(group, GRP_TOTALS); + if (gid_level < 0) { + info("Failed to open group %s", GRP_TOTALS); + } + } else if (strcasecmp(params.level, "Node:TimeSeries") == 0) { + gid_level = get_group(group, GRP_SAMPLES); + if (gid_level < 0) { + info("Failed to open group %s", GRP_SAMPLES); + } + } else { + info("%s is an illegal level", params.level); + return -1; + + } + + return gid_level; +} + + +static void _get_series_names(hid_t group) +{ + int i, len; + char buf[MAX_GROUP_NAME+1]; + H5G_info_t group_info; + + H5Gget_info(group, &group_info); + num_series = (int)group_info.nlinks; + if (num_series < 0) { + debug("No Data Series in group"); + return; + } + series_names = xmalloc(sizeof(char*)*num_series); + for (i = 0; (num_series>0) && (i<num_series); i++) { + len = H5Lget_name_by_idx(group, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + info("Invalid series name=%s", buf); + // put into list anyway so list doesn't have a null. + } + series_names[i] = xstrdup(buf); + } + +} + +static void _extract_series(FILE* fp, int stepx, bool header, hid_t gid_level, + char* node_name, char* data_set_name) { + hid_t gid_series; + int size_data; + void *data; + uint32_t type; + char *data_type, *subtype; + hdf5_api_ops_t* ops; + gid_series = get_group(gid_level, data_set_name); + if (gid_series < 0) { + // This is okay, may not have ran long enough for + // a sample (hostname????) + // OR trying to get all tasks + return; + } + data_type = get_string_attribute(gid_series, ATTR_DATATYPE); + if (!data_type) { + H5Gclose(gid_series); + info("No datatype in %s", data_set_name); + return; + } + type = acct_gather_profile_type_from_string(data_type); + xfree(data_type); + subtype = get_string_attribute(gid_series, ATTR_SUBDATATYPE); + if (subtype == NULL) { + H5Gclose(gid_series); + info("No %s attribute", ATTR_SUBDATATYPE); + return; + } + ops = profile_factory(type); + if (ops == NULL) { + xfree(subtype); + H5Gclose(gid_series); + info("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + return; + } + data = get_hdf5_data( + gid_series, type, data_set_name, &size_data); + if (data) { + if (strcmp(subtype,SUBDATA_SUMMARY) != 0) + (*(ops->extract_series)) (fp, header, params.job_id, + stepx, node_name, data_set_name, + data, size_data); + else + (*(ops->extract_total)) (fp, header, params.job_id, + stepx, node_name, data_set_name, + data, size_data); + xfree(data); + } else { + fprintf(fp, "%d,%d,%s,No %s Data\n", + params.job_id, stepx, node_name, + data_set_name); + } + xfree(ops); + H5Gclose(gid_series); + +} +static void _extract_node_level(FILE* fp, int stepx, hid_t jgid_nodes, + int nnodes, char* data_set_name) +{ + + hid_t jgid_node, gid_level; + int nodex, len; + char jgrp_node_name[MAX_GROUP_NAME+1]; + bool header = true; + for (nodex=0; nodex<nnodes; nodex++) { + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, nodex, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + info("Invalid node name=%s", jgrp_node_name); + continue; + } + jgid_node = get_group(jgid_nodes, jgrp_node_name); + if (jgid_node < 0) { + info("Failed to open group %s", jgrp_node_name); + continue; + } + if (params.node + && strcmp(params.node, "*") + && strcmp(params.node, jgrp_node_name)) + continue; + gid_level = _get_series_parent(jgid_node); + if (gid_level == -1) { + H5Gclose(jgid_node); + continue; + } + _extract_series(fp, stepx, header, gid_level, jgrp_node_name, + data_set_name); + header = false; + H5Gclose(gid_level); + H5Gclose(jgid_node); + } +} + +static void _extract_all_tasks(FILE *fp, hid_t gid_step, hid_t gid_nodes, + int nnodes, int stepx) +{ + + hid_t gid_tasks, gid_task = 0, gid_node = -1, gid_level = -1; + H5G_info_t group_info; + int ntasks, itx, len, task_id; + char task_name[MAX_GROUP_NAME+1]; + char* node_name; + char buf[MAX_GROUP_NAME+1]; + bool hd = true; + + gid_tasks = get_group(gid_step, GRP_TASKS); + if (gid_tasks < 0) + fatal("No tasks in step %d", stepx); + H5Gget_info(gid_tasks, &group_info); + ntasks = (int) group_info.nlinks; + if (ntasks <= 0) + fatal("No tasks in step %d", stepx); + + for (itx = 0; itx<ntasks; itx++) { + // Get the name of the group. + len = H5Lget_name_by_idx(gid_tasks, ".", H5_INDEX_NAME, + H5_ITER_INC, itx, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + gid_task = H5Gopen(gid_tasks, buf, H5P_DEFAULT); + if (gid_task < 0) + fatal("Failed to open %s", buf); + } else + fatal("Illegal task name %s",buf); + task_id = get_int_attribute(gid_task, ATTR_TASKID); + node_name = get_string_attribute(gid_task, ATTR_NODENAME); + sprintf(task_name,"%s_%d", GRP_TASK, task_id); + gid_node = H5Gopen(gid_nodes, node_name, H5P_DEFAULT); + if (gid_node < 0) + fatal("Failed to open %s for Task_%d", + node_name, task_id); + gid_level = get_group(gid_node, GRP_SAMPLES); + if (gid_level < 0) + fatal("Failed to open group %s for node=%s task=%d", + GRP_SAMPLES,node_name, task_id); + _extract_series(fp, stepx, hd, gid_level, node_name, task_name); + + hd = false; + xfree(node_name); + H5Gclose(gid_level); + H5Gclose(gid_node); + H5Gclose(gid_task); + } + H5Gclose(gid_tasks); +} + +/* _extract_data() + */ +static int _extract_data(void) +{ + hid_t fid_job; + hid_t jgid_root; + hid_t jgid_step; + hid_t jgid_nodes; + hid_t jgid_node; + hid_t jgid_level; + int nsteps; + int nnodes; + int stepx; + int isx; + int len; + char jgrp_step_name[MAX_GROUP_NAME+1]; + char jgrp_node_name[MAX_GROUP_NAME+1]; + FILE *fp; + + fp = fopen(params.output, "w"); + if (fp == NULL) { + error("Failed to create output file %s -- %m", + params.output); + } + + fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_job < 0) { + error("Failed to open %s", params.input); + return -1; + } + + jgid_root = H5Gopen(fid_job, "/", H5P_DEFAULT); + if (jgid_root < 0) { + H5Fclose(fid_job); + error("Failed to open root"); + return -1; + } + + nsteps = get_int_attribute(jgid_root, ATTR_NSTEPS); + for (stepx = 0; stepx < nsteps; stepx++) { + + if ((params.step_id != -1) && (stepx != params.step_id)) + continue; + + sprintf(jgrp_step_name, "%s_%d", GRP_STEP, stepx); + jgid_step = get_group(jgid_root, jgrp_step_name); + if (jgid_step < 0) { + error("Failed to open group %s", jgrp_step_name); + continue; + } + + if (params.level && !strncasecmp(params.level, "Node:", 5)) { + + nnodes = get_int_attribute(jgid_step, ATTR_NNODES); + + jgid_nodes = get_group(jgid_step, GRP_NODES); + if (jgid_nodes < 0) { + H5Gclose(jgid_step); + error("Failed to open group %s", GRP_NODES); + continue; + } + + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, 0, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + error("Invalid node name %s", jgrp_node_name); + continue; + } + + jgid_node = get_group(jgid_nodes, jgrp_node_name); + if (jgid_node < 0) { + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + info("Failed to open group %s", jgrp_node_name); + continue; + } + + jgid_level = _get_series_parent(jgid_node); + if (jgid_level == -1) { + H5Gclose(jgid_node); + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + continue; + } + + _get_series_names(jgid_level); + H5Gclose(jgid_level); + H5Gclose(jgid_node); + + if (!params.series || !strcmp(params.series, "*")) { + for (isx = 0; isx < num_series; isx++) { + if (strncasecmp(series_names[isx], + GRP_TASK, + strlen(GRP_TASK)) == 0) + continue; + _extract_node_level(fp, stepx, jgid_nodes, + nnodes, + series_names[isx]); + // Now handle all tasks. + } + } else if (strcasecmp(params.series, GRP_TASKS) == 0 + || strcasecmp(params.series, GRP_TASK) == 0) { + for (isx = 0; isx < num_series; isx++) { + if (strstr(series_names[isx], + GRP_TASK)) { + _extract_node_level(fp, stepx, jgid_nodes, + nnodes, + series_names[isx]); + } + } + } else { + _extract_node_level(fp, stepx, jgid_nodes, + nnodes, + params.series); + } + + _delete_string_list(series_names, num_series); + series_names = NULL; + num_series = 0; + if (!params.series || !strcmp(params.series, "*")) + _extract_all_tasks(fp, jgid_step, jgid_nodes, + nnodes, stepx); + + H5Gclose(jgid_nodes); + } else { + error("%s is an illegal level", params.level); + } + H5Gclose(jgid_step); + } + + H5Gclose(jgid_root); + H5Fclose(fid_job); + fclose(fp); + + return 0; +} + + +/* ============================================================================ + * ============================================================================ + * Functions for data item extraction + * ============================================================================ + * ========================================================================= */ + +// Get the data_set for a node +static void *_get_series_data(hid_t jgid_node, char* series, + hdf5_api_ops_t **ops_p, int *nsmp) +{ + + hid_t gid_level, gid_series; + int size_data; + void *data; + uint32_t type; + char *data_type; + hdf5_api_ops_t* ops; + + *nsmp = 0; // Initialize return arguments. + *ops_p = NULL; + + // Navigate from the node group to the data set + gid_level = get_group(jgid_node, GRP_SAMPLES); + if (gid_level == -1) { + return NULL; + } + gid_series = get_group(gid_level, series); + if (gid_series < 0) { + // This is okay, may not have ran long enough for + // a sample (srun hostname) + H5Gclose(gid_level); + return NULL; + } + data_type = get_string_attribute(gid_series, ATTR_DATATYPE); + if (!data_type) { + H5Gclose(gid_series); + H5Gclose(gid_level); + debug("No datatype in %s", series); + return NULL; + } + // Invoke the data type operator to get the data set + type = acct_gather_profile_type_from_string(data_type); + xfree(data_type); + ops = profile_factory(type); + if (ops == NULL) { + H5Gclose(gid_series); + H5Gclose(gid_level); + debug("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + return NULL; + } + data = get_hdf5_data(gid_series, type, series, &size_data); + if (data) { + *nsmp = (size_data / ops->dataset_size()); + *ops_p = ops; + } else { + xfree(ops); + } + H5Gclose(gid_series); + H5Gclose(gid_level); + return data; +} + +static void _series_analysis(FILE *fp, bool hd, int stepx, int nseries, + int nsmp, char **series_name, char **tod, double *et, + double **all_series, uint64_t *series_smp) +{ + double *mn_series; // Min Value, each sample + double *mx_series; // Max value, each sample + double *sum_series; // Total of all series, each sample + double *smp_series; // all samples for one node + uint64_t *mn_sx; // Index of series with minimum value + uint64_t *mx_sx; // Index of series with maximum value + uint64_t *series_in_smp; // Number of series in the sample + int max_smpx = 0; + double max_smp_series = 0; + double ave_series; + int ix, isx; + + mn_series = xmalloc(nsmp * sizeof(double)); + mx_series = xmalloc(nsmp * sizeof(double)); + sum_series =xmalloc(nsmp * sizeof(double)); + mn_sx = xmalloc(nsmp * sizeof(uint64_t)); + mx_sx = xmalloc(nsmp * sizeof(uint64_t)); + series_in_smp = xmalloc(nsmp * sizeof(uint64_t)); + + for (ix = 0; ix < nsmp; ix++) { + for (isx=0; isx<nseries; isx++) { + if (series_smp[isx]<nsmp && ix>=series_smp[isx]) + continue; + series_in_smp[ix]++; + smp_series = all_series[isx]; + if (smp_series) { + sum_series[ix] += smp_series[ix]; + if (mn_series[ix] == 0 + || smp_series[ix] < mn_series[ix]) { + mn_series[ix] = smp_series[ix]; + mn_sx[ix] = isx; + } + if (mx_series[ix] == 0 + || smp_series[ix] > mx_series[ix]) { + mx_series[ix] = smp_series[ix]; + mx_sx[ix] = isx; + } + } + } + } + + for (ix = 0; ix < nsmp; ix++) { + if (sum_series[ix] > max_smp_series) { + max_smpx = ix; + max_smp_series = sum_series[ix]; + } + } + + ave_series = sum_series[max_smpx] / series_in_smp[max_smpx]; + printf(" Step %d Maximum accumulated %s Value (%f) occurred " + "at %s (Elapsed Time=%d) Ave Node %f\n", + stepx, params.data_item, max_smp_series, + tod[max_smpx], (int) et[max_smpx], ave_series); + + // Put data for step + if (!hd) { + fprintf(fp,"TOD,Et,JobId,StepId,Min Node,Min %s," + "Ave %s,Max Node,Max %s,Total %s," + "Num Nodes",params.data_item,params.data_item, + params.data_item,params.data_item); + for (isx = 0; isx < nseries; isx++) { + fprintf(fp,",%s",series_name[isx]); + } + fprintf(fp,"\n"); + } + + for (ix = 0; ix < nsmp; ix++) { + fprintf(fp,"%s, %d",tod[ix], (int) et[ix]); + fprintf(fp,",%d,%d",params.job_id,stepx); + fprintf(fp,",%s,%f",series_name[mn_sx[ix]], + mn_series[ix]); + ave_series = sum_series[ix] / series_in_smp[ix]; + fprintf(fp,",%f",ave_series); + fprintf(fp,",%s,%f",series_name[mx_sx[ix]], + mx_series[ix]); + fprintf(fp,",%f",sum_series[ix]); + fprintf(fp,",%"PRIu64"",series_in_smp[ix]); + for (isx = 0; isx < nseries; isx++) { + if (series_smp[isx]<nsmp && ix>=series_smp[isx]) { + fprintf(fp,",0.0"); + } else { + smp_series = all_series[isx]; + fprintf(fp,",%f",smp_series[ix]); + } + } + fprintf(fp,"\n"); + } + + xfree(mn_series); + xfree(mx_series); + xfree(sum_series); + xfree(mn_sx); + xfree(mx_sx); +} + +static void _get_all_node_series(FILE *fp, bool hd, hid_t jgid_step, int stepx) +{ + char **tod = NULL; // Date time at each sample + char **node_name; // Node Names + double **all_series; // Pointers to all sampled for each node + double *et = NULL; // Elapsed time at each sample + uint64_t *series_smp; // Number of samples in this series + + hid_t jgid_nodes, jgid_node; + int nnodes, ndx, len, nsmp = 0, nitem = -1; + char jgrp_node_name[MAX_GROUP_NAME+1]; + void* series_data = NULL; + hdf5_api_ops_t* ops; + + nnodes = get_int_attribute(jgid_step, ATTR_NNODES); + // allocate node arrays + + series_smp = xmalloc(nnodes * (sizeof(uint64_t))); + if (series_smp == NULL) + fatal("Failed to get memory for node_samples"); + + node_name = xmalloc(nnodes * (sizeof(char*))); + if (node_name == NULL) + fatal("Failed to get memory for node_name"); + + all_series = xmalloc(nnodes * (sizeof(double*))); + if (all_series == NULL) + fatal("Failed to get memory for all_series"); + + jgid_nodes = get_group(jgid_step, GRP_NODES); + if (jgid_nodes < 0) + fatal("Failed to open group %s", GRP_NODES); + + for (ndx=0; ndx<nnodes; ndx++) { + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, ndx, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + debug("Invalid node name=%s", jgrp_node_name); + continue; + } + node_name[ndx] = xstrdup(jgrp_node_name); + jgid_node = get_group(jgid_nodes, jgrp_node_name); + if (jgid_node < 0) { + debug("Failed to open group %s", jgrp_node_name); + continue; + } + ops = NULL; + nitem = 0; + series_data = _get_series_data(jgid_node, params.series, + &ops, &nitem); + if (series_data==NULL || nitem==0 || ops==NULL) { + if (ops != NULL) + xfree(ops); + continue; + } + all_series[ndx] = ops->get_series_values( + params.data_item, series_data, nitem); + if (!all_series[ndx]) + fatal("No data item %s",params.data_item); + series_smp[ndx] = nitem; + if (ndx == 0) { + nsmp = nitem; + tod = ops->get_series_tod(series_data, nitem); + et = ops->get_series_values("time", + series_data, nitem); + } else { + if (nitem > nsmp) { + // new largest number of samples + _delete_string_list(tod, nsmp); + xfree(et); + nsmp = nitem; + tod = ops->get_series_tod(series_data, + nitem); + et = ops->get_series_values("time", + series_data, nitem); + } + } + xfree(ops); + xfree(series_data); + H5Gclose(jgid_node); + } + if (nsmp == 0) { + // May be bad series name + info("No values %s for series %s found in step %d", + params.data_item,params.series, + stepx); + } else { + _series_analysis(fp, hd, stepx, nnodes, nsmp, + node_name, tod, et, all_series, series_smp); + } + for (ndx=0; ndx<nnodes; ndx++) { + xfree(node_name[ndx]); + xfree(all_series[ndx]); + } + xfree(node_name); + xfree(all_series); + xfree(series_smp); + _delete_string_list(tod, nsmp); + xfree(et); + + H5Gclose(jgid_nodes); + +} + +static void _get_all_task_series(FILE *fp, bool hd, hid_t jgid_step, int stepx) +{ + + hid_t jgid_tasks, jgid_task = 0, jgid_nodes, jgid_node; + H5G_info_t group_info; + int ntasks,itx, tid; + uint64_t *task_id; + char **task_node_name; /* Node Name for each task */ + char **tod = NULL; /* Date time at each sample */ + char **series_name; /* Node Names */ + double **all_series; /* Pointers to all sampled for each node */ + double *et = NULL; /* Elapsed time at each sample */ + uint64_t *series_smp; /* Number of samples in this series */ + int nnodes, ndx, len, nsmp = 0, nitem = -1; + char jgrp_node_name[MAX_GROUP_NAME+1]; + char jgrp_task_name[MAX_GROUP_NAME+1]; + char buf[MAX_GROUP_NAME+1]; + void* series_data = NULL; + hdf5_api_ops_t* ops; + + jgid_nodes = get_group(jgid_step, GRP_NODES); + if (jgid_nodes < 0) + fatal("Failed to open group %s", GRP_NODES); + jgid_tasks = get_group(jgid_step, GRP_TASKS); + if (jgid_tasks < 0) + fatal("No tasks in step %d", stepx); + H5Gget_info(jgid_tasks, &group_info); + ntasks = (int) group_info.nlinks; + if (ntasks <= 0) + fatal("No tasks in step %d", stepx); + task_id = xmalloc(ntasks*sizeof(uint64_t)); + if (task_id == NULL) + fatal("Failed to get memory for task_ids"); + task_node_name = xmalloc(ntasks*sizeof(char*)); + if (task_node_name == NULL) + fatal("Failed to get memory for task_node_names"); + + for (itx = 0; itx<ntasks; itx++) { + // Get the name of the group. + len = H5Lget_name_by_idx(jgid_tasks, ".", H5_INDEX_NAME, + H5_ITER_INC, itx, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + jgid_task = H5Gopen(jgid_tasks, buf, H5P_DEFAULT); + if (jgid_task < 0) + fatal("Failed to open %s", buf); + } else + fatal("Illegal task name %s",buf); + task_id[itx] = get_int_attribute(jgid_task, ATTR_TASKID); + task_node_name[itx] = get_string_attribute(jgid_task, + ATTR_NODENAME); + H5Gclose(jgid_task); + } + H5Gclose(jgid_tasks); + + nnodes = get_int_attribute(jgid_step, ATTR_NNODES); + // allocate node arrays + series_smp = (uint64_t*) xmalloc(ntasks*(sizeof(uint64_t))); + if (series_smp == NULL) + fatal("Failed to get memory for node_samples"); + series_name = (char**) xmalloc(ntasks*(sizeof(char*))); + if (series_name == NULL) + fatal("Failed to get memory for series_name"); + all_series = (double**) xmalloc(ntasks*(sizeof(double*))); + if (all_series == NULL) + fatal("Failed to get memory for all_series"); + + for (ndx=0; ndx<nnodes; ndx++) { + + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, ndx, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) + fatal("Invalid node name=%s", jgrp_node_name); + jgid_node = get_group(jgid_nodes, jgrp_node_name); + + if (jgid_node < 0) + fatal("Failed to open group %s", jgrp_node_name); + for (itx = 0; itx<ntasks; itx++) { + if (strcmp(jgrp_node_name, task_node_name[itx]) != 0) + continue; + tid = task_id[itx]; + series_name[itx] = xstrdup_printf("%s_%d %s", + GRP_TASK,tid,jgrp_node_name); + sprintf(jgrp_task_name,"%s_%d",GRP_TASK, tid); + + ops = NULL; + nitem = 0; + series_data = _get_series_data(jgid_node, + jgrp_task_name, &ops, &nitem); + if (series_data==NULL || nitem==0 || ops==NULL) { + if (ops != NULL) + xfree(ops); + continue; + } + all_series[itx] = ops->get_series_values( + params.data_item, series_data, nitem); + if (!all_series[ndx]) + fatal("No data item %s",params.data_item); + series_smp[itx] = nitem; + if (nsmp == 0) { + nsmp = nitem; + tod = ops->get_series_tod(series_data, nitem); + et = ops->get_series_values("time", + series_data, nitem); + } else { + if (nitem > nsmp) { + // new largest number of samples + _delete_string_list(tod, nsmp); + xfree(et); + nsmp = nitem; + tod = ops->get_series_tod(series_data, + nitem); + et = ops->get_series_values("time", + series_data, nitem); + } + } + xfree(ops); + xfree(series_data); + } + H5Gclose(jgid_node); + } + if (nsmp == 0) { + // May be bad series name + info("No values %s for series %s found in step %d", + params.data_item,params.series, + stepx); + } else { + _series_analysis(fp, hd, stepx, ntasks, nsmp, + series_name, tod, et, all_series, series_smp); + } + for (itx=0; itx<ntasks; itx++) { + xfree(all_series[itx]); + } + xfree(series_name); + xfree(all_series); + xfree(series_smp); + _delete_string_list(tod, nsmp); + xfree(et); + _delete_string_list(task_node_name, ntasks); + xfree(task_id); + + H5Gclose(jgid_nodes); +} + +static int _series_data(void) +{ + FILE *fp; + bool hd = false; + hid_t fid_job; + hid_t jgid_root; + hid_t jgid_step; + int nsteps; + int stepx; + char jgrp_step_name[MAX_GROUP_NAME + 1]; + + fp = fopen(params.output, "w"); + if (fp == NULL) { + error("Failed open file %s -- %m", params.output); + return -1; + } + + fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_job < 0) { + fclose(fp); + error("Failed to open %s", params.input); + return -1; + } + + jgid_root = H5Gopen(fid_job, "/", H5P_DEFAULT); + if (jgid_root < 0) { + fclose(fp); + H5Fclose(fid_job); + error("Failed to open root"); + return -1; + } + + nsteps = get_int_attribute(jgid_root, ATTR_NSTEPS); + for (stepx = 0; stepx < nsteps; stepx++) { + + if ((params.step_id != -1) && (stepx != params.step_id)) + continue; + + sprintf(jgrp_step_name, "%s_%d", GRP_STEP, stepx); + jgid_step = get_group(jgid_root, jgrp_step_name); + if (jgid_step < 0) { + error("Failed to open group %s", jgrp_step_name); + return -1; + } + + if (strncmp(params.series,GRP_TASK,strlen(GRP_TASK)) == 0) + _get_all_task_series(fp,hd,jgid_step, stepx); + else + _get_all_node_series(fp,hd,jgid_step, stepx); + + hd = true; + H5Gclose(jgid_step); + } + + H5Gclose(jgid_root); + H5Fclose(fid_job); + fclose(fp); + + return 0; +} diff --git a/src/plugins/acct_gather_profile/none/Makefile.am b/src/plugins/acct_gather_profile/none/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..cc5d507772b1ddcba5a764bfe4df4a9613cb48b9 --- /dev/null +++ b/src/plugins/acct_gather_profile/none/Makefile.am @@ -0,0 +1,15 @@ +# Makefile for acct_gather_profile/none plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +pkglib_LTLIBRARIES = acct_gather_profile_none.la + +# Null job completion logging plugin. +acct_gather_profile_none_la_SOURCES = acct_gather_profile_none.c + +acct_gather_profile_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + diff --git a/src/plugins/acct_gather_profile/none/Makefile.in b/src/plugins/acct_gather_profile/none/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..f5c1fb5cef30a447179f3f456ed3a5535b973afd --- /dev/null +++ b/src/plugins/acct_gather_profile/none/Makefile.in @@ -0,0 +1,717 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for acct_gather_profile/none plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_profile/none +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +acct_gather_profile_none_la_LIBADD = +am_acct_gather_profile_none_la_OBJECTS = acct_gather_profile_none.lo +acct_gather_profile_none_la_OBJECTS = \ + $(am_acct_gather_profile_none_la_OBJECTS) +acct_gather_profile_none_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(acct_gather_profile_none_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(acct_gather_profile_none_la_SOURCES) +DIST_SOURCES = $(acct_gather_profile_none_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +pkglib_LTLIBRARIES = acct_gather_profile_none.la + +# Null job completion logging plugin. +acct_gather_profile_none_la_SOURCES = acct_gather_profile_none.c +acct_gather_profile_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/none/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/none/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +acct_gather_profile_none.la: $(acct_gather_profile_none_la_OBJECTS) $(acct_gather_profile_none_la_DEPENDENCIES) $(EXTRA_acct_gather_profile_none_la_DEPENDENCIES) + $(acct_gather_profile_none_la_LINK) -rpath $(pkglibdir) $(acct_gather_profile_none_la_OBJECTS) $(acct_gather_profile_none_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/acct_gather_profile_none.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c b/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c new file mode 100644 index 0000000000000000000000000000000000000000..b5459d817d67f18a27c8aec8149d933bcf7bb1b5 --- /dev/null +++ b/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c @@ -0,0 +1,156 @@ +/*****************************************************************************\ + * acct_gather_profile_none.c - slurm profile accounting plugin for none. + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +\*****************************************************************************/ + +/* acct_gather_profile_none + * This plugin does not initiate a node-level thread. + * It is the acct_gather_profile stub. + */ + +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" + +#include <fcntl.h> +#include <signal.h> + +#define _DEBUG 1 +#define _DEBUG_STATS 1 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "AcctGatherProfile NONE plugin"; +const char plugin_type[] = "acct_gather_Profile/none"; +const uint32_t plugin_version = 100; + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + return SLURM_SUCCESS; +} + +extern void acct_gather_profile_p_conf_set(s_p_hashtbl_t *tbl) +{ + return; +} + +extern void acct_gather_profile_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + return; +} + +extern void acct_gather_profile_p_get(enum acct_gather_profile_info info_type, + void *data) +{ + uint32_t *uint32 = (uint32_t *) data; + + switch (info_type) { + case ACCT_GATHER_PROFILE_DEFAULT: + case ACCT_GATHER_PROFILE_RUNNING: + *uint32 = ACCT_GATHER_PROFILE_NONE; + break; + default: + break; + } + + return; +} + +extern int acct_gather_profile_p_node_step_start(slurmd_job_t* job) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_node_step_end() +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_task_start(uint32_t taskid) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_task_end(pid_t taskpid) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_sample_data(uint32_t type, void* data) +{ + return SLURM_SUCCESS; +} diff --git a/src/plugins/auth/Makefile.in b/src/plugins/auth/Makefile.in index ba94740db8721e4cbbf1159668be592427a94782..e9e1c7da901e2a041ce1f0d083d065a0da287364 100644 --- a/src/plugins/auth/Makefile.in +++ b/src/plugins/auth/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/auth DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/auth/authd/Makefile.in b/src/plugins/auth/authd/Makefile.in index ba94cec7a9d868fb15c9bc67b60013b49c18f19b..a2840601dc65fcdaaaea16be5e38fd27d1bd0da3 100644 --- a/src/plugins/auth/authd/Makefile.in +++ b/src/plugins/auth/authd/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/auth/authd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/auth/authd/auth_authd.c b/src/plugins/auth/authd/auth_authd.c index 76865ae385724713c5ed0a588782426f5f729641..e48a6a63ea1da86746bba9c114e897cb4b3d2f0b 100644 --- a/src/plugins/auth/authd/auth_authd.c +++ b/src/plugins/auth/authd/auth_authd.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -306,7 +306,7 @@ slurm_auth_unpack( Buf buf ) } safe_unpack32( &version, buf ); - if( version < min_plug_version ) { + if ( version < min_plug_version ) { plugin_errno = SLURM_AUTH_VERSION; return NULL; } diff --git a/src/plugins/auth/munge/Makefile.in b/src/plugins/auth/munge/Makefile.in index 2785924e5c8a3069425802e7da2fe0a4e016ba2d..214e1b38b825be217ec9ee95241cc4966d5492c4 100644 --- a/src/plugins/auth/munge/Makefile.in +++ b/src/plugins/auth/munge/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/auth/munge DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c index c0abf562d91f4361f739e2e7e774b451696faf4d..c4589301ab6d06a51392660b580630a79d3f73ef 100644 --- a/src/plugins/auth/munge/auth_munge.c +++ b/src/plugins/auth/munge/auth_munge.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/auth/none/Makefile.in b/src/plugins/auth/none/Makefile.in index f36a43ac930cd636211371cfab01d90f237fa9c5..c677d98add8b2baafabcbeb1b1abb3b6a4209aef 100644 --- a/src/plugins/auth/none/Makefile.in +++ b/src/plugins/auth/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/auth/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/auth/none/auth_none.c b/src/plugins/auth/none/auth_none.c index dfd2d9525b1d9d0138f244c64f5a89207926108c..20ac3cb55ab844c36e5d5def864dd871fa324930 100644 --- a/src/plugins/auth/none/auth_none.c +++ b/src/plugins/auth/none/auth_none.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/checkpoint/Makefile.in b/src/plugins/checkpoint/Makefile.in index 4d93aab295468043703058dc39447c8564a81f41..4fbacd808dd1d0d2db5d973bf469ed5749997b5a 100644 --- a/src/plugins/checkpoint/Makefile.in +++ b/src/plugins/checkpoint/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/checkpoint DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/checkpoint/aix/Makefile.in b/src/plugins/checkpoint/aix/Makefile.in index 53a8c290fd3c9a7973085913b6f28b0d33ca6ca2..0a658b15c6278f9cff787ee298d2c1b3e81ee697 100644 --- a/src/plugins/checkpoint/aix/Makefile.in +++ b/src/plugins/checkpoint/aix/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/checkpoint/aix DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -185,6 +189,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -205,6 +211,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -214,6 +223,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -221,6 +232,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -255,6 +275,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -282,6 +305,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/checkpoint/aix/checkpoint_aix.c b/src/plugins/checkpoint/aix/checkpoint_aix.c index 62e7889b3a3c3956deadb0800be219dcbea3ddb6..b964739de9d0a216f0fd23922d7ef98e3a4aaf38 100644 --- a/src/plugins/checkpoint/aix/checkpoint_aix.c +++ b/src/plugins/checkpoint/aix/checkpoint_aix.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -359,6 +359,16 @@ extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, return SLURM_ERROR; } +extern check_jobinfo_t slurm_ckpt_copy_job(check_jobinfo_t jobinfo) +{ + struct check_job_info *jobinfo_src, *jobinfo_dest; + + jobinfo_src = (struct check_job_info *)jobinfo_src; + jobinfo_dest = xmalloc(sizeof(struct check_job_info)); + memcpy(jobinfo_dest, jobinfo_src, sizeof(struct check_job_info)); + return (check_jobinfo_t) jobinfo_dest; +} + /* Send a signal RPC to a specific node */ static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal, char *node_name, slurm_addr_t node_addr) diff --git a/src/plugins/checkpoint/blcr/Makefile.in b/src/plugins/checkpoint/blcr/Makefile.in index 53c6b78b1d7fe69f664dc6f6aae20a661ca1473a..4a5d8632e678ca62a8925dc77ca716e215251d99 100644 --- a/src/plugins/checkpoint/blcr/Makefile.in +++ b/src/plugins/checkpoint/blcr/Makefile.in @@ -61,6 +61,7 @@ DIST_COMMON = $(am__dist_pkglibexec_SCRIPTS_DIST) \ $(srcdir)/cr_checkpoint.sh.in $(srcdir)/cr_restart.sh.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -78,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -86,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -191,6 +195,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -211,6 +217,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -220,6 +229,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -227,6 +238,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -261,6 +281,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -288,6 +311,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/checkpoint/blcr/checkpoint_blcr.c b/src/plugins/checkpoint/blcr/checkpoint_blcr.c index 663b45d7ad159ffddf83afc09f1584b14778ca60..010e1a77e993f81e812637d5617ddb38b54f1ee9 100644 --- a/src/plugins/checkpoint/blcr/checkpoint_blcr.c +++ b/src/plugins/checkpoint/blcr/checkpoint_blcr.c @@ -380,6 +380,16 @@ extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, return SLURM_ERROR; } +extern check_jobinfo_t slurm_ckpt_copy_job(check_jobinfo_t jobinfo) +{ + struct check_job_info *jobinfo_src, *jobinfo_dest; + + jobinfo_src = (struct check_job_info *)jobinfo; + jobinfo_dest = xmalloc(sizeof(struct check_job_info)); + memcpy(jobinfo_dest, jobinfo_src, sizeof(struct check_job_info)); + return (check_jobinfo_t) jobinfo_dest; +} + extern int slurm_ckpt_stepd_prefork(slurmd_job_t *job) { char *old_env = NULL, *new_env = NULL, *ptr = NULL, *save_ptr = NULL; diff --git a/src/plugins/checkpoint/none/Makefile.in b/src/plugins/checkpoint/none/Makefile.in index 86803f4f2ab55748b71ba84d970d832fd20288d4..843f9e7fc3a532c50847e45f37bc595fd91048cd 100644 --- a/src/plugins/checkpoint/none/Makefile.in +++ b/src/plugins/checkpoint/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/checkpoint/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/checkpoint/none/checkpoint_none.c b/src/plugins/checkpoint/none/checkpoint_none.c index fc977e031cf3c7c65408d191f2c9e754588a8135..987245b9ab41cc3961386196c15c2c77e70cd060 100644 --- a/src/plugins/checkpoint/none/checkpoint_none.c +++ b/src/plugins/checkpoint/none/checkpoint_none.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -141,6 +141,11 @@ extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, return SLURM_SUCCESS; } +extern check_jobinfo_t slurm_ckpt_copy_job(check_jobinfo_t jobinfo) +{ + return NULL; +} + extern int slurm_ckpt_task_comp (struct step_record * step_ptr, uint32_t task_id, time_t event_time, uint32_t error_code, char *error_msg ) diff --git a/src/plugins/checkpoint/ompi/Makefile.in b/src/plugins/checkpoint/ompi/Makefile.in index 26f0170123f6089a894e74ae713a2fbe6e4e5156..1ebb17f9ede6bfb85e70202a2707a3071236ddfc 100644 --- a/src/plugins/checkpoint/ompi/Makefile.in +++ b/src/plugins/checkpoint/ompi/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/checkpoint/ompi DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/checkpoint/ompi/checkpoint_ompi.c b/src/plugins/checkpoint/ompi/checkpoint_ompi.c index 27b5991cd16f0f9ca8ba70d4301a70d53a168d5c..ef2419f8828d80fd3914da6db703e35571c95dc9 100644 --- a/src/plugins/checkpoint/ompi/checkpoint_ompi.c +++ b/src/plugins/checkpoint/ompi/checkpoint_ompi.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -289,6 +289,16 @@ extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, return SLURM_ERROR; } +extern check_jobinfo_t slurm_ckpt_copy_job(check_jobinfo_t jobinfo) +{ + struct check_job_info *jobinfo_src, *jobinfo_dest; + + jobinfo_src = (struct check_job_info *)jobinfo; + jobinfo_dest = xmalloc(sizeof(struct check_job_info)); + memcpy(jobinfo_dest, jobinfo_src, sizeof(struct check_job_info)); + return (check_jobinfo_t) jobinfo_dest; +} + static int _ckpt_step(struct step_record * step_ptr, uint16_t wait, int vacate) { struct check_job_info *check_ptr; diff --git a/src/plugins/checkpoint/poe/Makefile.in b/src/plugins/checkpoint/poe/Makefile.in index 86be037bafc8cb87508e8ad2c71fc84d3084a031..1857c347d12494aafd2b7d1002e1a112224ac165 100644 --- a/src/plugins/checkpoint/poe/Makefile.in +++ b/src/plugins/checkpoint/poe/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/checkpoint/poe DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -185,6 +189,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -205,6 +211,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -214,6 +223,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -221,6 +232,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -255,6 +275,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -282,6 +305,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/checkpoint/poe/checkpoint_poe.c b/src/plugins/checkpoint/poe/checkpoint_poe.c index d3b16fd6eada0fa4de7895d066301febf016940c..b1a95734e49d09f853bdf3912a18945c9794097c 100644 --- a/src/plugins/checkpoint/poe/checkpoint_poe.c +++ b/src/plugins/checkpoint/poe/checkpoint_poe.c @@ -14,7 +14,7 @@ * Written by Morris Jette <jette1@llnl.gov> and <jette@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -364,6 +364,17 @@ extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, return SLURM_ERROR; } +extern check_jobinfo_t slurm_ckpt_copy_job(check_jobinfo_t jobinfo) +{ + struct check_job_info *jobinfo_src, *jobinfo_dest; + + jobinfo_src = (struct check_job_info *)jobinfo_src; + jobinfo_dest = xmalloc(sizeof(struct check_job_info)); + memcpy(jobinfo_dest, jobinfo_src, sizeof(struct check_job_info)); + jobinfo_dest->error_msg = xstrdup(jobinfo_src->error_msg); + return (check_jobinfo_t) jobinfo_dest; +} + /* Send a signal RPC to a specific node */ static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal, char *node_name, slurm_addr_t node_addr) diff --git a/src/plugins/crypto/Makefile.in b/src/plugins/crypto/Makefile.in index 6f6edd887ac16a7eaf287a7e9e8ab53deac0f66f..ae61785c566967d940abb535ec9b64a35e0fd4f5 100644 --- a/src/plugins/crypto/Makefile.in +++ b/src/plugins/crypto/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/crypto DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/crypto/munge/Makefile.in b/src/plugins/crypto/munge/Makefile.in index fdd65b29375e5ce520733f738c41243551f91146..da61d42c4ef191718e1fc34434b10b7b52efde5a 100644 --- a/src/plugins/crypto/munge/Makefile.in +++ b/src/plugins/crypto/munge/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/crypto/munge DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/crypto/munge/crypto_munge.c b/src/plugins/crypto/munge/crypto_munge.c index 604f8beedbb2ae27b2090eea58dd7bfd33352235..a3be4669026e59cbc71ec391aa576cb360040a5f 100644 --- a/src/plugins/crypto/munge/crypto_munge.c +++ b/src/plugins/crypto/munge/crypto_munge.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/crypto/openssl/Makefile.in b/src/plugins/crypto/openssl/Makefile.in index 9e168693b2ab5073ab0c6c31e00f3bc9bf2415f8..2538153851a8625f57b6e67a00f8d0a32b67c898 100644 --- a/src/plugins/crypto/openssl/Makefile.in +++ b/src/plugins/crypto/openssl/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/crypto/openssl DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -187,6 +191,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -207,6 +213,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -216,6 +225,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -223,6 +234,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -257,6 +277,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -284,6 +307,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/crypto/openssl/crypto_openssl.c b/src/plugins/crypto/openssl/crypto_openssl.c index fa2a2836a938723fe5b4c557ecc431f3686e690e..f110a6250d4089815cfa474fab7dd60fb4cb1161 100644 --- a/src/plugins/crypto/openssl/crypto_openssl.c +++ b/src/plugins/crypto/openssl/crypto_openssl.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/ext_sensors/Makefile.am b/src/plugins/ext_sensors/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..8beb1a7a50bad8344e9bfa24b5ea93f310b5a4a9 --- /dev/null +++ b/src/plugins/ext_sensors/Makefile.am @@ -0,0 +1,3 @@ +# Makefile for ext sensors plugins + +SUBDIRS = rrd none diff --git a/src/plugins/ext_sensors/Makefile.in b/src/plugins/ext_sensors/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..cfc6495d3d85c31bd167b88691a188a0469fbb7e --- /dev/null +++ b/src/plugins/ext_sensors/Makefile.in @@ -0,0 +1,735 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for ext sensors plugins +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/ext_sensors +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = rrd none +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/plugins/ext_sensors/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/plugins/ext_sensors/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/ext_sensors/none/Makefile.am b/src/plugins/ext_sensors/none/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..1c88177d5657849bd2786a51ff4273b849d71ecf --- /dev/null +++ b/src/plugins/ext_sensors/none/Makefile.am @@ -0,0 +1,14 @@ +# Makefile for ext_sensors/none plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +pkglib_LTLIBRARIES = ext_sensors_none.la + +# Null job completion logging plugin. +ext_sensors_none_la_SOURCES = ext_sensors_none.c + +ext_sensors_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/priority/multifactor2/Makefile.in b/src/plugins/ext_sensors/none/Makefile.in similarity index 91% rename from src/plugins/priority/multifactor2/Makefile.in rename to src/plugins/ext_sensors/none/Makefile.in index 768f2152e05b3109a635d97f4d2ffed6c676f188..0493c233e964f5935e965d164e26f2b1ff7ae767 100644 --- a/src/plugins/priority/multifactor2/Makefile.in +++ b/src/plugins/ext_sensors/none/Makefile.in @@ -15,7 +15,7 @@ @SET_MAKE@ -# Makefile for priority/multifactor2 plugin +# Makefile for ext_sensors/none plugin VPATH = @srcdir@ am__make_dryrun = \ @@ -54,10 +54,11 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -subdir = src/plugins/priority/multifactor2 +subdir = src/plugins/ext_sensors/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -129,13 +133,12 @@ am__uninstall_files_from_dir = { \ } am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) -priority_multifactor2_la_DEPENDENCIES = -am_priority_multifactor2_la_OBJECTS = priority_multifactor2.lo -priority_multifactor2_la_OBJECTS = \ - $(am_priority_multifactor2_la_OBJECTS) -priority_multifactor2_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ +ext_sensors_none_la_LIBADD = +am_ext_sensors_none_la_OBJECTS = ext_sensors_none.lo +ext_sensors_none_la_OBJECTS = $(am_ext_sensors_none_la_OBJECTS) +ext_sensors_none_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(priority_multifactor2_la_LDFLAGS) $(LDFLAGS) -o $@ + $(ext_sensors_none_la_LDFLAGS) $(LDFLAGS) -o $@ DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -149,8 +152,8 @@ CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ -SOURCES = $(priority_multifactor2_la_SOURCES) -DIST_SOURCES = $(priority_multifactor2_la_SOURCES) +SOURCES = $(ext_sensors_none_la_SOURCES) +DIST_SOURCES = $(ext_sensors_none_la_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -181,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -367,12 +392,13 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign -PLUGIN_FLAGS = -module -avoid-version -export-dynamic +PLUGIN_FLAGS = -module -avoid-version --export-dynamic INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -pkglib_LTLIBRARIES = priority_multifactor2.la -priority_multifactor2_la_SOURCES = priority_multifactor2.c -priority_multifactor2_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -priority_multifactor2_la_LIBADD = -lm +pkglib_LTLIBRARIES = ext_sensors_none.la + +# Null job completion logging plugin. +ext_sensors_none_la_SOURCES = ext_sensors_none.c +ext_sensors_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) all: all-am .SUFFIXES: @@ -386,9 +412,9 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi exit 1;; \ esac; \ done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/priority/multifactor2/Makefile'; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/ext_sensors/none/Makefile'; \ $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --foreign src/plugins/priority/multifactor2/Makefile + $(AUTOMAKE) --foreign src/plugins/ext_sensors/none/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ @@ -439,8 +465,8 @@ clean-pkglibLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done -priority_multifactor2.la: $(priority_multifactor2_la_OBJECTS) $(priority_multifactor2_la_DEPENDENCIES) $(EXTRA_priority_multifactor2_la_DEPENDENCIES) - $(priority_multifactor2_la_LINK) -rpath $(pkglibdir) $(priority_multifactor2_la_OBJECTS) $(priority_multifactor2_la_LIBADD) $(LIBS) +ext_sensors_none.la: $(ext_sensors_none_la_OBJECTS) $(ext_sensors_none_la_DEPENDENCIES) $(EXTRA_ext_sensors_none_la_DEPENDENCIES) + $(ext_sensors_none_la_LINK) -rpath $(pkglibdir) $(ext_sensors_none_la_OBJECTS) $(ext_sensors_none_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -448,7 +474,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_multifactor2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ext_sensors_none.Plo@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< diff --git a/src/plugins/ext_sensors/none/ext_sensors_none.c b/src/plugins/ext_sensors/none/ext_sensors_none.c new file mode 100644 index 0000000000000000000000000000000000000000..37f08b100fbcfa18b77bc066b0194e64fff5da68 --- /dev/null +++ b/src/plugins/ext_sensors/none/ext_sensors_none.c @@ -0,0 +1,131 @@ +/*****************************************************************************\ + * ext_sensors_none.c - slurm external sensors plugin for none. + ***************************************************************************** + * Copyright (C) 2013 + * Written by Bull- Thomas Cadeau/Martin Perry/Yiannis Georgiou + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + \*****************************************************************************/ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <math.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <unistd.h> + +/* slurm_xlator.h must be first */ +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_ext_sensors.h" +#include "src/common/fd.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "ExtSensors NONE plugin"; +const char plugin_type[] = "ext_sensors/none"; +const uint32_t plugin_version = 100; + + +extern int ext_sensors_read_conf(void) +{ + return SLURM_SUCCESS; +} + +extern int ext_sensors_free_conf(void) +{ + return SLURM_SUCCESS; +} + +extern int ext_sensors_p_update_component_data(void) +{ + return SLURM_SUCCESS; +} + +extern int ext_sensors_p_get_stepstartdata(struct step_record *step_rec) +{ + return SLURM_SUCCESS; +} + +extern int ext_sensors_p_get_stependdata(struct step_record *step_rec) +{ + return SLURM_SUCCESS; +} + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + return SLURM_SUCCESS; +} + diff --git a/src/plugins/ext_sensors/rrd/Makefile.am b/src/plugins/ext_sensors/rrd/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..36a5c1fbf4712a4554093f00f64a79b86b9241ec --- /dev/null +++ b/src/plugins/ext_sensors/rrd/Makefile.am @@ -0,0 +1,24 @@ +# Makefile for ext_sensors/rrd plugin + +AUTOMAKE_OPTIONS = foreign + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(RRDTOOL_CPPFLAGS) + +if BUILD_RRD +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +pkglib_LTLIBRARIES = ext_sensors_rrd.la + +# external sensors plugin. +ext_sensors_rrd_la_SOURCES = ext_sensors_rrd.c \ + ext_sensors_rrd.h + +ext_sensors_rrd_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(RRDTOOL_LDFLAGS) +ext_sensors_rrd_la_LIBADD = $(RRDTOOL_LIBS) + +else + +EXTRA_ext_sensors_rrd_la_SOURCES = ext_sensors_rrd.c \ + ext_sensors_rrd.h + +endif diff --git a/src/plugins/ext_sensors/rrd/Makefile.in b/src/plugins/ext_sensors/rrd/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..834ef9ed0cd4b228cd8818be8dff48c8fa871f04 --- /dev/null +++ b/src/plugins/ext_sensors/rrd/Makefile.in @@ -0,0 +1,730 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for ext_sensors/rrd plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/ext_sensors/rrd +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +am__DEPENDENCIES_1 = +@BUILD_RRD_TRUE@ext_sensors_rrd_la_DEPENDENCIES = \ +@BUILD_RRD_TRUE@ $(am__DEPENDENCIES_1) +am__ext_sensors_rrd_la_SOURCES_DIST = ext_sensors_rrd.c \ + ext_sensors_rrd.h +@BUILD_RRD_TRUE@am_ext_sensors_rrd_la_OBJECTS = ext_sensors_rrd.lo +am__EXTRA_ext_sensors_rrd_la_SOURCES_DIST = ext_sensors_rrd.c \ + ext_sensors_rrd.h +ext_sensors_rrd_la_OBJECTS = $(am_ext_sensors_rrd_la_OBJECTS) +ext_sensors_rrd_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(ext_sensors_rrd_la_LDFLAGS) $(LDFLAGS) -o $@ +@BUILD_RRD_TRUE@am_ext_sensors_rrd_la_rpath = -rpath $(pkglibdir) +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(ext_sensors_rrd_la_SOURCES) \ + $(EXTRA_ext_sensors_rrd_la_SOURCES) +DIST_SOURCES = $(am__ext_sensors_rrd_la_SOURCES_DIST) \ + $(am__EXTRA_ext_sensors_rrd_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(RRDTOOL_CPPFLAGS) +@BUILD_RRD_TRUE@PLUGIN_FLAGS = -module -avoid-version --export-dynamic +@BUILD_RRD_TRUE@pkglib_LTLIBRARIES = ext_sensors_rrd.la + +# external sensors plugin. +@BUILD_RRD_TRUE@ext_sensors_rrd_la_SOURCES = ext_sensors_rrd.c \ +@BUILD_RRD_TRUE@ ext_sensors_rrd.h + +@BUILD_RRD_TRUE@ext_sensors_rrd_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(RRDTOOL_LDFLAGS) +@BUILD_RRD_TRUE@ext_sensors_rrd_la_LIBADD = $(RRDTOOL_LIBS) +@BUILD_RRD_FALSE@EXTRA_ext_sensors_rrd_la_SOURCES = ext_sensors_rrd.c \ +@BUILD_RRD_FALSE@ ext_sensors_rrd.h + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/ext_sensors/rrd/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/ext_sensors/rrd/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +ext_sensors_rrd.la: $(ext_sensors_rrd_la_OBJECTS) $(ext_sensors_rrd_la_DEPENDENCIES) $(EXTRA_ext_sensors_rrd_la_DEPENDENCIES) + $(ext_sensors_rrd_la_LINK) $(am_ext_sensors_rrd_la_rpath) $(ext_sensors_rrd_la_OBJECTS) $(ext_sensors_rrd_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ext_sensors_rrd.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/ext_sensors/rrd/ext_sensors_rrd.c b/src/plugins/ext_sensors/rrd/ext_sensors_rrd.c new file mode 100644 index 0000000000000000000000000000000000000000..7cac474dd7756336dfc4ce96cdaa5b654cd2de42 --- /dev/null +++ b/src/plugins/ext_sensors/rrd/ext_sensors_rrd.c @@ -0,0 +1,708 @@ +/*****************************************************************************\ + * ext_sensors_rrd.c - slurm external sensors plugin for rrd. + ***************************************************************************** + * Copyright (C) 2013 + * Written by Bull- Thomas Cadeau/Martin Perry/Yiannis Georgiou + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + \*****************************************************************************/ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <math.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/stat.h> +#include <unistd.h> + +/* slurm_xlator.h must be first */ +#include "src/common/slurm_xlator.h" +#include "ext_sensors_rrd.h" +#include "src/common/fd.h" +#include "src/common/read_config.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/common/slurm_ext_sensors.h" +#include "src/slurmd/common/proctrack.h" + +#include <rrd.h> + +enum ext_sensors_value_type { + EXT_SENSORS_VALUE_ENERGY, + EXT_SENSORS_VALUE_TEMPERATURE, +}; + +#define _WATT_MIN 10 +#define _WATT_MAX 500 +#define _TEMP_MIN 1 +#define _TEMP_MAX 300 +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "ExtSensors rrd plugin"; +const char plugin_type[] = "ext_sensors/rrd"; +const uint32_t plugin_version = 100; + +static uint32_t debug_flags = 0; +static ext_sensors_conf_t ext_sensors_conf; +static ext_sensors_conf_t *ext_sensors_cnf = &ext_sensors_conf; +static time_t last_valid_time; +static rrd_value_t last_valid_watt; + +/* Local plugin functions */ +static int _update_node_data(void); +static int _update_switch_data(void); +static int _update_door_data(void); +extern int _ext_sensors_read_conf(void); +static void _ext_sensors_clear_free_conf(void); + +/* Local RRD functions */ +static rrd_value_t _get_additional_consumption(time_t time0, time_t time1, + rrd_value_t watt0, + rrd_value_t watt1); +static rrd_value_t _validate_watt(rrd_value_t *v); +static char* _get_node_rrd_path(char* component_name, + enum ext_sensors_value_type sensor_type); +static uint32_t _rrd_get_last_one(char* filename, char* rra_name); +static uint32_t _rrd_consolidate_one(time_t t0, time_t t1, + char* filename, char* rra_name, + bool flag_approximate); + + +static rrd_value_t _get_additional_consumption(time_t time0, time_t time1, + rrd_value_t watt0, + rrd_value_t watt1) +{ + rrd_value_t consumption = (time1 - time0)*(watt1 + watt0)/2; + return consumption; +} + +static rrd_value_t _validate_watt(rrd_value_t *v) +{ + rrd_value_t r = (rrd_value_t)NO_VAL; + if (v != NULL && + *v > ext_sensors_cnf->min_watt && + *v < ext_sensors_cnf->max_watt) + r = *v; + return r; +} + +static char* _get_node_rrd_path(char* component_name, + enum ext_sensors_value_type sensor_type) +{ + char *p; + char *rrd_file; + struct stat buf; + + switch (sensor_type) { + case EXT_SENSORS_VALUE_ENERGY: + rrd_file = ext_sensors_cnf->energy_rrd_file; + break; + case EXT_SENSORS_VALUE_TEMPERATURE: + rrd_file = ext_sensors_cnf->temp_rrd_file; + break; + default: + error("ext_sensors: _get_node_rrd_path: unknown enum %d", + sensor_type); + return NULL; + } + + if (!component_name || !strlen(component_name) || !rrd_file) + return NULL; + + p = xstrdup(rrd_file); + xstrsubstitute(p, "%n", component_name); + + if (!strcmp(p, rrd_file)) { + xfree(p); + return NULL; + } + + if (stat(p, &buf) == -1) { + xfree(p); + return NULL; + } + + return p; +} + +static uint32_t _rrd_get_last_one(char* filename, char* rra_name) +{ + /* RRD library usage notes: + * do not use the following lines for compatibility: + * 1.3.8-6 : (argv={lastupdate, filename} + * status = rrd_lastupdate(argc, argv, &time, &ds_count, + * &ds_names, &last_ds); + * 1.4.7 : + * status = rrd_lastupdate_r(filename, &time, &ds_count, + * &ds_names, &last_ds); + */ + rrd_info_t *data, *data_p; + char line[] = "ds[%s].last_ds", *p, *rra = NULL; + char *argv[] = {"info", filename, NULL}; + uint32_t temperature = NO_VAL; + + p = xstrdup(line); + + data = rrd_info(2, argv); + data_p = data; + + if (rra_name == NULL) { + while (data_p) { + if (!strncmp(line, data_p->key, 3)) { + rra = xstrdup(data_p->key + 3); + xstrsubstitute(rra, strchr(rra, ']'), ""); + break; + } + data_p = data_p->next; + } + } else + rra = rra_name; + + if (rra != NULL) { + xstrsubstitute(p, "%s", rra_name); + if (rra_name == NULL) + xfree(rra); + if (strcmp(p,line) == 0) { + xfree(p); + rrd_info_free(data); + return temperature; + } + } else { + xfree(p); + rrd_info_free(data); + return temperature; + } + + while (data_p) { + if (!strcmp(p, data_p->key)) { + if (!sscanf(data_p->value.u_str, "%d", &temperature)) + temperature = 1; + break; + } + data_p = data_p->next; + } + + xfree(p); + rrd_info_free(data); + + return temperature; +} + +static uint32_t _rrd_consolidate_one(time_t t0, time_t t1, + char* filename, char* rra_name, + bool flag_approximate) +{ + int status, rra_nb = -1; + unsigned long step = 1, ds_count, ii; + char cf[] = "AVERAGE"; + char **ds_names; + time_t ti, start = t0-1, end = t1+1; + uint32_t nb_miss = 0, nb_values = 0; + rrd_value_t *rrd_data, *rrd_data_p; + rrd_value_t current_watt = (rrd_value_t)NO_VAL; + rrd_value_t temp_energy = 0, consumed_energy = 0; + + last_valid_time = 0; + last_valid_watt = (rrd_value_t)NO_VAL; + + status = rrd_fetch_r(filename, cf, + &start, &end, &step, + &ds_count, &ds_names, + &rrd_data); + + if (status != 0){ + if (debug_flags & DEBUG_FLAG_EXT_SENSORS) + info("ext_sensors: error rrd_fetch %s",filename); + return NO_VAL; + } + + rrd_data_p = rrd_data; + + do { + if (start == end) { + consumed_energy = (rrd_value_t)NO_VAL; + break; + } + if (ds_count == 0) { + if (debug_flags & DEBUG_FLAG_EXT_SENSORS) + info("ext_sensors: error ds_count==0 in RRD %s", + filename); + consumed_energy = (rrd_value_t)NO_VAL; + break; + } else if (ds_count == 1 || rra_name == NULL) + rra_nb = 0; + else { + for (ii = 0; ii < ds_count; ii++){ + if (!strcmp(ds_names[ii],rra_name)) { + rra_nb = ii; + break; + } + } + if (rra_nb == -1) { + if (debug_flags & DEBUG_FLAG_EXT_SENSORS) + info("ext_sensors: error RRA %s not " + "found in RRD %s", + rra_name, filename); + consumed_energy = (rrd_value_t)NO_VAL; + break; + } + } + ti = start; + do { + for (ii = 0; ii < rra_nb; ii++) + rrd_data_p++; + last_valid_watt = _validate_watt(rrd_data_p); + if (last_valid_watt != (rrd_value_t)NO_VAL) + last_valid_time = ti; + for (ii = rra_nb; ii < ds_count; ii++) + rrd_data_p++; + ti += step; + } while (ti < t0 && ti < end); + + if (ti != t0 && ti < end) { + for (ii = 0; ii < rra_nb; ii++) + rrd_data_p++; + current_watt = _validate_watt(rrd_data_p); + + if (current_watt != (rrd_value_t)NO_VAL) { + temp_energy = _get_additional_consumption( + t0, ti < t1 ? ti : t1, + current_watt, current_watt); + last_valid_watt = current_watt; + last_valid_time = ti; + consumed_energy += temp_energy; + nb_values += 1; + } else { + nb_miss += 10001; + } + + for (ii = rra_nb; ii < ds_count; ii++) + rrd_data_p++; + } else if ((ti == t0) && (ti < end)) { + for (ii = 0; ii < rra_nb; ii++) + rrd_data_p++; + current_watt = _validate_watt(rrd_data_p); + if (current_watt != (rrd_value_t)NO_VAL) { + last_valid_watt = current_watt; + last_valid_time = ti; + } + for (ii = rra_nb; ii < ds_count; ii++) + rrd_data_p++; + ti += step; + } + while (((ti += step) <= t1) && (ti < end)) { + for (ii = 0; ii < rra_nb; ii++) + rrd_data_p++; + current_watt = _validate_watt(rrd_data_p); + if (current_watt != (rrd_value_t)NO_VAL && + last_valid_watt != (rrd_value_t)NO_VAL) { + temp_energy = _get_additional_consumption( + ti-step, ti, + last_valid_watt, current_watt); + last_valid_watt = current_watt; + last_valid_time = ti; + consumed_energy += temp_energy; + nb_values += 1; + } else { + nb_miss += 1; + } + for (ii = rra_nb; ii < ds_count; ii++) + rrd_data_p++; + } + if ((ti > t1) && (t1 > (t0 + step)) && (ti-step < t1)) { + if (current_watt != (rrd_value_t)NO_VAL) { + temp_energy = _get_additional_consumption( + ti-step, t1, + current_watt, current_watt); + consumed_energy += temp_energy; + nb_values += 1; + } else { + nb_miss += 1; + } + } + } while(0); + + if (nb_miss >= 10000) { + if (debug_flags & DEBUG_FLAG_EXT_SENSORS) + info("ext_sensors: RRD: no first value"); + nb_miss -= 10000; + } + if (debug_flags & DEBUG_FLAG_EXT_SENSORS) + info("ext_sensors: RRD: have %d values and miss %d values", + nb_values, nb_miss); + + if (flag_approximate && + current_watt == (rrd_value_t)NO_VAL && + last_valid_watt != (rrd_value_t)NO_VAL) { + temp_energy = _get_additional_consumption( + last_valid_time, t1, + last_valid_watt, last_valid_watt); + consumed_energy += temp_energy; + } + + for (ii = 0; ii < ds_count; ii++) + free(ds_names[ii]); + + free(ds_names); + free(rrd_data); + + return (uint32_t)consumed_energy; +} + +extern uint32_t RRD_consolidate(time_t step_starttime, time_t step_endtime, + bitstr_t* bitmap_of_nodes) +{ + uint32_t consumed_energy = 0; + uint32_t tmp; + char *node_name = NULL; + hostlist_t hl; + char* path; + + node_name = bitmap2node_name(bitmap_of_nodes); + hl = hostlist_create(node_name); + xfree(node_name); + while ((node_name = hostlist_shift(hl))) { + if (!(path = _get_node_rrd_path(node_name, + EXT_SENSORS_VALUE_ENERGY))) + consumed_energy = NO_VAL; + free(node_name); + if ((tmp = _rrd_consolidate_one( + step_starttime, step_endtime, path, + ext_sensors_cnf->energy_rra_name, true)) == NO_VAL) + consumed_energy = NO_VAL; + xfree(path); + if (consumed_energy == NO_VAL) + break; + consumed_energy += tmp; + } + hostlist_destroy(hl); + + return consumed_energy; +} + +static int _update_node_data(void) +{ + int i; + char* path; + uint32_t tmp; + ext_sensors_data_t *ext_sensors; + time_t now = time(NULL); + + if (ext_sensors_cnf->dataopts & EXT_SENSORS_OPT_NODE_ENERGY) { + for (i=0; i < node_record_count; i++) { + ext_sensors = node_record_table_ptr[i].ext_sensors; + if (ext_sensors->energy_update_time == 0) { + ext_sensors->energy_update_time = now; + ext_sensors->consumed_energy = 0; + ext_sensors->current_watts = 0; + continue; + } + if (!(path = _get_node_rrd_path( + node_record_table_ptr[i].name, + EXT_SENSORS_VALUE_ENERGY))) { + ext_sensors->consumed_energy = NO_VAL; + ext_sensors->current_watts = NO_VAL; + continue; + } + tmp = _rrd_consolidate_one( + ext_sensors->energy_update_time, + now, path, + ext_sensors_cnf->energy_rra_name, + false); + xfree(path); + if ((tmp != NO_VAL) && (tmp != 0) && + (last_valid_time != 0) && + (last_valid_watt != (rrd_value_t)NO_VAL)) { + if ((ext_sensors->consumed_energy <= 0) || + (ext_sensors->consumed_energy == NO_VAL)) { + ext_sensors->consumed_energy = tmp; + } else { + ext_sensors->consumed_energy += tmp; + } + ext_sensors->energy_update_time = + last_valid_time; + ext_sensors->current_watts = + (uint32_t)last_valid_watt; + } + } + } + + if (ext_sensors_cnf->dataopts & EXT_SENSORS_OPT_NODE_TEMP) { + for (i=0; i < node_record_count; i++) { + ext_sensors = node_record_table_ptr[i].ext_sensors; + if (!(path = _get_node_rrd_path( + node_record_table_ptr[i].name, + EXT_SENSORS_VALUE_TEMPERATURE))) { + ext_sensors->temperature = NO_VAL; + continue; + } + tmp = _rrd_get_last_one(path, + ext_sensors_cnf->temp_rra_name); + xfree(path); + if (tmp != NO_VAL && + tmp > ext_sensors_cnf->min_temp && + tmp < ext_sensors_cnf->max_temp) { + ext_sensors->temperature = tmp; + } else { + ext_sensors->temperature = NO_VAL; + } + } + } + return SLURM_SUCCESS; +} + +static int _update_switch_data(void) +{ + /* TODO: insert code here to do the following: + * If SwitchData is configured in ext_sensors_cnf->dataopts: + * for each switch, update data in switch_record from RRD database */ + return SLURM_SUCCESS; +} + +static int _update_door_data(void) +{ + /* TODO: insert code here to do the following: + * If ColdDoorData is configured in ext_sensors_cnf->dataopts: + * for each door, update data in door_record from RRD database */ + return SLURM_SUCCESS; +} + +extern int _ext_sensors_read_conf(void) +{ + s_p_options_t options[] = { + {"JobData", S_P_STRING}, + {"NodeData", S_P_STRING}, + {"SwitchData", S_P_STRING}, + {"ColdDoorData", S_P_STRING}, + {"MinWatt", S_P_UINT32}, + {"MaxWatt", S_P_UINT32}, + {"MinTemp", S_P_UINT32}, + {"MaxTemp", S_P_UINT32}, + {"EnergyRRA", S_P_STRING}, + {"TempRRA", S_P_STRING}, + {"EnergyPathRRD", S_P_STRING}, + {"TempPathRRD", S_P_STRING}, + {NULL} }; + s_p_hashtbl_t *tbl = NULL; + char *conf_path = NULL; + struct stat buf; + char *temp_str = NULL; + + /* Set initial values */ + if (ext_sensors_cnf == NULL) { + return SLURM_ERROR; + } + _ext_sensors_clear_free_conf(); + /* Get the ext_sensors.conf path and validate the file */ + conf_path = get_extra_conf_path("ext_sensors.conf"); + if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) { + fatal("ext_sensors: No ext_sensors file (%s)", conf_path); + } else { + debug2("ext_sensors: Reading ext_sensors file %s", conf_path); + tbl = s_p_hashtbl_create(options); + if (s_p_parse_file(tbl, NULL, conf_path, false) == + SLURM_ERROR) { + fatal("ext_sensors: Could not open/read/parse " + "ext_sensors file %s", conf_path); + } + /* ext_sensors initialization parameters */ + if (s_p_get_string(&temp_str, "JobData", tbl)) { + if (strstr(temp_str, "energy")) + ext_sensors_cnf->dataopts + |= EXT_SENSORS_OPT_JOB_ENERGY; + } + xfree(temp_str); + if (s_p_get_string(&temp_str, "NodeData", tbl)) { + if (strstr(temp_str, "energy")) + ext_sensors_cnf->dataopts + |= EXT_SENSORS_OPT_NODE_ENERGY; + if (strstr(temp_str, "temp")) + ext_sensors_cnf->dataopts + |= EXT_SENSORS_OPT_NODE_TEMP; + } + xfree(temp_str); + if (s_p_get_string(&temp_str, "SwitchData", tbl)) { + if (strstr(temp_str, "energy")) + ext_sensors_cnf->dataopts + |= EXT_SENSORS_OPT_SWITCH_ENERGY; + if (strstr(temp_str, "temp")) + ext_sensors_cnf->dataopts + |= EXT_SENSORS_OPT_SWITCH_TEMP; + } + xfree(temp_str); + if (s_p_get_string(&temp_str, "ColdDoorData", tbl)) { + if (strstr(temp_str, "temp")) + ext_sensors_cnf->dataopts + |= EXT_SENSORS_OPT_COLDDOOR_TEMP; + } + xfree(temp_str); + + + s_p_get_uint32(&ext_sensors_cnf->min_watt,"MinWatt", tbl); + s_p_get_uint32(&ext_sensors_cnf->max_watt,"MaxWatt", tbl); + s_p_get_uint32(&ext_sensors_cnf->min_temp,"MinTemp", tbl); + s_p_get_uint32(&ext_sensors_cnf->max_temp,"MaxTemp", tbl); + if (!s_p_get_string(&ext_sensors_cnf->energy_rra_name, + "EnergyRRA", tbl)) { + if (ext_sensors_cnf->dataopts + & EXT_SENSORS_OPT_JOB_ENERGY) + fatal("ext_sensors/rrd: EnergyRRA " + "must be set to gather JobData=energy. " + "Please set this value in your " + "ext_sensors.conf file."); + } + + if (!s_p_get_string(&ext_sensors_cnf->temp_rra_name, + "TempRRA", tbl)) { + if (ext_sensors_cnf->dataopts + & EXT_SENSORS_OPT_NODE_TEMP) + fatal("ext_sensors/rrd: TempRRA " + "must be set to gather NodeData=temp. " + "Please set this value in your " + "ext_sensors.conf file."); + } + s_p_get_string(&ext_sensors_cnf->energy_rrd_file, + "EnergyPathRRD", tbl); + s_p_get_string(&ext_sensors_cnf->temp_rrd_file, + "TempPathRRD", tbl); + + s_p_hashtbl_destroy(tbl); + } + xfree(conf_path); + return SLURM_SUCCESS; +} + +static void _ext_sensors_clear_free_conf(void) +{ + ext_sensors_cnf->dataopts = 0; + ext_sensors_cnf->min_watt = _WATT_MIN; + ext_sensors_cnf->max_watt = _WATT_MAX; + ext_sensors_cnf->min_temp = _TEMP_MIN; + ext_sensors_cnf->max_temp = _TEMP_MAX; + xfree(ext_sensors_cnf->energy_rra_name); + xfree(ext_sensors_cnf->temp_rra_name); + xfree(ext_sensors_cnf->energy_rrd_file); + xfree(ext_sensors_cnf->temp_rrd_file); +} + +extern int ext_sensors_p_update_component_data(void) +{ + int rc_node, rc_switch, rc_door; + + rc_node = _update_node_data(); + rc_switch = _update_switch_data(); + rc_door = _update_door_data(); + if ((rc_node == SLURM_SUCCESS) && + (rc_switch == SLURM_SUCCESS) && + (rc_door == SLURM_SUCCESS)) + return SLURM_SUCCESS; + return SLURM_ERROR; +} + +extern int ext_sensors_p_get_stepstartdata(struct step_record *step_rec) +{ + /* Nothing to do here for ext_sensors/rrd plugin */ + int rc = SLURM_SUCCESS; + return rc; +} + +extern int ext_sensors_p_get_stependdata(struct step_record *step_rec) +{ + time_t step_endtime = time(NULL); + int rc = SLURM_SUCCESS; + + if (ext_sensors_cnf->dataopts & EXT_SENSORS_OPT_JOB_ENERGY) { + step_rec->ext_sensors->consumed_energy = + RRD_consolidate(step_rec->start_time, step_endtime, + step_rec->step_node_bitmap); + if (step_rec->jobacct && + (!step_rec->jobacct->energy.consumed_energy + || (step_rec->jobacct->energy.consumed_energy == NO_VAL))) { + step_rec->jobacct->energy.consumed_energy = + step_rec->ext_sensors->consumed_energy; + } + } + + return rc; +} + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + /* read ext_sensors configuration */ + if (_ext_sensors_read_conf()) + return SLURM_ERROR; + + debug_flags = slurm_get_debug_flags(); + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + _ext_sensors_clear_free_conf(); + return SLURM_SUCCESS; +} + diff --git a/src/plugins/ext_sensors/rrd/ext_sensors_rrd.h b/src/plugins/ext_sensors/rrd/ext_sensors_rrd.h new file mode 100644 index 0000000000000000000000000000000000000000..145260fb7cfada2837796d1f63fb612d691c1586 --- /dev/null +++ b/src/plugins/ext_sensors/rrd/ext_sensors_rrd.h @@ -0,0 +1,86 @@ +/*****************************************************************************\ + * ext_sensors_rrd.h - slurm external sensors plugin for rrd. + ***************************************************************************** + * Copyright (C) 2013 + * Written by Bull- Thomas Cadeau/Martin Perry/Yiannis Georgiou + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +\*****************************************************************************/ + +#include "src/slurmctld/slurmctld.h" + +#ifndef _EXT_SENSORS_RRD_H_ +#define _EXT_SENSORS_RRD_H_ + +/* ext_sensors data collection option flags */ +#define EXT_SENSORS_OPT_JOB_ENERGY 0x00000001 +#define EXT_SENSORS_OPT_NODE_ENERGY 0x00000002 +#define EXT_SENSORS_OPT_NODE_TEMP 0x00000004 +#define EXT_SENSORS_OPT_SWITCH_ENERGY 0x00000008 +#define EXT_SENSORS_OPT_SWITCH_TEMP 0x00000010 +#define EXT_SENSORS_OPT_COLDDOOR_TEMP 0x00000020 + +/* ext_sensors plugins configuration parameters */ +typedef struct ext_sensors_config { + uint64_t dataopts; + uint32_t min_watt; + uint32_t max_watt; + uint32_t min_temp; + uint32_t max_temp; + char *energy_rra_name; + char *temp_rra_name; + char *energy_rrd_file; + char *temp_rrd_file; +} ext_sensors_conf_t; + +/* read external sensors configuration file */ +extern int ext_sensors_read_conf(void); + +/* clear and free external sensors configuration structures */ +extern void ext_sensors_free_conf(void); + +/* update external sensors data for hardware components */ +extern int ext_sensors_p_update_component_data(void); + +/* get external sensors data at start of jobstep */ +extern int ext_sensors_p_get_stepstartdata(struct step_record *step_rec); + +/* get external sensors data at end of jobstep */ +extern int ext_sensors_p_get_stependdata(struct step_record *step_rec); + +/* consolidate RRD data */ +extern uint32_t RRD_consolidate(time_t step_starttime, time_t step_endtime, + bitstr_t* bitmap_of_nodes); + +extern int init(void); +extern int fini(void); + +#endif diff --git a/src/plugins/gres/Makefile.in b/src/plugins/gres/Makefile.in index 7b4c4b8fa9d9603b2bab7dbcbae448da575e6902..124cd1f7ec35e813f00a28835ac1960940b05b6b 100644 --- a/src/plugins/gres/Makefile.in +++ b/src/plugins/gres/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/gres DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/gres/gpu/Makefile.in b/src/plugins/gres/gpu/Makefile.in index 6404fb0d7387b0e4662bb3d85a67e639e3408476..4bc2a40fe07701323cd84a3c7538bec1e227b8e5 100644 --- a/src/plugins/gres/gpu/Makefile.in +++ b/src/plugins/gres/gpu/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/gres/gpu DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c index 86d2fc1069c23ec6b9f25d667d209da26081f0cc..bc7b1bfc2509e15d53da2cddce2aff2b26a2a70f 100644 --- a/src/plugins/gres/gpu/gres_gpu.c +++ b/src/plugins/gres/gpu/gres_gpu.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -113,6 +113,19 @@ static char gres_name[] = "gpu"; static int *gpu_devices = NULL; static int nb_available_files; +extern int init(void) +{ + debug("%s: %s loaded", __func__, plugin_name); + + return SLURM_SUCCESS; +} +extern int fini(void) +{ + debug("%s: unloading %s", __func__, plugin_name); + xfree(gpu_devices); + + return SLURM_SUCCESS; +} /* * We could load gres state or validate it using various mechanisms here. * This only validates that the configuration was specified in gres.conf. @@ -120,7 +133,7 @@ static int nb_available_files; */ extern int node_config_load(List gres_conf_list) { - int i, rc = SLURM_ERROR; + int i, rc = SLURM_SUCCESS; ListIterator iter; gres_slurmd_conf_t *gres_slurmd_conf; int nb_gpu = 0; /* Number of GPUs in the list */ @@ -128,21 +141,17 @@ extern int node_config_load(List gres_conf_list) xassert(gres_conf_list); iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = list_next(iter))) { if (strcmp(gres_slurmd_conf->name, gres_name)) continue; - rc = SLURM_SUCCESS; if (gres_slurmd_conf->file) nb_gpu++; } list_iterator_destroy(iter); gpu_devices = NULL; nb_available_files = -1; - /* (Re-)Allocate memory if number of files changed */ - if (nb_gpu != nb_available_files) { + if (nb_gpu > nb_available_files) { xfree(gpu_devices); /* No-op if NULL */ gpu_devices = (int *) xmalloc(sizeof(int) * nb_gpu); nb_available_files = nb_gpu; @@ -151,21 +160,43 @@ extern int node_config_load(List gres_conf_list) } iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = list_next(iter))) { if ((strcmp(gres_slurmd_conf->name, gres_name) == 0) && gres_slurmd_conf->file) { /* Populate gpu_devices array with number * at end of the file name */ - for (i = 0; gres_slurmd_conf->file[i]; i++) { - if (!isdigit(gres_slurmd_conf->file[i])) - continue; - gpu_devices[available_files_index] = - atoi(gres_slurmd_conf->file + i); + char *bracket, *fname, *tmp_name; + hostlist_t hl; + bracket = strrchr(gres_slurmd_conf->file, '['); + if (bracket) + tmp_name = xstrdup(bracket); + else + tmp_name = xstrdup(gres_slurmd_conf->file); + hl = hostlist_create(tmp_name); + xfree(tmp_name); + if (!hl) { + rc = EINVAL; break; } - available_files_index++; + while ((fname = hostlist_shift(hl))) { + if (available_files_index == + nb_available_files) { + nb_available_files++; + xrealloc(gpu_devices, sizeof(int) * + nb_available_files); + gpu_devices[available_files_index] = -1; + } + for (i = 0; fname[i]; i++) { + if (!isdigit(fname[i])) + continue; + gpu_devices[available_files_index] = + atoi(fname + i); + break; + } + available_files_index++; + free(fname); + } + hostlist_destroy(hl); } } list_iterator_destroy(iter); @@ -265,7 +296,7 @@ extern void step_set_env(char ***job_env_ptr, void *gres_ptr) } } -/* Send GRES information to slurmstepd on the specified file descriptor*/ +/* Send GRES information to slurmstepd on the specified file descriptor */ extern void send_stepd(int fd) { int i; @@ -278,7 +309,7 @@ extern void send_stepd(int fd) rwfail: error("gres_plugin_send_stepd failed"); } -/* Receive GRES information from slurmd on the specified file descriptor*/ +/* Receive GRES information from slurmd on the specified file descriptor */ extern void recv_stepd(int fd) { int i; diff --git a/src/plugins/gres/mic/Makefile.in b/src/plugins/gres/mic/Makefile.in index 872a3837b72bc8e48e5e3b3dc9fe6b47043972c8..c3e44e3ee04acd6f308c98980ae889e9e72620ac 100644 --- a/src/plugins/gres/mic/Makefile.in +++ b/src/plugins/gres/mic/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/gres/mic DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/gres/mic/gres_mic.c b/src/plugins/gres/mic/gres_mic.c index f8c143c9ae84c6e3fc3b415d8f78681c22607413..577b42c689569e78a197dde00cfccc3b6962916f 100644 --- a/src/plugins/gres/mic/gres_mic.c +++ b/src/plugins/gres/mic/gres_mic.c @@ -1,290 +1,285 @@ -/*****************************************************************************\ - * gres_mic.c - Support MICs as a generic resources. - ***************************************************************************** - * Copyright (C) 2012 CSC-IT Center for Science Ltd. - * Written by Olli-Pekka Lehto - * Based upon gres_gpu.c with the copyright notice shown below: - * Copyright (C) 2010 Lawrence Livermore National Security. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Morris Jette <jette1@llnl.gov> - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. - * Please also read the included file: DISCLAIMER. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do - * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in - * the program, then also delete it here. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -\*****************************************************************************/ - -#if HAVE_CONFIG_H -# include "config.h" -# if STDC_HEADERS -# include <string.h> -# endif -# if HAVE_SYS_TYPES_H -# include <sys/types.h> -# endif /* HAVE_SYS_TYPES_H */ -# if HAVE_UNISTD_H -# include <unistd.h> -# endif -# if HAVE_INTTYPES_H -# include <inttypes.h> -# else /* ! HAVE_INTTYPES_H */ -# if HAVE_STDINT_H -# include <stdint.h> -# endif -# endif /* HAVE_INTTYPES_H */ -#else /* ! HAVE_CONFIG_H */ -# include <sys/types.h> -# include <unistd.h> -# include <stdint.h> -# include <string.h> -#endif /* HAVE_CONFIG_H */ - -#ifdef HAVE_HWLOC -# include <hwloc.h> -#endif /* HAVE_HWLOC */ - -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> - -#include "slurm/slurm.h" -#include "slurm/slurm_errno.h" - -#include "src/common/slurm_xlator.h" -#include "src/common/bitstring.h" -#include "src/common/env.h" -#include "src/common/gres.h" -#include "src/common/list.h" -#include "src/common/xstring.h" - -/* - * These variables are required by the generic plugin interface. If they - * are not found in the plugin, the plugin loader will ignore it. - * - * plugin_name - A string giving a human-readable description of the - * plugin. There is no maximum length, but the symbol must refer to - * a valid string. - * - * plugin_type - A string suggesting the type of the plugin or its - * applicability to a particular form of data or method of data handling. - * If the low-level plugin API is used, the contents of this string are - * unimportant and may be anything. SLURM uses the higher-level plugin - * interface which requires this string to be of the form - * - * <application>/<method> - * - * where <application> is a description of the intended application of - * the plugin (e.g., "auth" for SLURM authentication) and <method> is a - * description of how this plugin satisfies that application. SLURM will - * only load authentication plugins if the plugin_type string has a prefix - * of "auth/". - * - * plugin_version - Specifies the version number of the plugin. This would - * typically be the same for all plugins. - */ -const char plugin_name[] = "Gres MIC plugin"; -const char plugin_type[] = "gres/mic"; -const uint32_t plugin_version = 110; - -static char gres_name[] = "mic"; - -static int *mic_devices = NULL; -static int nb_available_files; - -/* - * We could load gres state or validate it using various mechanisms here. - * This only validates that the configuration was specified in gres.conf. - * In the general case, no code would need to be changed. - */ -extern int node_config_load(List gres_conf_list) -{ - int i, rc = SLURM_ERROR; - ListIterator iter; - gres_slurmd_conf_t *gres_slurmd_conf; - int nb_mic = 0; /* Number of MICs in the list */ - int available_files_index = 0; - - xassert(gres_conf_list); - iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); - while ((gres_slurmd_conf = list_next(iter))) { - if (strcmp(gres_slurmd_conf->name, gres_name)) - continue; - rc = SLURM_SUCCESS; - if (gres_slurmd_conf->file) - nb_mic++; - } - list_iterator_destroy(iter); - mic_devices = NULL; - nb_available_files = -1; - - /* (Re-)Allocate memory if number of files changed */ - if (nb_mic != nb_available_files) { - xfree(mic_devices); /* No-op if NULL */ - mic_devices = (int *) xmalloc(sizeof(int) * nb_mic); - nb_available_files = nb_mic; - for (i = 0; i < nb_available_files; i++) - mic_devices[i] = -1; - } - - iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); - while ((gres_slurmd_conf = list_next(iter))) { - if ((strcmp(gres_slurmd_conf->name, gres_name) == 0) && - gres_slurmd_conf->file) { - /* Populate mic_devices array with number - * at end of the file name */ - for (i = 0; gres_slurmd_conf->file[i]; i++) { - if (!isdigit(gres_slurmd_conf->file[i])) - continue; - mic_devices[available_files_index] = - atoi(gres_slurmd_conf->file + i); - break; - } - available_files_index++; - } - } - list_iterator_destroy(iter); - - if (rc != SLURM_SUCCESS) - fatal("%s failed to load configuration", plugin_name); - - for (i = 0; i < nb_available_files; i++) - info("mic %d is device number %d", i, mic_devices[i]); - - return rc; -} - -/* - * Set environment variables as appropriate for a job (i.e. all tasks) based - * upon the job's GRES state. - */ -extern void job_set_env(char ***job_env_ptr, void *gres_ptr) -{ - int i, len; - char *dev_list = NULL; - gres_job_state_t *gres_job_ptr = (gres_job_state_t *) gres_ptr; - - if ((gres_job_ptr != NULL) && - (gres_job_ptr->node_cnt == 1) && - (gres_job_ptr->gres_bit_alloc != NULL) && - (gres_job_ptr->gres_bit_alloc[0] != NULL)) { - len = bit_size(gres_job_ptr->gres_bit_alloc[0]); - for (i=0; i<len; i++) { - if (!bit_test(gres_job_ptr->gres_bit_alloc[0], i)) - continue; - if (!dev_list) - dev_list = xmalloc(128); - else - xstrcat(dev_list, ","); - if (mic_devices && (mic_devices[i] >= 0)) - xstrfmtcat(dev_list, "%d", mic_devices[i]); - else - xstrfmtcat(dev_list, "%d", i); - } - } - if (dev_list) { - env_array_overwrite(job_env_ptr,"OFFLOAD_DEVICES", - dev_list); - xfree(dev_list); - } else { - /* The gres.conf file must identify specific device files - * in order to set the OFFLOAD_DEVICES env var */ - error("gres/mic unable to set OFFLOAD_DEVICES, " - "no device files configured"); - } -} - -/* - * Set environment variables as appropriate for a job (i.e. all tasks) based - * upon the job step's GRES state. - */ -extern void step_set_env(char ***job_env_ptr, void *gres_ptr) -{ - int i, len; - char *dev_list = NULL; - gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr; - - if ((gres_step_ptr != NULL) && - (gres_step_ptr->node_cnt == 1) && - (gres_step_ptr->gres_bit_alloc != NULL) && - (gres_step_ptr->gres_bit_alloc[0] != NULL)) { - len = bit_size(gres_step_ptr->gres_bit_alloc[0]); - for (i=0; i<len; i++) { - if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i)) - continue; - if (!dev_list) - dev_list = xmalloc(128); - else - xstrcat(dev_list, ","); - if (mic_devices && (mic_devices[i] >= 0)) - xstrfmtcat(dev_list, "%d", mic_devices[i]); - else - xstrfmtcat(dev_list, "%d", i); - } - } - if (dev_list) { - env_array_overwrite(job_env_ptr,"OFFLOAD_DEVICES", - dev_list); - xfree(dev_list); - } else { - /* The gres.conf file must identify specific device files - * in order to set the OFFLOAD_DEVICES env var */ - error("gres/mic unable to set OFFLOAD_DEVICES, " - "no device files configured"); - } -} - -/* Send GRES information to slurmstepd on the specified file descriptor*/ -extern void send_stepd(int fd) -{ - int i; - - safe_write(fd, &nb_available_files, sizeof(int)); - for (i = 0; i < nb_available_files; i++) - safe_write(fd, &mic_devices[i], sizeof(int)); - return; - -rwfail: error("gres_plugin_send_stepd failed"); -} - -/* Receive GRES information from slurmd on the specified file descriptor*/ -extern void recv_stepd(int fd) -{ - int i; - - safe_read(fd, &nb_available_files, sizeof(int)); - if (nb_available_files > 0) - mic_devices = xmalloc(sizeof(int) * nb_available_files); - for (i = 0; i < nb_available_files; i++) - safe_read(fd, &mic_devices[i], sizeof(int)); - return; - -rwfail: error("gres_plugin_recv_stepd failed"); -} +/*****************************************************************************\ + * gres_mic.c - Support MICs as a generic resources. + ***************************************************************************** + * Copyright (C) 2012 CSC-IT Center for Science Ltd. + * Written by Olli-Pekka Lehto + * Based upon gres_gpu.c with the copyright notice shown below: + * Copyright (C) 2010 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include "config.h" +# if STDC_HEADERS +# include <string.h> +# endif +# if HAVE_SYS_TYPES_H +# include <sys/types.h> +# endif /* HAVE_SYS_TYPES_H */ +# if HAVE_UNISTD_H +# include <unistd.h> +# endif +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else /* ! HAVE_INTTYPES_H */ +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* ! HAVE_CONFIG_H */ +# include <sys/types.h> +# include <unistd.h> +# include <stdint.h> +# include <string.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_HWLOC +# include <hwloc.h> +#endif /* HAVE_HWLOC */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" + +#include "src/common/slurm_xlator.h" +#include "src/common/bitstring.h" +#include "src/common/env.h" +#include "src/common/gres.h" +#include "src/common/list.h" +#include "src/common/xstring.h" + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - A string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - A string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "auth" for SLURM authentication) and <method> is a + * description of how this plugin satisfies that application. SLURM will + * only load authentication plugins if the plugin_type string has a prefix + * of "auth/". + * + * plugin_version - Specifies the version number of the plugin. This would + * typically be the same for all plugins. + */ +const char plugin_name[] = "Gres MIC plugin"; +const char plugin_type[] = "gres/mic"; +const uint32_t plugin_version = 110; + +static char gres_name[] = "mic"; + +static int *mic_devices = NULL; +static int nb_available_files; + +/* + * We could load gres state or validate it using various mechanisms here. + * This only validates that the configuration was specified in gres.conf. + * In the general case, no code would need to be changed. + */ +extern int node_config_load(List gres_conf_list) +{ + int i, rc = SLURM_SUCCESS; + ListIterator iter; + gres_slurmd_conf_t *gres_slurmd_conf; + int nb_mic = 0; /* Number of MICs in the list */ + int available_files_index = 0; + + xassert(gres_conf_list); + iter = list_iterator_create(gres_conf_list); + while ((gres_slurmd_conf = list_next(iter))) { + if (strcmp(gres_slurmd_conf->name, gres_name)) + continue; + if (gres_slurmd_conf->file) + nb_mic++; + } + list_iterator_destroy(iter); + mic_devices = NULL; + nb_available_files = -1; + + /* (Re-)Allocate memory if number of files changed */ + if (nb_mic != nb_available_files) { + xfree(mic_devices); /* No-op if NULL */ + mic_devices = (int *) xmalloc(sizeof(int) * nb_mic); + nb_available_files = nb_mic; + for (i = 0; i < nb_available_files; i++) + mic_devices[i] = -1; + } + + iter = list_iterator_create(gres_conf_list); + while ((gres_slurmd_conf = list_next(iter))) { + if ((strcmp(gres_slurmd_conf->name, gres_name) == 0) && + gres_slurmd_conf->file) { + /* Populate mic_devices array with number + * at end of the file name */ + for (i = 0; gres_slurmd_conf->file[i]; i++) { + if (!isdigit(gres_slurmd_conf->file[i])) + continue; + mic_devices[available_files_index] = + atoi(gres_slurmd_conf->file + i); + break; + } + available_files_index++; + } + } + list_iterator_destroy(iter); + + if (rc != SLURM_SUCCESS) + fatal("%s failed to load configuration", plugin_name); + + for (i = 0; i < nb_available_files; i++) + info("mic %d is device number %d", i, mic_devices[i]); + + return rc; +} + +/* + * Set environment variables as appropriate for a job (i.e. all tasks) based + * upon the job's GRES state. + */ +extern void job_set_env(char ***job_env_ptr, void *gres_ptr) +{ + int i, len; + char *dev_list = NULL; + gres_job_state_t *gres_job_ptr = (gres_job_state_t *) gres_ptr; + + if ((gres_job_ptr != NULL) && + (gres_job_ptr->node_cnt == 1) && + (gres_job_ptr->gres_bit_alloc != NULL) && + (gres_job_ptr->gres_bit_alloc[0] != NULL)) { + len = bit_size(gres_job_ptr->gres_bit_alloc[0]); + for (i=0; i<len; i++) { + if (!bit_test(gres_job_ptr->gres_bit_alloc[0], i)) + continue; + if (!dev_list) + dev_list = xmalloc(128); + else + xstrcat(dev_list, ","); + if (mic_devices && (mic_devices[i] >= 0)) + xstrfmtcat(dev_list, "%d", mic_devices[i]); + else + xstrfmtcat(dev_list, "%d", i); + } + } + if (dev_list) { + env_array_overwrite(job_env_ptr,"OFFLOAD_DEVICES", + dev_list); + xfree(dev_list); + } else { + /* The gres.conf file must identify specific device files + * in order to set the OFFLOAD_DEVICES env var */ + error("gres/mic unable to set OFFLOAD_DEVICES, " + "no device files configured"); + } +} + +/* + * Set environment variables as appropriate for a job (i.e. all tasks) based + * upon the job step's GRES state. + */ +extern void step_set_env(char ***job_env_ptr, void *gres_ptr) +{ + int i, len; + char *dev_list = NULL; + gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr; + + if ((gres_step_ptr != NULL) && + (gres_step_ptr->node_cnt == 1) && + (gres_step_ptr->gres_bit_alloc != NULL) && + (gres_step_ptr->gres_bit_alloc[0] != NULL)) { + len = bit_size(gres_step_ptr->gres_bit_alloc[0]); + for (i=0; i<len; i++) { + if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i)) + continue; + if (!dev_list) + dev_list = xmalloc(128); + else + xstrcat(dev_list, ","); + if (mic_devices && (mic_devices[i] >= 0)) + xstrfmtcat(dev_list, "%d", mic_devices[i]); + else + xstrfmtcat(dev_list, "%d", i); + } + } + if (dev_list) { + env_array_overwrite(job_env_ptr,"OFFLOAD_DEVICES", + dev_list); + xfree(dev_list); + } else { + /* The gres.conf file must identify specific device files + * in order to set the OFFLOAD_DEVICES env var */ + error("gres/mic unable to set OFFLOAD_DEVICES, " + "no device files configured"); + } +} + +/* Send GRES information to slurmstepd on the specified file descriptor */ +extern void send_stepd(int fd) +{ + int i; + + safe_write(fd, &nb_available_files, sizeof(int)); + for (i = 0; i < nb_available_files; i++) + safe_write(fd, &mic_devices[i], sizeof(int)); + return; + +rwfail: error("gres_plugin_send_stepd failed"); +} + +/* Receive GRES information from slurmd on the specified file descriptor */ +extern void recv_stepd(int fd) +{ + int i; + + safe_read(fd, &nb_available_files, sizeof(int)); + if (nb_available_files > 0) + mic_devices = xmalloc(sizeof(int) * nb_available_files); + for (i = 0; i < nb_available_files; i++) + safe_read(fd, &mic_devices[i], sizeof(int)); + return; + +rwfail: error("gres_plugin_recv_stepd failed"); +} diff --git a/src/plugins/gres/nic/Makefile.in b/src/plugins/gres/nic/Makefile.in index 15b44172ee87aa04073fabd05768e9c6bae204ac..fb85f9891bcf4ec02bc957afb0ce7829577e4147 100644 --- a/src/plugins/gres/nic/Makefile.in +++ b/src/plugins/gres/nic/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/gres/nic DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c index ed93dea58f21804ffa6edec31fe5e466fcb8df03..31fd808ca435b8ad9b6d29f4b0971726711aec5a 100644 --- a/src/plugins/gres/nic/gres_nic.c +++ b/src/plugins/gres/nic/gres_nic.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -120,8 +120,6 @@ extern int node_config_load(List gres_conf_list) xassert(gres_conf_list); iter = list_iterator_create(gres_conf_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((gres_slurmd_conf = list_next(iter))) { if (strcmp(gres_slurmd_conf->name, gres_name) == 0) { rc = SLURM_SUCCESS; diff --git a/src/plugins/job_submit/Makefile.am b/src/plugins/job_submit/Makefile.am index e35d4fe750a7d9d16ad9bc0cdce91f0567f3b4c0..4bd72f1c087f75af09012ec329c38876971516ed 100644 --- a/src/plugins/job_submit/Makefile.am +++ b/src/plugins/job_submit/Makefile.am @@ -1,3 +1,3 @@ # Makefile for job_submit plugins -SUBDIRS = all_partitions cnode defaults logging lua partition +SUBDIRS = all_partitions cnode defaults logging lua partition pbs require_timelimit diff --git a/src/plugins/job_submit/Makefile.in b/src/plugins/job_submit/Makefile.in index aa2b5a6e614401e0c71ca2afb509a2099a64a9c8..faa1d12a6d6024df81db05faa963665ef20ec717 100644 --- a/src/plugins/job_submit/Makefile.in +++ b/src/plugins/job_submit/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/job_submit DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -354,7 +380,7 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = all_partitions cnode defaults logging lua partition +SUBDIRS = all_partitions cnode defaults logging lua partition pbs require_timelimit all: all-recursive .SUFFIXES: diff --git a/src/plugins/job_submit/all_partitions/Makefile.in b/src/plugins/job_submit/all_partitions/Makefile.in index dfd584344bc6aef028dc26ca4d31bb1c8f7fdf9c..4fb490d01f3515f22202e99b7e979968e5f0a8b0 100644 --- a/src/plugins/job_submit/all_partitions/Makefile.in +++ b/src/plugins/job_submit/all_partitions/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/job_submit/all_partitions DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -183,6 +187,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -203,6 +209,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -212,6 +221,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -219,6 +230,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -253,6 +273,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -280,6 +303,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/job_submit/all_partitions/job_submit_all_partitions.c b/src/plugins/job_submit/all_partitions/job_submit_all_partitions.c index 2e26dfd4e79f1f042617a15f0322aac9bd098896..affe03a76f42bdd5aeb011ba6f5084f782de55c5 100644 --- a/src/plugins/job_submit/all_partitions/job_submit_all_partitions.c +++ b/src/plugins/job_submit/all_partitions/job_submit_all_partitions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/job_submit/cnode/Makefile.in b/src/plugins/job_submit/cnode/Makefile.in index ef5475072fdc7bca092a4bfc461da93b185659e7..494911883eeb1055e5be1ba5f76d279573cb0a86 100644 --- a/src/plugins/job_submit/cnode/Makefile.in +++ b/src/plugins/job_submit/cnode/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/job_submit/cnode DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/job_submit/cnode/job_submit_cnode.c b/src/plugins/job_submit/cnode/job_submit_cnode.c index 42a78d421eb501067811f23019df9ecb9203b009..0cc707440c79e8f7ffdd5a8d283600ad4fd0261a 100644 --- a/src/plugins/job_submit/cnode/job_submit_cnode.c +++ b/src/plugins/job_submit/cnode/job_submit_cnode.c @@ -12,7 +12,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/job_submit/defaults/Makefile.in b/src/plugins/job_submit/defaults/Makefile.in index 4190bc085ce31baff07966b3ca4ae7892e2901e5..7c68856c4b7afe7cf42d1eeedc6a2bc338918cc5 100644 --- a/src/plugins/job_submit/defaults/Makefile.in +++ b/src/plugins/job_submit/defaults/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/job_submit/defaults DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/job_submit/defaults/job_submit_defaults.c b/src/plugins/job_submit/defaults/job_submit_defaults.c index 138440c8cc9437a77b3c5a5a646d7ba1d631bfba..802b3efbcaa9e4c189ae5f6cdf93a98fd10d0221 100644 --- a/src/plugins/job_submit/defaults/job_submit_defaults.c +++ b/src/plugins/job_submit/defaults/job_submit_defaults.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -67,7 +67,7 @@ #include "src/common/slurm_xlator.h" #include "src/slurmctld/slurmctld.h" -#define MIN_ACCTG_FREQUENCY 30 +#define MAX_ACCTG_FREQUENCY 30 /* * These variables are required by the generic plugin interface. If they @@ -103,20 +103,24 @@ const uint32_t min_plug_version = 100; /*****************************************************************************\ * We've provided a simple example of the type of things you can do with this * plugin. If you develop another plugin that may be of interest to others - * please post it to slurm-dev@lists.llnl.gov Thanks! + * please post it to slurm-dev@schedmd.com Thanks! \*****************************************************************************/ extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid) { #if 0 + uint16_t acctg_freq = 0; + if (job_desc->acctg_freq) + acctg_freq = atoi(job_desc->acctg_freq); /* This example code will prevent users from setting an accounting * frequency of less than 30 seconds in order to insure more precise * accounting. Also remove any QOS value set by the user in order * to use the default value from the database. */ - if (job_desc->acctg_freq < MIN_ACCTG_FREQUENCY) { + if (acctg_freq < MIN_ACCTG_FREQUENCY) { info("Changing accounting frequency of submitted job " "from %u to %u", - job_desc->acctg_freq, MIN_ACCTG_FREQUENCY); - job_desc->acctg_freq = MIN_ACCTG_FREQUENCY; + acctg_freq, MIN_ACCTG_FREQUENCY); + job_desc->acctg_freq = xstrdup_printf( + "%d", MIN_ACCTG_FREQUENCY); } if (job_desc->qos) { @@ -131,15 +135,19 @@ extern int job_modify(struct job_descriptor *job_desc, struct job_record *job_ptr, uint32_t submit_uid) { #if 0 + uint16_t acctg_freq = 0; + if (job_desc->acctg_freq) + acctg_freq = atoi(job_desc->acctg_freq); /* This example code will prevent users from setting an accounting * frequency of less than 30 seconds in order to insure more precise * accounting. Also remove any QOS value set by the user in order * to use the default value from the database. */ - if (job_desc->acctg_freq < MIN_ACCTG_FREQUENCY) { + if (acctg_freq < MIN_ACCTG_FREQUENCY) { info("Changing accounting frequency of modify job %u " "from %u to %u", job_ptr->job_id, job_desc->acctg_freq, MIN_ACCTG_FREQUENCY); - job_desc->acctg_freq = MIN_ACCTG_FREQUENCY; + job_desc->acctg_freq = xstrdup_printf( + "%d", MIN_ACCTG_FREQUENCY); } if (job_desc->qos) { diff --git a/src/plugins/job_submit/logging/Makefile.in b/src/plugins/job_submit/logging/Makefile.in index cc1cf0ea5951b2dbc06ac84b35033c80f4f9cefa..bb20f86eb0b6af808512db0c55abee69336906f0 100644 --- a/src/plugins/job_submit/logging/Makefile.in +++ b/src/plugins/job_submit/logging/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/job_submit/logging DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/job_submit/logging/job_submit_logging.c b/src/plugins/job_submit/logging/job_submit_logging.c index 5a8e09d4aa7163a559fbdf438ba86443856d14be..9033c0ebede6abd5b92dd34350676a807f3612b6 100644 --- a/src/plugins/job_submit/logging/job_submit_logging.c +++ b/src/plugins/job_submit/logging/job_submit_logging.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -103,7 +103,7 @@ const uint32_t min_plug_version = 100; /*****************************************************************************\ * We've provided a simple example of the type of things you can do with this * plugin. If you develop another plugin that may be of interest to others - * please post it to slurm-dev@lists.llnl.gov Thanks! + * please post it to slurm-dev@schedmd.com Thanks! \*****************************************************************************/ extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid) diff --git a/src/plugins/job_submit/lua/Makefile.in b/src/plugins/job_submit/lua/Makefile.in index b1de453bcb5913e9bcf3eaa660a6ca645acd8749..e6b168a5cdca4f06ee9b8166c252121fe7f4e9c6 100644 --- a/src/plugins/job_submit/lua/Makefile.in +++ b/src/plugins/job_submit/lua/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/job_submit/lua DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -185,6 +189,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -205,6 +211,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -214,6 +223,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -221,6 +232,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -255,6 +275,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -282,6 +305,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c index c908daac7754555865c2d2a5ec87c36030da35ff..b82ad78d37b2e27fada06a8d078b52ecf6a47eed 100644 --- a/src/plugins/job_submit/lua/job_submit_lua.c +++ b/src/plugins/job_submit/lua/job_submit_lua.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -115,7 +115,7 @@ static pthread_mutex_t lua_lock = PTHREAD_MUTEX_INITIALIZER; /*****************************************************************************\ * We've provided a simple example of the type of things you can do with this * plugin. If you develop another plugin that may be of interest to others - * please post it to slurm-dev@lists.llnl.gov Thanks! + * please post it to slurm-dev@schedmd.com Thanks! \*****************************************************************************/ /* Generic stack dump function for debugging purposes */ @@ -285,7 +285,7 @@ static char *_get_default_account(uint32_t user_id) /* Get fields in an existing slurmctld job record * NOTE: This is an incomplete list of job record fields. - * Add more as needed and send patches to slurm-dev@llnl.gov */ + * Add more as needed and send patches to slurm-dev@schedmd.com */ static int _get_job_rec_field (lua_State *L) { const struct job_record *job_ptr = lua_touserdata(L, 1); @@ -366,7 +366,7 @@ static int _get_job_req_field (lua_State *L) } else if (!strcmp(name, "account")) { lua_pushstring (L, job_desc->account); } else if (!strcmp(name, "acctg_freq")) { - lua_pushnumber (L, job_desc->acctg_freq); + lua_pushstring (L, job_desc->acctg_freq); } else if (!strcmp(name, "begin_time")) { lua_pushnumber (L, job_desc->begin_time); } else if (!strcmp(name, "comment")) { @@ -431,6 +431,12 @@ static int _get_job_req_field (lua_State *L) lua_pushnumber (L, job_desc->shared); } else if (!strcmp(name, "sockets_per_node")) { lua_pushnumber (L, job_desc->sockets_per_node); + } else if (!strcmp(name, "std_err")) { + lua_pushstring (L, job_desc->std_err); + } else if (!strcmp(name, "std_in")) { + lua_pushstring (L, job_desc->std_in); + } else if (!strcmp(name, "std_out")) { + lua_pushstring (L, job_desc->std_out); } else if (!strcmp(name, "threads_per_core")) { lua_pushnumber (L, job_desc->threads_per_core); } else if (!strcmp(name, "time_limit")) { @@ -441,8 +447,21 @@ static int _get_job_req_field (lua_State *L) lua_pushnumber (L, job_desc->user_id); } else if (!strcmp(name, "wait4switch")) { lua_pushnumber (L, job_desc->wait4switch); + } else if (!strcmp(name, "work_dir")) { + lua_pushstring (L, job_desc->work_dir); } else if (!strcmp(name, "wckey")) { lua_pushstring (L, job_desc->wckey); + } else if (!strcmp(name, "ntasks_per_core")) { + lua_pushnumber (L, job_desc->ntasks_per_core); + } else if (!strcmp(name, "boards_per_node")) { + lua_pushnumber (L, job_desc->boards_per_node); + } else if (!strcmp(name, "ntasks_per_board")) { + lua_pushnumber (L, job_desc->ntasks_per_board); + } else if (!strcmp(name, "ntasks_per_socket")) { + lua_pushnumber (L, job_desc->ntasks_per_socket); + } else if (!strcmp(name, "sockets_per_board")) { + lua_pushnumber (L, job_desc->sockets_per_board); + } else { lua_pushnil (L); } @@ -465,7 +484,10 @@ static int _set_job_req_field (lua_State *L) if (strlen(value_str)) job_desc->account = xstrdup(value_str); } else if (!strcmp(name, "acctg_freq")) { - job_desc->acctg_freq = luaL_checknumber(L, 3); + value_str = luaL_checkstring(L, 3); + xfree(job_desc->acctg_freq); + if (strlen(value_str)) + job_desc->acctg_freq = xstrdup(value_str); } else if (!strcmp(name, "begin_time")) { job_desc->begin_time = luaL_checknumber(L, 3); } else if (!strcmp(name, "comment")) { @@ -559,6 +581,21 @@ static int _set_job_req_field (lua_State *L) job_desc->shared = luaL_checknumber(L, 3); } else if (!strcmp(name, "sockets_per_node")) { job_desc->sockets_per_node = luaL_checknumber(L, 3); + } else if (!strcmp(name, "std_err")) { + value_str = luaL_checkstring(L, 3); + xfree(job_desc->std_err); + if (strlen(value_str)) + job_desc->std_err = xstrdup(value_str); + } else if (!strcmp(name, "std_in")) { + value_str = luaL_checkstring(L, 3); + xfree(job_desc->std_in); + if (strlen(value_str)) + job_desc->std_in = xstrdup(value_str); + } else if (!strcmp(name, "std_out")) { + value_str = luaL_checkstring(L, 3); + xfree(job_desc->std_out); + if (strlen(value_str)) + job_desc->std_out = xstrdup(value_str); } else if (!strcmp(name, "threads_per_core")) { job_desc->threads_per_core = luaL_checknumber(L, 3); } else if (!strcmp(name, "time_limit")) { @@ -572,6 +609,11 @@ static int _set_job_req_field (lua_State *L) xfree(job_desc->wckey); if (strlen(value_str)) job_desc->wckey = xstrdup(value_str); + } else if (!strcmp(name, "work_dir")) { + value_str = luaL_checkstring(L, 3); + xfree(job_desc->work_dir); + if (strlen(value_str)) + job_desc->work_dir = xstrdup(value_str); } else { error("_set_job_field: unrecognized field: %s", name); } @@ -581,7 +623,7 @@ static int _set_job_req_field (lua_State *L) /* Get fields in an existing slurmctld partition record * NOTE: This is an incomplete list of partition record fields. - * Add more as needed and send patches to slurm-dev@llnl.gov */ + * Add more as needed and send patches to slurm-dev@schedmd.com */ static int _get_part_rec_field (lua_State *L) { const struct part_record *part_ptr = lua_touserdata(L, 1); @@ -712,8 +754,6 @@ static void _push_partition_list(uint32_t user_id, uint32_t submit_uid) lua_newtable(L); part_iterator = list_iterator_create(part_list); - if (!part_iterator) - fatal("list_iterator_create malloc"); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { if (!_user_can_use_part(user_id, submit_uid, part_ptr)) continue; diff --git a/src/plugins/job_submit/partition/Makefile.in b/src/plugins/job_submit/partition/Makefile.in index 35e46fc0011f7a0d0d9326aaf3439ffca7aa3dd5..54b6e1890e49486f66d19a625afa3575496e1750 100644 --- a/src/plugins/job_submit/partition/Makefile.in +++ b/src/plugins/job_submit/partition/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/job_submit/partition DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/job_submit/partition/job_submit_partition.c b/src/plugins/job_submit/partition/job_submit_partition.c index e4a25ee2c606d410ea45ca8f9c5af1b0644ef6a8..c1e88230e633c969a4f9891bd8120182db59877d 100644 --- a/src/plugins/job_submit/partition/job_submit_partition.c +++ b/src/plugins/job_submit/partition/job_submit_partition.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -102,7 +102,7 @@ const uint32_t min_plug_version = 100; /*****************************************************************************\ * We've provided a simple example of the type of things you can do with this * plugin. If you develop another plugin that may be of interest to others - * please post it to slurm-dev@lists.llnl.gov Thanks! + * please post it to slurm-dev@schedmd.com Thanks! \*****************************************************************************/ /* Test if this user can run jobs in the selected partition based upon @@ -144,8 +144,6 @@ extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid) return SLURM_SUCCESS; part_iterator = list_iterator_create(part_list); - if (!part_iterator) - fatal("list_iterator_create malloc"); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { if (!(part_ptr->state_up & PARTITION_SUBMIT)) continue; /* nobody can submit jobs here */ diff --git a/src/plugins/job_submit/pbs/Makefile.am b/src/plugins/job_submit/pbs/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..5ea428671bdcbd925cdb7ef9fd02e7a1749dd9b3 --- /dev/null +++ b/src/plugins/job_submit/pbs/Makefile.am @@ -0,0 +1,17 @@ +# Makefile for job_submit/pbs and spank/pbs plugins + +AUTOMAKE_OPTIONS = foreign + +AM_CPPFLAGS = -DDEFAULT_SCRIPT_DIR=\"$(sysconfdir)\" + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +# Job submit PBS plugin. +pkglib_LTLIBRARIES = job_submit_pbs.la spank_pbs.la +job_submit_pbs_la_SOURCES = job_submit_pbs.c +job_submit_pbs_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + +# SPANK PBS plugin. +spank_pbs_la_SOURCES = spank_pbs.c +spank_pbs_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/job_submit/pbs/Makefile.in b/src/plugins/job_submit/pbs/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..20aa4d21a93578ba935c39dc3f351c012e90b71e --- /dev/null +++ b/src/plugins/job_submit/pbs/Makefile.in @@ -0,0 +1,729 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for job_submit/pbs and spank/pbs plugins + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/job_submit/pbs +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +job_submit_pbs_la_LIBADD = +am_job_submit_pbs_la_OBJECTS = job_submit_pbs.lo +job_submit_pbs_la_OBJECTS = $(am_job_submit_pbs_la_OBJECTS) +job_submit_pbs_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(job_submit_pbs_la_LDFLAGS) $(LDFLAGS) -o $@ +spank_pbs_la_LIBADD = +am_spank_pbs_la_OBJECTS = spank_pbs.lo +spank_pbs_la_OBJECTS = $(am_spank_pbs_la_OBJECTS) +spank_pbs_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(spank_pbs_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(job_submit_pbs_la_SOURCES) $(spank_pbs_la_SOURCES) +DIST_SOURCES = $(job_submit_pbs_la_SOURCES) $(spank_pbs_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +AM_CPPFLAGS = -DDEFAULT_SCRIPT_DIR=\"$(sysconfdir)\" +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +# Job submit PBS plugin. +pkglib_LTLIBRARIES = job_submit_pbs.la spank_pbs.la +job_submit_pbs_la_SOURCES = job_submit_pbs.c +job_submit_pbs_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + +# SPANK PBS plugin. +spank_pbs_la_SOURCES = spank_pbs.c +spank_pbs_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/job_submit/pbs/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/job_submit/pbs/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +job_submit_pbs.la: $(job_submit_pbs_la_OBJECTS) $(job_submit_pbs_la_DEPENDENCIES) $(EXTRA_job_submit_pbs_la_DEPENDENCIES) + $(job_submit_pbs_la_LINK) -rpath $(pkglibdir) $(job_submit_pbs_la_OBJECTS) $(job_submit_pbs_la_LIBADD) $(LIBS) +spank_pbs.la: $(spank_pbs_la_OBJECTS) $(spank_pbs_la_DEPENDENCIES) $(EXTRA_spank_pbs_la_DEPENDENCIES) + $(spank_pbs_la_LINK) -rpath $(pkglibdir) $(spank_pbs_la_OBJECTS) $(spank_pbs_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/job_submit_pbs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spank_pbs.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/job_submit/pbs/job_submit_pbs.c b/src/plugins/job_submit/pbs/job_submit_pbs.c new file mode 100644 index 0000000000000000000000000000000000000000..d01c8fe0ab9473695420a9ed3c19fe27ca541902 --- /dev/null +++ b/src/plugins/job_submit/pbs/job_submit_pbs.c @@ -0,0 +1,394 @@ +/*****************************************************************************\ + * job_submit_pbs.c - Translate PBS job options specifications to the Slurm + * equivalents, particularly job dependencies. + ***************************************************************************** + * Copyright (C) 2013 SchedMD LLC. + * Written by Morris Jette <jette@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#if HAVE_STDINT_H +# include <stdint.h> +#endif + +#if HAVE_INTTYPES_H +# include <inttypes.h> +#endif + +#include <stdio.h> + +#include <sys/types.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <dlfcn.h> +#include <pthread.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" + +#include "src/common/slurm_xlator.h" +#include "src/slurmctld/job_scheduler.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/slurmctld.h" + +#define _DEBUG 0 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "auth" for SLURM authentication) and <method> is a + * description of how this plugin satisfies that application. SLURM will + * only load authentication plugins if the plugin_type string has a prefix + * of "auth/". + * + * plugin_version - specifies the version number of the plugin. + * min_plug_version - specifies the minumum version number of incoming + * messages that this plugin can accept + */ +const char plugin_name[] = "Job submit PBS plugin"; +const char plugin_type[] = "job_submit/pbs"; +const uint32_t plugin_version = 100; +const uint32_t min_plug_version = 100; + +int init (void) +{ + return SLURM_SUCCESS; +} + +int fini (void) +{ + return SLURM_SUCCESS; +} + +static void _add_env(struct job_descriptor *job_desc, char *new_env) +{ + if (!job_desc->environment || !new_env) + return; /* Nothing we can do for interactive jobs */ + + xrealloc(job_desc->environment, + sizeof(char *) * (job_desc->env_size + 2)); + job_desc->environment[job_desc->env_size] = xstrdup(new_env); + job_desc->env_size++; +} + +static void _add_env2(struct job_descriptor *job_desc, char *key, char *val) +{ + int len; + char *new_env; + + if (!job_desc->environment || !key || !val) + return; /* Nothing we can do for interactive jobs */ + + len = strlen(key) + strlen(val) + 2; + new_env = xmalloc(len); + snprintf(new_env, len, "%s=%s", key, val); + _add_env(job_desc, new_env); + xfree(new_env); +} + +static void _decr_depend_cnt(struct job_record *job_ptr) +{ + char buf[16], *end_ptr = NULL, *tok = NULL; + int cnt, width; + + if (job_ptr->comment) + tok = strstr(job_ptr->comment, "on:"); + if (!tok) { + info("%s: invalid job depend before option on job %u", + plugin_type, job_ptr->job_id); + return; + } + + cnt = strtol(tok + 3, &end_ptr, 10); + if (cnt > 0) + cnt--; + width = MIN(sizeof(buf) - 1, (end_ptr - tok - 3)); + sprintf(buf, "%*d", width, cnt); + memcpy(tok + 3, buf, width); +} + +/* We can not invoke update_job_dependency() until the new job record has + * been created, hence this sleeping thread modifies the dependent job + * later. */ +static void *_dep_agent(void *args) +{ + struct job_record *job_ptr = (struct job_record *) args; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK}; + char *end_ptr = NULL, *tok; + int cnt = 0; + + usleep(100000); + lock_slurmctld(job_write_lock); + if (job_ptr && job_ptr->details && (job_ptr->magic == JOB_MAGIC) && + job_ptr->comment && strstr(job_ptr->comment, "on:")) { + char *new_depend = job_ptr->details->dependency; + job_ptr->details->dependency = NULL; + update_job_dependency(job_ptr, new_depend); + xfree(new_depend); + tok = strstr(job_ptr->comment, "on:"); + cnt = strtol(tok + 3, &end_ptr, 10); + } + if (cnt == 0) + set_job_prio(job_ptr); + unlock_slurmctld(job_write_lock); + return NULL; +} + +static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id) +{ + uint32_t job_id; + char *last_ptr = NULL, *new_dep = NULL, *tok, *type; + struct job_record *job_ptr; + pthread_attr_t attr; + pthread_t dep_thread; + + + tok = strtok_r(depend, ":", &last_ptr); + if (!strcmp(tok, "before")) + type = "after"; + else if (!strcmp(tok, "beforeany")) + type = "afterany"; + else if (!strcmp(tok, "beforenotok")) + type = "afternotok"; + else if (!strcmp(tok, "beforeok")) + type = "afterok"; + else { + info("%s: discarding invalid job dependency option %s", + plugin_type, tok); + return; + } + + tok = strtok_r(NULL, ":", &last_ptr); + while (tok) { + job_id = atoi(tok); + job_ptr = find_job_record(job_id); + if (!job_ptr) { + info("%s: discarding invalid job dependency before %s", + plugin_type, tok); + } else if ((submit_uid != job_ptr->user_id) && + !validate_super_user(submit_uid)) { + error("%s: Security violation: uid %u trying to alter " + "job %u belonging to uid %u", + plugin_type, submit_uid, job_ptr->job_id, + job_ptr->user_id); + } else if ((!IS_JOB_PENDING(job_ptr)) || + (job_ptr->details == NULL)) { + info("%s: discarding job before dependency on " + "non-pending job %u", + plugin_type, job_ptr->job_id); + } else { + if (job_ptr->details->dependency) { + xstrcat(new_dep, job_ptr->details->dependency); + xstrcat(new_dep, ","); + } + xstrfmtcat(new_dep, "%s:%u", type, my_job_id); + xfree(job_ptr->details->dependency); + job_ptr->details->dependency = new_dep; + new_dep = NULL; + _decr_depend_cnt(job_ptr); + + slurm_attr_init(&attr); + pthread_attr_setdetachstate(&attr, + PTHREAD_CREATE_DETACHED); + pthread_create(&dep_thread, &attr, _dep_agent, job_ptr); + slurm_attr_destroy(&attr); + } + tok = strtok_r(NULL, ":", &last_ptr); + } +} + +/* Translate PBS job dependencies to Slurm equivalents to the exptned possible + * + * PBS option Slurm nearest equivalent + * =========== ======================== + * after after + * afterok afterok + * afternotok afternotok + * afterany after + * before (set after in referenced job and release as needed) + * beforeok (set afterok in referenced job and release as needed) + * beforenotok (set afternotok in referenced job and release as needed) + * beforeany (set afterany in referenced job and release as needed) + * N/A expand + * on (store value in job comment and hold it) + * N/A singleton + */ +static void _xlate_dependency(struct job_descriptor *job_desc, + uint32_t submit_uid, uint32_t my_job_id) +{ + char *result = NULL; + char *last_ptr = NULL, *tok; + + if (!job_desc->dependency) + return; + +#if _DEBUG + info("dependency in:%s", job_desc->dependency); +#endif + + tok = strtok_r(job_desc->dependency, ",", &last_ptr); + while (tok) { + if (!strncmp(tok, "after", 5) || + !strncmp(tok, "expand", 6) || + !strncmp(tok, "singleton", 9)) { + if (result) + xstrcat(result, ","); + xstrcat(result, tok); + } else if (!strncmp(tok, "on:", 3)) { + job_desc->priority = 0; /* Job is held */ + if (job_desc->comment) + xstrcat(job_desc->comment, ","); + xstrcat(job_desc->comment, tok); + } else if (!strncmp(tok, "before", 6)) { + _xlate_before(tok, submit_uid, my_job_id); + } else { + info("%s: discarding unknown job dependency option %s", + plugin_type, tok); + } + tok = strtok_r(NULL, ",", &last_ptr); + } +#if _DEBUG + info("dependency out:%s", result); +#endif + xfree(job_desc->dependency); + job_desc->dependency = result; +} + +extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid) +{ + char *std_out, *tok; + uint32_t my_job_id = get_next_job_id(); + + _xlate_dependency(job_desc, submit_uid, my_job_id); + + if (job_desc->account) + _add_env2(job_desc, "PBS_ACCOUNT", job_desc->account); + + if (job_desc->script) { + /* Setting PBS_ENVIRONMENT causes Intel MPI to believe that + * it is running on a PBS system, which isn't the case here. */ + /* _add_env(job_desc, "PBS_ENVIRONMENT=PBS_BATCH"); */ + } else { + /* Interactive jobs lack an environment in the job submit + * RPC, so it needs to be handled by a SPANK plugin */ + /* _add_env(job_desc, "PBS_ENVIRONMENT=PBS_INTERACTIVE"); */ + } + + if (job_desc->partition) + _add_env2(job_desc, "PBS_QUEUE", job_desc->partition); + + if (job_desc->std_out) + std_out = job_desc->std_out; + else + std_out = "slurm-%j.out"; + if (job_desc->comment) + xstrcat(job_desc->comment, ","); + xstrcat(job_desc->comment, "stdout="); + if (std_out && (std_out[0] != '/') && job_desc->work_dir) { + xstrcat(job_desc->comment, job_desc->work_dir); + xstrcat(job_desc->comment, "/"); + } + tok = strstr(std_out, "%j"); + if (tok) { + char buf[16], *tok2; + char *tmp = xstrdup(std_out); + tok2 = strstr(tmp, "%j"); + tok2[0] = '\0'; + snprintf(buf, sizeof(buf), "%u", my_job_id); + xstrcat(tmp, buf); + xstrcat(tmp, tok + 2); + xstrcat(job_desc->comment, tmp); + xfree(tmp); + } else { + xstrcat(job_desc->comment, std_out); + } + + return SLURM_SUCCESS; +} + +/* Lua script hook called for "modify job" event. */ +extern int job_modify(struct job_descriptor *job_desc, + struct job_record *job_ptr, uint32_t submit_uid) +{ + char *tok; + + xassert(job_ptr); + + _xlate_dependency(job_desc, submit_uid, job_ptr->job_id); + + if (job_desc->std_out) { + if (job_ptr->comment) + xstrcat(job_ptr->comment, ","); + xstrcat(job_ptr->comment, "stdout="); + if ((job_desc->std_out[0] != '/') && job_ptr->details && + job_ptr->details->work_dir) { + xstrcat(job_ptr->comment, job_ptr->details->work_dir); + xstrcat(job_ptr->comment, "/"); + } + tok = strstr(job_desc->std_out, "%j"); + if (tok) { + char buf[16], *tok2; + char *tmp = xstrdup(job_desc->std_out); + tok2 = strstr(tmp, "%j"); + tok2[0] = '\0'; + snprintf(buf, sizeof(buf), "%u", job_ptr->job_id); + xstrcat(tmp, buf); + xstrcat(tmp, tok + 2); + xstrcat(job_ptr->comment, tmp); + xfree(tmp); + } else { + xstrcat(job_ptr->comment, job_desc->std_out); + } + xfree(job_desc->std_out); + } + + return SLURM_SUCCESS; +} diff --git a/src/plugins/job_submit/pbs/spank_pbs.c b/src/plugins/job_submit/pbs/spank_pbs.c new file mode 100644 index 0000000000000000000000000000000000000000..249586fb8a37487edc9eaf21652ceb9531faac60 --- /dev/null +++ b/src/plugins/job_submit/pbs/spank_pbs.c @@ -0,0 +1,124 @@ +/****************************************************************************\ + * spank_pbs.c - SPANK plugin to set PBS environment variables. + * + * Note: The job_submit/pbs plugin establishes some environment + * variables for batch jobs to complement those configured here. + ***************************************************************************** + * Copyright (C) 2013 SchedMD <http://www.schedmd.com>. + * Written by Morris Jette <jette@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "slurm/spank.h" + +SPANK_PLUGIN(pbs, 1); + +int slurm_spank_task_init(spank_t sp, int ac, char **av) +{ + char val[30000]; + + /* PBS_ACCOUNT is set in the job_submit/pbs plugin, but only for + * batch jobs that specify the job's account at job submit time. */ + + /* Setting PBS_ENVIRONMENT causes Intel MPI to believe that + * it is running on a PBS system, which isn't the case here. */ +#if 0 + /* PBS_ENVIRONMENT is set to PBS_BATCH in the job_submit/pbs plugin. + * Interactive jobs get PBS_ENVIRONMENT set here since it's environment + * never passes through the slurmctld daemon. */ + if (spank_getenv(sp, "PBS_ENVIRONMENT", val, sizeof(val)) != + ESPANK_SUCCESS) + spank_setenv(sp, "PBS_ENVIRONMENT", "PBS_INTERACTIVE", 1); +#endif + + if (getcwd(val, sizeof(val))) + spank_setenv(sp, "PBS_JOBDIR", val, 1); + + if (spank_getenv(sp, "SLURM_JOB_ID", val, sizeof(val)) == + ESPANK_SUCCESS) + spank_setenv(sp, "PBS_JOBID", val, 1); + + if (spank_getenv(sp, "SLURM_JOB_NAME", val, sizeof(val)) == + ESPANK_SUCCESS) + spank_setenv(sp, "PBS_JOBNAME", val, 1); + + /* PBS_NODEFILE is not currently available, although such a file might + * be build based upon the SLURM_JOB_NODELIST environment variable */ + + if (spank_getenv(sp, "SLURM_NODEID", val, sizeof(val)) == + ESPANK_SUCCESS) + spank_setenv(sp, "PBS_NODENUM", val, 1); + + if (spank_getenv(sp, "HOME", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_HOME", val, 1); + + if (spank_getenv(sp, "HOST", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_HOST", val, 1); + + if (spank_getenv(sp, "LANG", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_LANG", val, 1); + + if (spank_getenv(sp, "LOGNAME", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_LOGNAME", val, 1); + + if (spank_getenv(sp, "MAIL", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_MAIL", val, 1); + + if (spank_getenv(sp, "PATH", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_PATH", val, 1); + + if (spank_getenv(sp, "QUEUE", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_QUEUE", val, 1); + + if (spank_getenv(sp, "SHELL", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_SHELL", val, 1); + + if (spank_getenv(sp, "SYSTEM", val, sizeof(val)) == ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_SYSTEM", val, 1); + + if (spank_getenv(sp, "SLURM_SUBMIT_DIR", val, sizeof(val)) == + ESPANK_SUCCESS) + spank_setenv(sp, "PBS_O_WORKDIR", val, 1); + + /* PBS_QUEUE is set in the job_submit/pbs plugin, but only for + * batch jobs that specify the job's partition at job submit time. */ + + if (spank_getenv(sp, "SLURM_PROCID", val, sizeof(val)) == + ESPANK_SUCCESS) { + int i = atoi(val) + 1; + snprintf(val, sizeof(val), "%d", i); + spank_setenv(sp, "PBS_TASKNUM", val, 1); + } + + return 0; +} diff --git a/src/plugins/job_submit/require_timelimit/Makefile.am b/src/plugins/job_submit/require_timelimit/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..b3c872e1cddda42af36d20ab831d4d772eb2010b --- /dev/null +++ b/src/plugins/job_submit/require_timelimit/Makefile.am @@ -0,0 +1,13 @@ +# Makefile for job_submit/require_timelimit plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +pkglib_LTLIBRARIES = job_submit_require_timelimit.la + +# Job submit require_timelimit plugin. +job_submit_require_timelimit_la_SOURCES = job_submit_require_timelimit.c +job_submit_require_timelimit_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/job_submit/require_timelimit/Makefile.in b/src/plugins/job_submit/require_timelimit/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..cd3a112f6c28c5bfa6ff9b352cb93b28e3f6d5f5 --- /dev/null +++ b/src/plugins/job_submit/require_timelimit/Makefile.in @@ -0,0 +1,718 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for job_submit/require_timelimit plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/job_submit/require_timelimit +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +job_submit_require_timelimit_la_LIBADD = +am_job_submit_require_timelimit_la_OBJECTS = \ + job_submit_require_timelimit.lo +job_submit_require_timelimit_la_OBJECTS = \ + $(am_job_submit_require_timelimit_la_OBJECTS) +job_submit_require_timelimit_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) \ + $(job_submit_require_timelimit_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(job_submit_require_timelimit_la_SOURCES) +DIST_SOURCES = $(job_submit_require_timelimit_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common +pkglib_LTLIBRARIES = job_submit_require_timelimit.la + +# Job submit require_timelimit plugin. +job_submit_require_timelimit_la_SOURCES = job_submit_require_timelimit.c +job_submit_require_timelimit_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/job_submit/require_timelimit/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/job_submit/require_timelimit/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +job_submit_require_timelimit.la: $(job_submit_require_timelimit_la_OBJECTS) $(job_submit_require_timelimit_la_DEPENDENCIES) $(EXTRA_job_submit_require_timelimit_la_DEPENDENCIES) + $(job_submit_require_timelimit_la_LINK) -rpath $(pkglibdir) $(job_submit_require_timelimit_la_OBJECTS) $(job_submit_require_timelimit_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/job_submit_require_timelimit.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/job_submit/require_timelimit/job_submit_require_timelimit.c b/src/plugins/job_submit/require_timelimit/job_submit_require_timelimit.c new file mode 100644 index 0000000000000000000000000000000000000000..4c8a782ab8877b33be73b7f01add964a063e4574 --- /dev/null +++ b/src/plugins/job_submit/require_timelimit/job_submit_require_timelimit.c @@ -0,0 +1,70 @@ +/*****************************************************************************\ + * job_submit_require_timelimit.c - Force job requests to include time limit + ***************************************************************************** + * Copyright (C) 2013 Rensselaer Polytechnic Institute + * Written by Daniel M. Weeks. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <slurm/slurm.h> +#include <slurm/slurm_errno.h> + +#include "src/slurmctld/slurmctld.h" + +const char plugin_name[]="Require time limit jobsubmit plugin"; +const char plugin_type[]="job_submit/require_timelimit"; +const uint32_t plugin_version = 100; +const uint32_t min_plug_version = 100; + +int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid) +{ + // NOTE: no job id actually exists yet (=NO_VAL) + + if (job_desc->time_limit == NO_VAL) { + info("Missing time limit for job by uid:%u", submit_uid); + return ESLURM_MISSING_TIME_LIMIT; + } else if (job_desc->time_limit == INFINITE) { + info("Bad time limit for job by uid:%u", submit_uid); + return ESLURM_INVALID_TIME_LIMIT; + } + + return SLURM_SUCCESS; +} + +int job_modify(struct job_descriptor *job_desc, struct job_record *job_ptr, + uint32_t submit_uid) +{ + if (job_desc->time_limit == INFINITE) { + info("Bad replacement time limit for %u", job_desc->job_id); + return ESLURM_INVALID_TIME_LIMIT; + } + + return SLURM_SUCCESS; +} diff --git a/src/plugins/jobacct_gather/Makefile.in b/src/plugins/jobacct_gather/Makefile.in index d3f7053f61db324f180ea8731095d0eb1562b815..05a3f9286ab05b8d168636ac7d21e7027c5c93b8 100644 --- a/src/plugins/jobacct_gather/Makefile.in +++ b/src/plugins/jobacct_gather/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/jobacct_gather DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobacct_gather/aix/Makefile.in b/src/plugins/jobacct_gather/aix/Makefile.in index 64ff216c29c5d3745db4aac81bb11fd6a7ade7fa..06cd277dbcae4e5e14e8b5adcce1a53d60549b52 100644 --- a/src/plugins/jobacct_gather/aix/Makefile.in +++ b/src/plugins/jobacct_gather/aix/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobacct_gather/aix DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c index c467366882c2760875ce95c34ef7c272ab5dc141..16ebc280db54a92215aacff8a0def12393dc657b 100644 --- a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c +++ b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -196,7 +196,7 @@ extern void jobacct_gather_p_poll_data( return; } - if(processing) { + if (processing) { debug("already running, returning"); return; } @@ -204,7 +204,7 @@ extern void jobacct_gather_p_poll_data( processing = 1; prec_list = list_create(_destroy_prec); - if(!pgid_plugin) { + if (!pgid_plugin) { /* get only the processes in the proctrack container */ slurm_container_get_pids(cont_id, &pids, &npids); if (!npids) { diff --git a/src/plugins/jobacct_gather/cgroup/Makefile.in b/src/plugins/jobacct_gather/cgroup/Makefile.in index 5eebff9e78cd3e08ece2351e590f7239d6732c66..4495fff824d035f092adfb040e88a15c070d82ad 100644 --- a/src/plugins/jobacct_gather/cgroup/Makefile.in +++ b/src/plugins/jobacct_gather/cgroup/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobacct_gather/cgroup DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -183,6 +187,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -203,6 +209,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -212,6 +221,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -219,6 +230,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -253,6 +273,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -280,6 +303,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c index df831dae68681b878014cae95ee9c8b74a84449b..16c48facb6d471d02844f78fc8afc704ac3a7f53 100644 --- a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c +++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h index 4d0b45d2c20c6eaea530b5c8c431ceadb6f7388a..8d163378a15f04fdc456ab40a00c98a678fa7259 100644 --- a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h +++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c index 8a4497f0dd5dccf29db59555787143b2af9b74ca..0ab128b1730396325356ecbdf5b181df99b3ac33 100644 --- a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c +++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c @@ -7,7 +7,7 @@ * Matthieu Hautreux * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c index fa4ce6d13aac4788c4c8cd9ac308bdd364406162..55cfafb56349d1ea6487b1f34ffaa5d65ff551f7 100644 --- a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c +++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c @@ -7,7 +7,7 @@ * Matthieu Hautreux * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobacct_gather/linux/Makefile.in b/src/plugins/jobacct_gather/linux/Makefile.in index e4e56667e6b8ce99011557eedc4b6cf254357517..552046f558a8ca4622e8aa90fbdf070a03d439d3 100644 --- a/src/plugins/jobacct_gather/linux/Makefile.in +++ b/src/plugins/jobacct_gather/linux/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobacct_gather/linux DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c index 76cf2c5c894f3bcad039da8d7e7049cbae833373..2015a0d3f53bb687092d4616f06a1717e9cc588e 100644 --- a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c +++ b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -47,6 +47,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_acct_gather_infiniband.h" #include "src/slurmd/common/proctrack.h" #define _DEBUG 0 @@ -96,6 +97,8 @@ typedef struct prec { /* process record */ int vsize; /* virtual size */ int act_cpufreq; /* actual average cpu frequency */ int last_cpu; /* last cpu */ + double disk_read; /* local disk read */ + double disk_write; /* local disk write */ } prec_t; static int pagesize = 0; @@ -110,10 +113,11 @@ static int _is_a_lwp(uint32_t pid); static void _get_offspring_data(List prec_list, prec_t *ancestor, pid_t pid); static int _get_process_data_line(int in, prec_t *prec); static int _get_sys_interface_freq_line(uint32_t cpu, char *filename, - char *sbuf ); + char *sbuf); static uint32_t _update_weighted_freq(struct jobacctinfo *jobacct, char * sbuf); + /* * _get_offspring_data() -- collect memory usage data for the offspring * @@ -154,6 +158,8 @@ _get_offspring_data(List prec_list, prec_t *ancestor, pid_t pid) ancestor->pages += prec->pages; ancestor->rss += prec->rss; ancestor->vsize += prec->vsize; + ancestor->disk_read += prec->disk_read; + ancestor->disk_write += prec->disk_write; } } list_iterator_destroy(itr); @@ -173,8 +179,8 @@ static uint32_t _update_weighted_freq(struct jobacctinfo *jobacct, sscanf(sbuf, "%d", &thisfreq); jobacct->current_weighted_freq = - jobacct->current_weighted_freq + - jobacct->this_sampled_cputime * thisfreq; + jobacct->current_weighted_freq + + jobacct->this_sampled_cputime * thisfreq; if (jobacct->last_total_cputime) { return (jobacct->current_weighted_freq / jobacct->last_total_cputime); @@ -197,7 +203,7 @@ static char * skipdot (char *str) } static int _get_sys_interface_freq_line(uint32_t cpu, char *filename, - char * sbuf ) + char * sbuf) { int num_read, fd; FILE *sys_fp = NULL; @@ -213,8 +219,8 @@ static int _get_sys_interface_freq_line(uint32_t cpu, char *filename, "/sys/devices/system/cpu/cpu%d/cpufreq/%s", cpu, filename); debug2("_get_sys_interface_freq_line: " - "filename = %s ", - freq_file); + "filename = %s ", + freq_file); if ((sys_fp = fopen(freq_file, "r"))!= NULL) { /* frequency scaling enabled */ fd = fileno(sys_fp); @@ -229,13 +235,13 @@ static int _get_sys_interface_freq_line(uint32_t cpu, char *filename, /* frequency scaling not enabled */ if (!cpunfo_frequency){ snprintf(freq_file, 14, - "/proc/cpuinfo"); + "/proc/cpuinfo"); debug2("_get_sys_interface_freq_line: " - "filename = %s ", - freq_file); + "filename = %s ", + freq_file); if ((sys_fp = fopen(freq_file, "r")) != NULL) { while (fgets(cpunfo_line, sizeof cpunfo_line, - sys_fp ) != NULL) { + sys_fp) != NULL) { if (strstr(cpunfo_line, "cpu MHz") || strstr(cpunfo_line, "cpu GHz")) { break; @@ -261,8 +267,8 @@ static int _is_a_lwp(uint32_t pid) { uint32_t tgid; int rc; - if ( snprintf(proc_status_file, 256, - "/proc/%d/status",pid) > 256 ) { + if (snprintf(proc_status_file, 256, + "/proc/%d/status",pid) > 256) { debug("jobacct_gather_linux: unable to build proc_status " "fpath"); return -1; @@ -278,18 +284,18 @@ static int _is_a_lwp(uint32_t pid) { rc = fscanf(status_fp, "Name:\t%*s\n%*[ \ta-zA-Z0-9:()]\nTgid:\t%d\n", &tgid); - } while ( rc < 0 && errno == EINTR ); + } while (rc < 0 && errno == EINTR); fclose(status_fp); /* unable to read /proc/[pid]/status content */ - if ( rc != 1 ) { + if (rc != 1) { debug3("jobacct_gather_linux: unable to read requested " "pattern in %s",proc_status_file); return -1; } /* if tgid differs from pid, this is a LWP (Thread POSIX) */ - if ( (uint32_t) tgid != (uint32_t) pid ) { + if ((uint32_t) tgid != (uint32_t) pid) { debug3("jobacct_gather_linux: pid=%d is a lightweight process", tgid); return 1; @@ -307,7 +313,7 @@ static int _is_a_lwp(uint32_t pid) { * !=0 - data are valid * * Based upon stat2proc() from the ps command. It can handle arbitrary - * executable file basenames for `cmd', i.e. those with embedded whitespace or + * executable file basenames for `cmd', i.e. those with embedded whitespace or * embedded ')'s. Such names confuse %s (see scanf(3)), so the string is split * and %39c is used instead. (except for embedded ')' "(%[^)]c)" would work. */ @@ -368,6 +374,45 @@ static int _get_process_data_line(int in, prec_t *prec) { return 1; } +/* _get_process_io_data_line() - get line of data from /proc/<pid>/io + * + * IN: in - input file descriptor + * OUT: prec - the destination for the data + * + * RETVAL: ==0 - no valid data + * !=0 - data are valid + * + * /proc/<pid>/io content format is: + * rchar: <# of characters read> + * wrchar: <# of characters written> + * . . . + */ +static int _get_process_io_data_line(int in, prec_t *prec) { + char sbuf[256]; + char f1[7], f3[7]; + int num_read, nvals; + uint64_t rchar, wchar; + + num_read = read(in, sbuf, (sizeof(sbuf) - 1)); + if (num_read <= 0) + return 0; + sbuf[num_read] = '\0'; + + nvals = sscanf(sbuf, "%s %"PRIu64" %s %"PRIu64"", + f1, &rchar, f3, &wchar); + if (nvals < 4) + return 0; + + if (_is_a_lwp(prec->pid) > 0) + return 0; + + /* Copy the values that slurm records into our data structure */ + prec->disk_read = (double)rchar / (double)1048576; + prec->disk_write = (double)wchar / (double)1048576; + + return 1; +} + static void _destroy_prec(void *object) { prec_t *prec = (prec_t *)object; @@ -375,11 +420,48 @@ static void _destroy_prec(void *object) return; } +static void _handle_stats( + List prec_list, char *proc_stat_file, char *proc_io_file) +{ + FILE *stat_fp = NULL; + FILE *io_fp = NULL; + int fd, fd2; + prec_t *prec = NULL; + + if (!(stat_fp = fopen(proc_stat_file, "r"))) + return; /* Assume the process went away */ + /* + * Close the file on exec() of user tasks. + * + * NOTE: If we fork() slurmstepd after the + * fopen() above and before the fcntl() below, + * then the user task may have this extra file + * open, which can cause problems for + * checkpoint/restart, but this should be a very rare + * problem in practice. + */ + fd = fileno(stat_fp); + fcntl(fd, F_SETFD, FD_CLOEXEC); + + prec = xmalloc(sizeof(prec_t)); + if (_get_process_data_line(fd, prec)) { + list_append(prec_list, prec); + if ((io_fp = fopen(proc_io_file, "r"))) { + fd2 = fileno(io_fp); + fcntl(fd2, F_SETFD, FD_CLOEXEC); + _get_process_io_data_line(fd2, prec); + fclose(io_fp); + } + } else + xfree(prec); + fclose(stat_fp); + +} /* * init() is called when the plugin is loaded, before any other functions * are called. Put global initialization here. */ -extern int init ( void ) +extern int init (void) { pagesize = getpagesize()/1024; @@ -388,7 +470,7 @@ extern int init ( void ) return SLURM_SUCCESS; } -extern int fini ( void ) +extern int fini (void) { /* just to make sure it closes things up since we call it * from here */ @@ -419,14 +501,14 @@ extern void jobacct_gather_p_poll_data( static int slash_proc_open = 0; struct dirent *slash_proc_entry; - char *iptr = NULL, *optr = NULL; - FILE *stat_fp = NULL; + char *iptr = NULL, *optr = NULL, *optr2 = NULL; char proc_stat_file[256]; /* Allow ~20x extra length */ + char proc_io_file[256]; /* Allow ~20x extra length */ List prec_list = NULL; pid_t *pids = NULL; int npids = 0; uint32_t total_job_mem = 0, total_job_vsize = 0; - int i, fd; + int i; ListIterator itr; ListIterator itr2; prec_t *prec = NULL; @@ -435,18 +517,30 @@ extern void jobacct_gather_p_poll_data( long hertz; char sbuf[72]; int energy_counted = 0; + static int first = 1; + static int energy_profile = ENERGY_DATA_JOULES_TASK; if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) { debug("cont_id hasn't been set yet not running poll"); return; } - if(processing) { + if (processing) { debug("already running, returning"); return; } processing = 1; prec_list = list_create(_destroy_prec); + if (first) { + uint32_t profile_opt; + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, + &profile_opt); + /* If we are profiling energy it will be checked at a + different rate, so just grab the last one. + */ + if (profile_opt & ACCT_GATHER_PROFILE_ENERGY) + energy_profile = ENERGY_DATA_STRUCT; + } hertz = sysconf(_SC_CLK_TCK); if (hertz < 1) { @@ -462,7 +556,7 @@ extern void jobacct_gather_p_poll_data( itr = list_iterator_create(task_list); if ((jobacct = list_next(itr))) { acct_gather_energy_g_get_data( - ENERGY_DATA_JOULES_TASK, + energy_profile, &jobacct->energy); debug2("getjoules_task energy = %u", jobacct->energy.consumed_energy); @@ -473,30 +567,11 @@ extern void jobacct_gather_p_poll_data( goto finished; } for (i = 0; i < npids; i++) { - snprintf(proc_stat_file, 256, - "/proc/%d/stat", pids[i]); - if ((stat_fp = fopen(proc_stat_file, "r"))==NULL) - continue; /* Assume the process went away */ - /* - * Close the file on exec() of user tasks. - * - * NOTE: If we fork() slurmstepd after the - * fopen() above and before the fcntl() below, - * then the user task may have this extra file - * open, which can cause problems for - * checkpoint/restart, but this should be a very rare - * problem in practice. - */ - fd = fileno(stat_fp); - fcntl(fd, F_SETFD, FD_CLOEXEC); - - prec = xmalloc(sizeof(prec_t)); - if (_get_process_data_line(fd, prec)) - list_append(prec_list, prec); - else - xfree(prec); - fclose(stat_fp); + snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); + snprintf(proc_io_file, 256, "/proc/%d/io", pids[i]); + _handle_stats(prec_list, proc_stat_file, proc_io_file); } + xfree(pids); } else { slurm_mutex_lock(&reading_mutex); @@ -512,6 +587,7 @@ extern void jobacct_gather_p_poll_data( slash_proc_open=1; } strcpy(proc_stat_file, "/proc/"); + strcpy(proc_io_file, "/proc/"); while ((slash_proc_entry = readdir(slash_proc))) { @@ -519,7 +595,8 @@ extern void jobacct_gather_p_poll_data( * strcat(statFileName, slash_proc_entry->d_name); * strcat(statFileName, "/stat"); * while checking for a numeric filename (which really - * should be a pid). + * should be a pid). Then do the same for the + * /proc/<pid>/io file name. */ optr = proc_stat_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; @@ -540,28 +617,26 @@ extern void jobacct_gather_p_poll_data( *optr++ = *iptr++; } while (*iptr); *optr = 0; + optr2 = proc_io_file + sizeof("/proc"); + iptr = slash_proc_entry->d_name; + i = 0; + do { + if ((*iptr < '0') || + ((*optr2++ = *iptr++) > '9')) { + i = -1; + break; + } + } while (*iptr); + if (i == -1) + continue; + iptr = (char*)"/io"; - if ((stat_fp = fopen(proc_stat_file,"r"))==NULL) - continue; /* Assume the process went away */ - /* - * Close the file on exec() of user tasks. - * - * NOTE: If we fork() slurmstepd after the - * fopen() above and before the fcntl() below, - * then the user task may have this extra file - * open, which can cause problems for - * checkpoint/restart, but this should be a very rare - * problem in practice. - */ - fd = fileno(stat_fp); - fcntl(fd, F_SETFD, FD_CLOEXEC); - - prec = xmalloc(sizeof(prec_t)); - if (_get_process_data_line(fd, prec)) - list_append(prec_list, prec); - else - xfree(prec); - fclose(stat_fp); + do { + *optr2++ = *iptr++; + } while (*iptr); + *optr2 = 0; + + _handle_stats(prec_list, proc_stat_file, proc_io_file); } slurm_mutex_unlock(&reading_mutex); @@ -596,6 +671,14 @@ extern void jobacct_gather_p_poll_data( jobacct->max_pages = MAX(jobacct->max_pages, prec->pages); jobacct->tot_pages = prec->pages; + jobacct->max_disk_read = MAX( + jobacct->max_disk_read, + prec->disk_read); + jobacct->tot_disk_read = prec->disk_read; + jobacct->max_disk_write = MAX( + jobacct->max_disk_write, + prec->disk_write); + jobacct->tot_disk_write = prec->disk_write; jobacct->min_cpu = MAX(jobacct->min_cpu, cpu_calc); jobacct->last_total_cputime = jobacct->tot_cpu; @@ -612,24 +695,29 @@ extern void jobacct_gather_p_poll_data( "cpuinfo_cur_freq", sbuf); jobacct->act_cpufreq = _update_weighted_freq(jobacct, sbuf); - debug2("Task average frequency = %u", - jobacct->act_cpufreq); - debug2(" pid %d mem size %u %u time %u(%u+%u)", + debug2("Task average frequency = %u " + "pid %d mem size %u %u time %u(%u+%u)", + jobacct->act_cpufreq, jobacct->pid, jobacct->max_rss, jobacct->max_vsize, jobacct->tot_cpu, prec->usec, prec->ssec); /* get energy consumption * only once is enough since we * report per node energy consumption */ - debug2("energycounted= %d", energy_counted); + debug2("energycounted = %d", energy_counted); if (energy_counted == 0) { acct_gather_energy_g_get_data( - ENERGY_DATA_JOULES_TASK, + energy_profile, &jobacct->energy); debug2("getjoules_task energy = %u", jobacct->energy.consumed_energy); energy_counted = 1; } + /* We only profile on after the first poll. */ + if (!first) + acct_gather_profile_g_add_sample_data( + ACCT_GATHER_PROFILE_TASK, + jobacct); break; } } @@ -642,6 +730,7 @@ extern void jobacct_gather_p_poll_data( finished: list_destroy(prec_list); processing = 0; + first = 0; return; } diff --git a/src/plugins/jobacct_gather/none/Makefile.in b/src/plugins/jobacct_gather/none/Makefile.in index e7318e69e42234c29da4b2ccae6a46d37b0b1aec..0a9181762450b46d030d304225291165d6458f81 100644 --- a/src/plugins/jobacct_gather/none/Makefile.in +++ b/src/plugins/jobacct_gather/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobacct_gather/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobacct_gather/none/jobacct_gather_none.c b/src/plugins/jobacct_gather/none/jobacct_gather_none.c index 7ad9c520a0e4b57996064fa2aa44258b47f42bd0..354d7862cce1c76e727f1493503518642cbdf948 100644 --- a/src/plugins/jobacct_gather/none/jobacct_gather_none.c +++ b/src/plugins/jobacct_gather/none/jobacct_gather_none.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -146,11 +146,6 @@ extern int jobacct_gather_p_endpoll(void) return SLURM_SUCCESS; } -extern void jobacct_gather_p_change_poll(uint16_t frequency) -{ - return; -} - extern void jobacct_gather_p_suspend_poll(void) { return; diff --git a/src/plugins/jobcomp/Makefile.in b/src/plugins/jobcomp/Makefile.in index 0f10ddee1ab5241e148145957d4634d8b96f1fdb..513077c701968f1deb3fbe76c0b019784d92988a 100644 --- a/src/plugins/jobcomp/Makefile.in +++ b/src/plugins/jobcomp/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/jobcomp DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobcomp/filetxt/Makefile.in b/src/plugins/jobcomp/filetxt/Makefile.in index a3e39997c9da13556f4186621eb2b74c55b0d746..429794bd726f77745508515186def820dc9cbe1e 100644 --- a/src/plugins/jobcomp/filetxt/Makefile.in +++ b/src/plugins/jobcomp/filetxt/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobcomp/filetxt DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c index 22e6972c00a09f092601c7ebcc29bc3cd30278fd..6cc65681a70369dbfb1861034286d8257b45d065 100644 --- a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c +++ b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -44,8 +44,8 @@ #include <ctype.h> #include <sys/stat.h> -#include "src/common/xmalloc.h" #include "src/common/slurm_jobcomp.h" +#include "src/common/xmalloc.h" #include "filetxt_jobcomp_process.h" #define BUFFER_SIZE 4096 @@ -60,7 +60,7 @@ static void _destroy_filetxt_jobcomp_info(void *object) { filetxt_jobcomp_info_t *jobcomp_info = (filetxt_jobcomp_info_t *)object; - if(jobcomp_info) { + if (jobcomp_info) { xfree(jobcomp_info); } } @@ -86,90 +86,68 @@ static FILE *_open_log_file(char *logfile) return fd; } -static void _do_fdump(List job_info_list, int lc) -{ - filetxt_jobcomp_info_t *jobcomp_info = NULL; - ListIterator itr = list_iterator_create(job_info_list); - - printf("\n------- Line %d -------\n", lc); - while((jobcomp_info = list_next(itr))) { - printf("%12s: %s\n", jobcomp_info->name, jobcomp_info->val); - } -} - static jobcomp_job_rec_t *_parse_line(List job_info_list) { ListIterator itr = NULL; filetxt_jobcomp_info_t *jobcomp_info = NULL; jobcomp_job_rec_t *job = xmalloc(sizeof(jobcomp_job_rec_t)); char *temp = NULL; - char *temp2 = NULL; itr = list_iterator_create(job_info_list); while((jobcomp_info = list_next(itr))) { - if(!strcasecmp("JobID", jobcomp_info->name)) { + if (!strcasecmp("JobID", jobcomp_info->name)) { job->jobid = atoi(jobcomp_info->val); - } else if(!strcasecmp("Partition", jobcomp_info->name)) { + } else if (!strcasecmp("Partition", jobcomp_info->name)) { job->partition = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("StartTime", jobcomp_info->name)) { + } else if (!strcasecmp("StartTime", jobcomp_info->name)) { job->start_time = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("EndTime", jobcomp_info->name)) { + } else if (!strcasecmp("EndTime", jobcomp_info->name)) { job->end_time = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("Userid", jobcomp_info->name)) { + } else if (!strcasecmp("Userid", jobcomp_info->name)) { temp = strstr(jobcomp_info->val, "("); - if(!temp) + if (!temp) { job->uid = atoi(jobcomp_info->val); - *temp++ = 0; - temp2 = temp; - temp = strstr(temp, ")"); - if(!temp) { error("problem getting correct uid from %s", jobcomp_info->val); } else { - *temp = 0; - job->uid = atoi(temp2); + job->uid = atoi(temp + 1); job->uid_name = xstrdup(jobcomp_info->val); } - } else if(!strcasecmp("GroupId", jobcomp_info->name)) { + } else if (!strcasecmp("GroupId", jobcomp_info->name)) { temp = strstr(jobcomp_info->val, "("); - if(!temp) + if (!temp) { job->gid = atoi(jobcomp_info->val); - *temp++ = 0; - temp2 = temp; - temp = strstr(temp, ")"); - if(!temp) { error("problem getting correct gid from %s", jobcomp_info->val); } else { - *temp = 0; - job->gid = atoi(temp2); + job->gid = atoi(temp + 1); job->gid_name = xstrdup(jobcomp_info->val); } - } else if(!strcasecmp("Name", jobcomp_info->name)) { + } else if (!strcasecmp("Name", jobcomp_info->name)) { job->jobname = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("NodeList", jobcomp_info->name)) { + } else if (!strcasecmp("NodeList", jobcomp_info->name)) { job->nodelist = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("NodeCnt", jobcomp_info->name)) { + } else if (!strcasecmp("NodeCnt", jobcomp_info->name)) { job->node_cnt = atoi(jobcomp_info->val); - } else if(!strcasecmp("JobState", jobcomp_info->name)) { + } else if (!strcasecmp("JobState", jobcomp_info->name)) { job->state = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("Timelimit", jobcomp_info->name)) { + } else if (!strcasecmp("Timelimit", jobcomp_info->name)) { job->timelimit = xstrdup(jobcomp_info->val); } #ifdef HAVE_BG - else if(!strcasecmp("MaxProcs", jobcomp_info->name)) { + else if (!strcasecmp("MaxProcs", jobcomp_info->name)) { job->max_procs = atoi(jobcomp_info->val); - } else if(!strcasecmp("Block_Id", jobcomp_info->name)) { + } else if (!strcasecmp("Block_Id", jobcomp_info->name)) { job->blockid = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("Connection", jobcomp_info->name)) { + } else if (!strcasecmp("Connection", jobcomp_info->name)) { job->connection = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("reboot", jobcomp_info->name)) { + } else if (!strcasecmp("reboot", jobcomp_info->name)) { job->reboot = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("rotate", jobcomp_info->name)) { + } else if (!strcasecmp("rotate", jobcomp_info->name)) { job->rotate = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("geometry", jobcomp_info->name)) { + } else if (!strcasecmp("geometry", jobcomp_info->name)) { job->geo = xstrdup(jobcomp_info->val); - } else if(!strcasecmp("start", jobcomp_info->name)) { + } else if (!strcasecmp("start", jobcomp_info->name)) { job->bg_start_point = xstrdup(jobcomp_info->val); } #endif @@ -198,17 +176,6 @@ extern List filetxt_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) List job_info_list = NULL; filetxt_jobcomp_info_t *jobcomp_info = NULL; List job_list = list_create(jobcomp_destroy_job); - int fdump_flag = 0; - - /* we grab the fdump only for the filetxt plug through the - FDUMP_FLAG on the job_cond->duplicates variable. We didn't - add this extra field to the structure since it only applies - to this plugin. - */ - if(job_cond) { - fdump_flag = job_cond->duplicates & FDUMP_FLAG; - job_cond->duplicates &= (~FDUMP_FLAG); - } filein = slurm_get_jobcomp_loc(); fd = _open_log_file(filein); @@ -217,7 +184,7 @@ extern List filetxt_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) lc++; fptr = line; /* break the record into NULL- terminated strings */ - if(job_info_list) + if (job_info_list) list_destroy(job_info_list); jobid = 0; partition = NULL; @@ -231,21 +198,21 @@ extern List filetxt_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) *fptr++ = 0; jobcomp_info->val = fptr; fptr = strstr(fptr, " "); - if(!strcasecmp("JobId", jobcomp_info->name)) + if (!strcasecmp("JobId", jobcomp_info->name)) jobid = atoi(jobcomp_info->val); - else if(!strcasecmp("Partition", + else if (!strcasecmp("Partition", jobcomp_info->name)) partition = jobcomp_info->val; - if(!fptr) { + if (!fptr) { fptr = strstr(jobcomp_info->val, "\n"); if (fptr) *fptr = 0; break; } else { *fptr++ = 0; - if(*fptr == '\n') { + if (*fptr == '\n') { *fptr = 0; break; } @@ -253,7 +220,7 @@ extern List filetxt_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) } if (job_cond->step_list && list_count(job_cond->step_list)) { - if(!jobid) + if (!jobid) continue; itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { @@ -270,7 +237,7 @@ extern List filetxt_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) if (job_cond->partition_list && list_count(job_cond->partition_list)) { - if(!partition) + if (!partition) continue; itr = list_iterator_create(job_cond->partition_list); while((selected_part = list_next(itr))) @@ -283,19 +250,13 @@ extern List filetxt_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) } foundp: - if (fdump_flag) { - _do_fdump(job_info_list, lc); - continue; - } - - job = _parse_line(job_info_list); - if(job) + if (job) list_append(job_list, job); } - if(job_info_list) + if (job_info_list) list_destroy(job_info_list); if (ferror(fd)) { diff --git a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.h b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.h index 76e69c40bdd650c7e6b401c0bd4cdc8a401a3feb..0589bad7b3198c8c78f5f0b11d6fbf85c2da2561 100644 --- a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.h +++ b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c b/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c index 014fac7b03fb3c2550b37d335932e999f0643e6a..0d13abdeeab875dfadf910811db818f6a39e5da9 100644 --- a/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c +++ b/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobcomp/mysql/Makefile.in b/src/plugins/jobcomp/mysql/Makefile.in index 70554249192c117b414a6af0d0e0653ced84b669..169bd7add7a3adcfa162da8bb8e1be1c7db52e41 100644 --- a/src/plugins/jobcomp/mysql/Makefile.in +++ b/src/plugins/jobcomp/mysql/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobcomp/mysql DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -189,6 +193,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -209,6 +215,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -218,6 +227,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -225,6 +236,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -259,6 +279,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -286,6 +309,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobcomp/mysql/jobcomp_mysql.c b/src/plugins/jobcomp/mysql/jobcomp_mysql.c index ca04612bda1925be61e61f671db52430756d15ed..13c1dcc4af68fe3fc3e4fa381cbdf2d60b7c0931 100644 --- a/src/plugins/jobcomp/mysql/jobcomp_mysql.c +++ b/src/plugins/jobcomp/mysql/jobcomp_mysql.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -128,7 +128,7 @@ static pthread_mutex_t jobcomp_lock = PTHREAD_MUTEX_INITIALIZER; static int _mysql_jobcomp_check_tables() { - if(mysql_db_create_table(jobcomp_mysql_conn, jobcomp_table, + if (mysql_db_create_table(jobcomp_mysql_conn, jobcomp_table, jobcomp_table_fields, ")") == SLURM_ERROR) return SLURM_ERROR; @@ -202,7 +202,7 @@ extern int init ( void ) { static int first = 1; - if(first) { + if (first) { /* since this can be loaded from many different places only tell us once. */ verbose("%s loaded", plugin_name); @@ -233,11 +233,11 @@ extern int slurm_jobcomp_set_location(char *location) if (jobcomp_mysql_conn && mysql_db_ping(jobcomp_mysql_conn) == 0) return SLURM_SUCCESS; - if(!location) + if (!location) db_name = slurm_get_jobcomp_loc(); else { while(location[i]) { - if(location[i] == '.' || location[i] == '/') { + if (location[i] == '.' || location[i] == '/') { debug("%s doesn't look like a database " "name using %s", location, DEFAULT_JOB_COMP_DB); @@ -245,7 +245,7 @@ extern int slurm_jobcomp_set_location(char *location) } i++; } - if(location[i]) + if (location[i]) db_name = xstrdup(DEFAULT_JOB_COMP_DB); else db_name = xstrdup(location); @@ -261,7 +261,7 @@ extern int slurm_jobcomp_set_location(char *location) destroy_mysql_db_info(db_info); - if(rc == SLURM_SUCCESS) + if (rc == SLURM_SUCCESS) debug("Jobcomp database init finished"); else debug("Jobcomp database init failed"); @@ -279,9 +279,9 @@ extern int slurm_jobcomp_log_record(struct job_record *job_ptr) char *query = NULL; uint32_t time_limit, start_time, end_time; - if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { + if (!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { char *loc = slurm_get_jobcomp_loc(); - if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { + if (slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return SLURM_ERROR; } @@ -348,21 +348,21 @@ extern int slurm_jobcomp_log_record(struct job_record *job_ptr) "starttime, endtime, nodecnt", jobcomp_table); - if(job_ptr->nodes) + if (job_ptr->nodes) xstrcat(query, ", nodelist"); - if(connect_type) + if (connect_type) xstrcat(query, ", connect_type"); - if(reboot) + if (reboot) xstrcat(query, ", reboot"); - if(rotate) + if (rotate) xstrcat(query, ", rotate"); - if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) + if (job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) xstrcat(query, ", maxprocs"); - if(geometry) + if (geometry) xstrcat(query, ", geometry"); - if(start) + if (start) xstrcat(query, ", start"); - if(blockid) + if (blockid) xstrcat(query, ", blockid"); xstrfmtcat(query, ") values (%u, %u, '%s', %u, '%s', \"%s\", %d, %u, " "'%s', \"%s\", %u, %u, %u", @@ -371,33 +371,33 @@ extern int slurm_jobcomp_log_record(struct job_record *job_ptr) job_state, job_ptr->total_cpus, job_ptr->partition, lim_str, start_time, end_time, job_ptr->node_cnt); - if(job_ptr->nodes) + if (job_ptr->nodes) xstrfmtcat(query, ", '%s'", job_ptr->nodes); - if(connect_type) { + if (connect_type) { xstrfmtcat(query, ", '%s'", connect_type); xfree(connect_type); } - if(reboot) { + if (reboot) { xstrfmtcat(query, ", '%s'", reboot); xfree(reboot); } - if(rotate) { + if (rotate) { xstrfmtcat(query, ", '%s'", rotate); xfree(rotate); } - if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) + if (job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) xstrfmtcat(query, ", '%u'", job_ptr->details->max_cpus); - if(geometry) { + if (geometry) { xstrfmtcat(query, ", '%s'", geometry); xfree(geometry); } - if(start) { + if (start) { xstrfmtcat(query, ", '%s'", start); xfree(start); } - if(blockid) { + if (blockid) { xstrfmtcat(query, ", '%s'", blockid); xfree(blockid); } @@ -430,9 +430,9 @@ extern List slurm_jobcomp_get_jobs(slurmdb_job_cond_t *job_cond) { List job_list = NULL; - if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { + if (!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { char *loc = slurm_get_jobcomp_loc(); - if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { + if (slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return job_list; } @@ -449,9 +449,9 @@ extern List slurm_jobcomp_get_jobs(slurmdb_job_cond_t *job_cond) */ extern int slurm_jobcomp_archive(slurmdb_archive_cond_t *arch_cond) { - if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { + if (!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { char *loc = slurm_get_jobcomp_loc(); - if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { + if (slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return SLURM_ERROR; } diff --git a/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c b/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c index f15980632ad24635202d8c9f4a4bfec2c80be187..05cfa83df504098d068e83a8f6b89c6d92eebcf0 100644 --- a/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c +++ b/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -46,18 +46,6 @@ #include "src/common/xstring.h" #include "mysql_jobcomp_process.h" -static void _do_fdump(MYSQL_ROW row, int lc) -{ - int i = 0; - printf("\n------- Line %d -------\n", lc); - while(jobcomp_table_fields[i].name) { - printf("%12s: %s\n", jobcomp_table_fields[i].name, row[i]); - i++; - } - - return; -} - extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) { @@ -76,24 +64,13 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) char time_str[32]; time_t temp_time; List job_list = list_create(jobcomp_destroy_job); - int fdump_flag = 0; - - /* we grab the fdump only for the filetxt plug through the - FDUMP_FLAG on the job_cond->duplicates variable. We didn't - add this extra field to the structure since it only applies - to this plugin. - */ - if(job_cond) { - fdump_flag = job_cond->duplicates & FDUMP_FLAG; - job_cond->duplicates &= (~FDUMP_FLAG); - } - if(job_cond->step_list && list_count(job_cond->step_list)) { + if (job_cond->step_list && list_count(job_cond->step_list)) { set = 0; xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if(set) + if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("jobid=%d", selected_step->jobid); @@ -105,16 +82,16 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) xstrcat(extra, ")"); } - if(job_cond->partition_list && list_count(job_cond->partition_list)) { + if (job_cond->partition_list && list_count(job_cond->partition_list)) { set = 0; - if(extra) + if (extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->partition_list); while((selected_part = list_next(itr))) { - if(set) + if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("partition='%s'", selected_part); @@ -128,7 +105,7 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) i = 0; while(jobcomp_table_fields[i].name) { - if(i) + if (i) xstrcat(tmp, ", "); xstrcat(tmp, jobcomp_table_fields[i].name); i++; @@ -137,13 +114,13 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) query = xstrdup_printf("select %s from %s", tmp, jobcomp_table); xfree(tmp); - if(extra) { + if (extra) { xstrcat(query, extra); xfree(extra); } //info("query = %s", query); - if(!(result = + if (!(result = mysql_db_query_ret(jobcomp_mysql_conn, query, 0))) { xfree(query); list_destroy(job_list); @@ -154,12 +131,8 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) while((row = mysql_fetch_row(result))) { lc++; - if (fdump_flag) { - _do_fdump(row, lc); - continue; - } job = xmalloc(sizeof(jobcomp_job_rec_t)); - if(row[JOBCOMP_REQ_JOBID]) + if (row[JOBCOMP_REQ_JOBID]) job->jobid = atoi(row[JOBCOMP_REQ_JOBID]); job->partition = xstrdup(row[JOBCOMP_REQ_PARTITION]); temp_time = atoi(row[JOBCOMP_REQ_STARTTIME]); @@ -174,22 +147,22 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) sizeof(time_str)); job->end_time = xstrdup(time_str); - if(row[JOBCOMP_REQ_UID]) + if (row[JOBCOMP_REQ_UID]) job->uid = atoi(row[JOBCOMP_REQ_UID]); job->uid_name = xstrdup(row[JOBCOMP_REQ_USER_NAME]); - if(row[JOBCOMP_REQ_GID]) + if (row[JOBCOMP_REQ_GID]) job->gid = atoi(row[JOBCOMP_REQ_GID]); job->gid_name = xstrdup(row[JOBCOMP_REQ_GROUP_NAME]); job->jobname = xstrdup(row[JOBCOMP_REQ_NAME]); job->nodelist = xstrdup(row[JOBCOMP_REQ_NODELIST]); - if(row[JOBCOMP_REQ_NODECNT]) + if (row[JOBCOMP_REQ_NODECNT]) job->node_cnt = atoi(row[JOBCOMP_REQ_NODECNT]); - if(row[JOBCOMP_REQ_STATE]) { + if (row[JOBCOMP_REQ_STATE]) { i = atoi(row[JOBCOMP_REQ_STATE]); job->state = xstrdup(job_state_string(i)); } job->timelimit = xstrdup(row[JOBCOMP_REQ_TIMELIMIT]); - if(row[JOBCOMP_REQ_MAXPROCS]) + if (row[JOBCOMP_REQ_MAXPROCS]) job->max_procs = atoi(row[JOBCOMP_REQ_MAXPROCS]); job->connection = xstrdup(row[JOBCOMP_REQ_CONNECTION]); job->reboot = xstrdup(row[JOBCOMP_REQ_REBOOT]); diff --git a/src/plugins/jobcomp/mysql/mysql_jobcomp_process.h b/src/plugins/jobcomp/mysql/mysql_jobcomp_process.h index c99b758d42e5cb85288cce548503b3311acc5b42..cf19ed492f97faba0f9a8eb99d15f403448219aa 100644 --- a/src/plugins/jobcomp/mysql/mysql_jobcomp_process.h +++ b/src/plugins/jobcomp/mysql/mysql_jobcomp_process.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobcomp/none/Makefile.in b/src/plugins/jobcomp/none/Makefile.in index b5c495e08e83a529c7ffc8ebc3ed54672ccc23ab..a626e2cc657f8c410661e70180cad0048a1ecc03 100644 --- a/src/plugins/jobcomp/none/Makefile.in +++ b/src/plugins/jobcomp/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobcomp/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobcomp/none/jobcomp_none.c b/src/plugins/jobcomp/none/jobcomp_none.c index 6994d4f8f8581fcb5afebf3a5971734b8f9eb477..44f52a31399336d6cc0f4d349b2c794cd44620d2 100644 --- a/src/plugins/jobcomp/none/jobcomp_none.c +++ b/src/plugins/jobcomp/none/jobcomp_none.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobcomp/pgsql/Makefile.in b/src/plugins/jobcomp/pgsql/Makefile.in index dece9367a712aabcbba218736ce59168a7a5f791..84b8ed5b3b251a8f53b41d87395e6153e02a45af 100644 --- a/src/plugins/jobcomp/pgsql/Makefile.in +++ b/src/plugins/jobcomp/pgsql/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobcomp/pgsql DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -189,6 +193,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -209,6 +215,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -218,6 +227,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -225,6 +236,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -259,6 +279,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -286,6 +309,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c b/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c index c9a215d9a4c5e1bd1760509cfbdd0c18a0a2c1c4..e48d80b276c3899d6d7727dc8397af5b9cd9d37d 100644 --- a/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c +++ b/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -130,7 +130,7 @@ static pgsql_db_info_t *_pgsql_jobcomp_create_db_info() db_info->port = slurm_get_jobcomp_port(); /* it turns out it is better if using defaults to let postgres handle them on it's own terms */ - if(!db_info->port) { + if (!db_info->port) { db_info->port = DEFAULT_PGSQL_PORT; slurm_set_jobcomp_port(db_info->port); } @@ -149,7 +149,7 @@ static int _pgsql_jobcomp_check_tables(char *user) "where tableowner='%s' " "and tablename !~ '^pg_+'", user); - if(!(result = + if (!(result = pgsql_db_query_ret(jobcomp_pgsql_db, query))) { xfree(query); return SLURM_ERROR; @@ -157,14 +157,14 @@ static int _pgsql_jobcomp_check_tables(char *user) xfree(query); for (i = 0; i < PQntuples(result); i++) { - if(!job_found + if (!job_found && !strcmp(jobcomp_table, PQgetvalue(result, i, 0))) job_found = 1; } PQclear(result); - if(!job_found) - if(pgsql_db_create_table(jobcomp_pgsql_db, "public", jobcomp_table, + if (!job_found) + if (pgsql_db_create_table(jobcomp_pgsql_db, "public", jobcomp_table, jobcomp_table_fields, ")") == SLURM_ERROR) return SLURM_ERROR; @@ -239,10 +239,13 @@ extern int init ( void ) { static int first = 1; - if(first) { + if (first) { /* since this can be loaded from many different places only tell us once. */ verbose("%s loaded", plugin_name); + error("jobcomp/pgsql will be removed in the next version of " + "Slurm. Please make plans to switch to a different " + "method of storing data."); first = 0; } else { debug4("%s loaded", plugin_name); @@ -267,14 +270,14 @@ extern int slurm_jobcomp_set_location(char *location) char *db_name = NULL; int i = 0; - if(jobcomp_pgsql_db && PQstatus(jobcomp_pgsql_db) == CONNECTION_OK) + if (jobcomp_pgsql_db && PQstatus(jobcomp_pgsql_db) == CONNECTION_OK) return SLURM_SUCCESS; - if(!location) + if (!location) db_name = slurm_get_jobcomp_loc(); else { while(location[i]) { - if(location[i] == '.' || location[i] == '/') { + if (location[i] == '.' || location[i] == '/') { debug("%s doesn't look like a database " "name using %s", location, DEFAULT_JOB_COMP_DB); @@ -282,7 +285,7 @@ extern int slurm_jobcomp_set_location(char *location) } i++; } - if(location[i]) + if (location[i]) db_name = xstrdup(DEFAULT_JOB_COMP_DB); else db_name = xstrdup(location); @@ -297,7 +300,7 @@ extern int slurm_jobcomp_set_location(char *location) destroy_pgsql_db_info(db_info); - if(rc == SLURM_SUCCESS) + if (rc == SLURM_SUCCESS) debug("Jobcomp database init finished"); else debug("Jobcomp database init failed"); @@ -315,9 +318,9 @@ extern int slurm_jobcomp_log_record(struct job_record *job_ptr) char *query = NULL; uint32_t time_limit, start_time, end_time; - if(!jobcomp_pgsql_db || PQstatus(jobcomp_pgsql_db) != CONNECTION_OK) { + if (!jobcomp_pgsql_db || PQstatus(jobcomp_pgsql_db) != CONNECTION_OK) { char *loc = slurm_get_jobcomp_loc(); - if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { + if (slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return SLURM_ERROR; } @@ -384,21 +387,21 @@ extern int slurm_jobcomp_log_record(struct job_record *job_ptr) "starttime, endtime, nodecnt", jobcomp_table); - if(job_ptr->nodes) + if (job_ptr->nodes) xstrcat(query, ", nodelist"); - if(connect_type) + if (connect_type) xstrcat(query, ", connect_type"); - if(reboot) + if (reboot) xstrcat(query, ", reboot"); - if(rotate) + if (rotate) xstrcat(query, ", rotate"); - if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) + if (job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) xstrcat(query, ", maxprocs"); - if(geometry) + if (geometry) xstrcat(query, ", geometry"); - if(start) + if (start) xstrcat(query, ", start"); - if(blockid) + if (blockid) xstrcat(query, ", blockid"); xstrfmtcat(query, ") values (%u, %u, '%s', %u, '%s', \"%s\", %d, %u, " @@ -408,32 +411,32 @@ extern int slurm_jobcomp_log_record(struct job_record *job_ptr) job_state, job_ptr->total_cpus, job_ptr->partition, lim_str, start_time, end_time, job_ptr->node_cnt); - if(job_ptr->nodes) + if (job_ptr->nodes) xstrfmtcat(query, ", '%s'", job_ptr->nodes); - if(connect_type) { + if (connect_type) { xstrfmtcat(query, ", '%s'", connect_type); xfree(connect_type); } - if(reboot) { + if (reboot) { xstrfmtcat(query, ", '%s'", reboot); xfree(reboot); } - if(rotate) { + if (rotate) { xstrfmtcat(query, ", '%s'", rotate); xfree(rotate); } - if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) + if (job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) xstrfmtcat(query, ", '%u'", job_ptr->details->max_cpus); - if(geometry) { + if (geometry) { xstrfmtcat(query, ", '%s'", geometry); xfree(geometry); } - if(start) { + if (start) { xstrfmtcat(query, ", '%s'", start); xfree(start); } - if(blockid) { + if (blockid) { xstrfmtcat(query, ", '%s'", blockid); xfree(blockid); } @@ -466,9 +469,9 @@ extern List slurm_jobcomp_get_jobs(slurmdb_job_cond_t *job_cond) { List job_list = NULL; - if(!jobcomp_pgsql_db || PQstatus(jobcomp_pgsql_db) != CONNECTION_OK) { + if (!jobcomp_pgsql_db || PQstatus(jobcomp_pgsql_db) != CONNECTION_OK) { char *loc = slurm_get_jobcomp_loc(); - if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { + if (slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return NULL; } @@ -485,9 +488,9 @@ extern List slurm_jobcomp_get_jobs(slurmdb_job_cond_t *job_cond) */ extern int slurm_jobcomp_archive(slurmdb_archive_cond_t *arch_cond) { - if(!jobcomp_pgsql_db || PQstatus(jobcomp_pgsql_db) != CONNECTION_OK) { + if (!jobcomp_pgsql_db || PQstatus(jobcomp_pgsql_db) != CONNECTION_OK) { char *loc = slurm_get_jobcomp_loc(); - if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { + if (slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return SLURM_ERROR; } diff --git a/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.c b/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.c index faffe6933e2576b43c0e14daebb2be65e76dc8d8..1fd1f991010457b0155d2b1dcff007e44af35312 100644 --- a/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.c +++ b/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -46,19 +46,6 @@ #include "src/common/xstring.h" #include "pgsql_jobcomp_process.h" -static void _do_fdump(PGresult *result, int lc) -{ - int i = 0; - printf("\n------- Line %d -------\n", lc); - while(jobcomp_table_fields[i].name) { - printf("%12s: %s\n", jobcomp_table_fields[i].name, - PQgetvalue(result, lc, i)); - i++; - } - - return; -} - extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) { @@ -75,24 +62,13 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) char time_str[32]; time_t temp_time; List job_list = NULL; - int fdump_flag = 0; - - /* we grab the fdump only for the filetxt plug through the - FDUMP_FLAG on the job_cond->duplicates variable. We didn't - add this extra field to the structure since it only applies - to this plugin. - */ - if(job_cond) { - fdump_flag = job_cond->duplicates & FDUMP_FLAG; - job_cond->duplicates &= (~FDUMP_FLAG); - } - if(job_cond->step_list && list_count(job_cond->step_list)) { + if (job_cond->step_list && list_count(job_cond->step_list)) { set = 0; xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if(set) + if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("jobid=%d", selected_step->jobid); @@ -104,16 +80,16 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) xstrcat(extra, ")"); } - if(job_cond->partition_list && list_count(job_cond->partition_list)) { + if (job_cond->partition_list && list_count(job_cond->partition_list)) { set = 0; - if(extra) + if (extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->partition_list); while((selected_part = list_next(itr))) { - if(set) + if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("partition='%s'", selected_part); @@ -127,7 +103,7 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) i = 0; while(jobcomp_table_fields[i].name) { - if(i) + if (i) xstrcat(tmp, ", "); xstrcat(tmp, jobcomp_table_fields[i].name); i++; @@ -136,13 +112,13 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) query = xstrdup_printf("select %s from %s", tmp, jobcomp_table); xfree(tmp); - if(extra) { + if (extra) { xstrcat(query, extra); xfree(extra); } //info("query = %s", query); - if(!(result = + if (!(result = pgsql_db_query_ret(jobcomp_pgsql_db, query))) { xfree(query); return NULL; @@ -151,13 +127,8 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) job_list = list_create(jobcomp_destroy_job); for (i = 0; i < PQntuples(result); i++) { - - if (fdump_flag) { - _do_fdump(result, i); - continue; - } job = xmalloc(sizeof(jobcomp_job_rec_t)); - if(PQgetvalue(result, i, JOBCOMP_REQ_JOBID)) + if (PQgetvalue(result, i, JOBCOMP_REQ_JOBID)) job->jobid = atoi(PQgetvalue(result, i, JOBCOMP_REQ_JOBID)); job->partition = @@ -174,12 +145,12 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) sizeof(time_str)); job->end_time = xstrdup(time_str); - if(PQgetvalue(result, i, JOBCOMP_REQ_UID)) + if (PQgetvalue(result, i, JOBCOMP_REQ_UID)) job->uid = atoi(PQgetvalue(result, i, JOBCOMP_REQ_UID)); job->uid_name = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_USER_NAME)); - if(PQgetvalue(result, i, JOBCOMP_REQ_GID)) + if (PQgetvalue(result, i, JOBCOMP_REQ_GID)) job->gid = atoi(PQgetvalue(result, i, JOBCOMP_REQ_GID)); job->gid_name = @@ -188,16 +159,16 @@ extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_NAME)); job->nodelist = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_NODELIST)); - if(PQgetvalue(result, i, JOBCOMP_REQ_NODECNT)) + if (PQgetvalue(result, i, JOBCOMP_REQ_NODECNT)) job->node_cnt = atoi(PQgetvalue(result, i, JOBCOMP_REQ_NODECNT)); - if(PQgetvalue(result, i, JOBCOMP_REQ_STATE)) { + if (PQgetvalue(result, i, JOBCOMP_REQ_STATE)) { int j = atoi(PQgetvalue(result, i, JOBCOMP_REQ_STATE)); job->state = xstrdup(job_state_string(j)); } job->timelimit = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_TIMELIMIT)); - if(PQgetvalue(result, i, JOBCOMP_REQ_MAXPROCS)) + if (PQgetvalue(result, i, JOBCOMP_REQ_MAXPROCS)) job->max_procs = atoi(PQgetvalue(result, i, JOBCOMP_REQ_MAXPROCS)); diff --git a/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.h b/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.h index f70f46aba77c81657d26287518e82566ff2cfa43..869f8741dd54271b15569563d641668143a3365b 100644 --- a/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.h +++ b/src/plugins/jobcomp/pgsql/pgsql_jobcomp_process.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/jobcomp/script/Makefile.in b/src/plugins/jobcomp/script/Makefile.in index a805ba99edb4265a9e8a9dec95ec0c6229c991ef..d048b9849e9157fdac9b11280cd76d9582251ba1 100644 --- a/src/plugins/jobcomp/script/Makefile.in +++ b/src/plugins/jobcomp/script/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/jobcomp/script DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/jobcomp/script/jobcomp_script.c b/src/plugins/jobcomp/script/jobcomp_script.c index 12ed94b40b9a593aa173b1fa8ad35efc2e383620..367f38afac4aedf836452010f7123c52ed163e71 100644 --- a/src/plugins/jobcomp/script/jobcomp_script.c +++ b/src/plugins/jobcomp/script/jobcomp_script.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -50,6 +50,9 @@ * PARTITION Partition name used to run job * PROCS Count of allocated CPUs * START Time of job start, UTS + * STDERR Job's stderr file name (if any) + * STDIN Job's stdin file name (if any) + * STDOUT Job's stdout file name (if any) * SUBMIT Time of job submission, UTS * UID User ID of job owner * WORK_DIR Job's working directory @@ -189,6 +192,9 @@ struct jobcomp_info { char *jobstate; char *account; char *work_dir; + char *std_in; + char *std_out; + char *std_err; #ifdef HAVE_BG char *connect_type; char *geometry; @@ -246,6 +252,15 @@ static struct jobcomp_info * _jobcomp_info_create (struct job_record *job) j->work_dir = xstrdup(job->details->work_dir); else j->work_dir = xstrdup("unknown"); + if (job->details) { + if (job->details->std_in) + j->std_in = xstrdup(job->details->std_in); + if (job->details->std_out) + j->std_out = xstrdup(job->details->std_out); + if (job->details->std_err) + j->std_err = xstrdup(job->details->std_err); + } + #ifdef HAVE_BG j->connect_type = select_g_select_jobinfo_xstrdup(job->select_jobinfo, SELECT_PRINT_CONNECTION); @@ -267,6 +282,9 @@ static void _jobcomp_info_destroy (struct jobcomp_info *j) xfree (j->jobstate); xfree (j->account); xfree (j->work_dir); + xfree (j->std_in); + xfree (j->std_out); + xfree (j->std_err); #ifdef HAVE_BG xfree (j->connect_type); xfree (j->geometry); @@ -379,6 +397,12 @@ static char ** _create_environment (struct jobcomp_info *job) _env_append (&env, "JOBSTATE", job->jobstate); _env_append (&env, "PARTITION", job->partition); _env_append (&env, "WORK_DIR", job->work_dir); + if (job->std_in) + _env_append (&env, "STDIN", job->std_in); + if (job->std_out) + _env_append (&env, "STDOUT", job->std_out); + if (job->std_err) + _env_append (&env, "STDERR", job->std_err); #ifdef HAVE_BG _env_append (&env, "BLOCKID", job->blockid); diff --git a/src/plugins/launch/Makefile.in b/src/plugins/launch/Makefile.in index ea9e372a4ed7b8f9828d22855cbf43345175f23c..ee3d43f75a7ceaea28bea00557e041e2f956a517 100644 --- a/src/plugins/launch/Makefile.in +++ b/src/plugins/launch/Makefile.in @@ -60,6 +60,7 @@ subdir = src/plugins/launch DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -172,6 +176,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -192,6 +198,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -201,6 +210,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -208,6 +219,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -242,6 +262,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -269,6 +292,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/launch/aprun/Makefile.in b/src/plugins/launch/aprun/Makefile.in index 10867a48eff186d64704457e17278280662a22c3..cc128e2640b74874721e6cf606cd042a028beed9 100644 --- a/src/plugins/launch/aprun/Makefile.in +++ b/src/plugins/launch/aprun/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/launch/aprun DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/launch/aprun/launch_aprun.c b/src/plugins/launch/aprun/launch_aprun.c index 1a88d2aaba09e81a602e85c1de6d01a062ef521a..4502557a3b829c3c841ecf7c5c83524336f97bf2 100644 --- a/src/plugins/launch/aprun/launch_aprun.c +++ b/src/plugins/launch/aprun/launch_aprun.c @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -387,7 +387,8 @@ static void _handle_msg(slurm_msg_t *msg) case SRUN_STEP_SIGNAL: ss = msg->data; debug("received step signal %u RPC", ss->signal); - launch_p_fwd_signal(ss->signal); + if (ss->signal) + launch_p_fwd_signal(ss->signal); slurm_free_job_step_kill_msg(msg->data); break; default: @@ -581,7 +582,7 @@ extern int launch_p_setup_srun_opt(char **rest) "%u", opt.sockets_per_node); } - if (opt.mem_bind && strstr(opt.mem_bind, "local")) { + if (opt.mem_bind_type & MEM_BIND_LOCAL) { opt.argc += 1; xrealloc(opt.argv, opt.argc * sizeof(char *)); opt.argv[command_pos++] = xstrdup("-ss"); diff --git a/src/plugins/launch/poe/Makefile.in b/src/plugins/launch/poe/Makefile.in index 78162577bca1ec214664b31124c397aaf84da5d2..253f95599e6a3c7b7ac191f2094edaa5ef9c24be 100644 --- a/src/plugins/launch/poe/Makefile.in +++ b/src/plugins/launch/poe/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/launch/poe DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/launch/poe/launch_poe.c b/src/plugins/launch/poe/launch_poe.c index 9a68e9e2c6c572b9abcf7d82321a123e327db4a7..9364e51cd8892417d722c08983bd458d1d1fdb38 100644 --- a/src/plugins/launch/poe/launch_poe.c +++ b/src/plugins/launch/poe/launch_poe.c @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -32,6 +32,12 @@ * You should have received a copy of the GNU General Public License along * with SLURM; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + ***************************************************************************** + * POE DEBUGING NOTES: + * + * MP_INFOLEVEL=4 Verbose POE logging + * MP_PMDLOG=yes Write log files to /tmp/mplog.* + * SCI_DEBUG_FANOUT=# Fanout of pmdv12 in launching tasks \*****************************************************************************/ #ifdef HAVE_CONFIG_H @@ -178,8 +184,8 @@ static void _propagate_srun_opts(uint32_t nnodes, uint32_t ntasks) if (opt.account) setenv("SLURM_ACCOUNT", opt.account, 1); - if (opt.acctg_freq >= 0) { - snprintf(value, sizeof(value), "%d", opt.acctg_freq); + if (opt.acctg_freq) { + snprintf(value, sizeof(value), "%s", opt.acctg_freq); setenv("SLURM_ACCTG_FREQ", value, 1); } if (opt.ckpt_dir) diff --git a/src/plugins/launch/runjob/Makefile.in b/src/plugins/launch/runjob/Makefile.in index eeb33dba82fe9ac17632c641957fcb15460b85c8..01f53f6f9495749f1f0ac42157c431287924d175 100644 --- a/src/plugins/launch/runjob/Makefile.in +++ b/src/plugins/launch/runjob/Makefile.in @@ -59,6 +59,7 @@ subdir = src/plugins/launch/runjob DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -193,6 +197,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -213,6 +219,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -222,6 +231,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -229,6 +240,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -263,6 +283,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -290,6 +313,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/launch/runjob/launch_runjob.c b/src/plugins/launch/runjob/launch_runjob.c index 0b78f553d62ca06f50d761e81cb8199afc359d90..af437aca517de5de7d8e96514edfff760c4f2fc0 100644 --- a/src/plugins/launch/runjob/launch_runjob.c +++ b/src/plugins/launch/runjob/launch_runjob.c @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -175,7 +175,8 @@ _handle_msg(slurm_msg_t *msg) case SRUN_STEP_SIGNAL: ss = msg->data; debug("received step signal %u RPC", ss->signal); - runjob_signal(ss->signal); + if (ss->signal) + runjob_signal(ss->signal); slurm_free_job_step_kill_msg(msg->data); break; default: diff --git a/src/plugins/launch/runjob/runjob_interface.cc b/src/plugins/launch/runjob/runjob_interface.cc index 059a4586a300568fd72eddc5a2c8c8dfe6d9b54c..cc65a8b6bf908c65649e6e2203a63128413efe0d 100644 --- a/src/plugins/launch/runjob/runjob_interface.cc +++ b/src/plugins/launch/runjob/runjob_interface.cc @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/launch/runjob/runjob_interface.h b/src/plugins/launch/runjob/runjob_interface.h index e151a1494270d595d08aa9ebd31a2707bbb67ed1..43b97faca20fcaa8dce5001a7edf6fd14433ce1b 100644 --- a/src/plugins/launch/runjob/runjob_interface.h +++ b/src/plugins/launch/runjob/runjob_interface.h @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/launch/slurm/Makefile.in b/src/plugins/launch/slurm/Makefile.in index 038076911fc7e772b2184644df3c5ec40dd5c0f1..ea02c2d239194dd18a26d7c9c564dcc12eb6db97 100644 --- a/src/plugins/launch/slurm/Makefile.in +++ b/src/plugins/launch/slurm/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/launch/slurm DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/launch/slurm/launch_slurm.c b/src/plugins/launch/slurm/launch_slurm.c index 29478635843e3eba271ac689f0c0cb670b2d0119..a42b8392d13701515f8c75390ce233206fad01d6 100644 --- a/src/plugins/launch/slurm/launch_slurm.c +++ b/src/plugins/launch/slurm/launch_slurm.c @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -288,9 +288,19 @@ static void _task_finish(task_exit_msg_t *msg) char *hosts; uint32_t rc = 0; int normal_exit = 0; + static int reduce_task_exit_msg = -1; + static int msg_printed = 0, last_task_exit_rc; const char *task_str = _taskstr(msg->num_tasks); + if (reduce_task_exit_msg == -1) { + char *ptr = getenv("SLURM_SRUN_REDUCE_TASK_EXIT_MSG"); + if (ptr && atoi(ptr) != 0) + reduce_task_exit_msg = 1; + else + reduce_task_exit_msg = 0; + } + verbose("Received task exit notification for %d %s (status=0x%04x).", msg->num_tasks, task_str, msg->return_code); @@ -306,8 +316,13 @@ static void _task_finish(task_exit_msg_t *msg) _handle_openmpi_port_error(tasks, hosts, local_srun_job->step_ctx); } else { - error("%s: %s %s: Exited with exit code %d", - hosts, task_str, tasks, rc); + if (reduce_task_exit_msg == 0 || + msg_printed == 0 || + msg->return_code != last_task_exit_rc) { + error("%s: %s %s: Exited with exit code %d", + hosts, task_str, tasks, rc); + msg_printed = 1; + } } if (!WIFEXITED(*local_global_rc) || (rc > WEXITSTATUS(*local_global_rc))) @@ -325,8 +340,14 @@ static void _task_finish(task_exit_msg_t *msg) hosts, task_str, tasks, signal_str, core_str); } else { rc = msg->return_code; - error("%s: %s %s: %s%s", - hosts, task_str, tasks, signal_str, core_str); + if (reduce_task_exit_msg == 0 || + msg_printed == 0 || + msg->return_code != last_task_exit_rc) { + error("%s: %s %s: %s%s", + hosts, task_str, tasks, signal_str, + core_str); + msg_printed = 1; + } } if (*local_global_rc == 0) *local_global_rc = msg->return_code; @@ -344,6 +365,8 @@ static void _task_finish(task_exit_msg_t *msg) if (task_state_first_exit(task_state) && (opt.max_wait > 0)) _setup_max_wait_timer(); + + last_task_exit_rc = msg->return_code; } /* Load the multi_prog config file into argv, pass the entire file contents @@ -482,6 +505,7 @@ extern int launch_p_step_launch( launch_params.remote_output_filename =fname_remote_string(job->ofname); launch_params.remote_input_filename = fname_remote_string(job->ifname); launch_params.remote_error_filename = fname_remote_string(job->efname); + launch_params.profile = opt.profile; launch_params.task_prolog = opt.task_prolog; launch_params.task_epilog = opt.task_epilog; launch_params.cpu_bind = opt.cpu_bind; diff --git a/src/plugins/launch/slurm/task_state.c b/src/plugins/launch/slurm/task_state.c index 4c896cf09f28a765461dfa52e9d2a1d92f39fcf1..00b463a418b9b119c02eaaf60e26e4de956f6785 100644 --- a/src/plugins/launch/slurm/task_state.c +++ b/src/plugins/launch/slurm/task_state.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/launch/slurm/task_state.h b/src/plugins/launch/slurm/task_state.h index eb6b5d2f342bf44d4cc46fbb5f9dcec35e2224ff..5256d7bfaae0822c6b937bbeef6e5c2d012fadb6 100644 --- a/src/plugins/launch/slurm/task_state.h +++ b/src/plugins/launch/slurm/task_state.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/Makefile.am b/src/plugins/mpi/Makefile.am index dda7bbadcb426df1aaa62cd6c47007fb1892a607..ea65f7e2a479a09878633ffb701495bbe4f52389 100644 --- a/src/plugins/mpi/Makefile.am +++ b/src/plugins/mpi/Makefile.am @@ -1,3 +1,8 @@ # Makefile for mpi plugins +if REAL_BGQ_LOADED +#on a real BGQ do not compile any mpi plugins +SUBDIRS = none +else SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi pmi2 +endif diff --git a/src/plugins/mpi/Makefile.in b/src/plugins/mpi/Makefile.in index 9b071a1b6ef12b58c4dbb493b1fa8a8b69ad507a..fc24d7f94dc3c5df885c38d64d8f06e1e4dc7f52 100644 --- a/src/plugins/mpi/Makefile.in +++ b/src/plugins/mpi/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/mpi DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -120,7 +124,8 @@ AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ distdir ETAGS = etags CTAGS = ctags -DIST_SUBDIRS = $(SUBDIRS) +DIST_SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam \ + openmpi pmi2 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ @@ -169,6 +174,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +196,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +208,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +217,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +260,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +290,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -354,7 +381,10 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi pmi2 +@REAL_BGQ_LOADED_FALSE@SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi pmi2 + +#on a real BGQ do not compile any mpi plugins +@REAL_BGQ_LOADED_TRUE@SUBDIRS = none all: all-recursive .SUFFIXES: diff --git a/src/plugins/mpi/lam/Makefile.in b/src/plugins/mpi/lam/Makefile.in index b24b6f04155f83c1ae302d7d1d839f5ad69991bf..529aacd01459059972ab5bb359886926911fea52 100644 --- a/src/plugins/mpi/lam/Makefile.in +++ b/src/plugins/mpi/lam/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/lam DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/lam/lam.h b/src/plugins/mpi/lam/lam.h index d1863802bc3f964ea496b9d5eff25bf40e5fe5cc..13c3cd43217c719ac9ee2e2688e4424dc8b63ba3 100644 --- a/src/plugins/mpi/lam/lam.h +++ b/src/plugins/mpi/lam/lam.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/lam/mpi_lam.c b/src/plugins/mpi/lam/mpi_lam.c index b631242e49a96bebb5dd67af484f8b75c0a189a2..5a976e088bf5fc2d83e9f5692bde9352f1e3fad2 100644 --- a/src/plugins/mpi/lam/mpi_lam.c +++ b/src/plugins/mpi/lam/mpi_lam.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpich1_p4/Makefile.in b/src/plugins/mpi/mpich1_p4/Makefile.in index 8a1385b2b9a8668cd41a99645814d6063f145aa5..a3420ad24cee5948128785b26d74aa80428a05c9 100644 --- a/src/plugins/mpi/mpich1_p4/Makefile.in +++ b/src/plugins/mpi/mpich1_p4/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/mpich1_p4 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/mpich1_p4/mpich1_p4.c b/src/plugins/mpi/mpich1_p4/mpich1_p4.c index e6b0dca5348266b86e78592094f261d9c3a398b7..a6175120a64bad8954e66acbbb97b928d50e7e1a 100644 --- a/src/plugins/mpi/mpich1_p4/mpich1_p4.c +++ b/src/plugins/mpi/mpich1_p4/mpich1_p4.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpich1_shmem/Makefile.in b/src/plugins/mpi/mpich1_shmem/Makefile.in index 7f02a4bb40d6b58bda80f388503a8384a554516b..cf17fc00e299cfea6293a0cc52f5cd5d8e9f4164 100644 --- a/src/plugins/mpi/mpich1_shmem/Makefile.in +++ b/src/plugins/mpi/mpich1_shmem/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/mpich1_shmem DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c b/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c index d2626df6db9287268436ab545b55543869c672ad..4cc7c6fd1ed21209f016522f4af1bece0637d847 100644 --- a/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c +++ b/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpichgm/Makefile.in b/src/plugins/mpi/mpichgm/Makefile.in index 8488e4478e51b0a10001651a494ab1bcf22f9464..252435f693304f893cad342634d6811331c1f183 100644 --- a/src/plugins/mpi/mpichgm/Makefile.in +++ b/src/plugins/mpi/mpichgm/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/mpichgm DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/mpichgm/mpi_mpichgm.c b/src/plugins/mpi/mpichgm/mpi_mpichgm.c index b0f77347c886da350cb8b761c67fe97c9b2a9713..75e207561e04272c66ff255a6e685d7eb43ae9b2 100644 --- a/src/plugins/mpi/mpichgm/mpi_mpichgm.c +++ b/src/plugins/mpi/mpichgm/mpi_mpichgm.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpichgm/mpichgm.c b/src/plugins/mpi/mpichgm/mpichgm.c index 5067a5fe5b313e6381603745bba1206ecb6a9a96..e7bdd32b32dcf43f8b34edec0b2152de528b7f29 100644 --- a/src/plugins/mpi/mpichgm/mpichgm.c +++ b/src/plugins/mpi/mpichgm/mpichgm.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpichgm/mpichgm.h b/src/plugins/mpi/mpichgm/mpichgm.h index 1cce496b3c43c29eafe23794021ccc1615f1c63e..b60c2b6974be1c33ee7cc8dd107c3e185f730be9 100644 --- a/src/plugins/mpi/mpichgm/mpichgm.h +++ b/src/plugins/mpi/mpichgm/mpichgm.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpichmx/Makefile.in b/src/plugins/mpi/mpichmx/Makefile.in index 128ff6be26a6a6370428478131bbf0593a7b7230..d79e88d5e9aa376740f03617fa033597acbde20a 100644 --- a/src/plugins/mpi/mpichmx/Makefile.in +++ b/src/plugins/mpi/mpichmx/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/mpichmx DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/mpichmx/mpi_mpichmx.c b/src/plugins/mpi/mpichmx/mpi_mpichmx.c index 2df42bf66bf9a0275e6f68809e247c9e7a83fedd..413d9c3950338302aedf92c78efd93b636fccbc2 100644 --- a/src/plugins/mpi/mpichmx/mpi_mpichmx.c +++ b/src/plugins/mpi/mpichmx/mpi_mpichmx.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpichmx/mpichmx.c b/src/plugins/mpi/mpichmx/mpichmx.c index 0d4ae97addc9c142f7f18b9683fc7756faeb8db9..640406d840db91d5c2c19add0dc78b8f5b624007 100644 --- a/src/plugins/mpi/mpichmx/mpichmx.c +++ b/src/plugins/mpi/mpichmx/mpichmx.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mpichmx/mpichmx.h b/src/plugins/mpi/mpichmx/mpichmx.h index dee9fcf97983a76c94e6fec9fa8b3e9be2d6320d..b897b8d5fc27c76fea0ae38570a6d5e4fe303483 100644 --- a/src/plugins/mpi/mpichmx/mpichmx.h +++ b/src/plugins/mpi/mpichmx/mpichmx.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mvapich/Makefile.in b/src/plugins/mpi/mvapich/Makefile.in index df19b61730c4a8b1b3a6b5d482cf90a1da9fdead..637f982dbc43db1bdab3acb9fb1a205c07c419ea 100644 --- a/src/plugins/mpi/mvapich/Makefile.in +++ b/src/plugins/mpi/mvapich/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/mvapich DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/mvapich/mpi_mvapich.c b/src/plugins/mpi/mvapich/mpi_mvapich.c index b6125d6b906c38dd8f9651907f2aa70ea39b3308..0092b3d5ad764ff239518e08a844187a78fe1ca7 100644 --- a/src/plugins/mpi/mvapich/mpi_mvapich.c +++ b/src/plugins/mpi/mvapich/mpi_mvapich.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -101,14 +101,18 @@ int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job, env_array_overwrite_fmt(env, "MPIRUN_MPD", "0"); debug2("init for mpi rank %u", job->gtaskid); - /* - * Fake MPIRUN_PROCESSES env var -- we don't need this for - * SLURM at this time. (what a waste) - */ - for (i = 0; i < job->ntasks; i++) - xstrcat (processes, "x:"); - env_array_overwrite_fmt(env, "MPIRUN_PROCESSES", "%s", processes); + if (getenvp (*env, "SLURM_NEED_MVAPICH_MPIRUN_PROCESSES")) { + /* + * Fake MPIRUN_PROCESSES env var -- we don't need this for + * SLURM at this time. (what a waste) + */ + for (i = 0; i < job->ntasks; i++) + xstrcat (processes, "x:"); + + env_array_overwrite_fmt(env, "MPIRUN_PROCESSES", "%s", + processes); + } /* * Some mvapich versions will ignore MPIRUN_PROCESSES If diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index a434cbac23db29f1ff100169f3773dc9767f0ba6..d580aece6b8b7b74b05349cb6269cddca8c95d8d 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/mvapich/mvapich.h b/src/plugins/mpi/mvapich/mvapich.h index 63ba7d3fd4d2304eec931fd63e2a38bb8d9d687a..22481bb6b7dba1eca3bc62a7396c8eaa71919675 100644 --- a/src/plugins/mpi/mvapich/mvapich.h +++ b/src/plugins/mpi/mvapich/mvapich.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/none/Makefile.in b/src/plugins/mpi/none/Makefile.in index eefb66fecbc5b77ece435ca0875ffd75fdc72e66..70ffdc670faa2abbf16c4be346beb4066e7ba1c9 100644 --- a/src/plugins/mpi/none/Makefile.in +++ b/src/plugins/mpi/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/none/mpi_none.c b/src/plugins/mpi/none/mpi_none.c index c73b1debf92b0bc0bd4624c81c55f70167e65d10..d9e94d2dad8363aa1eb9fa3395ec4853053898af 100644 --- a/src/plugins/mpi/none/mpi_none.c +++ b/src/plugins/mpi/none/mpi_none.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/openmpi/Makefile.in b/src/plugins/mpi/openmpi/Makefile.in index 2668b3298be2225f6cb69083d4cdd1ede516b9f0..b615d6be02e03d892e02d501c95d4f95f8318ec8 100644 --- a/src/plugins/mpi/openmpi/Makefile.in +++ b/src/plugins/mpi/openmpi/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/openmpi DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/mpi/openmpi/mpi_openmpi.c b/src/plugins/mpi/openmpi/mpi_openmpi.c index 579633a4370d41ed9575614e91ca01ad48710dc0..2039d982caff1ff67f5cd9b497fc6cceff0c61d6 100644 --- a/src/plugins/mpi/openmpi/mpi_openmpi.c +++ b/src/plugins/mpi/openmpi/mpi_openmpi.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/pmi2/Makefile.am b/src/plugins/mpi/pmi2/Makefile.am index 30663b48ec0cb3e30e360b17d04d19747bc735eb..f14adb673f9d175272edfdafca4603622418c9d8 100644 --- a/src/plugins/mpi/pmi2/Makefile.am +++ b/src/plugins/mpi/pmi2/Makefile.am @@ -17,6 +17,7 @@ mpi_pmi2_la_SOURCES = mpi_pmi2.c \ setup.c setup.h \ spawn.c spawn.h \ tree.c tree.h \ + nameserv.c nameserv.h \ $(top_srcdir)/src/common/mpi.h \ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c \ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.h diff --git a/src/plugins/mpi/pmi2/Makefile.in b/src/plugins/mpi/pmi2/Makefile.in index cde4a8e467df0c45e9f7fb7f28368f6f7547f368..ab6e29f329ce9cff160474897c3c6edfbe173c5f 100644 --- a/src/plugins/mpi/pmi2/Makefile.in +++ b/src/plugins/mpi/pmi2/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/mpi/pmi2 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -131,7 +135,8 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) mpi_pmi2_la_LIBADD = am_mpi_pmi2_la_OBJECTS = mpi_pmi2.lo agent.lo client.lo kvs.lo info.lo \ - pmi1.lo pmi2.lo setup.lo spawn.lo tree.lo reverse_tree_math.lo + pmi1.lo pmi2.lo setup.lo spawn.lo tree.lo nameserv.lo \ + reverse_tree_math.lo mpi_pmi2_la_OBJECTS = $(am_mpi_pmi2_la_OBJECTS) mpi_pmi2_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -181,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -379,6 +406,7 @@ mpi_pmi2_la_SOURCES = mpi_pmi2.c \ setup.c setup.h \ spawn.c spawn.h \ tree.c tree.h \ + nameserv.c nameserv.h \ $(top_srcdir)/src/common/mpi.h \ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c \ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.h @@ -464,6 +492,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kvs.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_pmi2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nameserv.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pmi1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pmi2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reverse_tree_math.Plo@am__quote@ diff --git a/src/plugins/mpi/pmi2/agent.c b/src/plugins/mpi/pmi2/agent.c index 6b5fe0a317dff0fef4cb3d524ff8ab3f414c35e4..1cf7278f3a3646cb72e60fb816755712fc2a7e8b 100644 --- a/src/plugins/mpi/pmi2/agent.c +++ b/src/plugins/mpi/pmi2/agent.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/pmi2/client.c b/src/plugins/mpi/pmi2/client.c index 7b8dc9ba843ac728fe436532646757596aa8d2d1..cf309d6264c74119292353d447d6b95bacc594d0 100644 --- a/src/plugins/mpi/pmi2/client.c +++ b/src/plugins/mpi/pmi2/client.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -46,6 +46,7 @@ #include "src/common/xstring.h" #include "client.h" +#include "setup.h" #include "pmi.h" #define KEY_INDEX(i) (i * 2) @@ -563,3 +564,59 @@ client_resp_free(client_resp_t *resp) xfree(resp); } } + +/* caller must free the result */ +static char * +_str_replace(char *str, char src, char dst) +{ + char *res, *ptr; + + res = xstrdup(str); + ptr = res; + while (*ptr) { + if (*ptr == src) + *ptr = dst; + ptr ++; + } + return res; +} +/* send fence_resp/barrier_out to tasks */ +extern int +send_kvs_fence_resp_to_clients(int rc, char *errmsg) +{ + int i = 0; + client_resp_t *resp; + char *msg; + + resp = client_resp_new(); + if ( is_pmi11() ) { + if (rc != 0 && errmsg != NULL) { + // XXX: pmi1.1 does not check the rc + msg = _str_replace(errmsg, ' ', '_'); + client_resp_append(resp, CMD_KEY"="BARRIEROUT_CMD" " + RC_KEY"=%d "MSG_KEY"=%s\n", + rc, msg); + xfree(msg); + } else { + client_resp_append(resp, CMD_KEY"="BARRIEROUT_CMD" " + RC_KEY"=%d\n", rc); + } + } else if (is_pmi20()) { + if (rc != 0 && errmsg != NULL) { + // TODO: pmi2.0 accept escaped ';' (";;") + msg = _str_replace(errmsg, ';', '_'); + client_resp_append(resp, CMD_KEY"="KVSFENCERESP_CMD";" + RC_KEY"=%d;"ERRMSG_KEY"=%s;", + rc, msg); + xfree(msg); + } else { + client_resp_append(resp, CMD_KEY"="KVSFENCERESP_CMD";" + RC_KEY"=%d;", rc); + } + } + for (i = 0; i < job_info.ltasks; i ++) { + rc = client_resp_send(resp, STEPD_PMI_SOCK(i)); + } + client_resp_free(resp); + return rc; +} diff --git a/src/plugins/mpi/pmi2/client.h b/src/plugins/mpi/pmi2/client.h index 09bad9683b3450635f3c4a9661d44a80e79aba8c..85bbe77bcb9d732dc518256bdce4d32effb22ff4 100644 --- a/src/plugins/mpi/pmi2/client.h +++ b/src/plugins/mpi/pmi2/client.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -100,4 +100,6 @@ extern void client_resp_free(client_resp_t *resp); } while (0) +extern int send_kvs_fence_resp_to_clients(int rc, char *errmsg); + #endif /* _CLIENT_H */ diff --git a/src/plugins/mpi/pmi2/info.c b/src/plugins/mpi/pmi2/info.c index 839334bc084e86bdb8bbebe738db8796bc3ac5f3..b248b3ede071b312c92705911063d02386dbc74a 100644 --- a/src/plugins/mpi/pmi2/info.c +++ b/src/plugins/mpi/pmi2/info.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/pmi2/info.h b/src/plugins/mpi/pmi2/info.h index 05f2fe68a2266472a2b3ca4364da7701a24f1131..095f955f0571ff82a3358e19a263d0c265e2f9ee 100644 --- a/src/plugins/mpi/pmi2/info.h +++ b/src/plugins/mpi/pmi2/info.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/pmi2/kvs.c b/src/plugins/mpi/pmi2/kvs.c index b03524a292149479d8074bce3a73780f8bf87abc..c7d4e426c959b0edf6ce1956f7d94c3807e53168 100644 --- a/src/plugins/mpi/pmi2/kvs.c +++ b/src/plugins/mpi/pmi2/kvs.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -36,15 +36,20 @@ \*****************************************************************************/ #include <stdlib.h> +#include <unistd.h> #include "kvs.h" #include "setup.h" #include "tree.h" #include "pmi.h" +#define MAX_RETRIES 5 + /* for fence */ int tasks_to_wait = 0; int children_to_wait = 0; +int kvs_seq = 1; /* starting from 1 */ +int waiting_kvs_resp = 0; /* bucket of key-value pairs */ @@ -114,6 +119,9 @@ temp_kvs_init(void) pack32((uint32_t)nodeid, buf); /* from_nodeid */ packstr(tree_info.this_node, buf); /* from_node */ pack32((uint32_t)num_children, buf); /* num_children */ + pack32(kvs_seq, buf); + } else { + pack32(kvs_seq, buf); } size = get_buf_offset(buf); if (temp_kvs_cnt + size > temp_kvs_size) { @@ -180,23 +188,37 @@ temp_kvs_merge(Buf buf) extern int temp_kvs_send(void) { - int rc; + int rc = SLURM_ERROR, retry = 0; + unsigned int delay = 1; /* cmd included in temp_kvs_buf */ + kvs_seq ++; /* expecting new kvs after now */ - if (! in_stepd()) { /* srun */ - rc = tree_msg_to_stepds(job_info.step_nodelist, - temp_kvs_cnt, - temp_kvs_buf); - } else if (tree_info.parent_node != NULL) { - /* non-first-level stepds */ - rc = tree_msg_to_stepds(tree_info.parent_node, - temp_kvs_cnt, - temp_kvs_buf); - } else { /* first level stepds */ - rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf); + while (1) { + if (retry == 1) { + verbose("failed to send temp kvs, rc=%d, retrying", rc); + } + if (! in_stepd()) { /* srun */ + rc = tree_msg_to_stepds(job_info.step_nodelist, + temp_kvs_cnt, + temp_kvs_buf); + } else if (tree_info.parent_node != NULL) { + /* non-first-level stepds */ + rc = tree_msg_to_stepds(tree_info.parent_node, + temp_kvs_cnt, + temp_kvs_buf); + } else { /* first level stepds */ + rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf); + } + if (rc == SLURM_SUCCESS) + break; + retry ++; + if (retry >= MAX_RETRIES) + break; + /* wait, in case parent stepd / srun not ready */ + sleep(delay); + delay *= 2; } - temp_kvs_init(); /* clear old temp kvs */ return rc; } diff --git a/src/plugins/mpi/pmi2/kvs.h b/src/plugins/mpi/pmi2/kvs.h index 09156df62d3a2ff6cd8b1828b43fc08589799b56..f2e3f4225b73967aa8f4440cd21bf93f4c76b409 100644 --- a/src/plugins/mpi/pmi2/kvs.h +++ b/src/plugins/mpi/pmi2/kvs.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -50,6 +50,8 @@ extern int tasks_to_wait; extern int children_to_wait; +extern int kvs_seq; +extern int waiting_kvs_resp; extern int temp_kvs_init(void); extern int temp_kvs_add(char *key, char *val); diff --git a/src/plugins/mpi/pmi2/mpi_pmi2.c b/src/plugins/mpi/pmi2/mpi_pmi2.c index 0946e39ab8fefb0e67e9b40f7de39679af6c72f7..e50a8bd8746606958f50d22923ada2252cbe8534 100644 --- a/src/plugins/mpi/pmi2/mpi_pmi2.c +++ b/src/plugins/mpi/pmi2/mpi_pmi2.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/mpi/pmi2/nameserv.c b/src/plugins/mpi/pmi2/nameserv.c new file mode 100644 index 0000000000000000000000000000000000000000..afe70dc9987bd89061d13193bc5f8fd3b36a43e2 --- /dev/null +++ b/src/plugins/mpi/pmi2/nameserv.c @@ -0,0 +1,191 @@ +/*****************************************************************************\ + ** nameserv.c - name publish/unpublish/lookup functions + ***************************************************************************** + * Copyright (C) 2013 National University of Defense Technology. + * Written by Hongjia Cao <hjcao@nudt.edu.cn>. + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + + +#include "pmi.h" +#include "setup.h" +#include "client.h" + +typedef struct name_port { + char *name; + char *port; + struct name_port *next; +} name_port_t; + +/* + * just a list for now. + * a db or directory is more useful. + * or execute a script to pub/unpub/lookup. + */ +static name_port_t *local_name_list = NULL; + +extern char * +name_lookup_local (char *name) +{ + name_port_t *np; + + np = local_name_list; + while (np && strcmp(np->name, name)) + np = np->next; + + return np ? xstrdup(np->port) : NULL; +} + +extern int +name_publish_local (char *name, char *port) +{ + name_port_t *np; + + np = local_name_list; + while (np && strcmp(np->name, name)) + np = np->next; + if (np) { + xfree(np->port); + np->port = xstrdup(port); + } else { + np = xmalloc(sizeof(name_port_t)); + np->name = xstrdup(name); + np->port = xstrdup(port); + np->next = local_name_list; + local_name_list = np; + } + return SLURM_SUCCESS; +} + +extern int +name_unpublish_local (char *name) +{ + name_port_t *np, **pprev; + + pprev = &local_name_list; + np = *pprev; + while (np) { + if (strcmp(np->name, name)) { + pprev = &np->next; + np = np->next; + } else { + *pprev = np->next; + xfree(np->name); + xfree(np->port); + xfree(np); + np = *pprev; + break; + } + } + return SLURM_SUCCESS; +} + +extern int +name_publish_up(char *name, char *port) +{ + Buf buf = NULL, resp_buf = NULL; + uint32_t size, tmp_32; + int rc; + + buf = init_buf(1024); + pack16((uint16_t)TREE_CMD_NAME_PUBLISH, buf); + packstr(name, buf); + packstr(port, buf); + size = get_buf_offset(buf); + + rc = tree_msg_to_srun_with_resp(size, get_buf_data(buf), &resp_buf); + free_buf(buf); + + if (rc == SLURM_SUCCESS) { + safe_unpack32(&tmp_32, resp_buf); + rc = (int) tmp_32; + } + +unpack_error: + if (resp_buf) + free_buf(resp_buf); + + return rc; +} + +extern int +name_unpublish_up(char *name) +{ + Buf buf = NULL, resp_buf = NULL; + uint32_t size, tmp_32; + int rc; + + buf = init_buf(1024); + pack16((uint16_t)TREE_CMD_NAME_UNPUBLISH, buf); + packstr(name, buf); + size = get_buf_offset(buf); + + rc = tree_msg_to_srun_with_resp(size, get_buf_data(buf), &resp_buf); + free_buf(buf); + + if (rc == SLURM_SUCCESS) { + safe_unpack32(&tmp_32, resp_buf); + rc = (int) tmp_32; + } + +unpack_error: + if (resp_buf) + free_buf(resp_buf); + + return rc; +} + + +extern char * +name_lookup_up(char *name) +{ + Buf buf = NULL, resp_buf = NULL; + uint32_t size; + char * port = NULL; + int rc; + + buf = init_buf(1024); + pack16((uint16_t)TREE_CMD_NAME_LOOKUP, buf); + packstr(name, buf); + size = get_buf_offset(buf); + + rc = tree_msg_to_srun_with_resp(size, get_buf_data(buf), &resp_buf); + free_buf(buf); + + if (rc == SLURM_SUCCESS) + safe_unpackstr_xmalloc(&port, (uint32_t *)&size, resp_buf); +unpack_error: + if (resp_buf) + free_buf(resp_buf); + + return port; +} diff --git a/src/plugins/mpi/pmi2/nameserv.h b/src/plugins/mpi/pmi2/nameserv.h new file mode 100644 index 0000000000000000000000000000000000000000..428c9dcc09568f38b7e31345a5d4604bcafd3c26 --- /dev/null +++ b/src/plugins/mpi/pmi2/nameserv.h @@ -0,0 +1,49 @@ +/*****************************************************************************\ + ** nameserv.h - name publish/unpublish/lookup functions + ***************************************************************************** + * Copyright (C) 2013 National University of Defense Technology. + * Written by Hongjia Cao <hjcao@nudt.edu.cn>. + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef _NAME_SERV_H +#define _NAME_SERV_H + +extern int name_publish_local (char *name, char *port); +extern int name_unpublish_local (char *name); +extern char * name_lookup_local (char *name); + +extern int name_publish_up (char *name, char *port); +extern int name_unpublish_up (char *name); +extern char * name_lookup_up (char *name); + +#endif diff --git a/src/plugins/mpi/pmi2/pmi.h b/src/plugins/mpi/pmi2/pmi.h index 7cf05110fc24e2e17e944ccc1e7d1d9e08a97fa2..2d95941beeb5f1499147f8735b0535ec6d90030d 100644 --- a/src/plugins/mpi/pmi2/pmi.h +++ b/src/plugins/mpi/pmi2/pmi.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -160,6 +160,9 @@ #define PUBLISHNAME_CMD "publish_name" #define UNPUBLISHNAME_CMD "unpublish_name" #define LOOKUPNAME_CMD "lookup_name" +#define PUBLISHRESULT_CMD "publish_result" +#define UNPUBLISHRESULT_CMD "unpublish_result" +#define LOOKUPRESULT_CMD "lookup_result" #define MCMD_CMD "mcmd" @@ -203,6 +206,8 @@ #define ARGV_KEY "argv" #define INFOKEYCOUNT_KEY "infokeycount" #define ERRCODES_KEY "errcodes" +#define SERVICE_KEY "service" +#define INFO_KEY "info" #define TRUE_VAL "TRUE" #define FALSE_VAL "FALSE" diff --git a/src/plugins/mpi/pmi2/pmi1.c b/src/plugins/mpi/pmi2/pmi1.c index 515f2d8d9cadc7dfb7f9bd9402fa704cddef2cbd..721903f45d1b4e77c6201a70f030bbf3e4554d0e 100644 --- a/src/plugins/mpi/pmi2/pmi1.c +++ b/src/plugins/mpi/pmi2/pmi1.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -61,6 +61,7 @@ #include "setup.h" #include "kvs.h" #include "agent.h" +#include "nameserv.h" /* client command handlers */ static int _handle_get_maxes(int fd, int lrank, client_req_t *req); @@ -118,7 +119,7 @@ _handle_get_maxes(int fd, int lrank, client_req_t *req) KVSNAMEMAX_KEY"=%d " KEYLENMAX_KEY"=%d " VALLENMAX_KEY"=%d\n", rc, MAXKVSNAME, MAXKEYLEN, MAXVALLEN); - rc = client_resp_send(resp, fd); + (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_maxes"); @@ -137,7 +138,7 @@ _handle_get_universe_size(int fd, int lrank, client_req_t *req) client_resp_append(resp, CMD_KEY"="UNIVSIZE_CMD" " RC_KEY"=%d " SIZE_KEY"=%d\n", rc, job_info.ntasks); - rc = client_resp_send(resp, fd); + (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_universe_size"); @@ -161,7 +162,7 @@ _handle_get_appnum(int fd, int lrank, client_req_t *req) */ client_resp_append(resp, CMD_KEY"="APPNUM_CMD" " RC_KEY"=%d " APPNUM_KEY"=-1\n", rc); - rc = client_resp_send(resp, fd); + (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_appnum"); @@ -184,6 +185,18 @@ _handle_barrier_in(int fd, int lrank, client_req_t *req) /* mutex protection is not required */ if (tasks_to_wait == 0 && children_to_wait == 0) { rc = temp_kvs_send(); + if (rc != SLURM_SUCCESS) { + error("mpi/pmi2: failed to send temp kvs to %s", + tree_info.parent_node ?: "srun"); + send_kvs_fence_resp_to_clients( + rc, + "mpi/pmi2: failed to send temp kvs"); + /* cancel the step to avoid tasks hang */ + slurm_kill_job_step(job_info.jobid, job_info.stepid, + SIGKILL); + } else { + waiting_kvs_resp = 1; + } } debug3("mpi/pmi2: out _handle_barrier_in, tasks_to_wait=%d, " "children_to_wait=%d", tasks_to_wait, children_to_wait); @@ -267,9 +280,12 @@ _handle_put(int fd, int lrank, client_req_t *req) client_req_get_str(req, KVSNAME_KEY, &kvsname); /* not used */ client_req_get_str(req, KEY_KEY, &key); client_req_get_str(req, VALUE_KEY, &val); - + xfree(kvsname); + /* no need to add k-v to hash. just get it ready to be up-forward */ rc = temp_kvs_add(key, val); + xfree(key); + xfree(val); if (rc == SLURM_SUCCESS) rc = 0; else @@ -296,9 +312,11 @@ _handle_get(int fd, int lrank, client_req_t *req) client_req_parse_body(req); client_req_get_str(req, KVSNAME_KEY, &kvsname); /* not used */ client_req_get_str(req, KEY_KEY, &key); - + xfree(kvsname); + val = kvs_get(key); - + xfree(key); + resp = client_resp_new(); if (val != NULL) { client_resp_append(resp, CMD_KEY"="GETRESULT_CMD" " @@ -320,28 +338,98 @@ _handle_getbyidx(int fd, int lrank, client_req_t *req) { /* not used in MPICH2 */ error("mpi/pmi2: PMI1 request of '" GETBYIDX_CMD "' not supported"); + return SLURM_ERROR; } static int _handle_publish_name(int fd, int lrank, client_req_t *req) { - error("mpi/pmi2: PMI1 request of '" PUBLISHNAME_CMD "' not supported"); - return SLURM_ERROR; + int rc; + client_resp_t *resp; + char *service = NULL, *port = NULL; + + debug3("mpi/pmi2: in _handle_publish_name"); + + client_req_parse_body(req); + client_req_get_str(req, SERVICE_KEY, &service); + client_req_get_str(req, PORT_KEY, &port); + + rc = name_publish_up(service, port); + xfree(service); + xfree(port); + + resp = client_resp_new(); + client_resp_append(resp, CMD_KEY"="PUBLISHRESULT_CMD" " + INFO_KEY"=%s\n", + rc == SLURM_SUCCESS ? "ok" : "fail"); + rc = client_resp_send(resp, fd); + client_resp_free(resp); + + debug3("mpi/pmi2: out _handle_publish_name"); + return rc; } static int _handle_unpublish_name(int fd, int lrank, client_req_t *req) { - error("mpi/pmi2: PMI1 request of '" UNPUBLISHNAME_CMD "' not supported"); - return SLURM_ERROR; + int rc; + client_resp_t *resp; + char *service = NULL; + + debug3("mpi/pmi2: in _handle_unpublish_name"); + + client_req_parse_body(req); + client_req_get_str(req, SERVICE_KEY, &service); + + rc = name_unpublish_up(service); + xfree(service); + + resp = client_resp_new(); + client_resp_append(resp, CMD_KEY"="UNPUBLISHRESULT_CMD" " + INFO_KEY"=%s\n", + rc == SLURM_SUCCESS ? "ok" : "fail"); + rc = client_resp_send(resp, fd); + client_resp_free(resp); + + debug3("mpi/pmi2: out _handle_unpublish_name"); + return rc; } +/* + * this design is not scalable: each task that calls MPI_Lookup_name() + * will generate a RPC to srun. + */ static int _handle_lookup_name(int fd, int lrank, client_req_t *req) { - error("mpi/pmi2: PMI1 request of '" LOOKUPNAME_CMD "' not supported"); - return SLURM_ERROR; + int rc; + client_resp_t *resp; + char *service = NULL, *port = NULL; + + debug3("mpi/pmi2: in _handle_lookup_name"); + + client_req_parse_body(req); + client_req_get_str(req, SERVICE_KEY, &service); + + port = name_lookup_up(service); + + resp = client_resp_new(); + client_resp_append(resp, CMD_KEY"="LOOKUPRESULT_CMD" "); + if (port == NULL) { + client_resp_append(resp, INFO_KEY"=fail\n"); + } else { + client_resp_append(resp, INFO_KEY"=ok "PORT_KEY"=%s\n", + port); + } + rc = client_resp_send(resp, fd); + client_resp_free(resp); + + xfree(service); + xfree(port); + + debug3("mpi/pmi2: out _handle_lookup_name"); + return rc; } static int @@ -494,8 +582,10 @@ _handle_pmi1_mcmd_buf(int fd, int lrank, int buf_size, int buf_len, char **pbuf) tmp_ptr = NULL; while (tmp_buf[0] != '\0') { tmp_ptr = strstr(tmp_buf, ENDCMD_KEY"\n"); - if ( tmp_ptr == NULL) { + if (tmp_ptr == NULL) { error("mpi/pmi2: this is impossible"); + rc = SLURM_ERROR; + break; } *tmp_ptr = '\0'; n = tmp_ptr - tmp_buf; diff --git a/src/plugins/mpi/pmi2/pmi2.c b/src/plugins/mpi/pmi2/pmi2.c index 28ad8818717fff07b493012aeb7eb0cc1d9567ad..d5ed26b8b4e6e2f34c3b9f7c4da2a5af268dff00 100644 --- a/src/plugins/mpi/pmi2/pmi2.c +++ b/src/plugins/mpi/pmi2/pmi2.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -59,6 +59,7 @@ #include "info.h" #include "setup.h" #include "agent.h" +#include "nameserv.h" /* PMI2 command handlers */ static int _handle_fullinit(int fd, int lrank, client_req_t *req); @@ -261,6 +262,18 @@ _handle_kvs_fence(int fd, int lrank, client_req_t *req) /* mutex protection is not required */ if (tasks_to_wait == 0 && children_to_wait == 0) { rc = temp_kvs_send(); + if (rc != SLURM_SUCCESS) { + error("mpi/pmi2: failed to send temp kvs to %s", + tree_info.parent_node ?: "srun"); + send_kvs_fence_resp_to_clients( + rc, + "mpi/pmi2: failed to send temp kvs"); + /* cancel the step to avoid tasks hang */ + slurm_kill_job_step(job_info.jobid, job_info.stepid, + SIGKILL); + } else { + waiting_kvs_resp = 1; + } } debug3("mpi/pmi2: out _handle_kvs_fence, tasks_to_wait=%d, " "children_to_wait=%d", tasks_to_wait, children_to_wait); @@ -390,20 +403,85 @@ _handle_info_getjobattr(int fd, int lrank, client_req_t *req) static int _handle_name_publish(int fd, int lrank, client_req_t *req) { - error("mpi/pmi2: name publish not implemented"); - return SLURM_ERROR; + int rc; + client_resp_t *resp; + char *name = NULL, *port = NULL; + + debug3("mpi/pmi2: in _handle_publish_name"); + + client_req_parse_body(req); + client_req_get_str(req, NAME_KEY, &name); + client_req_get_str(req, PORT_KEY, &port); + + rc = name_publish_up(name, port); + xfree(name); + xfree(port); + + resp = client_resp_new(); + client_resp_append(resp, CMD_KEY"="NAMEPUBLISHRESP_CMD";" + RC_KEY"=%d;", rc); + rc = client_resp_send(resp, fd); + client_resp_free(resp); + + debug3("mpi/pmi2: out _handle_publish_name"); + return rc; } static int _handle_name_unpublish(int fd, int lrank, client_req_t *req) { - return SLURM_ERROR; + int rc; + client_resp_t *resp; + char *name = NULL; + + debug3("mpi/pmi2: in _handle_unpublish_name"); + + client_req_parse_body(req); + client_req_get_str(req, NAME_KEY, &name); + + rc = name_unpublish_up(name); + xfree(name); + + resp = client_resp_new(); + client_resp_append(resp, CMD_KEY"="NAMEUNPUBLISHRESP_CMD";" + RC_KEY"=%d;", rc); + rc = client_resp_send(resp, fd); + client_resp_free(resp); + + debug3("mpi/pmi2: out _handle_unpublish_name"); + return rc; } static int _handle_name_lookup(int fd, int lrank, client_req_t *req) { - return SLURM_ERROR; + int rc; + client_resp_t *resp; + char *name = NULL, *port = NULL; + + debug3("mpi/pmi2: in _handle_lookup_name"); + + client_req_parse_body(req); + client_req_get_str(req, NAME_KEY, &name); + + port = name_lookup_up(name); + + resp = client_resp_new(); + client_resp_append(resp, CMD_KEY"="NAMELOOKUPRESP_CMD";"); + if (port == NULL) { + client_resp_append(resp, RC_KEY"=1;"); + } else { + client_resp_append(resp, RC_KEY"=0;"VALUE_KEY"=%s;", + port); + } + rc = client_resp_send(resp, fd); + client_resp_free(resp); + + xfree(name); + xfree(port); + + debug3("mpi/pmi2: out _handle_lookup_name"); + return rc; } static int diff --git a/src/plugins/mpi/pmi2/setup.c b/src/plugins/mpi/pmi2/setup.c index e56d453554dcefc1aec81e95d86dd3c7a0baa5bf..3652b59d68b41f8d91a8322d7027ac77a3650704 100644 --- a/src/plugins/mpi/pmi2/setup.c +++ b/src/plugins/mpi/pmi2/setup.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -160,7 +160,7 @@ _setup_stepd_job_info(const slurmd_job_t *job, char ***env) job_info.job_env = env_array_copy((const char **)*env); - job_info.srun_job = NULL; + job_info.MPIR_proctable = NULL; job_info.srun_opt = NULL; return SLURM_SUCCESS; @@ -237,6 +237,10 @@ _setup_stepd_tree_info(const slurmd_job_t *job, char ***env) tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t)); slurm_set_addr(tree_info.srun_addr, port, srun_host); + /* init kvs seq to 0. TODO: reduce array size */ + tree_info.children_kvs_seq = xmalloc(sizeof(uint32_t) * + job_info.nnodes); + return SLURM_SUCCESS; } @@ -551,13 +555,13 @@ _setup_srun_job_info(const mpi_plugin_client_info_t *job) error("mpi/pmi2: failed to dlopen()"); return SLURM_ERROR; } - sym = dlsym(handle, "job"); + sym = dlsym(handle, "MPIR_proctable"); if (sym == NULL) { /* if called directly in API, there may be no symbol available */ - verbose ("mpi/pmi2: failed to find symbol 'job'"); - job_info.srun_job = NULL; + verbose ("mpi/pmi2: failed to find symbol 'MPIR_proctable'"); + job_info.MPIR_proctable = NULL; } else { - job_info.srun_job = *(srun_job_t **)sym; + job_info.MPIR_proctable = *(MPIR_PROCDESC **)sym; } sym = dlsym(handle, "opt"); if (sym == NULL) { @@ -598,6 +602,10 @@ _setup_srun_tree_info(const mpi_plugin_client_info_t *job) snprintf(tree_sock_addr, 128, PMI2_SOCK_ADDR_FMT, job->jobid, job->stepid); + /* init kvs seq to 0. TODO: reduce array size */ + tree_info.children_kvs_seq = xmalloc(sizeof(uint32_t) * + job_info.nnodes); + return SLURM_SUCCESS; } @@ -636,23 +644,40 @@ _setup_srun_environ(const mpi_plugin_client_info_t *job, char ***env) return SLURM_SUCCESS; } +inline static int +_tasks_launched (void) +{ + int i, all_launched = 1; + if (job_info.MPIR_proctable == NULL) + return 1; + + for (i = 0; i < job_info.ntasks; i ++) { + if (job_info.MPIR_proctable[i].pid == 0) { + all_launched = 0; + break; + } + } + return all_launched; +} + static void * _task_launch_detection(void *unused) { spawn_resp_t *resp; - srun_job_state_t state; + time_t start; + int rc = 0; - if (job_info.srun_job) { - while (1) { - state = job_state(job_info.srun_job); - if (state >= SRUN_JOB_RUNNING) { - break; - } - usleep(1000*50); + /* + * mpir_init() is called in plugins/launch/slurm/launch_slurm.c before + * mpi_hook_client_prelaunch() is called in api/step_launch.c + */ + start = time(NULL); + while (_tasks_launched() == 0) { + usleep(1000*50); + if (time(NULL) - start > 600) { + rc = 1; + break; } - } else { - /* take the tasks launched successfully */ - state = SRUN_JOB_RUNNING; } /* send a resp to spawner srun */ @@ -660,11 +685,9 @@ _task_launch_detection(void *unused) resp->seq = job_info.spawn_seq; resp->jobid = xstrdup(job_info.pmi_jobid); resp->error_cnt = 0; /* TODO */ - if (state == SRUN_JOB_RUNNING) { - resp->rc = 0; - } else { - resp->rc = 1; - } + resp->rc = rc; + resp->pmi_port = tree_info.pmi_port; + spawn_resp_send_to_srun(resp); spawn_resp_free(resp); return NULL; diff --git a/src/plugins/mpi/pmi2/setup.h b/src/plugins/mpi/pmi2/setup.h index 51f0b5f4bbe14dbda1159c3cf35252b286ab587d..7a08d96e2df34c612222223c29076ea41d64e196 100644 --- a/src/plugins/mpi/pmi2/setup.h +++ b/src/plugins/mpi/pmi2/setup.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -59,7 +59,7 @@ #include "src/slurmd/slurmstepd/slurmstepd_job.h" -#include "src/srun/libsrun/srun_job.h" +#include "src/srun/libsrun/debugger.h" #include "src/srun/libsrun/opt.h" #include "tree.h" @@ -81,7 +81,7 @@ typedef struct pmi2_job_info { char *spawner_jobid; /* spawner pmi job id */ char **job_env; /* environment of job. use in stepd */ - srun_job_t *srun_job; /* used only in srun */ + MPIR_PROCDESC *MPIR_proctable; /* used only in srun */ opt_t *srun_opt; /* used only in srun */ } pmi2_job_info_t; @@ -93,7 +93,8 @@ typedef struct pmi2_tree_info { int depth; /* depth in tree */ int max_depth; /* max depth of the tree */ uint16_t pmi_port; /* PMI2 comm port of this srun */ - slurm_addr_t *srun_addr;/* PMI2 comm address parent srun */ + slurm_addr_t *srun_addr; /* PMI2 comm address parent srun */ + uint32_t *children_kvs_seq; /* sequence number of children nodes */ } pmi2_tree_info_t; diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c index dac2c2361455f9ecff7c743d17b5d50a1c79c32a..7553b02c80f14b3535f818c60792abe46db2d3b4 100644 --- a/src/plugins/mpi/pmi2/spawn.c +++ b/src/plugins/mpi/pmi2/spawn.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -314,6 +314,7 @@ spawn_resp_pack(spawn_resp_t *resp, Buf buf) pack32(resp->seq, buf); pack32((uint32_t)resp->rc, buf); + pack16((uint16_t)resp->pmi_port, buf); packstr(resp->jobid, buf); pack32(resp->error_cnt, buf); for (i = 0; i < resp->error_cnt; i ++) { @@ -332,6 +333,7 @@ spawn_resp_unpack(spawn_resp_t **resp_ptr, Buf buf) safe_unpack32(&resp->seq, buf); safe_unpack32((uint32_t *)&resp->rc, buf); + safe_unpack16((uint16_t *)&resp->pmi_port, buf); safe_unpackstr_xmalloc(&resp->jobid, &temp32, buf); safe_unpack32(&resp->error_cnt, buf); if (resp->error_cnt > 0) { @@ -465,6 +467,12 @@ _exec_srun_single(spawn_req_t *req, char **env) j = 0; argv[j ++] = "srun"; argv[j ++] = "--mpi=pmi2"; + if (job_info.srun_opt && job_info.srun_opt->no_alloc) { + argv[j ++] = "--no-alloc"; + xstrfmtcat(argv[j ++], "--nodelist=%s", + job_info.srun_opt->nodelist); + } + xstrfmtcat(argv[j ++], "--ntasks=%d", subcmd->max_procs); /* TODO: inherit options from srun_opt. */ for (i = 0; i < subcmd->info_cnt; i ++) { @@ -517,17 +525,17 @@ static int _exec_srun_multiple(spawn_req_t *req, char **env) { int argc, ntasks, i, j, spawn_cnt, fd; - char **argv = NULL, *multi_prog = NULL, *buf = NULL; + char **argv = NULL, *buf = NULL; spawn_subcmd_t *subcmd = NULL; + char fbuf[128]; debug3("mpi/pmi2: in _exec_srun_multiple"); /* create a tmp multi_prog file */ /* TODO: how to delete the file? */ - multi_prog = tempnam(NULL, NULL); - fd = open(multi_prog, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + sprintf(fbuf, "/tmp/%d.XXXXXX", getpid()); + fd = mkstemp(fbuf); if (fd < 0) { - error("mpi/pmi2: failed to open multi-prog file %s: %m", - multi_prog); + error("mpi/pmi2: failed to open multi-prog file %s: %m", fbuf); return SLURM_ERROR; } ntasks = 0; @@ -549,8 +557,10 @@ _exec_srun_multiple(spawn_req_t *req, char **env) xstrcat(buf, "\n"); ntasks += subcmd->max_procs; } - safe_write(fd, buf, strlen(buf)); - xfree(buf); + if (buf) { + safe_write(fd, buf, strlen(buf)); + xfree(buf); + } close(fd); argc = 7; @@ -560,8 +570,13 @@ _exec_srun_multiple(spawn_req_t *req, char **env) argv[j ++] = "srun"; argv[j ++] = "--mpi=pmi2"; xstrfmtcat(argv[j ++], "--ntasks=%d", ntasks); + if (job_info.srun_opt && job_info.srun_opt->no_alloc) { + argv[j ++] = "--no-alloc"; + xstrfmtcat(argv[j ++], "--nodelist=%s", + job_info.srun_opt->nodelist); + } argv[j ++] = "--multi-prog"; - argv[j ++] = multi_prog; + argv[j ++] = fbuf; argv[j ++] = NULL; debug3("mpi/mpi2: to execve"); diff --git a/src/plugins/mpi/pmi2/spawn.h b/src/plugins/mpi/pmi2/spawn.h index e2abe5f1440f660c9d43720a27718d005728b368..4e534666bce4dbf8358b7cbfd5c11facb568ed16 100644 --- a/src/plugins/mpi/pmi2/spawn.h +++ b/src/plugins/mpi/pmi2/spawn.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -78,6 +78,7 @@ typedef struct spawn_resp { uint32_t seq; int rc; char *jobid; + uint16_t pmi_port; uint32_t error_cnt; int *error_codes; } spawn_resp_t; diff --git a/src/plugins/mpi/pmi2/tree.c b/src/plugins/mpi/pmi2/tree.c index c5cd9e764be7f3fc642ae4442a16c739342c41a1..f619780108778b6867a61f5317ffc36f67d93dee 100644 --- a/src/plugins/mpi/pmi2/tree.c +++ b/src/plugins/mpi/pmi2/tree.c @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -45,23 +45,35 @@ #include "src/common/slurm_xlator.h" #include "src/common/slurm_protocol_interface.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" #include "kvs.h" #include "spawn.h" #include "client.h" #include "setup.h" #include "pmi.h" +#include "nameserv.h" static int _handle_kvs_fence(int fd, Buf buf); static int _handle_kvs_fence_resp(int fd, Buf buf); static int _handle_spawn(int fd, Buf buf); static int _handle_spawn_resp(int fd, Buf buf); +static int _handle_name_publish(int fd, Buf buf); +static int _handle_name_unpublish(int fd, Buf buf); +static int _handle_name_lookup(int fd, Buf buf); + +static uint32_t spawned_srun_ports_size = 0; +static uint16_t *spawned_srun_ports = NULL; + static int (*tree_cmd_handlers[]) (int fd, Buf buf) = { _handle_kvs_fence, _handle_kvs_fence_resp, _handle_spawn, _handle_spawn_resp, + _handle_name_publish, + _handle_name_unpublish, + _handle_name_lookup, NULL }; @@ -70,22 +82,40 @@ static char *tree_cmd_names[] = { "TREE_CMD_KVS_FENCE_RESP", "TREE_CMD_SPAWN", "TREE_CMD_SPAWN_RESP", + "TREE_CMD_NAME_PUBLISH", + "TREE_CMD_NAME_UNPUBLISH", + "TREE_CMD_NAME_LOOKUP", NULL, }; static int _handle_kvs_fence(int fd, Buf buf) { - uint32_t from_nodeid, num_children, temp32; + uint32_t from_nodeid, num_children, temp32, seq; char *from_node = NULL; + int rc = SLURM_SUCCESS; safe_unpack32(&from_nodeid, buf); safe_unpackstr_xmalloc(&from_node, &temp32, buf); safe_unpack32(&num_children, buf); + safe_unpack32(&seq, buf); debug3("mpi/pmi2: in _handle_kvs_fence, from node %u(%s) representing" - " %u offspring", from_nodeid, from_node, num_children); - + " %u offspring, seq=%u", from_nodeid, from_node, num_children, + seq); + if (seq != kvs_seq) { + error("mpi/pmi2: invalid kvs seq from node %u(%s) ignored, " + "expect %u got %u", + from_nodeid, from_node, kvs_seq, seq); + goto out; + } + if (seq == tree_info.children_kvs_seq[from_nodeid]) { + info("mpi/pmi2: duplicate KVS_FENCE request from node %u(%s) " + "ignored, seq=%u", from_nodeid, from_node, seq); + goto out; + } + tree_info.children_kvs_seq[from_nodeid] = seq; + if (tasks_to_wait == 0 && children_to_wait == 0) { tasks_to_wait = job_info.ltasks; children_to_wait = tree_info.num_children; @@ -95,26 +125,63 @@ _handle_kvs_fence(int fd, Buf buf) temp_kvs_merge(buf); if (children_to_wait == 0 && tasks_to_wait == 0) { - temp_kvs_send(); + rc = temp_kvs_send(); + if (rc != SLURM_SUCCESS) { + if (in_stepd()) { + error("mpi/pmi2: failed to send temp kvs" + " to %s", + tree_info.parent_node ?: "srun"); + send_kvs_fence_resp_to_clients( + rc, + "mpi/pmi2: failed to send temp kvs"); + } else { + error("mpi/pmi2: failed to send temp kvs" + " to compute nodes"); + } + /* cancel the step to avoid tasks hang */ + slurm_kill_job_step(job_info.jobid, job_info.stepid, + SIGKILL); + } else { + if (in_stepd()) + waiting_kvs_resp = 1; + } } debug3("mpi/pmi2: out _handle_kvs_fence, tasks_to_wait=%d, " "children_to_wait=%d", tasks_to_wait, children_to_wait); - return SLURM_SUCCESS; +out: + xfree(from_node); + return rc; unpack_error: error("mpi/pmi2: failed to unpack kvs fence message"); - return SLURM_ERROR; + rc = SLURM_ERROR; + goto out; } static int _handle_kvs_fence_resp(int fd, Buf buf) { - char *key, *val; - int rc = 0, i = 0; - client_resp_t *resp; - uint32_t temp32; + char *key, *val, *errmsg = NULL; + int rc = SLURM_SUCCESS; + uint32_t temp32, seq; debug3("mpi/pmi2: in _handle_kvs_fence_resp"); + + safe_unpack32(&seq, buf); + if (seq != kvs_seq - 1) { + error("mpi/pmi2: invalid kvs seq from srun, expect %u" + " got %u", kvs_seq - 1, seq); + rc = SLURM_ERROR;; + errmsg = "mpi/pmi2: invalid kvs seq from srun"; + goto resp; + } + if (! waiting_kvs_resp) { + debug("mpi/pmi2: duplicate KVS_FENCE_RESP from srun ignored"); + return rc; + } else { + waiting_kvs_resp = 0; + } + temp32 = remaining_buf(buf); debug3("mpi/pmi2: buf length: %u", temp32); /* put kvs into local hash */ @@ -128,24 +195,16 @@ _handle_kvs_fence_resp(int fd, Buf buf) } resp: - /* send fence_resp/barrier_out to tasks */ - resp = client_resp_new(); - if ( is_pmi11() ) { - client_resp_append(resp, CMD_KEY"="BARRIEROUT_CMD" " - RC_KEY"=%d\n", rc); - } else if (is_pmi20()) { - client_resp_append(resp, CMD_KEY"="KVSFENCERESP_CMD";" - RC_KEY"=%d;", rc); - } - for (i = 0; i < job_info.ltasks; i ++) { - client_resp_send(resp, STEPD_PMI_SOCK(i)); + send_kvs_fence_resp_to_clients(rc, errmsg); + if (rc != SLURM_SUCCESS) { + slurm_kill_job_step(job_info.jobid, job_info.stepid, SIGKILL); } - client_resp_free(resp); return rc; unpack_error: error("mpi/pmi2: unpack kvs error in fence resp"); rc = SLURM_ERROR; + errmsg = "mpi/pmi2: unpack kvs error in fence resp"; goto resp; } @@ -315,6 +374,11 @@ _handle_spawn_resp(int fd, Buf buf) } else { /* srun */ debug3("mpi/pmi2: spawned tasks of %s launched", spawn_resp->jobid); + spawned_srun_ports = xrealloc(spawned_srun_ports, + spawn_resp->seq * + sizeof(uint16_t)); + spawned_srun_ports_size = spawn_resp->seq; /* seq start from 1 */ + spawned_srun_ports[spawn_resp->seq - 1] = spawn_resp->pmi_port; /* forward resp to stepd */ spawn_resp_send_to_stepd(spawn_resp, from_node); xfree(from_node); @@ -324,6 +388,110 @@ _handle_spawn_resp(int fd, Buf buf) return rc; } + +/* name serv handlers called only in srun */ +static int +_handle_name_publish(int fd, Buf buf) +{ + int rc; + uint32_t tmp32; + char *name = NULL, *port = NULL; + Buf resp_buf = NULL; + + debug3("mpi/pmi2: in _handle_name_publish"); + + safe_unpackstr_xmalloc(&name, &tmp32, buf); + safe_unpackstr_xmalloc(&port, &tmp32, buf); + + if (tree_info.srun_addr) + rc = name_publish_up(name, port); + else + rc = name_publish_local(name, port); +out: + xfree(name); + xfree(port); + resp_buf = init_buf(32); + pack32((uint32_t) rc, resp_buf); + rc = _slurm_msg_sendto(fd, get_buf_data(resp_buf), + get_buf_offset(resp_buf), + SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); + free_buf(resp_buf); + + debug3("mpi/pmi2: out _handle_name_publish"); + return rc; + +unpack_error: + rc = SLURM_ERROR; + goto out; +} + +static int +_handle_name_unpublish(int fd, Buf buf) +{ + int rc; + uint32_t tmp32; + char *name = NULL; + Buf resp_buf = NULL; + + debug3("mpi/pmi2: in _handle_name_unpublish"); + + safe_unpackstr_xmalloc(&name, &tmp32, buf); + + if (tree_info.srun_addr) + rc = name_unpublish_up(name); + else + rc = name_unpublish_local(name); +out: + xfree(name); + resp_buf = init_buf(32); + pack32((uint32_t) rc, resp_buf); + rc = _slurm_msg_sendto(fd, get_buf_data(resp_buf), + get_buf_offset(resp_buf), + SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); + free_buf(resp_buf); + + debug3("mpi/pmi2: out _handle_name_unpublish"); + return rc; + +unpack_error: + rc = SLURM_ERROR; + goto out; +} + +static int +_handle_name_lookup(int fd, Buf buf) +{ + int rc; + uint32_t tmp32; + char *name = NULL, *port = NULL; + Buf resp_buf = NULL; + + debug3("mpi/pmi2: in _handle_name_lookup"); + + safe_unpackstr_xmalloc(&name, &tmp32, buf); + + if (tree_info.srun_addr) + port = name_lookup_up(name); + else + port = name_lookup_local(name); +out: + resp_buf = init_buf(1024); + packstr(port, resp_buf); + rc = _slurm_msg_sendto(fd, get_buf_data(resp_buf), + get_buf_offset(resp_buf), + SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); + free_buf(resp_buf); + xfree(name); + xfree(port); + + debug3("mpi/pmi2: out _handle_name_lookup"); + return rc; + +unpack_error: + rc = SLURM_ERROR; + goto out; +} + /**************************************************************/ extern int handle_tree_cmd(int fd) @@ -368,7 +536,13 @@ tree_msg_to_srun(uint32_t len, char *msg) int fd, rc; fd = _slurm_open_stream(tree_info.srun_addr, true); + if (fd < 0) + return SLURM_ERROR; rc = _slurm_msg_sendto(fd, msg, len, SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); + if (rc == len) /* all data sent */ + rc = SLURM_SUCCESS; + else + rc = SLURM_ERROR; close(fd); return rc; } @@ -383,6 +557,8 @@ tree_msg_to_srun_with_resp(uint32_t len, char *msg, Buf *resp_ptr) xassert(resp_ptr != NULL); fd = _slurm_open_stream(tree_info.srun_addr, true); + if (fd < 0) + return SLURM_ERROR; rc = _slurm_msg_sendto(fd, msg, len, SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); if (rc == len) { /* all data sent */ safe_read(fd, &len, sizeof(len)); @@ -414,3 +590,26 @@ tree_msg_to_stepds(char *nodelist, uint32_t len, char *msg) msg); return rc; } + +extern int +tree_msg_to_spawned_sruns(uint32_t len, char *msg) +{ + int i = 0, rc = SLURM_SUCCESS, fd = -1, sent=0; + slurm_addr_t srun_addr; + + for (i = 0; i < spawned_srun_ports_size; i ++) { + if (spawned_srun_ports[i] == 0) + continue; + + slurm_set_addr(&srun_addr, spawned_srun_ports[i], "127.0.0.1"); + fd = _slurm_open_stream(&srun_addr, true); + if (fd < 0) + return SLURM_ERROR; + sent = _slurm_msg_sendto(fd, msg, len, + SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); + if (sent != len) + rc = SLURM_ERROR; + close(fd); + } + return rc; +} diff --git a/src/plugins/mpi/pmi2/tree.h b/src/plugins/mpi/pmi2/tree.h index e885acb562722bb4436b73944bb0113dd554a001..e58ec8b2dd881ee9d2e033a3c8b7c1d52db4676c 100644 --- a/src/plugins/mpi/pmi2/tree.h +++ b/src/plugins/mpi/pmi2/tree.h @@ -6,7 +6,7 @@ * All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -47,6 +47,9 @@ enum { TREE_CMD_KVS_FENCE_RESP, TREE_CMD_SPAWN, TREE_CMD_SPAWN_RESP, + TREE_CMD_NAME_PUBLISH, + TREE_CMD_NAME_UNPUBLISH, + TREE_CMD_NAME_LOOKUP, TREE_CMD_COUNT }; @@ -55,7 +58,7 @@ extern int handle_tree_cmd(int fd); extern int tree_msg_to_srun(uint32_t len, char *msg); extern int tree_msg_to_srun_with_resp(uint32_t len, char *msg, Buf *resp_ptr); extern int tree_msg_to_stepds(char *nodelist, uint32_t len, char *msg); - +extern int tree_msg_to_spawned_sruns(uint32_t len, char *msg); diff --git a/src/plugins/preempt/Makefile.in b/src/plugins/preempt/Makefile.in index 4535da82e446395f6d27be54859eb526afc29f7d..d93c5636c68dd501913e5409cf967760bfeb9220 100644 --- a/src/plugins/preempt/Makefile.in +++ b/src/plugins/preempt/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/preempt DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/preempt/none/Makefile.in b/src/plugins/preempt/none/Makefile.in index 540559e857951b66dea48b5f9c30ae0cb777c575..4a04c0e9b31f798f45203152dc0737511ce0c9d5 100644 --- a/src/plugins/preempt/none/Makefile.in +++ b/src/plugins/preempt/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/preempt/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/preempt/none/preempt_none.c b/src/plugins/preempt/none/preempt_none.c index 72976f8cd7c34ebb6ed33a0053648f653b8db940..23c0e69dbd8ceab7a0092d9d2f4f4b160e0bd180 100644 --- a/src/plugins/preempt/none/preempt_none.c +++ b/src/plugins/preempt/none/preempt_none.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/preempt/partition_prio/Makefile.in b/src/plugins/preempt/partition_prio/Makefile.in index 5bef89ac798f2c23720433d21df9dd8f828fb4a2..5b0370a613d3fb62ae2309aed213a6d05c932517 100644 --- a/src/plugins/preempt/partition_prio/Makefile.in +++ b/src/plugins/preempt/partition_prio/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/preempt/partition_prio DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/preempt/partition_prio/preempt_partition_prio.c b/src/plugins/preempt/partition_prio/preempt_partition_prio.c index b676428df19c1f3361ab38e6da2d1a75d6a4d847..300a5c9160a320a927cf5b45fb54fc7cd9480fb3 100644 --- a/src/plugins/preempt/partition_prio/preempt_partition_prio.c +++ b/src/plugins/preempt/partition_prio/preempt_partition_prio.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -122,8 +122,6 @@ extern List find_preemptable_jobs(struct job_record *job_ptr) /* This job is a preemption candidate */ if (preemptee_job_list == NULL) { preemptee_job_list = list_create(NULL); - if (preemptee_job_list == NULL) - fatal("list_create malloc failure"); } list_append(preemptee_job_list, job_p); } diff --git a/src/plugins/preempt/qos/Makefile.in b/src/plugins/preempt/qos/Makefile.in index 9be45943091ea1bf7ed8dee9dd4f5c200a1a9190..68f4c51f27f14de2d5ceb21f3415e6acae1bb30b 100644 --- a/src/plugins/preempt/qos/Makefile.in +++ b/src/plugins/preempt/qos/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/preempt/qos DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/preempt/qos/preempt_qos.c b/src/plugins/preempt/qos/preempt_qos.c index 94047ab026836b42db85e82b929793e6cd2d70f9..af01632b1c2eeb9053106e3ff194e240471efa01 100644 --- a/src/plugins/preempt/qos/preempt_qos.c +++ b/src/plugins/preempt/qos/preempt_qos.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -124,8 +124,6 @@ extern List find_preemptable_jobs(struct job_record *job_ptr) /* This job is a preemption candidate */ if (preemptee_job_list == NULL) { preemptee_job_list = list_create(NULL); - if (preemptee_job_list == NULL) - fatal("list_create malloc failure"); } list_append(preemptee_job_list, job_p); } diff --git a/src/plugins/priority/Makefile.am b/src/plugins/priority/Makefile.am index 974a990357d89025472173fd5434fe26379c0601..0b4761ffbaad3e2f0d99a6a11b53a75498015b9f 100644 --- a/src/plugins/priority/Makefile.am +++ b/src/plugins/priority/Makefile.am @@ -1 +1 @@ -SUBDIRS = basic multifactor multifactor2 +SUBDIRS = basic multifactor diff --git a/src/plugins/priority/Makefile.in b/src/plugins/priority/Makefile.in index d4726497ac783ab01897785cfd0e6e1cbcf9625a..22fbccf04703bc560a3a9718ea023d2d6302b0c9 100644 --- a/src/plugins/priority/Makefile.in +++ b/src/plugins/priority/Makefile.in @@ -55,6 +55,7 @@ subdir = src/plugins/priority DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -352,7 +378,7 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = basic multifactor multifactor2 +SUBDIRS = basic multifactor all: all-recursive .SUFFIXES: diff --git a/src/plugins/priority/basic/Makefile.in b/src/plugins/priority/basic/Makefile.in index a9e8052ba95550c3dc70857864a15ace4be842e8..fbeafb85f66482193f3a9084df5e09e2440c99bf 100644 --- a/src/plugins/priority/basic/Makefile.in +++ b/src/plugins/priority/basic/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/priority/basic DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/priority/basic/priority_basic.c b/src/plugins/priority/basic/priority_basic.c index e9a52ad959afc4229d3884b5f61b9da397958dee..f0e3380ab88589a1807c53a5367ef01a425f0377 100644 --- a/src/plugins/priority/basic/priority_basic.c +++ b/src/plugins/priority/basic/priority_basic.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -53,6 +53,7 @@ #include "slurm/slurm_errno.h" #include "src/common/slurm_priority.h" +#include "src/common/assoc_mgr.h" /* * These variables are required by the generic plugin interface. If they @@ -126,7 +127,7 @@ extern uint32_t priority_p_set(uint32_t last_prio, struct job_record *job_ptr) return new_prio; } -extern void priority_p_reconfig(void) +extern void priority_p_reconfig(bool assoc_clear) { return; } @@ -160,3 +161,61 @@ extern List priority_p_get_priority_factors_list( { return(list_create(NULL)); } + +extern void priority_p_job_end(struct job_record *job_ptr) +{ + uint64_t unused_cpu_run_secs = 0; + uint64_t time_limit_secs = (uint64_t)job_ptr->time_limit * 60; + slurmdb_association_rec_t *assoc_ptr; + assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, + WRITE_LOCK, NO_LOCK, NO_LOCK }; + + /* No unused cpu_run_secs if job ran past its time limit */ + if (job_ptr->end_time >= job_ptr->start_time + time_limit_secs) + return; + + unused_cpu_run_secs = job_ptr->total_cpus * + (job_ptr->start_time + time_limit_secs - job_ptr->end_time); + + assoc_mgr_lock(&locks); + if (job_ptr->qos_ptr) { + slurmdb_qos_rec_t *qos_ptr = + (slurmdb_qos_rec_t *)job_ptr->qos_ptr; + if (unused_cpu_run_secs > + qos_ptr->usage->grp_used_cpu_run_secs) { + qos_ptr->usage->grp_used_cpu_run_secs = 0; + debug2("acct_policy_job_fini: " + "grp_used_cpu_run_secs " + "underflow for qos %s", qos_ptr->name); + } else + qos_ptr->usage->grp_used_cpu_run_secs -= + unused_cpu_run_secs; + } + assoc_ptr = (slurmdb_association_rec_t *)job_ptr->assoc_ptr; + while (assoc_ptr) { + /* If the job finished early remove the extra time now. */ + if (unused_cpu_run_secs > + assoc_ptr->usage->grp_used_cpu_run_secs) { + assoc_ptr->usage->grp_used_cpu_run_secs = 0; + debug2("acct_policy_job_fini: " + "grp_used_cpu_run_secs " + "underflow for account %s", + assoc_ptr->acct); + } else { + assoc_ptr->usage->grp_used_cpu_run_secs -= + unused_cpu_run_secs; + debug4("acct_policy_job_fini: job %u. " + "Removed %"PRIu64" unused seconds " + "from assoc %s " + "grp_used_cpu_run_secs = %"PRIu64"", + job_ptr->job_id, unused_cpu_run_secs, + assoc_ptr->acct, + assoc_ptr->usage->grp_used_cpu_run_secs); + } + /* now handle all the group limits of the parents */ + assoc_ptr = assoc_ptr->usage->parent_assoc_ptr; + } + assoc_mgr_unlock(&locks); + + return; +} diff --git a/src/plugins/priority/multifactor/Makefile.in b/src/plugins/priority/multifactor/Makefile.in index 10e56f46e12215b54227aa77f7ae41eb53365531..94a914e10f1b0ee445312bff3eeaa5a3df85a688 100644 --- a/src/plugins/priority/multifactor/Makefile.in +++ b/src/plugins/priority/multifactor/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/priority/multifactor DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c index 344d3405349ef23cb0aa9aee72ebe7fb0050b65f..e4f83c06f7616c36ee7d65c87bc9e62334e9b83c 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.c +++ b/src/plugins/priority/multifactor/priority_multifactor.c @@ -1,13 +1,19 @@ /*****************************************************************************\ * priority_multifactor.c - slurm multifactor priority plugin. ***************************************************************************** + * Copyright (C) 2012 Aalto University + * Written by Janne Blomqvist <janne.blomqvist@aalto.fi> + * + * Based on priority_multifactor.c, whose copyright information is + * reproduced below: + * * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Danny Auble <da@llnl.gov> * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -72,6 +78,9 @@ #define SECS_PER_DAY (24 * 60 * 60) #define SECS_PER_WEEK (7 * SECS_PER_DAY) + +#define MIN_USAGE_FACTOR 0.01 + /* These are defined here so when we link with something other than * the slurmctld we will have these symbols defined. They will get * overwritten when linking with the slurmctld. @@ -80,13 +89,15 @@ void *acct_db_conn __attribute__((weak_import)) = NULL; uint32_t cluster_cpus __attribute__((weak_import)) = NO_VAL; List job_list __attribute__((weak_import)) = NULL; -time_t last_job_update __attribute__((weak_import)); +time_t last_job_update __attribute__((weak_import)) = (time_t) 0; +uint16_t part_max_priority __attribute__((weak_import)) = 0; slurm_ctl_conf_t slurmctld_conf __attribute__((weak_import)); #else void *acct_db_conn = NULL; uint32_t cluster_cpus = NO_VAL; List job_list = NULL; -time_t last_job_update; +time_t last_job_update = (time_t) 0; +uint16_t part_max_priority = 0; slurm_ctl_conf_t slurmctld_conf; #endif @@ -131,25 +142,32 @@ static bool running_decay = 0, reconfig = 0, static bool favor_small; /* favor small jobs over large */ static uint32_t max_age; /* time when not to add any more * priority to a job if reached */ -static uint32_t weight_age; /* weight for age factor */ -static uint32_t weight_fs; /* weight for Fairshare factor */ -static uint32_t weight_js; /* weight for Job Size factor */ +static uint16_t enforce; /* AccountingStorageEnforce */ +static uint32_t weight_age; /* weight for age factor */ +static uint32_t weight_fs; /* weight for Fairshare factor */ +static uint32_t weight_js; /* weight for Job Size factor */ static uint32_t weight_part; /* weight for Partition factor */ -static uint32_t weight_qos; /* weight for QOS factor */ -static uint32_t flags; /* Priority Flags */ +static uint32_t weight_qos; /* weight for QOS factor */ +static uint32_t flags; /* Priority Flags */ +static uint32_t max_tickets; /* Maximum number of tickets given to a + * user. Protected by assoc_mgr lock. */ +static time_t g_last_ran = 0; /* when the last poll ran */ +static double decay_factor = 1; /* The decay factor when decaying time. */ extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc); extern double priority_p_calc_fs_factor(long double usage_efctv, long double shares_norm); +extern uint16_t part_max_priority; + /* * apply decay factor to all associations usage_raw - * IN: decay_factor - decay to be applied to each associations' used + * IN: real_decay - decay to be applied to each associations' used * shares. This should already be modified with the amount of delta * time from last application.. * RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else. */ -static int _apply_decay(double decay_factor) +static int _apply_decay(double real_decay) { ListIterator itr = NULL; slurmdb_association_rec_t *assoc = NULL; @@ -157,12 +175,12 @@ static int _apply_decay(double decay_factor) assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; - /* continue if decay_factor is 0 or 1 since that doesn't help + /* continue if real_decay is 0 or 1 since that doesn't help us at all. 1 means no decay and 0 will just zero everything out so don't waste time doing it */ - if (!decay_factor) + if (!real_decay) return SLURM_ERROR; - else if (!calc_fairshare || (decay_factor == 1)) + else if (!calc_fairshare || (real_decay == 1)) return SLURM_SUCCESS; assoc_mgr_lock(&locks); @@ -175,15 +193,15 @@ static int _apply_decay(double decay_factor) root. All usage_raws are calculated from the bottom up. */ while ((assoc = list_next(itr))) { - assoc->usage->usage_raw *= decay_factor; - assoc->usage->grp_used_wall *= decay_factor; + assoc->usage->usage_raw *= real_decay; + assoc->usage->grp_used_wall *= real_decay; } list_iterator_destroy(itr); itr = list_iterator_create(assoc_mgr_qos_list); while ((qos = list_next(itr))) { - qos->usage->usage_raw *= decay_factor; - qos->usage->grp_used_wall *= decay_factor; + qos->usage->usage_raw *= real_decay; + qos->usage->grp_used_wall *= real_decay; } list_iterator_destroy(itr); assoc_mgr_unlock(&locks); @@ -371,6 +389,32 @@ static int _write_last_decay_ran(time_t last_ran, time_t last_reset) return error_code; } + +/* Set the effective usage of a node. */ +static void _set_usage_efctv(slurmdb_association_rec_t *assoc) +{ + long double min_shares_norm; + + if ((assoc->shares_raw == SLURMDB_FS_USE_PARENT) + && assoc->usage->parent_assoc_ptr) { + assoc->usage->shares_norm = + assoc->usage->parent_assoc_ptr->usage->shares_norm; + assoc->usage->usage_norm = + assoc->usage->parent_assoc_ptr->usage->usage_norm; + } + + if (assoc->usage->level_shares) { + min_shares_norm = (long double) MIN_USAGE_FACTOR + * assoc->shares_raw / assoc->usage->level_shares; + if (assoc->usage->usage_norm > min_shares_norm) + assoc->usage->usage_efctv = assoc->usage->usage_norm; + else + assoc->usage->usage_efctv = min_shares_norm; + } else + assoc->usage->usage_efctv = assoc->usage->usage_norm; +} + + /* This should initially get the childern list from * assoc_mgr_root_assoc. Since our algorythm goes from top down we * calculate all the non-user associations now. When a user submits a @@ -401,6 +445,62 @@ static int _set_children_usage_efctv(List childern_list) return SLURM_SUCCESS; } + +/* Distribute the tickets to child nodes recursively. + * + * NOTE: acct_mgr_association_lock must be locked before this is called. + */ +static int _distribute_tickets(List childern_list, uint32_t tickets) +{ + ListIterator itr; + slurmdb_association_rec_t *assoc; + double sfsum = 0, fs; + + if (!childern_list || !list_count(childern_list)) + return SLURM_SUCCESS; + + itr = list_iterator_create(childern_list); + while ((assoc = list_next(itr))) { + if (assoc->usage->active_seqno + != assoc_mgr_root_assoc->usage->active_seqno) + continue; + if (fuzzy_equal(assoc->usage->usage_efctv, NO_VAL)) + priority_p_set_assoc_usage(assoc); + fs = priority_p_calc_fs_factor(assoc->usage->usage_efctv, + assoc->usage->shares_norm); + sfsum += assoc->usage->shares_norm * fs; + } + list_iterator_destroy(itr); + + itr = list_iterator_create(childern_list); + while ((assoc = list_next(itr))) { + if (assoc->usage->active_seqno + != assoc_mgr_root_assoc->usage->active_seqno) + continue; + fs = priority_p_calc_fs_factor(assoc->usage->usage_efctv, + assoc->usage->shares_norm); + assoc->usage->tickets = tickets * assoc->usage->shares_norm + * fs / sfsum; + if (priority_debug) { + if (assoc->user) + info("User %s in account %s gets %u tickets", + assoc->user, assoc->acct, + assoc->usage->tickets); + else + info("Account %s gets %u tickets", + assoc->acct, assoc->usage->tickets); + } + if (assoc->user && assoc->usage->tickets > max_tickets) + max_tickets = assoc->usage->tickets; + _distribute_tickets(assoc->usage->childern_list, + assoc->usage->tickets); + } + list_iterator_destroy(itr); + + return SLURM_SUCCESS; +} + + /* job_ptr should already have the partition priority and such added * here before had we will be adding to it */ @@ -437,17 +537,31 @@ static double _get_fairshare_priority( struct job_record *job_ptr) priority_p_set_assoc_usage(fs_assoc); /* Priority is 0 -> 1 */ - priority_fs = priority_p_calc_fs_factor( - fs_assoc->usage->usage_efctv, - (long double)fs_assoc->usage->shares_norm); - if (priority_debug) { - info("Fairshare priority of job %u for user %s in acct" - " %s is 2**(-%Lf/%f) = %f", - job_ptr->job_id, job_assoc->user, job_assoc->acct, - fs_assoc->usage->usage_efctv, - fs_assoc->usage->shares_norm, priority_fs); + priority_fs = 0; + if (flags & PRIORITY_FLAGS_TICKET_BASED) { + if (fs_assoc->usage->active_seqno == + assoc_mgr_root_assoc->usage->active_seqno && max_tickets) { + priority_fs = (double) fs_assoc->usage->tickets / + max_tickets; + } + if (priority_debug) { + info("Fairshare priority of job %u for user %s in acct" + " %s is %f", + job_ptr->job_id, job_assoc->user, job_assoc->acct, + priority_fs); + } + } else { + priority_fs = priority_p_calc_fs_factor( + fs_assoc->usage->usage_efctv, + (long double)fs_assoc->usage->shares_norm); + if (priority_debug) { + info("Fairshare priority of job %u for user %s in acct" + " %s is 2**(-%Lf/%f) = %f", + job_ptr->job_id, job_assoc->user, job_assoc->acct, + fs_assoc->usage->usage_efctv, + fs_assoc->usage->shares_norm, priority_fs); + } } - assoc_mgr_unlock(&locks); return priority_fs; @@ -504,7 +618,7 @@ static void _get_priority_factors(time_t start_time, struct job_record *job_ptr) } if (weight_js) { - uint32_t cpu_cnt = 0; + uint32_t cpu_cnt = 0, min_nodes = 1; /* On the initial run of this we don't have total_cpus so go off the requesting. After the first shot total_cpus should be filled in. @@ -516,11 +630,36 @@ static void _get_priority_factors(time_t start_time, struct job_record *job_ptr) cpu_cnt = job_ptr->details->max_cpus; else if (job_ptr->details && job_ptr->details->min_cpus) cpu_cnt = job_ptr->details->min_cpus; + if (job_ptr->details) + min_nodes = job_ptr->details->min_nodes; - if (favor_small) { + if (flags & PRIORITY_FLAGS_SIZE_RELATIVE) { + uint32_t time_limit = 1; + /* Job size in CPUs (based upon average CPUs/Node */ + job_ptr->prio_factors->priority_js = + (double)job_ptr->details->min_nodes * + (double)cluster_cpus / + (double)node_record_count; + if (cpu_cnt > job_ptr->prio_factors->priority_js) { + job_ptr->prio_factors->priority_js = + (double)cpu_cnt; + } + /* Divide by job time limit */ + if (job_ptr->time_limit != NO_VAL) + time_limit = job_ptr->time_limit; + else if (job_ptr->part_ptr) + time_limit = job_ptr->part_ptr->max_time; + job_ptr->prio_factors->priority_js /= time_limit; + /* Normalize to max value of 1.0 */ + job_ptr->prio_factors->priority_js /= cluster_cpus; + if (favor_small) { + job_ptr->prio_factors->priority_js = + (double) 1.0 - + job_ptr->prio_factors->priority_js; + } + } else if (favor_small) { job_ptr->prio_factors->priority_js = - (double)(node_record_count - - job_ptr->details->min_nodes) + (double)(node_record_count - min_nodes) / (double)node_record_count; if (cpu_cnt) { job_ptr->prio_factors->priority_js += @@ -528,10 +667,9 @@ static void _get_priority_factors(time_t start_time, struct job_record *job_ptr) / (double)cluster_cpus; job_ptr->prio_factors->priority_js /= 2; } - } else { + } else { /* favor large */ job_ptr->prio_factors->priority_js = - (double)job_ptr->details->min_nodes - / (double)node_record_count; + (double)min_nodes / (double)node_record_count; if (cpu_cnt) { job_ptr->prio_factors->priority_js += (double)cpu_cnt / (double)cluster_cpus; @@ -554,7 +692,10 @@ static void _get_priority_factors(time_t start_time, struct job_record *job_ptr) qos_ptr->usage->norm_priority; } - job_ptr->prio_factors->nice = job_ptr->details->nice; + if (job_ptr->details) + job_ptr->prio_factors->nice = job_ptr->details->nice; + else + job_ptr->prio_factors->nice = NICE_OFFSET; } static uint32_t _get_priority_internal(time_t start_time, @@ -591,6 +732,36 @@ static uint32_t _get_priority_internal(time_t start_time, + job_ptr->prio_factors->priority_qos - (double)(job_ptr->prio_factors->nice - NICE_OFFSET); + if (job_ptr->part_ptr_list) { + struct part_record *part_ptr; + double priority_part; + ListIterator part_iterator; + int i = 0; + + if (!job_ptr->priority_array) { + job_ptr->priority_array = xmalloc(sizeof(uint32_t) * + (list_count(job_ptr->part_ptr_list) + 1)); + } + part_iterator = list_iterator_create(job_ptr->part_ptr_list); + while ((part_ptr = (struct part_record *) + list_next(part_iterator))) { + priority_part = part_ptr->priority / + (double)part_max_priority * + (double)weight_part; + job_ptr->priority_array[i] = (uint32_t) + (job_ptr->prio_factors->priority_age + + job_ptr->prio_factors->priority_fs + + job_ptr->prio_factors->priority_js + + priority_part + + job_ptr->prio_factors->priority_qos + - (double)(job_ptr->prio_factors->nice + - NICE_OFFSET)); + debug("Job %u has more than one partition (%s)(%u)", + job_ptr->job_id, part_ptr->name, + job_ptr->priority_array[i]); + i++; + } + } /* Priority 0 is reserved for held jobs */ if (priority < 1) priority = 1; @@ -624,6 +795,34 @@ static uint32_t _get_priority_internal(time_t start_time, return (uint32_t)priority; } + +/* Mark an association and its parents as active (i.e. it may be given + * tickets) during the current scheduling cycle. The association + * manager lock should be held on entry. */ +static bool _mark_assoc_active(struct job_record *job_ptr) +{ + slurmdb_association_rec_t *job_assoc = + (slurmdb_association_rec_t *)job_ptr->assoc_ptr, + *assoc; + + if (!job_assoc) { + error("Job %u has no association. Unable to " + "mark assiciation as active.", job_ptr->job_id); + return false; + } + + for (assoc = job_assoc; assoc != assoc_mgr_root_assoc; + assoc = assoc->usage->parent_assoc_ptr) { + if (assoc->usage->active_seqno + == assoc_mgr_root_assoc->usage->active_seqno) + break; + assoc->usage->active_seqno + = assoc_mgr_root_assoc->usage->active_seqno; + } + return true; +} + + /* based upon the last reset time, compute when the next reset should be */ static time_t _next_reset(uint16_t reset_period, time_t last_reset) { @@ -686,18 +885,17 @@ static time_t _next_reset(uint16_t reset_period, time_t last_reset) } /* - Remove previously used time from qos and assocs - grp_used_cpu_run_secs. - - When restarting slurmctld acct_policy_job_begin() is called for all - running jobs. There every jobs total requested cputime (total_cpus * - time_limit) is added to grp_used_cpu_run_secs of assocs and qos. - - This function will subtract all cputime that was used until the - decay thread last ran. This kludge is necessary as the decay thread - last_ran variable can't be accessed from acct_policy_job_begin(). -*/ -void _init_grp_used_cpu_run_secs(time_t last_ran) + * Remove previously used time from qos and assocs grp_used_cpu_run_secs. + * + * When restarting slurmctld acct_policy_job_begin() is called for all + * running jobs. There every jobs total requested cputime (total_cpus * + * time_limit) is added to grp_used_cpu_run_secs of assocs and qos. + * + * This function will subtract all cputime that was used until the + * decay thread last ran. This kludge is necessary as the decay thread + * last_ran variable can't be accessed from acct_policy_job_begin(). + */ +static void _init_grp_used_cpu_run_secs(time_t last_ran) { struct job_record *job_ptr = NULL; ListIterator itr; @@ -709,21 +907,21 @@ void _init_grp_used_cpu_run_secs(time_t last_ran) slurmdb_qos_rec_t *qos; slurmdb_association_rec_t *assoc; - if(priority_debug) + if (priority_debug) info("Initializing grp_used_cpu_run_secs"); + if (!(enforce & ACCOUNTING_ENFORCE_LIMITS)) + return; if (!(job_list && list_count(job_list))) return; lock_slurmctld(job_read_lock); itr = list_iterator_create(job_list); - if (itr == NULL) - fatal("list_iterator_create: malloc failure"); assoc_mgr_lock(&locks); while ((job_ptr = list_next(itr))) { if (priority_debug) - debug2("job: %u",job_ptr->job_id); + debug2("job: %u", job_ptr->job_id); qos = NULL; assoc = NULL; delta = 0; @@ -739,20 +937,28 @@ void _init_grp_used_cpu_run_secs(time_t last_ran) qos = (slurmdb_qos_rec_t *) job_ptr->qos_ptr; assoc = (slurmdb_association_rec_t *) job_ptr->assoc_ptr; - if(qos) { - if (priority_debug) + if (qos) { + if (priority_debug) { info("Subtracting %"PRIu64" from qos " - "%u grp_used_cpu_run_secs " + "%s grp_used_cpu_run_secs " "%"PRIu64" = %"PRIu64"", delta, - qos->id, + qos->name, qos->usage->grp_used_cpu_run_secs, qos->usage->grp_used_cpu_run_secs - delta); - qos->usage->grp_used_cpu_run_secs -= delta; + } + if (qos->usage->grp_used_cpu_run_secs >= delta) { + qos->usage->grp_used_cpu_run_secs -= delta; + } else { + error("qos %s grp_used_cpu_run_secs underflow", + qos->name); + qos->usage->grp_used_cpu_run_secs = 0; + } } + while (assoc) { - if (priority_debug) + if (priority_debug) { info("Subtracting %"PRIu64" from assoc %u " "grp_used_cpu_run_secs " "%"PRIu64" = %"PRIu64"", @@ -761,7 +967,14 @@ void _init_grp_used_cpu_run_secs(time_t last_ran) assoc->usage->grp_used_cpu_run_secs, assoc->usage->grp_used_cpu_run_secs - delta); - assoc->usage->grp_used_cpu_run_secs -= delta; + } + if (assoc->usage->grp_used_cpu_run_secs >= delta) { + assoc->usage->grp_used_cpu_run_secs -= delta; + } else { + error("assoc %u grp_used_cpu_run_secs " + "underflow", assoc->id); + assoc->usage->grp_used_cpu_run_secs = 0; + } assoc = assoc->usage->parent_assoc_ptr; } } @@ -775,7 +988,7 @@ void _init_grp_used_cpu_run_secs(time_t last_ran) * Return 0 if we don't need to process the job any further, 1 if * futher processing is needed. */ -static int _apply_new_usage(struct job_record *job_ptr, double decay_factor, +static int _apply_new_usage(struct job_record *job_ptr, time_t start_period, time_t end_period) { slurmdb_qos_rec_t *qos; @@ -785,19 +998,11 @@ static int _apply_new_usage(struct job_record *job_ptr, double decay_factor, uint64_t job_time_limit_ends = 0; assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; - assoc_mgr_lock_t qos_read_lock = { NO_LOCK, NO_LOCK, - READ_LOCK, NO_LOCK, NO_LOCK }; - /* If usage_factor is 0 just skip this - since we don't add the usage. - */ - assoc_mgr_lock(&qos_read_lock); - qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; - if (qos && !qos->usage_factor) { - assoc_mgr_unlock(&qos_read_lock); - return 0; - } - assoc_mgr_unlock(&qos_read_lock); + /* Even if job_ptr->qos_ptr->usage_factor is 0 we need to + * handle other non-usage variables here + * (grp_used_cpu_run_secs), so don't return. + */ if (job_ptr->start_time > start_period) start_period = job_ptr->start_time; @@ -808,30 +1013,35 @@ static int _apply_new_usage(struct job_record *job_ptr, double decay_factor, run_delta = difftime(end_period, start_period); - /* job already has been accounted for - go to next */ - if (run_delta < 1) - return 0; + /* Even if run_delta is 0 we need to + * handle other non-usage variables here + * (grp_used_cpu_run_secs), so don't return. + */ + if (run_delta < 0) + run_delta = 0; /* cpu_run_delta will is used to - decrease qos and assocs - grp_used_cpu_run_secs values. When - a job is started only seconds until - start_time+time_limit is added, so - for jobs running over their - timelimit we should only subtract - the used time until the time - limit. */ + * decrease qos and assocs + * grp_used_cpu_run_secs values. When + * a job is started only seconds until + * start_time+time_limit is added, so + * for jobs running over their + * timelimit we should only subtract + * the used time until the time limit. */ job_time_limit_ends = (uint64_t)job_ptr->start_time + (uint64_t)job_ptr->time_limit * 60; - if ((uint64_t)start_period >= job_time_limit_ends) + if ((uint64_t)start_period >= job_time_limit_ends) cpu_run_delta = 0; - else if (end_period > job_time_limit_ends) + else if (IS_JOB_FINISHED(job_ptr) || IS_JOB_COMPLETING(job_ptr)) { + /* If a job is being requeued sometimes the state will + be pending + completing so handle that the same as + finished so we don't leave time in the mix. + */ cpu_run_delta = job_ptr->total_cpus * (job_time_limit_ends - (uint64_t)start_period); - else + } else cpu_run_delta = job_ptr->total_cpus * run_delta; if (priority_debug) @@ -862,8 +1072,9 @@ static int _apply_new_usage(struct job_record *job_ptr, double decay_factor, qos->usage->usage_raw += (long double)real_decay; if (qos->usage->grp_used_cpu_run_secs >= cpu_run_delta) { if (priority_debug) - info("grp_used_cpu_run_secs is %"PRIu64", " - "will subtract %"PRIu64"", + info("QOS %s has grp_used_cpu_run_secs " + "of %"PRIu64", will subtract %"PRIu64"", + qos->name, qos->usage->grp_used_cpu_run_secs, cpu_run_delta); qos->usage->grp_used_cpu_run_secs -= cpu_run_delta; @@ -880,16 +1091,17 @@ static int _apply_new_usage(struct job_record *job_ptr, double decay_factor, } /* We want to do this all the way up - to and including root. This way we - can keep track of how much usage - has occured on the entire system - and use that to normalize against. - */ + * to and including root. This way we + * can keep track of how much usage + * has occured on the entire system + * and use that to normalize against. */ while (assoc) { if (assoc->usage->grp_used_cpu_run_secs >= cpu_run_delta) { - if(priority_debug) - info("grp_used_cpu_run_secs is %"PRIu64", " - "will subtract %"PRIu64"", + if (priority_debug) + info("assoc %u (user='%s' " + "acct='%s') has grp_used_cpu_run_secs " + "of %"PRIu64", will subtract %"PRIu64"", + assoc->id, assoc->user, assoc->acct, assoc->usage->grp_used_cpu_run_secs, cpu_run_delta); assoc->usage->grp_used_cpu_run_secs -= cpu_run_delta; @@ -928,11 +1140,9 @@ static void *_decay_thread(void *no_data) struct job_record *job_ptr = NULL; ListIterator itr; time_t start_time = time(NULL); - time_t last_ran = 0; time_t last_reset = 0, next_reset = 0; uint32_t calc_period = slurm_get_priority_calc_period(); double decay_hl = (double)slurm_get_priority_decay_hl(); - double decay_factor = 1; uint16_t reset_period = slurm_get_priority_reset_period(); /* Write lock on jobs, read lock on nodes and partitions */ @@ -941,17 +1151,50 @@ static void *_decay_thread(void *no_data) assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; + /* + * DECAY_FACTOR DESCRIPTION: + * + * The decay thread applies an exponential decay over the past + * consumptions using a rolling approach. + * Every calc period p in seconds, the already computed usage is + * computed again applying the decay factor of that slice : + * decay_factor_slice. + * + * To ease the computation, the notion of decay_factor + * is introduced and corresponds to the decay factor + * required for a slice of 1 second. Thus, for any given + * slice ot time of n seconds, decay_factor_slice will be + * defined as : df_slice = pow(df,n) + * + * For a slice corresponding to the defined half life 'decay_hl' and + * a usage x, we will therefore have : + * >> x * pow(decay_factor,decay_hl) = 1/2 x << + * + * This expression helps to define the value of decay_factor that + * is necessary to apply the previously described logic. + * + * The expression is equivalent to : + * >> decay_hl * ln(decay_factor) = ln(1/2) + * >> ln(decay_factor) = ln(1/2) / decay_hl + * >> decay_factor = e( ln(1/2) / decay_hl ) + * + * Applying THe power series e(x) = sum(x^n/n!) for n from 0 to infinity + * >> decay_factor = 1 + ln(1/2)/decay_hl + * >> decay_factor = 1 - ( 0.693 / decay_hl) + * + * This explain the following declaration. + */ if (decay_hl > 0) decay_factor = 1 - (0.693 / decay_hl); (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - _read_last_decay_ran(&last_ran, &last_reset); + _read_last_decay_ran(&g_last_ran, &last_reset); if (last_reset == 0) last_reset = start_time; - _init_grp_used_cpu_run_secs(last_ran); + _init_grp_used_cpu_run_secs(g_last_ran); while (1) { time_t now = start_time; @@ -1014,10 +1257,10 @@ static void *_decay_thread(void *no_data) assoc_mgr_root_assoc->usage->childern_list); assoc_mgr_unlock(&locks); - if (!last_ran) + if (!g_last_ran) goto get_usage; else - run_delta = difftime(start_time, last_ran); + run_delta = difftime(start_time, g_last_ran); if (run_delta <= 0) goto get_usage; @@ -1038,38 +1281,134 @@ static void *_decay_thread(void *no_data) slurm_mutex_unlock(&decay_lock); break; } - lock_slurmctld(job_write_lock); - itr = list_iterator_create(job_list); - while ((job_ptr = list_next(itr))) { - /* apply new usage */ - if (!IS_JOB_PENDING(job_ptr) && - job_ptr->start_time && job_ptr->assoc_ptr) { - if (!_apply_new_usage(job_ptr, decay_factor, - last_ran, start_time)) + + if (!(flags & PRIORITY_FLAGS_TICKET_BASED)) { + lock_slurmctld(job_write_lock); + itr = list_iterator_create(job_list); + while ((job_ptr = list_next(itr))) { + /* Don't need to handle finished jobs. */ + if (IS_JOB_FINISHED(job_ptr) + || IS_JOB_COMPLETING(job_ptr)) + continue; + /* apply new usage */ + if (!IS_JOB_PENDING(job_ptr) && + job_ptr->start_time && job_ptr->assoc_ptr) { + if (!_apply_new_usage( + job_ptr, + g_last_ran, start_time)) + continue; + } + + /* + * Priority 0 is reserved for held + * jobs. Also skip priority + * calculation for non-pending jobs. + */ + if ((job_ptr->priority == 0) + || !IS_JOB_PENDING(job_ptr)) continue; + + job_ptr->priority = _get_priority_internal( + start_time, job_ptr); + last_job_update = time(NULL); + debug2("priority for job %u is now %u", + job_ptr->job_id, job_ptr->priority); } + list_iterator_destroy(itr); + unlock_slurmctld(job_write_lock); + } - /* - * Priority 0 is reserved for held jobs. Also skip - * priority calculation for non-pending jobs. + get_usage: + if (flags & PRIORITY_FLAGS_TICKET_BASED) { + /* Read lock on jobs, nodes, and partitions */ + slurmctld_lock_t job_read_lock = + { NO_LOCK, READ_LOCK, READ_LOCK, READ_LOCK }; + + /* Multifactor Ticket Based core algo + * 1/3. Iterate through all jobs, mark parent + * associations with the current + * sequence id, so that we know which + * associations/users are active. At the same time as + * we're looping through all the jobs anyway, apply + * the new usage of running jobs too. */ - if ((job_ptr->priority == 0) - || !IS_JOB_PENDING(job_ptr)) - continue; - job_ptr->priority = - _get_priority_internal(start_time, job_ptr); - last_job_update = time(NULL); - debug2("priority for job %u is now %u", - job_ptr->job_id, job_ptr->priority); + lock_slurmctld(job_read_lock); + assoc_mgr_lock(&locks); + /* seqno 0 is a special invalid value. */ + assoc_mgr_root_assoc->usage->active_seqno++; + if (!assoc_mgr_root_assoc->usage->active_seqno) + assoc_mgr_root_assoc->usage->active_seqno++; + assoc_mgr_unlock(&locks); + itr = list_iterator_create(job_list); + while ((job_ptr = list_next(itr))) { + /* Don't need to handle finished jobs. */ + if (IS_JOB_FINISHED(job_ptr) + || IS_JOB_COMPLETING(job_ptr)) + continue; + /* apply new usage */ + if (!IS_JOB_PENDING(job_ptr) && + job_ptr->start_time && job_ptr->assoc_ptr + && g_last_ran) + _apply_new_usage(job_ptr, + g_last_ran, + start_time); + + if (IS_JOB_PENDING(job_ptr) + && job_ptr->assoc_ptr) { + assoc_mgr_lock(&locks); + _mark_assoc_active(job_ptr); + assoc_mgr_unlock(&locks); + } + } + list_iterator_destroy(itr); + unlock_slurmctld(job_read_lock); + + /* Multifactor Ticket Based core algo + * 2/3. Start from the root, + * distribute tickets to active child associations + * proportional to the fair share (s*F). We start with + * UINT32_MAX tickets at the root. + */ + assoc_mgr_lock(&locks); + max_tickets = 0; + assoc_mgr_root_assoc->usage->tickets = (uint32_t) -1; + _distribute_tickets( + assoc_mgr_root_assoc->usage->childern_list, + (uint32_t) -1); + assoc_mgr_unlock(&locks); + + /* Multifactor Ticket Based core algo + * 3/3. Iterate through the job + * list again, give priorities proportional to the + * maximum number of tickets given to any user. + */ + lock_slurmctld(job_write_lock); + itr = list_iterator_create(job_list); + while ((job_ptr = list_next(itr))) { + /* + * Priority 0 is reserved for held + * jobs. Also skip priority + * calculation for non-pending jobs. + */ + if ((job_ptr->priority == 0) + || !IS_JOB_PENDING(job_ptr)) + continue; + + job_ptr->priority = _get_priority_internal( + start_time, job_ptr); + last_job_update = time(NULL); + debug2("priority for job %u is now %u", + job_ptr->job_id, job_ptr->priority); + } + list_iterator_destroy(itr); + unlock_slurmctld(job_write_lock); + } - list_iterator_destroy(itr); - unlock_slurmctld(job_write_lock); - get_usage: - last_ran = start_time; + g_last_ran = start_time; - _write_last_decay_ran(last_ran, last_reset); + _write_last_decay_ran(g_last_ran, last_reset); running_decay = 0; slurm_mutex_unlock(&decay_lock); @@ -1145,6 +1484,7 @@ static void _internal_setup(void) favor_small = slurm_get_priority_favor_small(); + enforce = slurm_get_accounting_storage_enforce(); max_age = slurm_get_priority_max_age(); weight_age = slurm_get_priority_weight_age(); weight_fs = slurm_get_priority_weight_fairshare(); @@ -1154,6 +1494,7 @@ static void _internal_setup(void) flags = slurmctld_conf.priority_flags; if (priority_debug) { + info("priority: AccountingStorageEnforce is %u", enforce); info("priority: Max Age is %u", max_age); info("priority: Weight Age is %u", weight_age); info("priority: Weight Fairshare is %u", weight_fs); @@ -1205,9 +1546,8 @@ int init ( void ) fatal("pthread_create error %m"); /* This is here to join the decay thread so we don't core - dump if in the sleep, since there is no other place to join - we have to create another thread to do it. - */ + * dump if in the sleep, since there is no other place to join + * we have to create another thread to do it. */ slurm_attr_init(&thread_attr); if (pthread_create(&cleanup_handler_thread, &thread_attr, _cleanup_thread, NULL)) @@ -1215,13 +1555,13 @@ int init ( void ) slurm_attr_destroy(&thread_attr); } else { - if (weight_fs) + if (weight_fs) { fatal("It appears you don't have any association " "data from your database. " "The priority/multifactor plugin requires " "this information to run correctly. Please " "check your database connection and try again."); - + } calc_fairshare = 0; } @@ -1259,10 +1599,19 @@ extern uint32_t priority_p_set(uint32_t last_prio, struct job_record *job_ptr) return priority; } -extern void priority_p_reconfig(void) +extern void priority_p_reconfig(bool assoc_clear) { reconfig = 1; _internal_setup(); + + /* Since the used_cpu_run_secs has been reset by the reconfig, + * we need to remove the time that has past since the last + * poll. We can't just do the correct calculation in the + * first place because it will mess up everything in the poll + * since it is based off the g_last_ran time. + */ + if (assoc_clear) + _init_grp_used_cpu_run_secs(g_last_ran); debug2("%s reconfigured", plugin_name); return; @@ -1286,25 +1635,26 @@ extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc) child_str = assoc->acct; } - if (assoc_mgr_root_assoc->usage->usage_raw) + if (assoc_mgr_root_assoc->usage->usage_raw) { assoc->usage->usage_norm = assoc->usage->usage_raw / assoc_mgr_root_assoc->usage->usage_raw; - else + } else { /* This should only happen when no usage has occured - at all so no big deal, the other usage should be 0 - as well here. - */ + * at all so no big deal, the other usage should be 0 + * as well here. */ assoc->usage->usage_norm = 0; + } - if (priority_debug) + if (priority_debug) { info("Normalized usage for %s %s off %s %Lf / %Lf = %Lf", child, child_str, assoc->usage->parent_assoc_ptr->acct, assoc->usage->usage_raw, assoc_mgr_root_assoc->usage->usage_raw, assoc->usage->usage_norm); + } /* This is needed in case someone changes the half-life on the - fly and now we have used more time than is available under - the new config */ + * fly and now we have used more time than is available under + * the new config */ if (assoc->usage->usage_norm > 1.0) assoc->usage->usage_norm = 1.0; @@ -1316,6 +1666,14 @@ extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc) assoc->usage->parent_assoc_ptr->acct, assoc->usage->usage_efctv, assoc->usage->usage_norm); + } else if (flags & PRIORITY_FLAGS_TICKET_BASED) { + _set_usage_efctv(assoc); + if (priority_debug) { + info("Effective usage for %s %s off %s = %Lf", + child, child_str, + assoc->usage->parent_assoc_ptr->acct, + assoc->usage->usage_efctv); + } } else { assoc->usage->usage_efctv = assoc->usage->usage_norm + ((assoc->usage->parent_assoc_ptr->usage->usage_efctv - @@ -1342,14 +1700,21 @@ extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc) extern double priority_p_calc_fs_factor(long double usage_efctv, long double shares_norm) { - double priority_fs; + double priority_fs = 0.0; - xassert(!fuzzy_equal(usage_efctv, NO_VAL)); + if (fuzzy_equal(usage_efctv, NO_VAL)) + return priority_fs; - if (shares_norm > 0.0) + if (shares_norm <= 0) + return priority_fs; + + if (flags & PRIORITY_FLAGS_TICKET_BASED) { + if (usage_efctv < MIN_USAGE_FACTOR * shares_norm) + usage_efctv = MIN_USAGE_FACTOR * shares_norm; + priority_fs = shares_norm / usage_efctv; + } else { priority_fs = pow(2.0, -(usage_efctv / shares_norm)); - else - priority_fs = 0.0; + } return priority_fs; } @@ -1373,12 +1738,10 @@ extern List priority_p_get_priority_factors_list( slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, READ_LOCK, READ_LOCK }; + lock_slurmctld(job_read_lock); if (job_list && list_count(job_list)) { ret_list = list_create(slurm_destroy_priority_factors_object); - lock_slurmctld(job_read_lock); itr = list_iterator_create(job_list); - if (itr == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = list_next(itr))) { /* * We are only looking for pending jobs @@ -1424,12 +1787,22 @@ extern List priority_p_get_priority_factors_list( list_append(ret_list, obj); } list_iterator_destroy(itr); - unlock_slurmctld(job_read_lock); if (!list_count(ret_list)) { list_destroy(ret_list); ret_list = NULL; } } + unlock_slurmctld(job_read_lock); return ret_list; } + +/* at least slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, + * READ_LOCK, READ_LOCK }; should be locked before calling this */ +extern void priority_p_job_end(struct job_record *job_ptr) +{ + if (priority_debug) + info("priority_p_job_end: called for job %u", job_ptr->job_id); + + _apply_new_usage(job_ptr, g_last_ran, time(NULL)); +} diff --git a/src/plugins/priority/multifactor2/Makefile.am b/src/plugins/priority/multifactor2/Makefile.am deleted file mode 100644 index e880a24858c7b36a888ae6215fa0468791c705d0..0000000000000000000000000000000000000000 --- a/src/plugins/priority/multifactor2/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Makefile for priority/multifactor2 plugin - -AUTOMAKE_OPTIONS = foreign - -PLUGIN_FLAGS = -module -avoid-version -export-dynamic - -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common - -pkglib_LTLIBRARIES = priority_multifactor2.la - -priority_multifactor2_la_SOURCES = priority_multifactor2.c -priority_multifactor2_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -priority_multifactor2_la_LIBADD = -lm diff --git a/src/plugins/priority/multifactor2/priority_multifactor2.c b/src/plugins/priority/multifactor2/priority_multifactor2.c deleted file mode 100644 index da4d0c80ea124a75a0ae212f4b2b5e18512de541..0000000000000000000000000000000000000000 --- a/src/plugins/priority/multifactor2/priority_multifactor2.c +++ /dev/null @@ -1,1569 +0,0 @@ -/*****************************************************************************\ - * priority_multifactor2.c - slurm multifactor priority plugin version 2. - ***************************************************************************** - * Copyright (C) 2012 Aalto University - * Written by Janne Blomqvist <janne.blomqvist@aalto.fi> - * - * Based on priority_multifactor.c, whose copyright information is - * reproduced below: - * - * Copyright (C) 2008-2009 Lawrence Livermore National Security. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Danny Auble <da@llnl.gov> - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. - * Please also read the included file: DISCLAIMER. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do - * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in - * the program, then also delete it here. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -\*****************************************************************************/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#if HAVE_STDINT_H -# include <stdint.h> -#endif -#if HAVE_INTTYPES_H -# include <inttypes.h> -#endif -#ifdef WITH_PTHREADS -# include <pthread.h> -#endif /* WITH_PTHREADS */ - -#include <sys/stat.h> -#include <stdio.h> -#include <fcntl.h> - -#include <math.h> -#include "slurm/slurm_errno.h" - -#include "src/common/slurm_priority.h" -#include "src/common/xstring.h" -#include "src/common/assoc_mgr.h" -#include "src/common/parse_time.h" - -#include "src/slurmctld/locks.h" - -#define SECS_PER_DAY (24 * 60 * 60) -#define SECS_PER_WEEK (7 * SECS_PER_DAY) - -#define MIN_USAGE_FACTOR 0.01 - -/* These are defined here so when we link with something other than - * the slurmctld we will have these symbols defined. They will get - * overwritten when linking with the slurmctld. - */ -#if defined (__APPLE__) -void *acct_db_conn __attribute__((weak_import)) = NULL; -uint32_t cluster_cpus __attribute__((weak_import)) = NO_VAL; -List job_list __attribute__((weak_import)) = NULL; -time_t last_job_update __attribute__((weak_import)); -#else -void *acct_db_conn = NULL; -uint32_t cluster_cpus = NO_VAL; -List job_list = NULL; -time_t last_job_update; -#endif - -/* - * These variables are required by the generic plugin interface. If they - * are not found in the plugin, the plugin loader will ignore it. - * - * plugin_name - a string giving a human-readable description of the - * plugin. There is no maximum length, but the symbol must refer to - * a valid string. - * - * plugin_type - a string suggesting the type of the plugin or its - * applicability to a particular form of data or method of data handling. - * If the low-level plugin API is used, the contents of this string are - * unimportant and may be anything. SLURM uses the higher-level plugin - * interface which requires this string to be of the form - * - * <application>/<method> - * - * where <application> is a description of the intended application of - * the plugin (e.g., "jobcomp" for SLURM job completion logging) and <method> - * is a description of how this plugin satisfies that application. SLURM will - * only load job completion logging plugins if the plugin_type string has a - * prefix of "jobcomp/". - * - * plugin_version - an unsigned 32-bit integer giving the version number - * of the plugin. If major and minor revisions are desired, the major - * version number may be multiplied by a suitable magnitude constant such - * as 100 or 1000. Various SLURM versions will likely require a certain - * minimum version for their plugins as the job completion logging API - * matures. - */ -const char plugin_name[] = "Priority MULTIFACTOR 2 plugin"; -const char plugin_type[] = "priority/multifactor2"; -const uint32_t plugin_version = 100; - -static pthread_t decay_handler_thread; -static pthread_mutex_t decay_lock = PTHREAD_MUTEX_INITIALIZER; -static bool running_decay = 0, reconfig = 0, - calc_fairshare = 1, priority_debug = 0; -static bool favor_small; /* favor small jobs over large */ -static uint32_t max_age; /* time when not to add any more - * priority to a job if reached */ -static uint32_t weight_age; /* weight for age factor */ -static uint32_t weight_fs; /* weight for Fairshare factor */ -static uint32_t weight_js; /* weight for Job Size factor */ -static uint32_t weight_part; /* weight for Partition factor */ -static uint32_t weight_qos; /* weight for QOS factor */ -static uint32_t flags; /* Priority Flags */ - -static uint32_t max_tickets; /* Maximum number of tickets given to a - * user. Protected by assoc_mgr lock. */ - -extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc); -extern double priority_p_calc_fs_factor(long double usage_efctv, - long double shares_norm); - -/* - * apply decay factor to all associations usage_raw - * IN: decay_factor - decay to be applied to each associations' used - * shares. This should already be modified with the amount of delta - * time from last application.. - * RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else. - */ -static int _apply_decay(double decay_factor) -{ - ListIterator itr = NULL; - slurmdb_association_rec_t *assoc = NULL; - slurmdb_qos_rec_t *qos = NULL; - assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, - WRITE_LOCK, NO_LOCK, NO_LOCK }; - - /* continue if decay_factor is 0 or 1 since that doesn't help - * us at all. 1 means no decay and 0 will just zero - * everything out so don't waste time doing it */ - if (!decay_factor) - return SLURM_ERROR; - else if (!calc_fairshare) - return SLURM_SUCCESS; - - assoc_mgr_lock(&locks); - - xassert(assoc_mgr_association_list); - xassert(assoc_mgr_qos_list); - - itr = list_iterator_create(assoc_mgr_association_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); - /* We want to do this to all associations including - * root. All usage_raws are calculated from the bottom up. */ - while ((assoc = list_next(itr))) { - assoc->usage->usage_raw *= decay_factor; - assoc->usage->grp_used_wall *= decay_factor; - } - list_iterator_destroy(itr); - - itr = list_iterator_create(assoc_mgr_qos_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); - while ((qos = list_next(itr))) { - qos->usage->usage_raw *= decay_factor; - qos->usage->grp_used_wall *= decay_factor; - } - list_iterator_destroy(itr); - assoc_mgr_unlock(&locks); - - return SLURM_SUCCESS; -} - -/* - * reset usage_raw, and grp_used_wall on all associations - * This should be called every PriorityUsageResetPeriod - * RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else. - */ -static int _reset_usage(void) -{ - ListIterator itr = NULL; - slurmdb_association_rec_t *assoc = NULL; - slurmdb_qos_rec_t *qos = NULL; - assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, - WRITE_LOCK, NO_LOCK, NO_LOCK }; - - if (!calc_fairshare) - return SLURM_SUCCESS; - - assoc_mgr_lock(&locks); - - xassert(assoc_mgr_association_list); - - itr = list_iterator_create(assoc_mgr_association_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); - /* We want to do this to all associations including - * root. All usage_raws are calculated from the bottom up. */ - while ((assoc = list_next(itr))) { - assoc->usage->usage_raw = 0; - assoc->usage->grp_used_wall = 0; - } - list_iterator_destroy(itr); - - itr = list_iterator_create(assoc_mgr_qos_list); - while ((qos = list_next(itr))) { - qos->usage->usage_raw = 0; - qos->usage->grp_used_wall = 0; - } - list_iterator_destroy(itr); - assoc_mgr_unlock(&locks); - - return SLURM_SUCCESS; -} - -static void _read_last_decay_ran(time_t *last_ran, time_t *last_reset) -{ - int data_allocated, data_read = 0; - uint32_t data_size = 0; - int state_fd; - char *data = NULL, *state_file; - Buf buffer; - - xassert(last_ran); - xassert(last_reset); - - (*last_ran) = 0; - (*last_reset) = 0; - - /* read the file */ - state_file = xstrdup(slurmctld_conf.state_save_location); - xstrcat(state_file, "/priority_last_decay_ran"); - lock_state_files(); - state_fd = open(state_file, O_RDONLY); - if (state_fd < 0) { - info("No last decay (%s) to recover", state_file); - unlock_state_files(); - return; - } else { - data_allocated = BUF_SIZE; - data = xmalloc(data_allocated); - while (1) { - data_read = read(state_fd, &data[data_size], - BUF_SIZE); - if (data_read < 0) { - if (errno == EINTR) - continue; - else { - error("Read error on %s: %m", - state_file); - break; - } - } else if (data_read == 0) /* eof */ - break; - data_size += data_read; - data_allocated += data_read; - xrealloc(data, data_allocated); - } - close(state_fd); - } - xfree(state_file); - unlock_state_files(); - - buffer = create_buf(data, data_size); - safe_unpack_time(last_ran, buffer); - safe_unpack_time(last_reset, buffer); - free_buf(buffer); - if (priority_debug) - info("Last ran decay on jobs at %ld", (long)*last_ran); - - return; - -unpack_error: - error("Incomplete priority last decay file returning"); - free_buf(buffer); - return; - -} - -static int _write_last_decay_ran(time_t last_ran, time_t last_reset) -{ - /* Save high-water mark to avoid buffer growth with copies */ - static int high_buffer_size = BUF_SIZE; - int error_code = SLURM_SUCCESS; - int state_fd; - char *old_file, *new_file, *state_file; - Buf buffer; - - if (!strcmp(slurmctld_conf.state_save_location, "/dev/null")) { - error("Can not save priority state information, " - "StateSaveLocation is /dev/null"); - return error_code; - } - - buffer = init_buf(high_buffer_size); - pack_time(last_ran, buffer); - pack_time(last_reset, buffer); - - /* read the file */ - old_file = xstrdup(slurmctld_conf.state_save_location); - xstrcat(old_file, "/priority_last_decay_ran.old"); - state_file = xstrdup(slurmctld_conf.state_save_location); - xstrcat(state_file, "/priority_last_decay_ran"); - new_file = xstrdup(slurmctld_conf.state_save_location); - xstrcat(new_file, "/priority_last_decay_ran.new"); - - lock_state_files(); - state_fd = creat(new_file, 0600); - if (state_fd < 0) { - error("Can't save decay state, create file %s error %m", - new_file); - error_code = errno; - } else { - int pos = 0, nwrite = get_buf_offset(buffer), amount; - char *data = (char *)get_buf_data(buffer); - high_buffer_size = MAX(nwrite, high_buffer_size); - while (nwrite > 0) { - amount = write(state_fd, &data[pos], nwrite); - if ((amount < 0) && (errno != EINTR)) { - error("Error writing file %s, %m", new_file); - error_code = errno; - break; - } - nwrite -= amount; - pos += amount; - } - fsync(state_fd); - close(state_fd); - } - - if (error_code != SLURM_SUCCESS) - (void) unlink(new_file); - else { /* file shuffle */ - (void) unlink(old_file); - if (link(state_file, old_file)) - debug3("unable to create link for %s -> %s: %m", - state_file, old_file); - (void) unlink(state_file); - if (link(new_file, state_file)) - debug3("unable to create link for %s -> %s: %m", - new_file, state_file); - (void) unlink(new_file); - } - xfree(old_file); - xfree(state_file); - xfree(new_file); - - unlock_state_files(); - debug4("done writing time %ld", (long)last_ran); - free_buf(buffer); - - return error_code; -} - - -/* Set the effective usage of a node. */ -static void _set_usage_efctv(slurmdb_association_rec_t *assoc) -{ - long double min_shares_norm; - - if ((assoc->shares_raw == SLURMDB_FS_USE_PARENT) - && assoc->usage->parent_assoc_ptr) { - assoc->usage->shares_norm = - assoc->usage->parent_assoc_ptr->usage->shares_norm; - assoc->usage->usage_norm = - assoc->usage->parent_assoc_ptr->usage->usage_norm; - } - - if (assoc->usage->level_shares) { - min_shares_norm = (long double) MIN_USAGE_FACTOR - * assoc->shares_raw / assoc->usage->level_shares; - if (assoc->usage->usage_norm > min_shares_norm) - assoc->usage->usage_efctv = assoc->usage->usage_norm; - else - assoc->usage->usage_efctv = min_shares_norm; - } else - assoc->usage->usage_efctv = assoc->usage->usage_norm; -} - - -/* This should initially get the childern list from - * assoc_mgr_root_assoc. Since our algorythm goes from top down we - * calculate all the non-user associations now. When a user submits a - * job, that norm_fairshare is calculated. Here we will set the - * usage_efctv to NO_VAL for users to not have to calculate a bunch - * of things that will never be used. - * - * NOTE: acct_mgr_association_lock must be locked before this is called. - */ -static int _set_children_usage_efctv(List childern_list) -{ - slurmdb_association_rec_t *assoc = NULL; - ListIterator itr = NULL; - - if (!childern_list || !list_count(childern_list)) - return SLURM_SUCCESS; - - itr = list_iterator_create(childern_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); - while ((assoc = list_next(itr))) { - if (assoc->user) { - assoc->usage->usage_efctv = (long double)NO_VAL; - continue; - } - priority_p_set_assoc_usage(assoc); - _set_children_usage_efctv(assoc->usage->childern_list); - } - list_iterator_destroy(itr); - return SLURM_SUCCESS; -} - - -/* Distribute the tickets to child nodes recursively. - * - * NOTE: acct_mgr_association_lock must be locked before this is called. - */ -static int _distribute_tickets(List childern_list, uint32_t tickets) -{ - ListIterator itr; - slurmdb_association_rec_t *assoc; - double sfsum = 0, fs; - - if (!childern_list || !list_count(childern_list)) - return SLURM_SUCCESS; - - itr = list_iterator_create(childern_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); - while ((assoc = list_next(itr))) { - if (assoc->usage->active_seqno - != assoc_mgr_root_assoc->usage->active_seqno) - continue; - if (fuzzy_equal(assoc->usage->usage_efctv, NO_VAL)) - priority_p_set_assoc_usage(assoc); - fs = priority_p_calc_fs_factor(assoc->usage->usage_efctv, - assoc->usage->shares_norm); - sfsum += assoc->usage->shares_norm * fs; - } - list_iterator_destroy(itr); - - itr = list_iterator_create(childern_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); - while ((assoc = list_next(itr))) { - if (assoc->usage->active_seqno - != assoc_mgr_root_assoc->usage->active_seqno) - continue; - fs = priority_p_calc_fs_factor(assoc->usage->usage_efctv, - assoc->usage->shares_norm); - assoc->usage->tickets = tickets * assoc->usage->shares_norm - * fs / sfsum; - if (priority_debug) { - if (assoc->user) - info("User %s in account %s gets %u tickets", - assoc->user, assoc->acct, - assoc->usage->tickets); - else - info("Account %s gets %u tickets", - assoc->acct, assoc->usage->tickets); - } - if (assoc->user && assoc->usage->tickets > max_tickets) - max_tickets = assoc->usage->tickets; - _distribute_tickets(assoc->usage->childern_list, - assoc->usage->tickets); - } - list_iterator_destroy(itr); - - return SLURM_SUCCESS; -} - - -/* job_ptr should already have the partition priority and such added - * here before had we will be adding to it - */ -static double _get_fairshare_priority( struct job_record *job_ptr) -{ - slurmdb_association_rec_t *job_assoc = - (slurmdb_association_rec_t *)job_ptr->assoc_ptr; - slurmdb_association_rec_t *fs_assoc = NULL; - double priority_fs = 0.0; - assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, - NO_LOCK, NO_LOCK, NO_LOCK }; - - if (!calc_fairshare) - return 0; - - if (!job_assoc) { - error("Job %u has no association. Unable to " - "compute fairshare.", job_ptr->job_id); - return 0; - } - - fs_assoc = job_assoc; - - assoc_mgr_lock(&locks); - - /* Use values from parent when FairShare=SLURMDB_FS_USE_PARENT */ - while ((fs_assoc->shares_raw == SLURMDB_FS_USE_PARENT) - && fs_assoc->usage->parent_assoc_ptr - && (fs_assoc != assoc_mgr_root_assoc)) { - fs_assoc = fs_assoc->usage->parent_assoc_ptr; - } - - if (fuzzy_equal(fs_assoc->usage->usage_efctv, NO_VAL)) - priority_p_set_assoc_usage(fs_assoc); - - /* Priority is 0 -> 1 */ - if (fs_assoc->usage->active_seqno - == assoc_mgr_root_assoc->usage->active_seqno && max_tickets) - priority_fs = (double) fs_assoc->usage->tickets / max_tickets; - else - priority_fs = 0; - if (priority_debug) { - info("Fairshare priority of job %u for user %s in acct" - " %s is %f", - job_ptr->job_id, job_assoc->user, job_assoc->acct, - priority_fs); - } - - assoc_mgr_unlock(&locks); - - return priority_fs; -} - -static void _get_priority_factors(time_t start_time, struct job_record *job_ptr) -{ - slurmdb_qos_rec_t *qos_ptr = NULL; - - xassert(job_ptr); - - if (!job_ptr->prio_factors) { - job_ptr->prio_factors = - xmalloc(sizeof(priority_factors_object_t)); - } else { - memset(job_ptr->prio_factors, 0, - sizeof(priority_factors_object_t)); - } - - qos_ptr = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; - - if (weight_age) { - uint32_t diff = 0; - time_t use_time; - - if (flags & PRIORITY_FLAGS_ACCRUE_ALWAYS) - use_time = job_ptr->details->submit_time; - else - use_time = job_ptr->details->begin_time; - - /* Only really add an age priority if the use_time is - * past the start_time. */ - if (start_time > use_time) - diff = start_time - use_time; - - if (job_ptr->details->begin_time) { - if (diff < max_age) { - job_ptr->prio_factors->priority_age = - (double)diff / (double)max_age; - } else - job_ptr->prio_factors->priority_age = 1.0; - } else if (flags & PRIORITY_FLAGS_ACCRUE_ALWAYS) { - if (diff < max_age) { - job_ptr->prio_factors->priority_age = - (double)diff / (double)max_age; - } else - job_ptr->prio_factors->priority_age = 1.0; - } - } - - if (job_ptr->assoc_ptr && weight_fs) { - job_ptr->prio_factors->priority_fs = - _get_fairshare_priority(job_ptr); - } - - if (weight_js) { - uint32_t cpu_cnt = 0; - /* On the initial run of this we don't have total_cpus - * so go off the requesting. After the first shot - * total_cpus should be filled in. */ - if (job_ptr->total_cpus) - cpu_cnt = job_ptr->total_cpus; - else if (job_ptr->details - && (job_ptr->details->max_cpus != NO_VAL)) - cpu_cnt = job_ptr->details->max_cpus; - else if (job_ptr->details && job_ptr->details->min_cpus) - cpu_cnt = job_ptr->details->min_cpus; - - if (favor_small) { - job_ptr->prio_factors->priority_js = - (double)(node_record_count - - job_ptr->details->min_nodes) - / (double)node_record_count; - if (cpu_cnt) { - job_ptr->prio_factors->priority_js += - (double)(cluster_cpus - cpu_cnt) - / (double)cluster_cpus; - job_ptr->prio_factors->priority_js /= 2; - } - } else { - job_ptr->prio_factors->priority_js = - (double)job_ptr->details->min_nodes - / (double)node_record_count; - if (cpu_cnt) { - job_ptr->prio_factors->priority_js += - (double)cpu_cnt / (double)cluster_cpus; - job_ptr->prio_factors->priority_js /= 2; - } - } - if (job_ptr->prio_factors->priority_js < .0) - job_ptr->prio_factors->priority_js = 0.0; - else if (job_ptr->prio_factors->priority_js > 1.0) - job_ptr->prio_factors->priority_js = 1.0; - } - - if (job_ptr->part_ptr && job_ptr->part_ptr->priority && weight_part) { - job_ptr->prio_factors->priority_part = - job_ptr->part_ptr->norm_priority; - } - - if (qos_ptr && qos_ptr->priority && weight_qos) { - job_ptr->prio_factors->priority_qos = - qos_ptr->usage->norm_priority; - } - - job_ptr->prio_factors->nice = job_ptr->details->nice; -} - -static uint32_t _get_priority_internal(time_t start_time, - struct job_record *job_ptr) -{ - double priority = 0.0; - priority_factors_object_t pre_factors; - - if (job_ptr->direct_set_prio && (job_ptr->priority > 0)) - return job_ptr->priority; - - if (!job_ptr->details) { - error("_get_priority_internal: job %u does not have a " - "details symbol set, can't set priority", - job_ptr->job_id); - return 0; - } - - /* figure out the priority */ - _get_priority_factors(start_time, job_ptr); - memcpy(&pre_factors, job_ptr->prio_factors, - sizeof(priority_factors_object_t)); - - job_ptr->prio_factors->priority_age *= (double)weight_age; - job_ptr->prio_factors->priority_fs *= (double)weight_fs; - job_ptr->prio_factors->priority_js *= (double)weight_js; - job_ptr->prio_factors->priority_part *= (double)weight_part; - job_ptr->prio_factors->priority_qos *= (double)weight_qos; - - priority = job_ptr->prio_factors->priority_age - + job_ptr->prio_factors->priority_fs - + job_ptr->prio_factors->priority_js - + job_ptr->prio_factors->priority_part - + job_ptr->prio_factors->priority_qos - - (double)(job_ptr->prio_factors->nice - NICE_OFFSET); - - /* Priority 0 is reserved for held jobs */ - if (priority < 1) - priority = 1; - - if (priority_debug) { - info("Weighted Age priority is %f * %u = %.2f", - pre_factors.priority_age, weight_age, - job_ptr->prio_factors->priority_age); - info("Weighted Fairshare priority is %f * %u = %.2f", - pre_factors.priority_fs, weight_fs, - job_ptr->prio_factors->priority_fs); - info("Weighted JobSize priority is %f * %u = %.2f", - pre_factors.priority_js, weight_js, - job_ptr->prio_factors->priority_js); - info("Weighted Partition priority is %f * %u = %.2f", - pre_factors.priority_part, weight_part, - job_ptr->prio_factors->priority_part); - info("Weighted QOS priority is %f * %u = %.2f", - pre_factors.priority_qos, weight_qos, - job_ptr->prio_factors->priority_qos); - info("Job %u priority: %.2f + %.2f + %.2f + %.2f + %.2f - %d " - "= %.2f", - job_ptr->job_id, job_ptr->prio_factors->priority_age, - job_ptr->prio_factors->priority_fs, - job_ptr->prio_factors->priority_js, - job_ptr->prio_factors->priority_part, - job_ptr->prio_factors->priority_qos, - (job_ptr->prio_factors->nice - NICE_OFFSET), - priority); - } - return (uint32_t)priority; -} - - -/* Mark an association and its parents as active (i.e. it may be given - * tickets) during the current scheduling cycle. The association - * manager lock should be held on entry. */ -static bool _mark_assoc_active(struct job_record *job_ptr) -{ - slurmdb_association_rec_t *job_assoc = - (slurmdb_association_rec_t *)job_ptr->assoc_ptr, - *assoc; - - if (!job_assoc) { - error("Job %u has no association. Unable to " - "mark assiciation as active.", job_ptr->job_id); - return false; - } - - for (assoc = job_assoc; assoc != assoc_mgr_root_assoc; - assoc = assoc->usage->parent_assoc_ptr) { - if (assoc->usage->active_seqno - == assoc_mgr_root_assoc->usage->active_seqno) - break; - assoc->usage->active_seqno - = assoc_mgr_root_assoc->usage->active_seqno; - } - return true; -} - - -/* based upon the last reset time, compute when the next reset should be */ -static time_t _next_reset(uint16_t reset_period, time_t last_reset) -{ - struct tm last_tm; - time_t tmp_time, now = time(NULL); - - if (localtime_r(&last_reset, &last_tm) == NULL) - return (time_t) 0; - - last_tm.tm_sec = 0; - last_tm.tm_min = 0; - last_tm.tm_hour = 0; -/* last_tm.tm_wday = 0 ignored */ -/* last_tm.tm_yday = 0; ignored */ - last_tm.tm_isdst = -1; - switch (reset_period) { - case PRIORITY_RESET_DAILY: - tmp_time = mktime(&last_tm); - tmp_time += SECS_PER_DAY; - while ((tmp_time + SECS_PER_DAY) < now) - tmp_time += SECS_PER_DAY; - return tmp_time; - case PRIORITY_RESET_WEEKLY: - tmp_time = mktime(&last_tm); - tmp_time += (SECS_PER_DAY * (7 - last_tm.tm_wday)); - while ((tmp_time + SECS_PER_WEEK) < now) - tmp_time += SECS_PER_WEEK; - return tmp_time; - case PRIORITY_RESET_MONTHLY: - last_tm.tm_mday = 1; - if (last_tm.tm_mon < 11) - last_tm.tm_mon++; - else { - last_tm.tm_mon = 0; - last_tm.tm_year++; - } - break; - case PRIORITY_RESET_QUARTERLY: - last_tm.tm_mday = 1; - if (last_tm.tm_mon < 3) - last_tm.tm_mon = 3; - else if (last_tm.tm_mon < 6) - last_tm.tm_mon = 6; - else if (last_tm.tm_mon < 9) - last_tm.tm_mon = 9; - else { - last_tm.tm_mon = 0; - last_tm.tm_year++; - } - break; - case PRIORITY_RESET_YEARLY: - last_tm.tm_mday = 1; - last_tm.tm_mon = 0; - last_tm.tm_year++; - break; - default: - return (time_t) 0; - } - return mktime(&last_tm); -} - -/* - * Remove previously used time from qos and assocs grp_used_cpu_run_secs. - * - * When restarting slurmctld acct_policy_job_begin() is called for all - * running jobs. There every jobs total requested cputime (total_cpus * - * time_limit) is added to grp_used_cpu_run_secs of assocs and qos. - * - * This function will subtract all cputime that was used until the - * decay thread last ran. This kludge is necessary as the decay thread - * last_ran variable can't be accessed from acct_policy_job_begin(). - */ -static void _init_grp_used_cpu_run_secs(time_t last_ran) -{ - struct job_record *job_ptr = NULL; - ListIterator itr; - assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, - WRITE_LOCK, NO_LOCK, NO_LOCK }; - slurmctld_lock_t job_read_lock = - { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - uint64_t delta; - slurmdb_qos_rec_t *qos; - slurmdb_association_rec_t *assoc; - - if (priority_debug) - info("Initializing grp_used_cpu_run_secs"); - - if (!(job_list && list_count(job_list))) - return; - - lock_slurmctld(job_read_lock); - itr = list_iterator_create(job_list); - if (itr == NULL) - fatal("list_iterator_create: malloc failure"); - - assoc_mgr_lock(&locks); - while ((job_ptr = list_next(itr))) { - if (priority_debug) - debug2("job: %u",job_ptr->job_id); - qos = NULL; - assoc = NULL; - delta = 0; - - if (!IS_JOB_RUNNING(job_ptr)) - continue; - - if (job_ptr->start_time > last_ran) - continue; - - delta = job_ptr->total_cpus * (last_ran - job_ptr->start_time); - - qos = (slurmdb_qos_rec_t *) job_ptr->qos_ptr; - assoc = (slurmdb_association_rec_t *) job_ptr->assoc_ptr; - - if (qos) { - if (priority_debug) - info("Subtracting %"PRIu64" from qos " - "%u grp_used_cpu_run_secs " - "%"PRIu64" = %"PRIu64"", - delta, - qos->id, - qos->usage->grp_used_cpu_run_secs, - qos->usage->grp_used_cpu_run_secs - - delta); - qos->usage->grp_used_cpu_run_secs -= delta; - } - while (assoc) { - if (priority_debug) - info("Subtracting %"PRIu64" from assoc %u " - "grp_used_cpu_run_secs " - "%"PRIu64" = %"PRIu64"", - delta, - assoc->id, - assoc->usage->grp_used_cpu_run_secs, - assoc->usage->grp_used_cpu_run_secs - - delta); - assoc->usage->grp_used_cpu_run_secs -= delta; - assoc = assoc->usage->parent_assoc_ptr; - } - } - assoc_mgr_unlock(&locks); - list_iterator_destroy(itr); - unlock_slurmctld(job_read_lock); -} - -/* If the job is running then apply decay to the job. - * - * Return 0 if we don't need to process the job any further, 1 if - * futher processing is needed. - */ -static int _apply_new_usage(struct job_record *job_ptr, double decay_factor, - time_t start_period, time_t end_period) -{ - slurmdb_qos_rec_t *qos; - slurmdb_association_rec_t *assoc; - double run_delta = 0.0, run_decay = 0.0, real_decay = 0.0; - uint64_t cpu_run_delta = 0; - uint64_t job_time_limit_ends = 0; - assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, - WRITE_LOCK, NO_LOCK, NO_LOCK }; - assoc_mgr_lock_t qos_read_lock = { NO_LOCK, NO_LOCK, - READ_LOCK, NO_LOCK, NO_LOCK }; - - /* If usage_factor is 0 just skip this since we don't add the usage. */ - assoc_mgr_lock(&qos_read_lock); - qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; - if (qos && !qos->usage_factor) { - assoc_mgr_unlock(&qos_read_lock); - return 0; - } - assoc_mgr_unlock(&qos_read_lock); - - if (job_ptr->start_time > start_period) - start_period = job_ptr->start_time; - - if (job_ptr->end_time - && (end_period > job_ptr->end_time)) - end_period = job_ptr->end_time; - - run_delta = difftime(end_period, start_period); - - /* job already has been accounted for, go to next */ - if (run_delta < 1) - return 0; - - /* cpu_run_delta will is used to - * decrease qos and assocs - * grp_used_cpu_run_secs values. When - * a job is started only seconds until - * start_time+time_limit is added, so - * for jobs running over their - * timelimit we should only subtract - * the used time until the time limit. */ - job_time_limit_ends = - (uint64_t)job_ptr->start_time + - (uint64_t)job_ptr->time_limit * 60; - - if ((uint64_t)start_period >= job_time_limit_ends) - cpu_run_delta = 0; - else if (end_period > job_time_limit_ends) - cpu_run_delta = job_ptr->total_cpus * - (job_time_limit_ends - (uint64_t)start_period); - else - cpu_run_delta = job_ptr->total_cpus * run_delta; - - if (priority_debug) { - info("job %u ran for %g seconds on %u cpus", - job_ptr->job_id, run_delta, job_ptr->total_cpus); - } - - /* get the time in decayed fashion */ - run_decay = run_delta * pow(decay_factor, run_delta); - - real_decay = run_decay * (double)job_ptr->total_cpus; - - assoc_mgr_lock(&locks); - /* Just to make sure we don't make a window where the qos_ptr could of - * changed make sure we get it again here. */ - qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; - assoc = (slurmdb_association_rec_t *)job_ptr->assoc_ptr; - - /* now apply the usage factor for this qos */ - if (qos) { - if (qos->usage_factor >= 0) { - real_decay *= qos->usage_factor; - run_decay *= qos->usage_factor; - } - qos->usage->grp_used_wall += run_decay; - qos->usage->usage_raw += (long double)real_decay; - if (qos->usage->grp_used_cpu_run_secs >= cpu_run_delta) { - if (priority_debug) - info("grp_used_cpu_run_secs is %"PRIu64", " - "will subtract %"PRIu64"", - qos->usage->grp_used_cpu_run_secs, - cpu_run_delta); - qos->usage->grp_used_cpu_run_secs -= cpu_run_delta; - } else { - if (priority_debug) - info("jobid %u, qos %s: setting " - "grp_used_cpu_run_secs " - "to 0 because %"PRIu64" < %"PRIu64"", - job_ptr->job_id, qos->name, - qos->usage->grp_used_cpu_run_secs, - cpu_run_delta); - qos->usage->grp_used_cpu_run_secs = 0; - } - } - - /* We want to do this all the way up - * to and including root. This way we - * can keep track of how much usage - * has occured on the entire system - * and use that to normalize against. */ - while (assoc) { - if (assoc->usage->grp_used_cpu_run_secs >= cpu_run_delta) { - if (priority_debug) - info("grp_used_cpu_run_secs is %"PRIu64", " - "will subtract %"PRIu64"", - assoc->usage->grp_used_cpu_run_secs, - cpu_run_delta); - assoc->usage->grp_used_cpu_run_secs -= cpu_run_delta; - } else { - if (priority_debug) - info("jobid %u, assoc %u: setting " - "grp_used_cpu_run_secs " - "to 0 because %"PRIu64" < %"PRIu64"", - job_ptr->job_id, assoc->id, - assoc->usage->grp_used_cpu_run_secs, - cpu_run_delta); - assoc->usage->grp_used_cpu_run_secs = 0; - } - - assoc->usage->grp_used_wall += run_decay; - assoc->usage->usage_raw += (long double)real_decay; - if (priority_debug) - info("adding %f new usage to assoc %u (user='%s' " - "acct='%s') raw usage is now %Lf. Group wall " - "added %f making it %f. GrpCPURunMins is " - "%"PRIu64"", - real_decay, assoc->id, - assoc->user, assoc->acct, - assoc->usage->usage_raw, - run_decay, - assoc->usage->grp_used_wall, - assoc->usage->grp_used_cpu_run_secs/60); - assoc = assoc->usage->parent_assoc_ptr; - } - assoc_mgr_unlock(&locks); - return 1; -} - -static void *_decay_thread(void *no_data) -{ - struct job_record *job_ptr = NULL; - ListIterator itr; - time_t start_time = time(NULL); - time_t last_ran = 0; - time_t last_reset = 0, next_reset = 0; - uint32_t calc_period = slurm_get_priority_calc_period(); - double decay_hl = (double)slurm_get_priority_decay_hl(); - double decay_factor = 1; - uint16_t reset_period = slurm_get_priority_reset_period(); - - /* Write lock on jobs, read lock on nodes and partitions */ - slurmctld_lock_t job_write_lock = - { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; - slurmctld_lock_t job_read_lock = - { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, - NO_LOCK, NO_LOCK, NO_LOCK }; - - - if (decay_hl > 0) - decay_factor = 1 - (0.693 / decay_hl); - - (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - _read_last_decay_ran(&last_ran, &last_reset); - if (last_reset == 0) - last_reset = start_time; - - _init_grp_used_cpu_run_secs(last_ran); - - while (1) { - time_t now = start_time; - double run_delta = 0.0, real_decay = 0.0; - - slurm_mutex_lock(&decay_lock); - running_decay = 1; - - /* If reconfig is called handle all that happens - * outside of the loop here */ - if (reconfig) { - /* if decay_hl is 0 or less that means no - * decay is to be had. This also means we - * flush the used time at a certain time set - * by PriorityUsageResetPeriod in the slurm.conf */ - calc_period = slurm_get_priority_calc_period(); - reset_period = slurm_get_priority_reset_period(); - next_reset = 0; - decay_hl = (double)slurm_get_priority_decay_hl(); - if (decay_hl > 0) - decay_factor = 1 - (0.693 / decay_hl); - else - decay_factor = 1; - - reconfig = 0; - } - - /* this needs to be done right away so as to - * incorporate it into the decay loop. - */ - switch(reset_period) { - case PRIORITY_RESET_NONE: - break; - case PRIORITY_RESET_NOW: /* do once */ - _reset_usage(); - reset_period = PRIORITY_RESET_NONE; - last_reset = now; - break; - case PRIORITY_RESET_DAILY: - case PRIORITY_RESET_WEEKLY: - case PRIORITY_RESET_MONTHLY: - case PRIORITY_RESET_QUARTERLY: - case PRIORITY_RESET_YEARLY: - if (next_reset == 0) { - next_reset = _next_reset(reset_period, - last_reset); - } - if (now >= next_reset) { - _reset_usage(); - last_reset = next_reset; - next_reset = _next_reset(reset_period, - last_reset); - } - } - - /* now calculate all the normalized usage here */ - assoc_mgr_lock(&locks); - _set_children_usage_efctv( - assoc_mgr_root_assoc->usage->childern_list); - assoc_mgr_unlock(&locks); - - if (!last_ran) - goto calc_tickets; - else - run_delta = difftime(start_time, last_ran); - - if (run_delta <= 0) - goto calc_tickets; - - real_decay = pow(decay_factor, run_delta); - - if (priority_debug) - info("Decay factor over %g seconds goes " - "from %.15f -> %.15f", - run_delta, decay_factor, real_decay); - - /* first apply decay to used time */ - if (_apply_decay(real_decay) != SLURM_SUCCESS) { - error("problem applying decay"); - running_decay = 0; - slurm_mutex_unlock(&decay_lock); - break; - } - - - /* Multifactor2 core algo 1/3. Iterate through all - * jobs, mark parent associations with the current - * sequence id, so that we know which - * associations/users are active. At the same time as - * we're looping through all the jobs anyway, apply - * the new usage of running jobs too. - */ - - calc_tickets: - lock_slurmctld(job_read_lock); - assoc_mgr_lock(&locks); - /* seqno 0 is a special invalid value. */ - assoc_mgr_root_assoc->usage->active_seqno++; - if (!assoc_mgr_root_assoc->usage->active_seqno) - assoc_mgr_root_assoc->usage->active_seqno++; - assoc_mgr_unlock(&locks); - itr = list_iterator_create(job_list); - while ((job_ptr = list_next(itr))) { - /* apply new usage */ - if (!IS_JOB_PENDING(job_ptr) && - job_ptr->start_time && job_ptr->assoc_ptr - && last_ran) - _apply_new_usage(job_ptr, decay_factor, - last_ran, start_time); - - if (IS_JOB_PENDING(job_ptr) && job_ptr->assoc_ptr) { - assoc_mgr_lock(&locks); - _mark_assoc_active(job_ptr); - assoc_mgr_unlock(&locks); - } - } - list_iterator_destroy(itr); - unlock_slurmctld(job_read_lock); - - /* Multifactor2 core algo 2/3. Start from the root, - * distribute tickets to active child associations - * proportional to the fair share (s*F). We start with - * UINT32_MAX tickets at the root. - */ - assoc_mgr_lock(&locks); - max_tickets = 0; - assoc_mgr_root_assoc->usage->tickets = (uint32_t) -1; - _distribute_tickets (assoc_mgr_root_assoc->usage->childern_list, - (uint32_t) -1); - assoc_mgr_unlock(&locks); - - /* Multifactor2 core algo 3/3. Iterate through the job - * list again, give priorities proportional to the - * maximum number of tickets given to any user. - */ - lock_slurmctld(job_write_lock); - itr = list_iterator_create(job_list); - while ((job_ptr = list_next(itr))) { - /* - * Priority 0 is reserved for held jobs. Also skip - * priority calculation for non-pending jobs. - */ - if ((job_ptr->priority == 0) - || !IS_JOB_PENDING(job_ptr)) - continue; - - job_ptr->priority = - _get_priority_internal(start_time, job_ptr); - last_job_update = time(NULL); - debug2("priority for job %u is now %u", - job_ptr->job_id, job_ptr->priority); - } - list_iterator_destroy(itr); - unlock_slurmctld(job_write_lock); - - last_ran = start_time; - - _write_last_decay_ran(last_ran, last_reset); - - running_decay = 0; - slurm_mutex_unlock(&decay_lock); - - /* Sleep until the next time. */ - now = time(NULL); - double elapsed = difftime(now, start_time); - if (elapsed < calc_period) { - sleep(calc_period - elapsed); - start_time = time(NULL); - } else - start_time = now; - /* repeat ;) */ - } - return NULL; -} - -/* Selects the specific jobs that the user wanted to see - * Requests that include job id(s) and user id(s) must match both to be passed. - * Returns 1 if job should be omitted */ -static int _filter_job(struct job_record *job_ptr, List req_job_list, - List req_user_list) -{ - int filter = 0; - ListIterator iterator; - uint32_t *job_id; - uint32_t *user_id; - - if (req_job_list) { - filter = 1; - iterator = list_iterator_create(req_job_list); - while ((job_id = list_next(iterator))) { - if (*job_id == job_ptr->job_id) { - filter = 0; - break; - } - } - list_iterator_destroy(iterator); - if (filter == 1) { - return 1; - } - } - - if (req_user_list) { - filter = 1; - iterator = list_iterator_create(req_user_list); - while ((user_id = list_next(iterator))) { - if (*user_id == job_ptr->user_id) { - filter = 0; - break; - } - } - list_iterator_destroy(iterator); - if (filter == 1) - return 1; - } - - return filter; -} - - -static void _internal_setup(void) -{ - if (slurm_get_debug_flags() & DEBUG_FLAG_PRIO) - priority_debug = 1; - else - priority_debug = 0; - - favor_small = slurm_get_priority_favor_small(); - - max_age = slurm_get_priority_max_age(); - weight_age = slurm_get_priority_weight_age(); - weight_fs = slurm_get_priority_weight_fairshare(); - weight_js = slurm_get_priority_weight_job_size(); - weight_part = slurm_get_priority_weight_partition(); - weight_qos = slurm_get_priority_weight_qos(); - flags = slurmctld_conf.priority_flags; - - if (priority_debug) { - info("priority: Max Age is %u", max_age); - info("priority: Weight Age is %u", weight_age); - info("priority: Weight Fairshare is %u", weight_fs); - info("priority: Weight JobSize is %u", weight_js); - info("priority: Weight Part is %u", weight_part); - info("priority: Weight QOS is %u", weight_qos); - info("priority: Flags is %u", flags); - } -} - -/* - * init() is called when the plugin is loaded, before any other functions - * are called. Put global initialization here. - */ -int init ( void ) -{ - pthread_attr_t thread_attr; - char *temp = NULL; - - /* This means we aren't running from the controller so skip setup. */ - if (cluster_cpus == NO_VAL) - return SLURM_SUCCESS; - - _internal_setup(); - - /* Check to see if we are running a supported accounting plugin */ - temp = slurm_get_accounting_storage_type(); - if (strcasecmp(temp, "accounting_storage/slurmdbd") - && strcasecmp(temp, "accounting_storage/mysql")) { - error("You are not running a supported " - "accounting_storage plugin\n(%s).\n" - "Fairshare can only be calculated with either " - "'accounting_storage/slurmdbd' " - "or 'accounting_storage/mysql' enabled. " - "If you want multifactor priority without fairshare " - "ignore this message.", - temp); - calc_fairshare = 0; - weight_fs = 0; - } else if (assoc_mgr_root_assoc) { - if (!cluster_cpus) - fatal("We need to have a cluster cpu count " - "before we can init the priority/multifactor " - "plugin"); - assoc_mgr_root_assoc->usage->usage_efctv = 1.0; - slurm_attr_init(&thread_attr); - if (pthread_create(&decay_handler_thread, &thread_attr, - _decay_thread, NULL)) - fatal("pthread_create error %m"); - slurm_attr_destroy(&thread_attr); - } else { - if (weight_fs) - fatal("It appears you don't have any association " - "data from your database. " - "The priority/multifactor plugin requires " - "this information to run correctly. Please " - "check your database connection and try again."); - - calc_fairshare = 0; - } - - xfree(temp); - - debug("%s loaded", plugin_name); - return SLURM_SUCCESS; -} - -int fini ( void ) -{ - /* Daemon termination handled here */ - if (running_decay) - debug("Waiting for decay thread to finish."); - - slurm_mutex_lock(&decay_lock); - - /* cancel the decay thread and then join it */ - if (decay_handler_thread) { - pthread_cancel(decay_handler_thread); - pthread_join(decay_handler_thread, NULL); - } - - slurm_mutex_unlock(&decay_lock); - - return SLURM_SUCCESS; -} - -extern uint32_t priority_p_set(uint32_t last_prio, struct job_record *job_ptr) -{ - uint32_t priority = _get_priority_internal(time(NULL), job_ptr); - - debug2("initial priority for job %u is %u", job_ptr->job_id, priority); - - return priority; -} - -extern void priority_p_reconfig(void) -{ - reconfig = 1; - _internal_setup(); - debug2("%s reconfigured", plugin_name); - - return; -} - -extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc) -{ - char *child; - char *child_str; - - xassert(assoc_mgr_root_assoc); - xassert(assoc); - xassert(assoc->usage); - xassert(assoc->usage->parent_assoc_ptr); - - if (assoc->user) { - child = "user"; - child_str = assoc->user; - } else { - child = "account"; - child_str = assoc->acct; - } - - if (assoc_mgr_root_assoc->usage->usage_raw) - assoc->usage->usage_norm = assoc->usage->usage_raw - / assoc_mgr_root_assoc->usage->usage_raw; - else - /* This should only happen when no usage has occured - at all so no big deal, the other usage should be 0 - as well here. - */ - assoc->usage->usage_norm = 0; - - if (priority_debug) - info("Normalized usage for %s %s off %s %Lf / %Lf = %Lf", - child, child_str, assoc->usage->parent_assoc_ptr->acct, - assoc->usage->usage_raw, - assoc_mgr_root_assoc->usage->usage_raw, - assoc->usage->usage_norm); - /* This is needed in case someone changes the half-life on the - * fly and now we have used more time than is available under - * the new config */ - if (assoc->usage->usage_norm > 1.0) - assoc->usage->usage_norm = 1.0; - - if (assoc->usage->parent_assoc_ptr == assoc_mgr_root_assoc) { - assoc->usage->usage_efctv = assoc->usage->usage_norm; - if (priority_debug) - info("Effective usage for %s %s off %s %Lf %Lf", - child, child_str, - assoc->usage->parent_assoc_ptr->acct, - assoc->usage->usage_efctv, - assoc->usage->usage_norm); - } else { - _set_usage_efctv(assoc); - if (priority_debug) { - info("Effective usage for %s %s off %s = %Lf", - child, child_str, - assoc->usage->parent_assoc_ptr->acct, - assoc->usage->usage_efctv); - } - } -} - -extern double priority_p_calc_fs_factor(long double usage_efctv, - long double shares_norm) -{ - double priority_fs; - - if (fuzzy_equal(usage_efctv, NO_VAL)) - return 0.0; - - if (shares_norm > 0.0) { - if (usage_efctv < MIN_USAGE_FACTOR * shares_norm) - usage_efctv = MIN_USAGE_FACTOR * shares_norm; - priority_fs = shares_norm / usage_efctv; - } - else - priority_fs = 0.0; - - return priority_fs; -} - -extern List priority_p_get_priority_factors_list( - priority_factors_request_msg_t *req_msg, uid_t uid) -{ - List req_job_list; - List req_user_list; - List ret_list = NULL; - ListIterator itr; - priority_factors_object_t *obj = NULL; - struct job_record *job_ptr = NULL; - time_t start_time = time(NULL); - - xassert(req_msg); - req_job_list = req_msg->job_id_list; - req_user_list = req_msg->uid_list; - - /* Read lock on jobs, nodes, and partitions */ - slurmctld_lock_t job_read_lock = - { NO_LOCK, READ_LOCK, READ_LOCK, READ_LOCK }; - - if (job_list && list_count(job_list)) { - ret_list = list_create(slurm_destroy_priority_factors_object); - lock_slurmctld(job_read_lock); - itr = list_iterator_create(job_list); - if (itr == NULL) - fatal("list_iterator_create: malloc failure"); - while ((job_ptr = list_next(itr))) { - /* - * We are only looking for pending jobs - */ - if (!IS_JOB_PENDING(job_ptr)) - continue; - - /* - * This means the job is not eligible yet - */ - if (!job_ptr->details->begin_time - || (job_ptr->details->begin_time > start_time)) - continue; - - /* - * 0 means the job is held - */ - if (job_ptr->priority == 0) - continue; - - /* - * Priority has been set elsewhere (e.g. by SlurmUser) - */ - if (job_ptr->direct_set_prio) - continue; - - if (_filter_job(job_ptr, req_job_list, req_user_list)) - continue; - - if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) - && (job_ptr->user_id != uid) - && !validate_operator(uid) - && !assoc_mgr_is_user_acct_coord( - acct_db_conn, uid, - job_ptr->account)) - continue; - - obj = xmalloc(sizeof(priority_factors_object_t)); - memcpy(obj, job_ptr->prio_factors, - sizeof(priority_factors_object_t)); - obj->job_id = job_ptr->job_id; - obj->user_id = job_ptr->user_id; - list_append(ret_list, obj); - } - list_iterator_destroy(itr); - unlock_slurmctld(job_read_lock); - if (!list_count(ret_list)) { - list_destroy(ret_list); - ret_list = NULL; - } - } - - return ret_list; -} diff --git a/src/plugins/proctrack/Makefile.in b/src/plugins/proctrack/Makefile.in index 9787f8d50add2061dd7b2a9eb9745132e7e77a34..dc52c9d8c7ed82c7f3cacda0451a0b3822c5f74d 100644 --- a/src/plugins/proctrack/Makefile.in +++ b/src/plugins/proctrack/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/proctrack DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/aix/Makefile.in b/src/plugins/proctrack/aix/Makefile.in index 6d56256a50c224152b968462be4b86069166bc97..a94739dc19ffc60312139fadf4e6b02a6c7f0216 100644 --- a/src/plugins/proctrack/aix/Makefile.in +++ b/src/plugins/proctrack/aix/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/proctrack/aix DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/aix/proctrack_aix.c b/src/plugins/proctrack/aix/proctrack_aix.c index b186dc5a255c491e7b295dd0a5b1a50b583b6588..314fe38a65d924ee6dfdf4446c02471e001ec76c 100644 --- a/src/plugins/proctrack/aix/proctrack_aix.c +++ b/src/plugins/proctrack/aix/proctrack_aix.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/proctrack/cgroup/Makefile.in b/src/plugins/proctrack/cgroup/Makefile.in index ac8fd2b307fa4f9dcb6d45fec3f12258d18373e4..9daac9a8b1a6bc49de751600ed518290e93b2f29 100644 --- a/src/plugins/proctrack/cgroup/Makefile.in +++ b/src/plugins/proctrack/cgroup/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/proctrack/cgroup DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/cgroup/proctrack_cgroup.c b/src/plugins/proctrack/cgroup/proctrack_cgroup.c index fc58a95aee5a9ca403a7643cdadcfc171e200db4..d2c3924eb10b76c8fb02a50c169c66d97f293c2c 100644 --- a/src/plugins/proctrack/cgroup/proctrack_cgroup.c +++ b/src/plugins/proctrack/cgroup/proctrack_cgroup.c @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -109,6 +109,14 @@ const uint32_t plugin_version = 91; #define PATH_MAX 256 #endif +/* One slurmstepd could be in the process of creating cgroups while another + * slurmstepd is simultaneoulsy deleting cgroups for another step for that + * same user. MAX_CGROUP_RETRY defines how many times that we retry creating + * the user and job cgroup on an error of ENOENT. */ +#ifndef MAX_CGROUP_RETRY +#define MAX_CGROUP_RETRY 8 +#endif + static slurm_cgroup_conf_t slurm_cgroup_conf; static char user_cgroup_path[PATH_MAX]; @@ -143,6 +151,7 @@ int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) { /* we do it here as we do not have access to the conf structure */ /* in libslurm (src/common/xcgroup.c) */ + int retry_count = 0; /* See MAX_CGROUP_RETRY description above */ xcgroup_t slurm_cg; char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend); #ifdef MULTIPLE_SLURMD @@ -216,11 +225,6 @@ int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) getuid(), getgid()) != XCGROUP_SUCCESS) { return SLURM_ERROR; } - if (xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) { - xcgroup_destroy(&user_freezer_cg); - - return SLURM_ERROR; - } /* create job cgroup in the freezer ns (it could already exist) */ if (xcgroup_create(&freezer_ns, &job_freezer_cg, @@ -229,11 +233,6 @@ int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) xcgroup_destroy(&user_freezer_cg); return SLURM_ERROR; } - if (xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) { - xcgroup_destroy(&user_freezer_cg); - xcgroup_destroy(&job_freezer_cg); - return SLURM_ERROR; - } /* create step cgroup in the freezer ns (it should not exists) */ if (xcgroup_create(&freezer_ns, &step_freezer_cg, @@ -243,7 +242,12 @@ int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) xcgroup_destroy(&job_freezer_cg); return SLURM_ERROR; } - if (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS) { + +retry: if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) || + (xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) || + (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) { + if ((errno == ENOENT) && (++retry_count <= MAX_CGROUP_RETRY)) + goto retry; xcgroup_destroy(&user_freezer_cg); xcgroup_destroy(&job_freezer_cg); xcgroup_destroy(&step_freezer_cg); diff --git a/src/plugins/proctrack/linuxproc/Makefile.in b/src/plugins/proctrack/linuxproc/Makefile.in index 3f4152dec9c183da7e9ef9721c098d321d09ff3a..d751351c9807dadc475e733fcf15cde6ae167e2c 100644 --- a/src/plugins/proctrack/linuxproc/Makefile.in +++ b/src/plugins/proctrack/linuxproc/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/proctrack/linuxproc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/linuxproc/kill_tree.c b/src/plugins/proctrack/linuxproc/kill_tree.c index ce7e67ba1fede7e942a61f392c5071dd3be69393..5b26e4cadd957ddd6fa43a0f5b1989ae02ba2ce7 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.c +++ b/src/plugins/proctrack/linuxproc/kill_tree.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/proctrack/linuxproc/kill_tree.h b/src/plugins/proctrack/linuxproc/kill_tree.h index cf8103a7b0f87b0bdca3f037ff80db80a2234183..20737a42ec12e0de57ad0e598ff89001eae3af6a 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.h +++ b/src/plugins/proctrack/linuxproc/kill_tree.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c index a98f5e1d5a8e5d31b9bad7c3598fc7d2db14d4f7..d7824e6918a979ec184c316a2f4934ce04c2bd0a 100644 --- a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c +++ b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/proctrack/lua/Makefile.in b/src/plugins/proctrack/lua/Makefile.in index 5e615c4e271af9a36213e64a387a0843ca019d09..ea5a4ef2e78c618de282a937ff9bbab76a9909f4 100644 --- a/src/plugins/proctrack/lua/Makefile.in +++ b/src/plugins/proctrack/lua/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/proctrack/lua DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/lua/proctrack_lua.c b/src/plugins/proctrack/lua/proctrack_lua.c index c74710d4c7cba0e5b960014df5f5370bf3b81b13..46f6f133ab49d8138e018aee97372b9253c0e357 100644 --- a/src/plugins/proctrack/lua/proctrack_lua.c +++ b/src/plugins/proctrack/lua/proctrack_lua.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -262,7 +262,7 @@ int init (void) */ rc = (int) lua_tonumber (L, -1); lua_pop (L, 1); - if(rc != SLURM_SUCCESS) + if (rc != SLURM_SUCCESS) return rc; /* diff --git a/src/plugins/proctrack/pgid/Makefile.in b/src/plugins/proctrack/pgid/Makefile.in index afc6734a7df9f5cae832380b30b6265e30cb20e0..9fc32ca6c4bb818cffc98712c3931c016000b20a 100644 --- a/src/plugins/proctrack/pgid/Makefile.in +++ b/src/plugins/proctrack/pgid/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/proctrack/pgid DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/pgid/proctrack_pgid.c b/src/plugins/proctrack/pgid/proctrack_pgid.c index 5b1d8ea7e9d395ffe9e558810ed5de5de8cbb676..013f4c840e3aa42e778e4c544e77d6ea42d4a1c3 100644 --- a/src/plugins/proctrack/pgid/proctrack_pgid.c +++ b/src/plugins/proctrack/pgid/proctrack_pgid.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/proctrack/sgi_job/Makefile.in b/src/plugins/proctrack/sgi_job/Makefile.in index 9d90ee8221dd461d49811cd84923243824c0952e..7fcc9e7c14d3fb74ac016503e15cab0c2b209c19 100644 --- a/src/plugins/proctrack/sgi_job/Makefile.in +++ b/src/plugins/proctrack/sgi_job/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/proctrack/sgi_job DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c b/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c index 6106f7b5f482a5774f3c8d0671b3533124180258..6000fc2eb7cdaa560089956816ca37aee943729d 100644 --- a/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c +++ b/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -209,6 +209,11 @@ int slurm_container_plugin_create (slurmd_job_t *job) return SLURM_SUCCESS; } +/* NOTE: This function is called after slurmstepd spawns all user tasks. + * Since the slurmstepd was placed in the job container when the container + * was created and all of it's spawned tasks are placed into the container + * when forked, all we need to do is remove the slurmstepd from the container + * (once) at this time. */ int slurm_container_plugin_add (slurmd_job_t *job, pid_t pid) { if (job->cont_id == (uint64_t) -1) { diff --git a/src/plugins/sched/Makefile.in b/src/plugins/sched/Makefile.in index b6920c3b7cabf06f5039c5e966a8fd6d8eb519c8..66bc068b7e271786d789487e2918a16f3ce18714 100644 --- a/src/plugins/sched/Makefile.in +++ b/src/plugins/sched/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/sched DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/sched/backfill/Makefile.in b/src/plugins/sched/backfill/Makefile.in index 46ffe8ea9485aad763987d65bfc7a3a2370a47ae..e5375a3d3e312b221aa99d4436898660e5d95dd0 100644 --- a/src/plugins/sched/backfill/Makefile.in +++ b/src/plugins/sched/backfill/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/sched/backfill DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 63b7cd2dfffe99ad43f6a0ded9644c9cd5d9c113..84a4e97c83d30fab14a7b1e88eaa7fb956ab4461 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -21,7 +21,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -64,6 +64,7 @@ #include "src/common/macros.h" #include "src/common/node_select.h" #include "src/common/parse_time.h" +#include "src/common/read_config.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" @@ -125,7 +126,9 @@ static int backfill_interval = BACKFILL_INTERVAL; static int backfill_resolution = BACKFILL_RESOLUTION; static int backfill_window = BACKFILL_WINDOW; static int max_backfill_job_cnt = 50; +static int max_backfill_job_per_part = 0; static int max_backfill_job_per_user = 0; +static bool backfill_continue = false; /*********************** local functions *********************/ static void _add_reservation(uint32_t start_time, uint32_t end_reserve, @@ -149,6 +152,35 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, bitstr_t *exc_core_bitmap); +/* Log recousrces to be allocated to a pending job */ +static void _dump_job_sched(struct job_record *job_ptr, time_t end_time, + bitstr_t *avail_bitmap) +{ + char begin_buf[32], end_buf[32], *node_list; + + slurm_make_time_str(&job_ptr->start_time, begin_buf, sizeof(begin_buf)); + slurm_make_time_str(&end_time, end_buf, sizeof(end_buf)); + node_list = bitmap2node_name(avail_bitmap); + info("Job %u to start at %s, end at %s on %s", + job_ptr->job_id, begin_buf, end_buf, node_list); + xfree(node_list); +} + +static void _dump_job_test(struct job_record *job_ptr, bitstr_t *avail_bitmap) +{ + char begin_buf[32], *node_list; + + if (job_ptr->start_time == 0) { + strcpy(begin_buf, "NOW"); + } else { + slurm_make_time_str(&job_ptr->start_time, begin_buf, + sizeof(begin_buf)); + } + node_list = bitmap2node_name(avail_bitmap); + info("Test job %u at %s on %s", job_ptr->job_id, begin_buf, node_list); + xfree(node_list); +} + /* Log resource allocate table */ static void _dump_node_space_table(node_space_map_t *node_space_ptr) { @@ -227,8 +259,6 @@ static int _num_feature_count(struct job_record *job_ptr) return rc; feat_iter = list_iterator_create(detail_ptr->feature_list); - if (feat_iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { if (feat_ptr->count) rc++; @@ -272,8 +302,6 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, list_size = list_count(detail_ptr->feature_list); feat_cnt_orig = xmalloc(sizeof(uint16_t) * list_size); feat_iter = list_iterator_create(detail_ptr->feature_list); - if (feat_iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { high_cnt = MAX(high_cnt, feat_ptr->count); @@ -391,10 +419,10 @@ static void _load_config(void) sched_params = slurm_get_sched_params(); debug_flags = slurm_get_debug_flags(); - if (sched_params && (tmp_ptr=strstr(sched_params, "interval="))) - backfill_interval = atoi(tmp_ptr + 9); + if (sched_params && (tmp_ptr=strstr(sched_params, "bf_interval="))) + backfill_interval = atoi(tmp_ptr + 12); if (backfill_interval < 1) { - fatal("Invalid backfill scheduler interval: %d", + fatal("Invalid backfill scheduler bf_interval: %d", backfill_interval); } @@ -420,6 +448,14 @@ static void _load_config(void) fatal("Invalid backfill scheduler resolution: %d", backfill_resolution); } + + if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_part="))) + max_backfill_job_per_part = atoi(tmp_ptr + 16); + if (max_backfill_job_per_part < 0) { + fatal("Invalid backfill scheduler bf_max_job_part: %d", + max_backfill_job_per_part); + } + if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_user="))) max_backfill_job_per_user = atoi(tmp_ptr + 16); if (max_backfill_job_per_user < 0) { @@ -427,6 +463,12 @@ static void _load_config(void) max_backfill_job_per_user); } + /* bf_continue makes backfill continue where it was if interrupted + */ + if (sched_params && (strstr(sched_params, "bf_continue"))) { + backfill_continue = true; + } + xfree(sched_params); } @@ -488,7 +530,7 @@ extern void *backfill_agent(void *args) wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || - !avail_front_end() || !_more_work(last_backfill_time)) + !avail_front_end(NULL) || !_more_work(last_backfill_time)) continue; lock_slurmctld(all_locks); @@ -534,9 +576,9 @@ static int _attempt_backfill(void) slurmdb_qos_rec_t *qos_ptr = NULL; int i, j, node_space_recs; struct job_record *job_ptr; - struct part_record *part_ptr; + struct part_record *part_ptr, **bf_part_ptr = NULL; uint32_t end_time, end_reserve; - uint32_t time_limit, comp_time_limit, orig_time_limit; + uint32_t time_limit, comp_time_limit, orig_time_limit, part_time_limit; uint32_t min_nodes, max_nodes, req_nodes; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; bitstr_t *exc_core_bitmap = NULL; @@ -546,9 +588,12 @@ static int _attempt_backfill(void) int sched_timeout = 2, yield_sleep = 1; int rc = 0; int job_test_count = 0; - uint32_t *uid = NULL, nuser = 0; + uint32_t *uid = NULL, nuser = 0, bf_parts = 0, *bf_part_jobs = NULL; uint16_t *njobs = NULL; bool already_counted; + uint32_t reject_array_job_id = 0; + time_t config_update = slurmctld_conf.last_update; + time_t part_update = last_part_update; bf_last_yields = 0; #ifdef HAVE_CRAY @@ -574,12 +619,14 @@ static int _attempt_backfill(void) START_TIMER; if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: beginning"); + else + debug("backfill: beginning"); sched_start = now = time(NULL); if (slurm_get_root_filter()) filter_root = true; - job_queue = build_job_queue(true); + job_queue = build_job_queue(true, true); if (list_count(job_queue) == 0) { debug("backfill: no jobs to backfill"); list_destroy(job_queue); @@ -606,25 +653,36 @@ static int _attempt_backfill(void) if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); + if (max_backfill_job_per_part) { + ListIterator part_iterator; + struct part_record *part_ptr; + bf_parts = list_count(part_list); + bf_part_ptr = xmalloc(sizeof(struct part_record *) * bf_parts); + bf_part_jobs = xmalloc(sizeof(int) * bf_parts); + part_iterator = list_iterator_create(part_list); + i = 0; + while ((part_ptr = (struct part_record *) + list_next(part_iterator))) { + bf_part_ptr[i++] = part_ptr; + } + list_iterator_destroy(part_iterator); + } if (max_backfill_job_per_user) { uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t)); njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t)); } while ((job_queue_rec = (job_queue_rec_t *) list_pop_bottom(job_queue, sort_job_queue2))) { - job_ptr = job_queue_rec->job_ptr; - orig_time_limit = job_ptr->time_limit; - if ((time(NULL) - sched_start) >= sched_timeout) { - uint32_t save_time_limit = job_ptr->time_limit; - job_ptr->time_limit = orig_time_limit; if (debug_flags & DEBUG_FLAG_BACKFILL) { END_TIMER; info("backfill: completed yielding locks " "after testing %d jobs, %s", job_test_count, TIME_STR); } - if (_yield_locks(yield_sleep)) { + if ((_yield_locks(yield_sleep) && !backfill_continue) || + (slurmctld_conf.last_update != config_update) || + (last_part_update != part_update)) { if (debug_flags & DEBUG_FLAG_BACKFILL) { info("backfill: system state changed, " "breaking out after testing %d " @@ -633,19 +691,33 @@ static int _attempt_backfill(void) rc = 1; break; } - job_ptr->time_limit = save_time_limit; /* Reset backfill scheduling timers, resume testing */ sched_start = time(NULL); job_test_count = 0; START_TIMER; } + job_ptr = job_queue_rec->job_ptr; + /* With bf_continue configured, the original job could have + * been cancelled and purged. Validate pointer here. */ + if ((job_ptr->magic != JOB_MAGIC) || + (job_ptr->job_id != job_queue_rec->job_id)) + continue; + orig_time_limit = job_ptr->time_limit; part_ptr = job_queue_rec->part_ptr; job_test_count++; xfree(job_queue_rec); if (!IS_JOB_PENDING(job_ptr)) continue; /* started in other partition */ + if (!avail_front_end(job_ptr)) + continue; /* No available frontend for this job */ + if (job_ptr->array_task_id != (uint16_t) NO_VAL) { + if (reject_array_job_id == job_ptr->array_job_id) + continue; /* already rejected array element */ + /* assume reject whole array for now, clear if OK */ + reject_array_job_id = job_ptr->array_job_id; + } job_ptr->part_ptr = part_ptr; if (debug_flags & DEBUG_FLAG_BACKFILL) @@ -654,6 +726,28 @@ static int _attempt_backfill(void) slurmctld_diag_stats.bf_last_depth++; already_counted = false; + if (max_backfill_job_per_part) { + bool skip_job = false; + for (j = 0; j < bf_parts; j++) { + if (bf_part_ptr[j] != job_ptr->part_ptr) + continue; + if (bf_part_jobs[j]++ >= + max_backfill_job_per_part) + skip_job = true; + break; + } + if (skip_job) { + if (debug_flags & DEBUG_FLAG_BACKFILL) + debug("backfill: have already " + "checked %u jobs for " + "partition %s; skipping " + "job %u", + max_backfill_job_per_part, + job_ptr->part_ptr->name, + job_ptr->job_id); + continue; + } + } if (max_backfill_job_per_user) { for (j = 0; j < nuser; j++) { if (job_ptr->user_id == uid[j]) { @@ -666,11 +760,13 @@ static int _attempt_backfill(void) } } if (j == nuser) { /* user not found */ + static bool bf_max_user_msg = true; if (nuser < BF_MAX_USERS) { uid[j] = job_ptr->user_id; njobs[j] = 1; nuser++; - } else { + } else if (bf_max_user_msg) { + bf_max_user_msg = false; error("backfill: too many users in " "queue. Consider increasing " "BF_MAX_USERS"); @@ -724,17 +820,18 @@ static int _attempt_backfill(void) } /* Determine job's expected completion time */ + if (part_ptr->max_time == INFINITE) + part_time_limit = 365 * 24 * 60; /* one year */ + else + part_time_limit = part_ptr->max_time; if (job_ptr->time_limit == NO_VAL) { - if (part_ptr->max_time == INFINITE) - time_limit = 365 * 24 * 60; /* one year */ - else - time_limit = part_ptr->max_time; + time_limit = part_time_limit; } else { if (part_ptr->max_time == INFINITE) time_limit = job_ptr->time_limit; else time_limit = MIN(job_ptr->time_limit, - part_ptr->max_time); + part_time_limit); } comp_time_limit = time_limit; qos_ptr = job_ptr->qos_ptr; @@ -748,6 +845,7 @@ static int _attempt_backfill(void) later_start = now; TRY_LATER: if ((time(NULL) - sched_start) >= sched_timeout) { + uint32_t save_job_id = job_ptr->job_id; uint32_t save_time_limit = job_ptr->time_limit; job_ptr->time_limit = orig_time_limit; if (debug_flags & DEBUG_FLAG_BACKFILL) { @@ -756,7 +854,9 @@ static int _attempt_backfill(void) "after testing %d jobs, %s", job_test_count, TIME_STR); } - if (_yield_locks(yield_sleep)) { + if ((_yield_locks(yield_sleep) && !backfill_continue) || + (slurmctld_conf.last_update != config_update) || + (last_part_update != part_update)) { if (debug_flags & DEBUG_FLAG_BACKFILL) { info("backfill: system state changed, " "breaking out after testing %d " @@ -765,6 +865,18 @@ static int _attempt_backfill(void) rc = 1; break; } + + /* With bf_continue configured, the original job could + * have been scheduled or cancelled and purged. + * Revalidate job the record here. */ + if ((job_ptr->magic != JOB_MAGIC) || + (job_ptr->job_id != save_job_id)) + continue; + if (!IS_JOB_PENDING(job_ptr)) + continue; + if (!avail_front_end(job_ptr)) + continue; /* No available frontend */ + job_ptr->time_limit = save_time_limit; /* Reset backfill scheduling timers, resume testing */ sched_start = time(NULL); @@ -848,6 +960,8 @@ static int _attempt_backfill(void) already_counted = true; } + if (debug_flags & DEBUG_FLAG_BACKFILL) + _dump_job_test(job_ptr, avail_bitmap); j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes, req_nodes, exc_core_bitmap); @@ -893,8 +1007,11 @@ static int _attempt_backfill(void) job_ptr->start_time = 0; break; } else { - /* Started this job. Update the database if job - * time limit changed and move to next job */ + /* Started this job, move to next one */ + reject_array_job_id = 0; + + /* Update the database if job time limit + * changed and move to next job */ if (save_time_limit != job_ptr->time_limit) jobacct_storage_g_job_start(acct_db_conn, job_ptr); @@ -936,12 +1053,17 @@ static int _attempt_backfill(void) */ if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) continue; + reject_array_job_id = 0; + if (debug_flags & DEBUG_FLAG_BACKFILL) + _dump_job_sched(job_ptr, end_reserve, avail_bitmap); bit_not(avail_bitmap); _add_reservation(job_ptr->start_time, end_reserve, avail_bitmap, node_space, &node_space_recs); if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); } + xfree(bf_part_jobs); + xfree(bf_part_ptr); xfree(uid); xfree(njobs); FREE_NULL_BITMAP(avail_bitmap); @@ -991,7 +1113,8 @@ static int _start_job(struct job_record *job_ptr, bitstr_t *resv_bitmap) job_ptr->job_id, job_ptr->nodes); if (job_ptr->batch_flag == 0) srun_allocate(job_ptr->job_id); - else if (job_ptr->details->prolog_running == 0) + else if ((job_ptr->details == NULL) || + (job_ptr->details->prolog_running == 0)) launch_job(job_ptr); slurmctld_diag_stats.backfilled_jobs++; slurmctld_diag_stats.last_backfilled_jobs++; diff --git a/src/plugins/sched/backfill/backfill.h b/src/plugins/sched/backfill/backfill.h index 2060d29dcc505990b3a60985b21587f01d2bd8af..6b7b2a338757a2a5194763e85c86221c0ed11515 100644 --- a/src/plugins/sched/backfill/backfill.h +++ b/src/plugins/sched/backfill/backfill.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/backfill/backfill_wrapper.c b/src/plugins/sched/backfill/backfill_wrapper.c index cc7812161269e34e41ea0644b72e6a0e53f2a44b..c6f6d293a36ae8a151d2605504f2d27ae91c25cd 100644 --- a/src/plugins/sched/backfill/backfill_wrapper.c +++ b/src/plugins/sched/backfill/backfill_wrapper.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/builtin/Makefile.in b/src/plugins/sched/builtin/Makefile.in index b814929aeda8055f3fb709b0589d555704d6e550..11427896f878beee8552c819f4839d00453915fa 100644 --- a/src/plugins/sched/builtin/Makefile.in +++ b/src/plugins/sched/builtin/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/sched/builtin DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/sched/builtin/builtin.c b/src/plugins/sched/builtin/builtin.c index 047b602e9d3fca0e63229a87dcdda970269e22d6..fdc69acba93724df8b2518290da894d6c1b49e5e 100644 --- a/src/plugins/sched/builtin/builtin.c +++ b/src/plugins/sched/builtin/builtin.c @@ -12,7 +12,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -154,10 +154,8 @@ static void _compute_start_times(void) sched_start = now; last_job_alloc = now - 1; alloc_bitmap = bit_alloc(node_record_count); - if (alloc_bitmap == NULL) - fatal("bit_alloc: malloc failure"); - job_queue = build_job_queue(true); - while ((job_queue_rec = (job_queue_rec_t *) + job_queue = build_job_queue(true, false); + while ((job_queue_rec = (job_queue_rec_t *) list_pop_bottom(job_queue, sort_job_queue2))) { job_ptr = job_queue_rec->job_ptr; part_ptr = job_queue_rec->part_ptr; diff --git a/src/plugins/sched/builtin/builtin.h b/src/plugins/sched/builtin/builtin.h index d0874039961f6c49706eee2eb2bc63f0c606dd0d..3a81b4e2da31bc82cdde791ae4db6b0d3a916607 100644 --- a/src/plugins/sched/builtin/builtin.h +++ b/src/plugins/sched/builtin/builtin.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/builtin/builtin_wrapper.c b/src/plugins/sched/builtin/builtin_wrapper.c index e9868e4353c81c58703d55d8c15ffddb93d0a34b..420de2f08a4f92e704a3544f4f8fa8935aa061d4 100644 --- a/src/plugins/sched/builtin/builtin_wrapper.c +++ b/src/plugins/sched/builtin/builtin_wrapper.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/hold/Makefile.in b/src/plugins/sched/hold/Makefile.in index bfaa3ac52b6f530a177d25e322997bbbe5129001..7fd982d5446b51151d88d98e02048528afbc0188 100644 --- a/src/plugins/sched/hold/Makefile.in +++ b/src/plugins/sched/hold/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/sched/hold DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/sched/hold/hold_wrapper.c b/src/plugins/sched/hold/hold_wrapper.c index 4e83f20bfaed6a5b47ff929e72092cc937132123..3b4c6c738ee6b36537279cc502050aca73242191 100644 --- a/src/plugins/sched/hold/hold_wrapper.c +++ b/src/plugins/sched/hold/hold_wrapper.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/Makefile.in b/src/plugins/sched/wiki/Makefile.in index 909c075d30d7ca9481fc25601c42353b4ce70a0c..88809e2c26c90e71a74c39115d2d72270e3a708f 100644 --- a/src/plugins/sched/wiki/Makefile.in +++ b/src/plugins/sched/wiki/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/sched/wiki DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/sched/wiki/cancel_job.c b/src/plugins/sched/wiki/cancel_job.c index 98c0a510eb05c654a06966efd448ef7ea0680ce9..7e5dc534eac555cfef324f5daf8f875fc282bd1b 100644 --- a/src/plugins/sched/wiki/cancel_job.c +++ b/src/plugins/sched/wiki/cancel_job.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/crypto.c b/src/plugins/sched/wiki/crypto.c index 34fd4149c20c9db3eb66e328f82bf9a816ecc863..bbfba1520a555b49e3172b374abfe51771d4f14f 100644 --- a/src/plugins/sched/wiki/crypto.c +++ b/src/plugins/sched/wiki/crypto.c @@ -48,9 +48,6 @@ static void des( uint32_t *lword, uint32_t *irword ) 0x15a2ca46 }; - itmph = 0; - itmpl = 0; - for ( idx = 0; idx < MAX_ITERATION; ++idx ) { iswap = *irword; ia = iswap ^ c1[ idx ]; diff --git a/src/plugins/sched/wiki/get_jobs.c b/src/plugins/sched/wiki/get_jobs.c index f460f096d70ebcdf777e4ee0744565c1d7918f84..d0fae6c79178f7abecb50285d789751d94be7b8c 100644 --- a/src/plugins/sched/wiki/get_jobs.c +++ b/src/plugins/sched/wiki/get_jobs.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/get_nodes.c b/src/plugins/sched/wiki/get_nodes.c index 12c9c20d7c2b4623f951680b589bec069e1bb548..7e6dec6671b0b3509bef09a753d166b8ed6a67f3 100644 --- a/src/plugins/sched/wiki/get_nodes.c +++ b/src/plugins/sched/wiki/get_nodes.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -215,7 +215,7 @@ static char * _dump_node(struct node_record *node_ptr, time_t update_time) if (update_time > 0) return buf; - if (slurmctld_conf.fast_schedule) { + if (slurmctld_conf.fast_schedule && node_ptr->config_ptr) { /* config from slurm.conf */ snprintf(tmp, sizeof(tmp), "CMEMORY=%u;CDISK=%u;CPROC=%u;", diff --git a/src/plugins/sched/wiki/hostlist.c b/src/plugins/sched/wiki/hostlist.c index a713dfbf9c9912469f66791d37fcf7e63d98d3ce..dadca00e3213c992b259fb9d77d23c11cdf97cff 100644 --- a/src/plugins/sched/wiki/hostlist.c +++ b/src/plugins/sched/wiki/hostlist.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/job_modify.c b/src/plugins/sched/wiki/job_modify.c index abf6914d79ac04fa0ce5fcda7bea45daf531403d..5dbd2552c03e040d6b4c4c7a4a6c1227e1420a04 100644 --- a/src/plugins/sched/wiki/job_modify.c +++ b/src/plugins/sched/wiki/job_modify.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -100,7 +100,7 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, } if (bank_ptr) { - if(update_job_account("wiki", job_ptr, bank_ptr) + if (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS) return EINVAL; else @@ -202,7 +202,7 @@ host_fini: if (rc) { } } - if(update_accounting) { + if (update_accounting) { if (job_ptr->details && job_ptr->details->begin_time) { /* Update job record in accounting to reflect changes */ jobacct_storage_g_job_start(acct_db_conn, job_ptr); diff --git a/src/plugins/sched/wiki/msg.c b/src/plugins/sched/wiki/msg.c index 836f21cd40c81c15ac4c66301e6ca4aecea4459e..8a894d777a94f16cdcf7c8ae976e0a1724dd739e 100644 --- a/src/plugins/sched/wiki/msg.c +++ b/src/plugins/sched/wiki/msg.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -67,7 +67,6 @@ int init_prio_mode = PRIO_HOLD; uint16_t kill_wait; uint16_t use_host_exp = 0; -static char * _get_wiki_conf_path(void); static void * _msg_thread(void *no_data); static int _parse_msg(char *msg, char **req); static void _proc_msg(slurm_fd_t new_fd, char *msg); @@ -204,33 +203,6 @@ static void *_msg_thread(void *no_data) return NULL; } -/*****************************************************************************\ - * _get_wiki_conf_path - return the pathname of the wiki.conf file - * return value must be xfreed -\*****************************************************************************/ -static char * _get_wiki_conf_path(void) -{ - char *val = getenv("SLURM_CONF"); - char *path = NULL; - int i; - - if (!val) - val = default_slurm_config_file; - - /* Replace file name on end of path */ - i = strlen(val) + 10; - path = xmalloc(i); - strcpy(path, val); - val = strrchr(path, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = path; - strcpy(val, "wiki.conf"); - - return path; -} - /*****************************************************************************\ * parse_wiki_config - Results go into global variables * RET SLURM_SUCESS or error code @@ -276,7 +248,7 @@ extern int parse_wiki_config(void) kill_wait = conf->kill_wait; slurm_conf_unlock(); - wiki_conf = _get_wiki_conf_path(); + wiki_conf = get_extra_conf_path("wiki.conf"); if ((wiki_conf == NULL) || (stat(wiki_conf, &buf) == -1)) { debug("No wiki.conf file (%s)", wiki_conf); xfree(wiki_conf); diff --git a/src/plugins/sched/wiki/msg.h b/src/plugins/sched/wiki/msg.h index b9f329c602e83058f805755b48e8e0b40d082236..40707279b3e8d0cad1781afe54fbdccdb08219a6 100644 --- a/src/plugins/sched/wiki/msg.h +++ b/src/plugins/sched/wiki/msg.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/resume_job.c b/src/plugins/sched/wiki/resume_job.c index 9eff6fff30a6081ce98a4aa343d51ec252431591..03df960560de491bd273b7b84ed31150eaabfed7 100644 --- a/src/plugins/sched/wiki/resume_job.c +++ b/src/plugins/sched/wiki/resume_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/sched_wiki.c b/src/plugins/sched/wiki/sched_wiki.c index 7248487319271c15a6e0bb9e822251df956ae75f..eea71567331d37d0b91f341725b1d71f3f6f8313 100644 --- a/src/plugins/sched/wiki/sched_wiki.c +++ b/src/plugins/sched/wiki/sched_wiki.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki/start_job.c b/src/plugins/sched/wiki/start_job.c index b3f9ab16587e46431bbd7d3b9d82bcf8cbba6bff..eb232e4f09dd060891b9161dfdac676c369e36bd 100644 --- a/src/plugins/sched/wiki/start_job.c +++ b/src/plugins/sched/wiki/start_job.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -251,6 +251,7 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, old_task_cnt = job_ptr->details->min_cpus; job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt); job_ptr->priority = 100000000; + job_ptr->state_reason = WAIT_NO_REASON; fini: unlock_slurmctld(job_write_lock); if (rc) diff --git a/src/plugins/sched/wiki/suspend_job.c b/src/plugins/sched/wiki/suspend_job.c index e1ef9f64ca964b0b0ccdd8d32e023dd17ecb6995..4004d7376e7793abe4bddc02adc60f1a876df50a 100644 --- a/src/plugins/sched/wiki/suspend_job.c +++ b/src/plugins/sched/wiki/suspend_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/Makefile.in b/src/plugins/sched/wiki2/Makefile.in index 31c2d84edf1e78723dd5754e4cdbed79fe80af4c..0107911154ffa64df29fd046f4e728d10e8b85e4 100644 --- a/src/plugins/sched/wiki2/Makefile.in +++ b/src/plugins/sched/wiki2/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/sched/wiki2 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -185,6 +189,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -205,6 +211,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -214,6 +223,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -221,6 +232,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -255,6 +275,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -282,6 +305,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/sched/wiki2/cancel_job.c b/src/plugins/sched/wiki2/cancel_job.c index b3ef339b38aa5b090094ac577ba9af5be7f1e420..57785e1017f8204de54299376b2895114a454611 100644 --- a/src/plugins/sched/wiki2/cancel_job.c +++ b/src/plugins/sched/wiki2/cancel_job.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/crypto.c b/src/plugins/sched/wiki2/crypto.c index 34fd4149c20c9db3eb66e328f82bf9a816ecc863..bbfba1520a555b49e3172b374abfe51771d4f14f 100644 --- a/src/plugins/sched/wiki2/crypto.c +++ b/src/plugins/sched/wiki2/crypto.c @@ -48,9 +48,6 @@ static void des( uint32_t *lword, uint32_t *irword ) 0x15a2ca46 }; - itmph = 0; - itmpl = 0; - for ( idx = 0; idx < MAX_ITERATION; ++idx ) { iswap = *irword; ia = iswap ^ c1[ idx ]; diff --git a/src/plugins/sched/wiki2/event.c b/src/plugins/sched/wiki2/event.c index a40e725c9fb4bc26e2c56147b739cc28e546471c..9079bd71101e1c22f4e191bbdb13344195c7ab2d 100644 --- a/src/plugins/sched/wiki2/event.c +++ b/src/plugins/sched/wiki2/event.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/get_jobs.c b/src/plugins/sched/wiki2/get_jobs.c index d3717356a7bb62134ee09685ac106ad82aaaffa7..ff6fa97668f0b64f39395bf651848321094c0f21 100644 --- a/src/plugins/sched/wiki2/get_jobs.c +++ b/src/plugins/sched/wiki2/get_jobs.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/get_nodes.c b/src/plugins/sched/wiki2/get_nodes.c index 87ab4d679920296bd5a14b880ed456f4a79a47b3..fe980b6f9c837a30421dc21913e62205f07fe813 100644 --- a/src/plugins/sched/wiki2/get_nodes.c +++ b/src/plugins/sched/wiki2/get_nodes.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -198,8 +198,10 @@ static char * _dump_all_nodes(int *node_cnt, time_t update_time) hostlist_push(hl, node_ptr->name); } else { hl = hostlist_create(node_ptr->name); - if (hl == NULL) - fatal("malloc failure"); + if (!hl) { + fatal("Invalid node_name: %s", + node_ptr->name); + } } continue; } else { @@ -207,8 +209,10 @@ static char * _dump_all_nodes(int *node_cnt, time_t update_time) update_time); hostlist_destroy(hl); hl = hostlist_create(node_ptr->name); - if (hl == NULL) - fatal("malloc failure"); + if (!hl) { + fatal("Invalid node_name: %s", + node_ptr->name); + } uniq_node_ptr = node_ptr; } } else { @@ -326,6 +330,13 @@ static char * _dump_node(struct node_record *node_ptr, hostlist_t hl, snprintf(tmp, sizeof(tmp), ":STATE=%s;", _get_node_state(node_ptr)); xstrcat(buf, tmp); + + if (node_ptr->cpu_load != NO_VAL) { + snprintf(tmp, sizeof(tmp), "CPULOAD=%f;", + (node_ptr->cpu_load / 100.0)); + xstrcat(buf, tmp); + } + if (node_ptr->reason) { /* Strip out any quotes, they confuse Moab */ char *reason, *bad_char; @@ -360,12 +371,6 @@ static char * _dump_node(struct node_record *node_ptr, hostlist_t hl, if (i > 0) xstrcat(buf, ";"); - if (node_ptr->cpu_load != NO_VAL) { - snprintf(tmp, sizeof(tmp), "CPULOAD=%f;", - (node_ptr->cpu_load / 100.0)); - xstrcat(buf, tmp); - } - if (node_ptr->arch) { snprintf(tmp, sizeof(tmp), "ARCH=%s;", node_ptr->arch); xstrcat(buf, tmp); @@ -396,7 +401,7 @@ static char * _dump_node(struct node_record *node_ptr, hostlist_t hl, if (update_time > 0) return buf; - if (slurmctld_conf.fast_schedule) { + if (slurmctld_conf.fast_schedule && node_ptr->config_ptr) { /* config from slurm.conf */ snprintf(tmp, sizeof(tmp), "CMEMORY=%u;CDISK=%u;CPROC=%u;", diff --git a/src/plugins/sched/wiki2/hostlist.c b/src/plugins/sched/wiki2/hostlist.c index 273cc591f4028c8ca2fab7a96510af89249c7d32..d6e99c14789abfd0f0b363171e4336e54cdbe6d7 100644 --- a/src/plugins/sched/wiki2/hostlist.c +++ b/src/plugins/sched/wiki2/hostlist.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -169,7 +169,7 @@ static char * _task_list(struct job_record *job_ptr) xassert(job_resrcs_ptr); #ifdef HAVE_BG - if(job_ptr->node_cnt) { + if (job_ptr->node_cnt) { task_cnt = job_resrcs_ptr->cpu_array_value[0]; } else task_cnt = 1; @@ -271,7 +271,7 @@ static char * _task_list_exp(struct job_record *job_ptr) xassert(job_resrcs_ptr); #ifdef HAVE_BG - if(job_ptr->node_cnt) { + if (job_ptr->node_cnt) { task_cnt = job_resrcs_ptr->cpu_array_value[0]; } else task_cnt = 1; diff --git a/src/plugins/sched/wiki2/initialize.c b/src/plugins/sched/wiki2/initialize.c index 49e01c1751020000489b9825509c6a934d6f0f8b..af0f3b6d1ad3fa6c6402f440f1a4735cfb7f8ce5 100644 --- a/src/plugins/sched/wiki2/initialize.c +++ b/src/plugins/sched/wiki2/initialize.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/job_add_task.c b/src/plugins/sched/wiki2/job_add_task.c index 85710fbf7ad686cfcc7875e580afe0f989aa04bf..8b9c72614ecf85c0fe927e34b1e65755a8519905 100644 --- a/src/plugins/sched/wiki2/job_add_task.c +++ b/src/plugins/sched/wiki2/job_add_task.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/job_modify.c b/src/plugins/sched/wiki2/job_modify.c index 2924189cb3423f76dd04e80b4cf80edfa85c1778..882b218f47c65967c99afb8cad9b5e74346e66a2 100644 --- a/src/plugins/sched/wiki2/job_modify.c +++ b/src/plugins/sched/wiki2/job_modify.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -332,7 +332,7 @@ host_fini: if (rc) { &cpus_per_node); } #endif - if(!IS_JOB_PENDING(job_ptr) || !job_ptr->details) { + if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) { error("wiki: MODIFYJOB node count of non-pending " "job %u", jobid); return ESLURM_DISABLED; diff --git a/src/plugins/sched/wiki2/job_notify.c b/src/plugins/sched/wiki2/job_notify.c index 6cd73ecde2ef6ed9ef5c13c260606699dafad7cf..5cab0db1a6e6373eca6530e1c26408e37e41d28c 100644 --- a/src/plugins/sched/wiki2/job_notify.c +++ b/src/plugins/sched/wiki2/job_notify.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/job_release_task.c b/src/plugins/sched/wiki2/job_release_task.c index 288c414ced9ecf506da99c22e64e92ddf0eb880d..64038c0a4500cf89194e99fbeb6f968b18278b0e 100644 --- a/src/plugins/sched/wiki2/job_release_task.c +++ b/src/plugins/sched/wiki2/job_release_task.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/job_requeue.c b/src/plugins/sched/wiki2/job_requeue.c index b601b0cdbf4fd089a1732235efe04d05586f5eec..767ccf5c4e0c24a3e3acfaaaed99ced5fb0cf671 100644 --- a/src/plugins/sched/wiki2/job_requeue.c +++ b/src/plugins/sched/wiki2/job_requeue.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/job_signal.c b/src/plugins/sched/wiki2/job_signal.c index db294f7c9326a4e328e58887c4e57802c8710c8f..befc22d4ee7a4fb1b0ad5f85aafe5eb0c96213a2 100644 --- a/src/plugins/sched/wiki2/job_signal.c +++ b/src/plugins/sched/wiki2/job_signal.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/job_will_run.c b/src/plugins/sched/wiki2/job_will_run.c index e2010a47461d1a64f76dbb410c326c6ddd801cbd..0fa067fafedacc47434389df2b2bffe6da453de7 100644 --- a/src/plugins/sched/wiki2/job_will_run.c +++ b/src/plugins/sched/wiki2/job_will_run.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c index 3b34af0d7fee07041db622a7939ff32d8dd52cdb..cceec66d549d15bcaac7ba46830ed6ea92166a1a 100644 --- a/src/plugins/sched/wiki2/msg.c +++ b/src/plugins/sched/wiki2/msg.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -73,7 +73,6 @@ int init_prio_mode = PRIO_HOLD; uint16_t kill_wait; uint16_t use_host_exp = 0; -static char * _get_wiki_conf_path(void); static void * _msg_thread(void *no_data); static int _parse_msg(char *msg, char **req); static void _proc_msg(slurm_fd_t new_fd, char *msg); @@ -212,33 +211,6 @@ static void *_msg_thread(void *no_data) return NULL; } -/*****************************************************************************\ - * _get_wiki_conf_path - return the pathname of the wiki.conf file - * return value must be xfreed -\*****************************************************************************/ -static char * _get_wiki_conf_path(void) -{ - char *val = getenv("SLURM_CONF"); - char *path = NULL; - int i; - - if (!val) - val = default_slurm_config_file; - - /* Replace file name on end of path */ - i = strlen(val) + 10; - path = xmalloc(i); - strcpy(path, val); - val = strrchr(path, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = path; - strcpy(val, "wiki.conf"); - - return path; -} - /*****************************************************************************\ * parse_wiki_config - Results go into global variables * RET SLURM_SUCCESS or error code @@ -283,7 +255,7 @@ extern int parse_wiki_config(void) first_job_id = conf->first_job_id; slurm_conf_unlock(); - wiki_conf = _get_wiki_conf_path(); + wiki_conf = get_extra_conf_path("wiki.conf"); if ((wiki_conf == NULL) || (stat(wiki_conf, &buf) == -1)) { fatal("No wiki.conf file (%s)", wiki_conf); xfree(wiki_conf); diff --git a/src/plugins/sched/wiki2/msg.h b/src/plugins/sched/wiki2/msg.h index 75fa6dbae46d025b033ecae79abd35f2ff458675..0479064f206b40997a7fc72266746881d8036b3c 100644 --- a/src/plugins/sched/wiki2/msg.h +++ b/src/plugins/sched/wiki2/msg.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/resume_job.c b/src/plugins/sched/wiki2/resume_job.c index 9eff6fff30a6081ce98a4aa343d51ec252431591..03df960560de491bd273b7b84ed31150eaabfed7 100644 --- a/src/plugins/sched/wiki2/resume_job.c +++ b/src/plugins/sched/wiki2/resume_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/sched_wiki.c b/src/plugins/sched/wiki2/sched_wiki.c index b1cf9348e607e46b0c93f6cffde439ce7c27c26e..1c9cb58ffd41bbc3794bc892a293a6b7a26ecd18 100644 --- a/src/plugins/sched/wiki2/sched_wiki.c +++ b/src/plugins/sched/wiki2/sched_wiki.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/sched/wiki2/start_job.c b/src/plugins/sched/wiki2/start_job.c index 9d6393a64985c3a457bbb53f7b9a42768efb1c0a..3693c8f2578a1f8dcc9005689a9b0e2b8dc20111 100644 --- a/src/plugins/sched/wiki2/start_job.c +++ b/src/plugins/sched/wiki2/start_job.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -306,6 +306,7 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, old_task_cnt = job_ptr->details->min_cpus; job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt); job_ptr->priority = 100000000; + job_ptr->state_reason = WAIT_NO_REASON; fini: unlock_slurmctld(job_write_lock); if (rc) diff --git a/src/plugins/sched/wiki2/suspend_job.c b/src/plugins/sched/wiki2/suspend_job.c index e1ef9f64ca964b0b0ccdd8d32e023dd17ecb6995..4004d7376e7793abe4bddc02adc60f1a876df50a 100644 --- a/src/plugins/sched/wiki2/suspend_job.c +++ b/src/plugins/sched/wiki2/suspend_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/Makefile.in b/src/plugins/select/Makefile.in index 0e7cff8de25dcf4a0c9c5c3dfce8500b3a56b221..5ef180627e94779c85457b991bb84bfe9858f7ff 100644 --- a/src/plugins/select/Makefile.in +++ b/src/plugins/select/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/select DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/bluegene/Makefile.am b/src/plugins/select/bluegene/Makefile.am index 6266282ce85d25cde74f2521a8d78830947d01ab..0a88eeb5b0cc60e52fed923af2e1426040aee604 100644 --- a/src/plugins/select/bluegene/Makefile.am +++ b/src/plugins/select/bluegene/Makefile.am @@ -3,8 +3,6 @@ AUTOMAKE_OPTIONS = foreign CLEANFILES = core.* -AM_CPPFLAGS = -DBLUEGENE_CONFIG_FILE=\"$(sysconfdir)/bluegene.conf\" - PLUGIN_FLAGS = -module -avoid-version --export-dynamic -lm INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(BG_INCLUDES) diff --git a/src/plugins/select/bluegene/Makefile.in b/src/plugins/select/bluegene/Makefile.in index 4b60e208e6f351ac761611fa7d71f8dc724d30cf..e4f037952ced2414a6c38c7a0723721253ad2dec 100644 --- a/src/plugins/select/bluegene/Makefile.in +++ b/src/plugins/select/bluegene/Makefile.in @@ -90,6 +90,7 @@ subdir = src/plugins/select/bluegene DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -107,6 +108,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -115,11 +117,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -344,6 +348,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -364,6 +370,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -373,6 +382,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -380,6 +391,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -414,6 +434,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -441,6 +464,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -531,7 +557,6 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign CLEANFILES = core.* -AM_CPPFLAGS = -DBLUEGENE_CONFIG_FILE=\"$(sysconfdir)/bluegene.conf\" PLUGIN_FLAGS = -module -avoid-version --export-dynamic -lm INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(BG_INCLUDES) pkglib_LTLIBRARIES = select_bluegene.la $(am__append_2) \ diff --git a/src/plugins/select/bluegene/ba/Makefile.in b/src/plugins/select/bluegene/ba/Makefile.in index e1e3bc86f8e29ad2e577c454a41f25e577aa72ff..aa91c6b5cf9028e0e782560c046d074dda9ed188 100644 --- a/src/plugins/select/bluegene/ba/Makefile.in +++ b/src/plugins/select/bluegene/ba/Makefile.in @@ -60,6 +60,7 @@ subdir = src/plugins/select/bluegene/ba DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -164,6 +168,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -184,6 +190,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -193,6 +202,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -200,6 +211,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -234,6 +254,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -261,6 +284,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/bluegene/ba/block_allocator.c b/src/plugins/select/bluegene/ba/block_allocator.c index 41e2e19626fb6922eb7b15c5d3cf703f57534f8c..1494323dc2bf052ec2e1dbaa10a9249b549ae897 100644 --- a/src/plugins/select/bluegene/ba/block_allocator.c +++ b/src/plugins/select/bluegene/ba/block_allocator.c @@ -9,7 +9,7 @@ * Written by Dan Phung <phung4@llnl.gov>, Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/ba/block_allocator.h b/src/plugins/select/bluegene/ba/block_allocator.h index d9a9f2ed05c751bf37b03b4b31b0416470ee2943..ab7e65c2e2032f80b4c260e77d82ac2c430fb48e 100644 --- a/src/plugins/select/bluegene/ba/block_allocator.h +++ b/src/plugins/select/bluegene/ba/block_allocator.h @@ -7,7 +7,7 @@ * Written by Dan Phung <phung4@llnl.gov>, Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/ba/wire_test.c b/src/plugins/select/bluegene/ba/wire_test.c index b477388fd58e5a70df8811cecc8c7c9fbea71a34..d9f7ba7c5a0f833f6de6d23969bc51a2818580e7 100644 --- a/src/plugins/select/bluegene/ba/wire_test.c +++ b/src/plugins/select/bluegene/ba/wire_test.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/ba_bgq/Makefile.in b/src/plugins/select/bluegene/ba_bgq/Makefile.in index 87151486be31b93f99712bd2779e6cde678f0c7a..63e83fd5ae23a6bb93dd937ea3e3a98ab17ca9ad 100644 --- a/src/plugins/select/bluegene/ba_bgq/Makefile.in +++ b/src/plugins/select/bluegene/ba_bgq/Makefile.in @@ -60,6 +60,7 @@ subdir = src/plugins/select/bluegene/ba_bgq DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -174,6 +178,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -194,6 +200,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -203,6 +212,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -210,6 +221,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -244,6 +264,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -271,6 +294,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.c b/src/plugins/select/bluegene/ba_bgq/block_allocator.c index 7201961c0333be31a6d1645126eddaf9310e6eea..f53108ffff775261a5dddf566d25bf6edc6c3b52 100644 --- a/src/plugins/select/bluegene/ba_bgq/block_allocator.c +++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.c @@ -10,7 +10,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.h b/src/plugins/select/bluegene/ba_bgq/block_allocator.h index 337dc3b37c519c13a67c7fb7c1004edf8842bcaf..1061fd82164c531bf548c15c65932b0ca89aa6dc 100644 --- a/src/plugins/select/bluegene/ba_bgq/block_allocator.h +++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.h @@ -8,7 +8,7 @@ * Written by Dan Phung <phung4@llnl.gov>, Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/ba_bgq/wire_test.c b/src/plugins/select/bluegene/ba_bgq/wire_test.c index d33238a07812ef54148eb7b3ffcf5467361b569b..3f8167a7c3d1d6ad5d4bc9f341916353a56d4635 100644 --- a/src/plugins/select/bluegene/ba_bgq/wire_test.c +++ b/src/plugins/select/bluegene/ba_bgq/wire_test.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/ba_common.c b/src/plugins/select/bluegene/ba_common.c index d868ae17e08009b0c1e44862fe0c7a60f6b36e3c..a51795ac3c3dc33c3afd4c83375e07136042c56e 100644 --- a/src/plugins/select/bluegene/ba_common.c +++ b/src/plugins/select/bluegene/ba_common.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -109,11 +109,11 @@ static int _unpack_ba_switch(ba_switch_t *ba_switch, if ((cluster_flags & CLUSTER_FLAG_BGL) || (cluster_flags & CLUSTER_FLAG_BGP)) { for (i=0; i< NUM_PORTS_PER_NODE; i++) { - if(_unpack_ba_connection(&ba_switch->int_wire[i], + if (_unpack_ba_connection(&ba_switch->int_wire[i], buffer, protocol_version) != SLURM_SUCCESS) goto unpack_error; - if(_unpack_ba_connection(&ba_switch->ext_wire[i], + if (_unpack_ba_connection(&ba_switch->ext_wire[i], buffer, protocol_version) != SLURM_SUCCESS) goto unpack_error; @@ -212,8 +212,6 @@ static ba_geo_combos_t *_build_geo_bitmap_arrays(int size) for (i = 1; i <= combos->elem_count; i++) { bool some_bit_set = false, some_gap_set = false; combos->set_bits_array[i-1] = bit_alloc(size); - if (combos->set_bits_array[i-1] == NULL) - fatal("bit_alloc: malloc failure"); gap_count = 0; gap_start = -1; @@ -1254,8 +1252,6 @@ extern void ba_free_geo_table(ba_geo_system_t *my_geo_system) extern bitstr_t *ba_node_map_alloc(ba_geo_system_t *my_geo_system) { bitstr_t *cnode_map = bit_alloc(my_geo_system->total_size); - if (cnode_map == NULL) - fatal("bit_alloc: malloc failure"); return cnode_map; } diff --git a/src/plugins/select/bluegene/ba_common.h b/src/plugins/select/bluegene/ba_common.h index b9300d968539307920fa19d257dba11fe870ee53..62b6d78b5b3158252d8b6996a742d7b949638a3f 100644 --- a/src/plugins/select/bluegene/ba_common.h +++ b/src/plugins/select/bluegene/ba_common.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_core.c b/src/plugins/select/bluegene/bg_core.c index 092b4d754ab218fc5b9ae28605cc0ac12f70fbcb..8acac055f1bf59240c4fe4610d0709ba29b5b419 100644 --- a/src/plugins/select/bluegene/bg_core.c +++ b/src/plugins/select/bluegene/bg_core.c @@ -9,7 +9,7 @@ * Written by Danny Auble <auble1@llnl.gov> et. al. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -655,8 +655,7 @@ extern void free_block_list(uint32_t job_id, List track_list, bg_free_list)) { error("pthread_create error %m"); if (++retries > MAX_PTHREAD_RETRIES) - fatal("Can't create " - "pthread"); + fatal("Can't create pthread"); /* sleep and retry */ usleep(1000); } diff --git a/src/plugins/select/bluegene/bg_core.h b/src/plugins/select/bluegene/bg_core.h index 82fd7c47bdb272a3096aa2bc60e2fa9d9235024b..fc123703c3bcb5aac03a2850ecda416faad96b96 100644 --- a/src/plugins/select/bluegene/bg_core.h +++ b/src/plugins/select/bluegene/bg_core.h @@ -8,7 +8,7 @@ * Written by Dan Phung <phung4@llnl.gov> and Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_defined_block.c b/src/plugins/select/bluegene/bg_defined_block.c index 6770c22a3700d8558e701235a3ad49d2abc2dc9f..88425105a412ffacaf201201b5dbd80950ad0794 100644 --- a/src/plugins/select/bluegene/bg_defined_block.c +++ b/src/plugins/select/bluegene/bg_defined_block.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -221,6 +221,7 @@ extern int create_defined_blocks(bg_layout_t overlapped, if ((rc = bridge_block_create(bg_record)) != SLURM_SUCCESS) break; + setup_subblock_structs(bg_record); print_bg_record(bg_record); } } @@ -457,6 +458,7 @@ extern int create_full_system_block(List bg_found_block_list) goto no_total; } + setup_subblock_structs(bg_record); print_bg_record(bg_record); list_append(bg_lists->main, bg_record); diff --git a/src/plugins/select/bluegene/bg_defined_block.h b/src/plugins/select/bluegene/bg_defined_block.h index eb0a8a65849d5a99a2cc6b4128c72a5091e7050c..39bce1c81977fc6c6ff31e510567f7f6515a571e 100644 --- a/src/plugins/select/bluegene/bg_defined_block.h +++ b/src/plugins/select/bluegene/bg_defined_block.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_dynamic_block.c b/src/plugins/select/bluegene/bg_dynamic_block.c index 31e9efb9052784f6df3af6eefc889a1e7992553f..a66c996e116e0b209b75125590f4f06eccd93e89 100644 --- a/src/plugins/select/bluegene/bg_dynamic_block.c +++ b/src/plugins/select/bluegene/bg_dynamic_block.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_dynamic_block.h b/src/plugins/select/bluegene/bg_dynamic_block.h index 82469889da3c1eca0551650464fc5123315acc99..68d9cb171667bd8e5e4c2d0b52b6bd73997e3826 100644 --- a/src/plugins/select/bluegene/bg_dynamic_block.h +++ b/src/plugins/select/bluegene/bg_dynamic_block.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_enums.h b/src/plugins/select/bluegene/bg_enums.h index 4f4fc292fcfc4b17c22e935cd6751a73d5c9870b..ba5b0dad517ca6051532d36d9a23d5f0260d806c 100644 --- a/src/plugins/select/bluegene/bg_enums.h +++ b/src/plugins/select/bluegene/bg_enums.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_job_info.c b/src/plugins/select/bluegene/bg_job_info.c index 670ace1f2d99780b95795d950f2b60facd28de1e..1e334f6c7bdf97ecc94bc21cd2b3346092a07b9c 100644 --- a/src/plugins/select/bluegene/bg_job_info.c +++ b/src/plugins/select/bluegene/bg_job_info.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -143,12 +143,17 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, case SELECT_JOBDATA_GEOMETRY: new_size = 1; for (i=0; i<jobinfo->dim_cnt; i++) { + /* If geo[i] is NO_VAL then we know this + doesn't need to be reset. + */ + if (jobinfo->geometry[i] != (uint16_t) NO_VAL) { + /* Make sure the conn type is correct with the + * new count (if Geometry is requested it + * can't be small) */ + jobinfo->conn_type[i] = SELECT_NAV; + } jobinfo->geometry[i] = uint16[i]; new_size *= uint16[i]; - /* Make sure the conn type is correct with the - * new count (if Geometry is requested it - * can't be small) */ - jobinfo->conn_type[i] = SELECT_NAV; } break; diff --git a/src/plugins/select/bluegene/bg_job_info.h b/src/plugins/select/bluegene/bg_job_info.h index b22aadf51d3c1b51f43f446ddd3ac87d7356f412..1f9722a2372e18a965bd7d151a4052f4155f7a97 100644 --- a/src/plugins/select/bluegene/bg_job_info.h +++ b/src/plugins/select/bluegene/bg_job_info.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_job_place.c b/src/plugins/select/bluegene/bg_job_place.c index a56244d450c55971904214b1d71dc894ced1bc11..64776b213e37c510a4500a52cbf8a80e82444e93 100644 --- a/src/plugins/select/bluegene/bg_job_place.c +++ b/src/plugins/select/bluegene/bg_job_place.c @@ -9,7 +9,7 @@ * and Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -602,6 +602,17 @@ static bg_record_t *_find_matching_block(List block_list, */ goto good_conn_type; } +#ifndef HAVE_BG_L_P + else if ((bg_record->geo[dim] == 1) + && (request->conn_type[dim] + == SELECT_MESH)) { + /* On a BGQ system a dim only + 1 long must be a TORUS, so + ignore a requested MESH. + */ + goto good_conn_type; + } +#endif if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) { @@ -1619,10 +1630,7 @@ static void _build_job_resources_struct( job_resrcs_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); /* job_resrcs_ptr->nhosts = node_cnt; */ job_resrcs_ptr->nhosts = bit_set_count(bitmap); - - if (!(job_resrcs_ptr->node_bitmap = bit_copy(bitmap))) - fatal("bit_copy malloc failure"); - + job_resrcs_ptr->node_bitmap = bit_copy(bitmap); job_resrcs_ptr->nodes = xstrdup(bg_record->mp_str); job_resrcs_ptr->cpu_array_cnt = 1; diff --git a/src/plugins/select/bluegene/bg_job_place.h b/src/plugins/select/bluegene/bg_job_place.h index 8fb87df207e6f7d53d153cc01e54bc85f69d7dc9..d40fe506d0e9a54e340f9f8da5de86c8e3cc02df 100644 --- a/src/plugins/select/bluegene/bg_job_place.h +++ b/src/plugins/select/bluegene/bg_job_place.h @@ -7,7 +7,7 @@ * Written by Dan Phung <phung4@llnl.gov> et. al. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_job_run.c b/src/plugins/select/bluegene/bg_job_run.c index c94975303ad6081b5369078f689ffd832966c9fb..762289d6885129676c462a7ec38c5f3a99a8dc58 100644 --- a/src/plugins/select/bluegene/bg_job_run.c +++ b/src/plugins/select/bluegene/bg_job_run.c @@ -10,7 +10,7 @@ * Written by Morris Jette <jette1@llnl.gov>, Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_job_run.h b/src/plugins/select/bluegene/bg_job_run.h index ff9411248154cd3cb054e7b1424751043c377f48..3ac657d75db44942b8b0a22261b86fc8fe62188c 100644 --- a/src/plugins/select/bluegene/bg_job_run.h +++ b/src/plugins/select/bluegene/bg_job_run.h @@ -7,7 +7,7 @@ * Written by Morris Jette <jette1@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_list_functions.c b/src/plugins/select/bluegene/bg_list_functions.c index e3c92df518c4ecb0d8d59fbfbe1e4bdef9a9e7dd..535d1cd1f6de7b45ff0cca6f4b8d1b92487f67c8 100644 --- a/src/plugins/select/bluegene/bg_list_functions.c +++ b/src/plugins/select/bluegene/bg_list_functions.c @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_list_functions.h b/src/plugins/select/bluegene/bg_list_functions.h index cbe9822b5298e9dc7a84bd47a19fcf2a8ddaba17..4c40d6c2309ac80def9564001e8e6dbeac327d86 100644 --- a/src/plugins/select/bluegene/bg_list_functions.h +++ b/src/plugins/select/bluegene/bg_list_functions.h @@ -9,7 +9,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_node_info.c b/src/plugins/select/bluegene/bg_node_info.c index e4cda19ca72ecb81d9246aee807ebc4cfa41efde..fe799568ac04c54eb645bdec12b6b1970215e90a 100644 --- a/src/plugins/select/bluegene/bg_node_info.c +++ b/src/plugins/select/bluegene/bg_node_info.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -461,6 +461,7 @@ extern int select_nodeinfo_get(select_nodeinfo_t *nodeinfo, { int rc = SLURM_SUCCESS; uint16_t *uint16 = (uint16_t *) data; + uint32_t *uint32 = (uint32_t *) data; bitstr_t **bitmap = (bitstr_t **) data; char **tmp_char = (char **) data; ListIterator itr = NULL; @@ -538,6 +539,9 @@ extern int select_nodeinfo_get(select_nodeinfo_t *nodeinfo, xstrfmtcat(*tmp_char, "Failed cnodes=%s", nodeinfo->failed_cnodes); break; + case SELECT_NODEDATA_MEM_ALLOC: + *uint32 = 0; + break; default: error("Unsupported option %d for get_nodeinfo.", dinfo); rc = SLURM_ERROR; diff --git a/src/plugins/select/bluegene/bg_node_info.h b/src/plugins/select/bluegene/bg_node_info.h index 7bc89d8223d006e5265e011b088b3cdab5365b7d..5c86113d6f1db6238dffc8e86076dee9c7de2cf1 100644 --- a/src/plugins/select/bluegene/bg_node_info.h +++ b/src/plugins/select/bluegene/bg_node_info.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_read_config.c b/src/plugins/select/bluegene/bg_read_config.c index ec3580715f6e1e0199e44bbaaf5915a751033e1a..4583d180ccca3c6746fb86c9edb850a73bf652a4 100644 --- a/src/plugins/select/bluegene/bg_read_config.c +++ b/src/plugins/select/bluegene/bg_read_config.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -101,28 +101,6 @@ static int _reopen_bridge_log(void) return rc; } -static char *_get_bg_conf(void) -{ - char *val = getenv("SLURM_CONF"); - char *rc = NULL; - int i; - - if (!val) - return xstrdup(BLUEGENE_CONFIG_FILE); - - /* Replace file name on end of path */ - i = strlen(val) - strlen("slurm.conf") + strlen("bluegene.conf") + 1; - rc = xmalloc(i); - strcpy(rc, val); - val = strrchr(rc, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = rc; - strcpy(val, "bluegene.conf"); - return rc; -} - static void _destroy_bitmap(void *object) { bitstr_t *bitstr = (bitstr_t *)object; @@ -362,7 +340,7 @@ extern int read_bg_conf(void) info("Reading the bluegene.conf file"); /* check if config file has changed */ - bg_conf_file = _get_bg_conf(); + bg_conf_file = get_extra_conf_path("bluegene.conf"); if (stat(bg_conf_file, &config_stat) < 0) fatal("can't stat bluegene.conf file %s: %m", bg_conf_file); diff --git a/src/plugins/select/bluegene/bg_read_config.h b/src/plugins/select/bluegene/bg_read_config.h index 041d93d11b8a7fce7b9c4637fc3836ac5aff72db..054d9ad7433ea17033f3ebb0d30eff6e3e6c78ba 100644 --- a/src/plugins/select/bluegene/bg_read_config.h +++ b/src/plugins/select/bluegene/bg_read_config.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_record_functions.c b/src/plugins/select/bluegene/bg_record_functions.c index 0041dc89bd4c563bd415512ffbdff2f273d84c8a..636bf960e3999f6fe002663ed5ed8677ce269603 100644 --- a/src/plugins/select/bluegene/bg_record_functions.c +++ b/src/plugins/select/bluegene/bg_record_functions.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -549,6 +549,42 @@ extern int bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b) return bg_record_cmpf_inc(rec_a, rec_b); } +/* set up structures needed for sub block jobs. */ +extern void setup_subblock_structs(bg_record_t *bg_record) +{ + ba_mp_t *ba_mp; + + xassert(bg_record); + + if (!bg_conf->sub_blocks || bg_record->mp_count != 1) + return; + + xassert(bg_record->ba_mp_list); + + ba_mp = list_peek(bg_record->ba_mp_list); + xassert(ba_mp); + + /* This will be a list containing jobs running on this + block */ + if (!bg_record->job_list) + bg_record->job_list = list_create(NULL); + + /* Create these now so we can deal with error + cnodes if/when they happen. Since this is + the easiest place to figure it out for + blocks that don't use the entire block */ + FREE_NULL_BITMAP(ba_mp->cnode_bitmap); + if ((ba_mp->cnode_bitmap = + ba_create_ba_mp_cnode_bitmap(bg_record))) { + FREE_NULL_BITMAP(ba_mp->cnode_err_bitmap); + FREE_NULL_BITMAP(ba_mp->cnode_usable_bitmap); + ba_mp->cnode_err_bitmap = + bit_alloc(bg_conf->mp_cnode_cnt); + ba_mp->cnode_usable_bitmap = + bit_copy(ba_mp->cnode_bitmap); + } +} + /* Try to requeue job running on block and put block in an error state. * block_state_mutex and slurmctld must be unlocked before calling this. */ @@ -737,25 +773,8 @@ extern int add_bg_record(List records, List *used_nodes, "destroying this mp list"); list_destroy(bg_record->ba_mp_list); bg_record->ba_mp_list = NULL; - } else if (bg_conf->sub_blocks && bg_record->mp_count == 1) { - ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list); - xassert(ba_mp); - /* This will be a list containing jobs running on this - block */ - bg_record->job_list = list_create(NULL); - - /* Create these now so we can deal with error - cnodes if/when they happen. Since this is - the easiest place to figure it out for - blocks that don't use the entire block */ - if ((ba_mp->cnode_bitmap = - ba_create_ba_mp_cnode_bitmap(bg_record))) { - ba_mp->cnode_err_bitmap = - bit_alloc(bg_conf->mp_cnode_cnt); - ba_mp->cnode_usable_bitmap = - bit_copy(ba_mp->cnode_bitmap); - } - } + } else + setup_subblock_structs(bg_record); } else { List ba_mp_list = NULL; diff --git a/src/plugins/select/bluegene/bg_record_functions.h b/src/plugins/select/bluegene/bg_record_functions.h index 335da0732b8859f1601fe3a0521697d85412c716..3ef4c76effc3a4921eae122a340c178fd6fa3a82 100644 --- a/src/plugins/select/bluegene/bg_record_functions.h +++ b/src/plugins/select/bluegene/bg_record_functions.h @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,6 +66,7 @@ extern List copy_bg_list(List in_list); extern void copy_bg_record(bg_record_t *fir_record, bg_record_t *sec_record); extern int bg_record_cmpf_inc(bg_record_t *rec_a, bg_record_t *rec_b); extern int bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b); +extern void setup_subblock_structs(bg_record_t *bg_record); /* change username of a block bg_record_t target_name needs to be updated before call of function. diff --git a/src/plugins/select/bluegene/bg_status.c b/src/plugins/select/bluegene/bg_status.c index 84ec866d0a9e097044f2e05f6def9a2c3c2297e3..af55356d95c92a10d7a9ada71715782bd1591bbc 100644 --- a/src/plugins/select/bluegene/bg_status.c +++ b/src/plugins/select/bluegene/bg_status.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_status.h b/src/plugins/select/bluegene/bg_status.h index 8a988bbe0783d13a9a6975df963b46f709c74991..018cd7762a37a80bb4dc1e89472021c82cf5f468 100644 --- a/src/plugins/select/bluegene/bg_status.h +++ b/src/plugins/select/bluegene/bg_status.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bg_structs.h b/src/plugins/select/bluegene/bg_structs.h index fe265963c6dc503c4244a930500a3406b034e4fc..71dc09965234156d57d578faa3b18e416ff408ac 100644 --- a/src/plugins/select/bluegene/bg_structs.h +++ b/src/plugins/select/bluegene/bg_structs.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl/Makefile.in b/src/plugins/select/bluegene/bl/Makefile.in index 90c6963abfbaf108d32f00e600a50309c70dca43..a84e49a7ad2eb446850280619a97abb61288caa0 100644 --- a/src/plugins/select/bluegene/bl/Makefile.in +++ b/src/plugins/select/bluegene/bl/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/bluegene/bl DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -153,6 +157,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -173,6 +179,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -182,6 +191,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -189,6 +200,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -223,6 +243,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -250,6 +273,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/bluegene/bl/bridge_linker.c b/src/plugins/select/bluegene/bl/bridge_linker.c index abfd8bddb745d6941f88cfc8a987f074f894ec0a..9818a8c65a479cdfea11287258b210630774d0ac 100644 --- a/src/plugins/select/bluegene/bl/bridge_linker.c +++ b/src/plugins/select/bluegene/bl/bridge_linker.c @@ -7,7 +7,7 @@ * Written by Dan Phung <phung4@llnl.gov>, Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl/bridge_status.c b/src/plugins/select/bluegene/bl/bridge_status.c index 656d73f14fe707cce9cdd035f5446abdf9d7710c..8a527e35a77dba4db455dded01d09851c0f81614 100644 --- a/src/plugins/select/bluegene/bl/bridge_status.c +++ b/src/plugins/select/bluegene/bl/bridge_status.c @@ -8,7 +8,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl/bridge_status.h b/src/plugins/select/bluegene/bl/bridge_status.h index 5cecc5d01bf894611411bb06423e81a82e30a1cc..87573963be780eaaf06013b14d8fca47b7ba67a5 100644 --- a/src/plugins/select/bluegene/bl/bridge_status.h +++ b/src/plugins/select/bluegene/bl/bridge_status.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl/bridge_switch_connections.c b/src/plugins/select/bluegene/bl/bridge_switch_connections.c index 549eeb6faffedbe9f7edfa810a638e54e1456fa1..ad5e06e0cd3331e5167d6bb249714ebed6404908 100644 --- a/src/plugins/select/bluegene/bl/bridge_switch_connections.c +++ b/src/plugins/select/bluegene/bl/bridge_switch_connections.c @@ -9,7 +9,7 @@ * Written by Dan Phung <phung4@llnl.gov> and Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl/bridge_switch_connections.h b/src/plugins/select/bluegene/bl/bridge_switch_connections.h index c58344ae12a1193beab683ea1afc1a369464757e..d613da24d20165f651cf2b9bc66ded082827bd7d 100644 --- a/src/plugins/select/bluegene/bl/bridge_switch_connections.h +++ b/src/plugins/select/bluegene/bl/bridge_switch_connections.h @@ -9,7 +9,7 @@ * Written by Dan Phung <phung4@llnl.gov> and Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl_bgq/Makefile.in b/src/plugins/select/bluegene/bl_bgq/Makefile.in index 59c3f97eaabca2a878257b6fb14567bca93940fe..560108ad40e77ce207c57d2a37efb89c6d27dff9 100644 --- a/src/plugins/select/bluegene/bl_bgq/Makefile.in +++ b/src/plugins/select/bluegene/bl_bgq/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/bluegene/bl_bgq DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -162,6 +166,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -182,6 +188,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -191,6 +200,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -198,6 +209,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -232,6 +252,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -259,6 +282,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc index 7bae1d74d88accad50d0e2d465470a9ed46a8b59..34f933aab29589c5449191fe76cba7906be217fa 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc +++ b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_helper.h b/src/plugins/select/bluegene/bl_bgq/bridge_helper.h index d01b05775e8464b2d3613a973c616634ad55d271..87bc148522933d528c21db9e8ac66f672ef99c76 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_helper.h +++ b/src/plugins/select/bluegene/bl_bgq/bridge_helper.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc b/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc index cc0d9e3b2a960b8b6769e5a04ab46212c1ecca91..80dd72b04ef7c0b8834623e9fcc2c8e501ec5e15 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc +++ b/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc index c45fd60adadca08c735e8d766cfa9fe260eb7f4d..bf660123b8b007d0ca5254252c294d16a9350eb3 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc +++ b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_status.h b/src/plugins/select/bluegene/bl_bgq/bridge_status.h index 326d3c27a5005f2887a7d61c66483b596f77776d..3cd97367394226da687fb3886378fb1e0c59fdb8 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_status.h +++ b/src/plugins/select/bluegene/bl_bgq/bridge_status.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/bridge_linker.h b/src/plugins/select/bluegene/bridge_linker.h index b8fa501664d4fc839ac52469252430dcbb5f54b7..8e47d8c62ef00373481d62cb7e7cb822c9a70bee 100644 --- a/src/plugins/select/bluegene/bridge_linker.h +++ b/src/plugins/select/bluegene/bridge_linker.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@llnl.gov> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/configure_api.c b/src/plugins/select/bluegene/configure_api.c index 22a94b78d8ce85e12cfb19bfe5a49b87e1c0dbfb..ad500cd07f6abf6f1a90285433cc08551a3307dc 100644 --- a/src/plugins/select/bluegene/configure_api.c +++ b/src/plugins/select/bluegene/configure_api.c @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/configure_api.h b/src/plugins/select/bluegene/configure_api.h index 86e240ccff46b2554b369bac8fc6836ad4605fb7..0f007e18b73e06afc0d4a9b0062e8d3b07838c55 100644 --- a/src/plugins/select/bluegene/configure_api.h +++ b/src/plugins/select/bluegene/configure_api.h @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/libsched_if64.c b/src/plugins/select/bluegene/libsched_if64.c index 82a0121ab07679c4f4d6ec915bcb5dbd5d9ad2d6..6020ed394727d53eeef2ea0f9e2b59e4f2a38710 100644 --- a/src/plugins/select/bluegene/libsched_if64.c +++ b/src/plugins/select/bluegene/libsched_if64.c @@ -10,7 +10,7 @@ * Written by Danny Auble <auble1@llnl.gov> et. al. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/runjob_plugin.cc b/src/plugins/select/bluegene/runjob_plugin.cc index 71c4337ee023e6ee06182d922070c7abb4744f6f..78c7568fa7c97780dd177dfb390004d970060d3f 100644 --- a/src/plugins/select/bluegene/runjob_plugin.cc +++ b/src/plugins/select/bluegene/runjob_plugin.cc @@ -12,7 +12,7 @@ * Written by Danny Auble <da@schedmd.com> et. al. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -63,8 +63,25 @@ extern "C" { #include <boost/foreach.hpp> #include <boost/lexical_cast.hpp> +#include <log4cxx/logger.h> + #include <iosfwd> +static log4cxx::LoggerPtr slurm_ibm_logger( + log4cxx::Logger::getLogger("ibm.runjob.mux.slurm")); + +#define LOG_DEBUG_MSG(message_expr) \ + LOG4CXX_DEBUG(slurm_ibm_logger, message_expr) + +#define LOG_INFO_MSG(message_expr) \ + LOG4CXX_INFO(slurm_ibm_logger, message_expr) + +#define LOG_WARN_MSG(message_expr) \ + LOG4CXX_WARN(slurm_ibm_logger, message_expr) + +#define LOG_ERROR_MSG(message_expr) \ + LOG4CXX_ERROR(slurm_ibm_logger, message_expr) + using namespace bgsched; class Plugin : public bgsched::runjob::Plugin @@ -126,8 +143,8 @@ static void _send_failed_cnodes(uint32_t job_id, uint32_t step_id, uint16_t sig) if ((count > max_tries) || rc == ESLURM_ALREADY_DONE || rc == ESLURM_INVALID_JOB_ID) break; - std::cerr << "Trying to fail cnodes, message from slurmctld: " - << slurm_strerror(rc) << std::endl; + LOG_WARN_MSG("Trying to fail cnodes, message from slurmctld: " + << slurm_strerror(rc)); sleep (5); count++; } @@ -142,13 +159,13 @@ Plugin::Plugin() : runjob_list = list_create(_destroy_runjob_job); - std::cerr << "Slurm runjob plugin loaded version " - << SLURM_VERSION_STRING << std::endl; + LOG_INFO_MSG("Slurm runjob plugin loaded version " + << SLURM_VERSION_STRING); } Plugin::~Plugin() { - std::cerr << "Slurm runjob plugin finished" << std::endl; + LOG_INFO_MSG("Slurm runjob plugin finished"); slurm_mutex_lock(&runjob_list_lock); list_destroy(runjob_list); runjob_list = NULL; @@ -157,6 +174,7 @@ Plugin::~Plugin() void Plugin::execute(bgsched::runjob::Verify& verify) { + LOG_DEBUG_MSG("Verify - Start"); boost::lock_guard<boost::mutex> lock( _mutex ); unsigned geo[Dimension::NodeDims]; unsigned start_coords[Dimension::NodeDims]; @@ -368,6 +386,7 @@ void Plugin::execute(bgsched::runjob::Verify& verify) slurm_mutex_unlock(&runjob_list_lock); slurm_free_job_step_info_response_msg(step_resp); + LOG_DEBUG_MSG("Verify - Done"); return; deny_job: @@ -379,6 +398,7 @@ deny_job: void Plugin::execute(const bgsched::runjob::Started& data) { + LOG_DEBUG_MSG("Started start"); boost::lock_guard<boost::mutex> lock( _mutex ); // ListIterator itr = NULL; // runjob_job_t *runjob_job = NULL; @@ -398,10 +418,12 @@ void Plugin::execute(const bgsched::runjob::Started& data) // list_iterator_destroy(itr); // } // slurm_mutex_unlock(&runjob_list_lock); + LOG_DEBUG_MSG("Started - Done"); } void Plugin::execute(const bgsched::runjob::Terminated& data) { + LOG_DEBUG_MSG("Terminated - Start"); ListIterator itr = NULL; runjob_job_t *runjob_job = NULL; uint16_t sig = 0; @@ -428,21 +450,21 @@ void Plugin::execute(const bgsched::runjob::Terminated& data) if (!runjob_job) { if (runjob_list) - std::cerr << "Couldn't find job running with pid, " - << data.pid() << " ID " << data.job() - << std::endl; + LOG_ERROR_MSG("Couldn't find job running with pid, " + << data.pid() << " ID " << data.job()); + } else if (data.kill_timeout()) { - std::cerr << runjob_job->job_id << "." << runjob_job->step_id - << " had a kill_timeout()" << std::endl; + LOG_ERROR_MSG(runjob_job->job_id << "." << runjob_job->step_id + << " had a kill_timeout()"); /* In an older driver this wasn't always caught, so send it. */ sig = SIG_NODE_FAIL; } else if (!data.message().empty()) { - std::cerr << runjob_job->job_id << "." << runjob_job->step_id - << " had a message of '" << data.message() - << "'. (" - << runjob_job->total_cnodes << ")" << std::endl; + LOG_ERROR_MSG(runjob_job->job_id << "." << runjob_job->step_id + << " had a message of '" << data.message() + << "'. (" + << runjob_job->total_cnodes << ")"); } // else if (data.status() == 9) // sig = SIGKILL; @@ -451,6 +473,7 @@ void Plugin::execute(const bgsched::runjob::Terminated& data) runjob_job->job_id, runjob_job->step_id, sig); _destroy_runjob_job(runjob_job); + LOG_DEBUG_MSG("Terminated - Done"); } extern "C" bgsched::runjob::Plugin* create() diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 80d3bb5d9c52b0fd349f095747951f8791714908..3b528d5dbed0427769caa8f4a0fd0cfb1dbb74af 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -1106,9 +1106,6 @@ static List _get_config(void) config_key_pair_t *key_pair; List my_list = list_create(destroy_config_key_pair); - if (!my_list) - fatal("malloc failure on list_create"); - key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("DefaultConnType"); key_pair->value = conn_type_string_full(bg_conf->default_conn_type); @@ -1932,8 +1929,7 @@ extern bitstr_t *select_p_step_pick_nodes(struct job_record *job_ptr, bg_record->bg_block_id, job_ptr->job_id); goto end_it; } - if (!(picked_mps = bit_copy(job_ptr->node_bitmap))) - fatal("bit_copy malloc failure"); + picked_mps = bit_copy(job_ptr->node_bitmap); if (cluster_flags & CLUSTER_FLAG_BGQ) { bitstr_t *used_bitmap; @@ -1996,8 +1992,7 @@ extern bitstr_t *select_p_step_pick_nodes(struct job_record *job_ptr, FREE_NULL_BITMAP(total_bitmap); node_count = step_jobinfo->cnode_cnt; - if (!(picked_mps = bit_copy(job_ptr->node_bitmap))) - fatal("bit_copy malloc failure"); + picked_mps = bit_copy(job_ptr->node_bitmap); bit_or(jobinfo->units_used, step_jobinfo->units_used); for (dim = 0; dim < step_jobinfo->dim_cnt; dim++) { /* The IBM software works off a relative @@ -2021,8 +2016,7 @@ extern bitstr_t *select_p_step_pick_nodes(struct job_record *job_ptr, } } else if ((ba_mp = ba_sub_block_in_record( bg_record, &node_count, step_jobinfo))) { - if (!(picked_mps = bit_alloc(bit_size(job_ptr->node_bitmap)))) - fatal("bit_copy malloc failure"); + picked_mps = bit_alloc(bit_size(job_ptr->node_bitmap)); bit_set(picked_mps, ba_mp->index); for (dim = 0; dim < step_jobinfo->dim_cnt; dim++) { /* The IBM software works off a relative @@ -3381,8 +3375,6 @@ extern bitstr_t *select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, tmp_bitmap = bit_copy(avail_bitmap); preemptee_candidates = list_create(NULL); - if (preemptee_candidates == NULL) - fatal("list_create: malloc failure"); rc = submit_job(&job_rec, tmp_bitmap, node_cnt, node_cnt, node_cnt, SELECT_MODE_WILL_RUN, preemptee_candidates, diff --git a/src/plugins/select/bluegene/sfree/Makefile.in b/src/plugins/select/bluegene/sfree/Makefile.in index 02a6eae072e7145b7217585b2b6574f0709a2bca..d877bc328ae33350ac2ef7a941aa5842959e6824 100644 --- a/src/plugins/select/bluegene/sfree/Makefile.in +++ b/src/plugins/select/bluegene/sfree/Makefile.in @@ -59,6 +59,7 @@ subdir = src/plugins/select/bluegene/sfree DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -156,6 +160,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -176,6 +182,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -185,6 +194,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -192,6 +203,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -226,6 +246,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -253,6 +276,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/bluegene/sfree/opts.c b/src/plugins/select/bluegene/sfree/opts.c index 290d375642c830d37f5c36318daac15f4f88abd8..9184f653169988983d11ef09a71fa2197937f5f8 100644 --- a/src/plugins/select/bluegene/sfree/opts.c +++ b/src/plugins/select/bluegene/sfree/opts.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/sfree/sfree.c b/src/plugins/select/bluegene/sfree/sfree.c index 528eeb6cd917f889f738a888945d6582d62f26f0..14aa8dc646137b519f837f6ee1178a46dcfc3d7c 100644 --- a/src/plugins/select/bluegene/sfree/sfree.c +++ b/src/plugins/select/bluegene/sfree/sfree.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/sfree/sfree.h b/src/plugins/select/bluegene/sfree/sfree.h index ed3f0bf079bfc33126fc8d6b336a0ae7cab9f595..83028ce31da7abc6fcf8f2add79e275e1d5a9877 100644 --- a/src/plugins/select/bluegene/sfree/sfree.h +++ b/src/plugins/select/bluegene/sfree/sfree.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/slurm_epilog.c b/src/plugins/select/bluegene/slurm_epilog.c index 15b41a938118c94c84b3d32e35b4bb4da4ecb3ad..8fc01b7821619a049af62e04d0b6c9c3dab28434 100644 --- a/src/plugins/select/bluegene/slurm_epilog.c +++ b/src/plugins/select/bluegene/slurm_epilog.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/bluegene/slurm_prolog.c b/src/plugins/select/bluegene/slurm_prolog.c index 86e171d1404b7cd0ce069b4bbd7e9639d86f4a46..597ba3dec9a8b5664414ad0173eba7f6c7a5b284 100644 --- a/src/plugins/select/bluegene/slurm_prolog.c +++ b/src/plugins/select/bluegene/slurm_prolog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/cons_res/Makefile.in b/src/plugins/select/cons_res/Makefile.in index 377c2af730d9c731eeb31ba6fd3f2956550213a1..ae5eefe892dfb13d98c04b614ccd9fffc5fb3bc2 100644 --- a/src/plugins/select/cons_res/Makefile.in +++ b/src/plugins/select/cons_res/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/cons_res DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index 3d2732c511e74d9594fae44f698368fc7e48ef46..2c10e1f842ae37099b3b47bc3510157e0040cfe0 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -9,7 +9,7 @@ * Written by Martin Perry <martin.perry@bull.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -41,7 +41,7 @@ #include "select_cons_res.h" #include "dist_tasks.h" -#if(0) +#if (0) /* Using CR_SOCKET or CR_SOCKET_MEMORY will not allocate a socket to more * than one job at a time, but it also will not grant a job access to more * CPUs on the socket than requested. If ALLOCATE_FULL_SOCKET is defined, @@ -134,8 +134,11 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) uint32_t n, i, tid, maxtasks, l; uint16_t *avail_cpus; job_resources_t *job_res = job_ptr->job_resrcs; - if (!job_res || !job_res->cpus) { - error("cons_res: _compute_c_b_task_dist given NULL job_ptr"); + bool log_over_subscribe = true; + + if (!job_res || !job_res->cpus || !job_res->nhosts) { + error("cons_res: _compute_c_b_task_dist invalid allocation " + "for job %u", job_ptr->job_id); return SLURM_ERROR; } @@ -146,10 +149,12 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) /* ncpus is already set the number of tasks if overcommit is used */ if (!job_ptr->details->overcommit && (job_ptr->details->cpus_per_task > 1)) { - if (job_ptr->details->ntasks_per_node == 0) + if (job_ptr->details->ntasks_per_node == 0) { maxtasks = maxtasks / job_ptr->details->cpus_per_task; - else - maxtasks = job_ptr->details->ntasks_per_node * job_res->nhosts; + } else { + maxtasks = job_ptr->details->ntasks_per_node * + job_res->nhosts; + } } /* Safe guard if the user didn't specified a lower number of @@ -161,16 +166,20 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) } if (job_ptr->details->cpus_per_task == 0) job_ptr->details->cpus_per_task = 1; + if (job_ptr->details->overcommit) + log_over_subscribe = false; for (tid = 0, i = job_ptr->details->cpus_per_task ; (tid < maxtasks); i += job_ptr->details->cpus_per_task ) { /* cycle counter */ bool space_remaining = false; - if (over_subscribe) { + if (over_subscribe && log_over_subscribe) { /* 'over_subscribe' is a relief valve that guards * against an infinite loop, and it *should* never * come into play because maxtasks should never be * greater than the total number of available cpus */ - error("cons_res: _compute_c_b_task_dist oversubscribe"); + error("cons_res: _compute_c_b_task_dist " + "oversubscribe for job %u", job_ptr->job_id); + log_over_subscribe = false /* Log once per job */; } for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) { if ((i <= avail_cpus[n]) || over_subscribe) { @@ -200,8 +209,11 @@ static int _compute_plane_dist(struct job_record *job_ptr) uint32_t n, i, p, tid, maxtasks, l; uint16_t *avail_cpus, plane_size = 1; job_resources_t *job_res = job_ptr->job_resrcs; - if (!job_res || !job_res->cpus) { - error("cons_res: _compute_plane_dist given NULL job_res"); + bool log_over_subscribe = true; + + if (!job_res || !job_res->cpus || !job_res->nhosts) { + error("cons_res: _compute_c_b_task_dist invalid allocation " + "for job %u", job_ptr->job_id); return SLURM_ERROR; } @@ -220,16 +232,19 @@ static int _compute_plane_dist(struct job_record *job_ptr) return SLURM_ERROR; } job_res->cpus = xmalloc(job_res->nhosts * sizeof(uint16_t)); - + if (job_ptr->details->overcommit) + log_over_subscribe = false; for (tid = 0, i = 0; (tid < maxtasks); i++) { /* cycle counter */ bool space_remaining = false; - if (over_subscribe) { + if (over_subscribe && log_over_subscribe) { /* 'over_subscribe' is a relief valve that guards * against an infinite loop, and it *should* never * come into play because maxtasks should never be * greater than the total number of available cpus */ - error("cons_res: _compute_plane_dist oversubscribe"); + error("cons_res: _compute_plane_dist oversubscribe " + "for job %u", job_ptr->job_id); + log_over_subscribe = false /* Log once per job */; } for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) { for (p = 0; p < plane_size && (tid < maxtasks); p++) { @@ -326,13 +341,13 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, if (cr_type & CR_CORE) alloc_cores = true; -#ifdef ALLOCATE_FULL_SOCKET - if (cr_type & CR_SOCKET) - alloc_sockets = true; -#else - if (cr_type & CR_SOCKET) - alloc_cores = true; -#endif + if (slurmctld_conf.select_type_param & CR_ALLOCATE_FULL_SOCKET) { + if (cr_type & CR_SOCKET) + alloc_sockets = true; + } else { + if (cr_type & CR_SOCKET) + alloc_cores = true; + } if (job_ptr->details && job_ptr->details->mc_ptr) { multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr; @@ -552,10 +567,12 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, if ( best_fit_cpus == 0 ) break; + j = best_fit_location; + if (sock_per_brd) + j /= sock_per_brd; debug3("cons_res: best_fit: using node[%u]: " "board[%u]: socket[%u]: %u cores available", - n, best_fit_location/sock_per_brd, - best_fit_location, + n, j, best_fit_location, sockets_cpu_cnt[best_fit_location]); /* select socket cores from last to first */ @@ -658,21 +675,20 @@ static int _cyclic_sync_core_bitmap(struct job_record *job_ptr, bool *sock_used, *sock_avoid; bool alloc_cores = false, alloc_sockets = false; uint16_t ntasks_per_core = 0xffff, ntasks_per_socket = 0xffff; - int error_code = SLURM_SUCCESS, socket_best_fit; - uint32_t total_cpus, *cpus_cnt; + int error_code = SLURM_SUCCESS; if ((job_res == NULL) || (job_res->core_bitmap == NULL)) return error_code; if (cr_type & CR_CORE) alloc_cores = true; -#ifdef ALLOCATE_FULL_SOCKET - if (cr_type & CR_SOCKET) - alloc_sockets = true; -#else - if (cr_type & CR_SOCKET) - alloc_cores = true; -#endif + if (slurmctld_conf.select_type_param & CR_ALLOCATE_FULL_SOCKET) { + if (cr_type & CR_SOCKET) + alloc_sockets = true; + } else { + if (cr_type & CR_SOCKET) + alloc_cores = true; + } core_map = job_res->core_bitmap; if (job_ptr->details && job_ptr->details->mc_ptr) { multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr; @@ -732,48 +748,33 @@ static int _cyclic_sync_core_bitmap(struct job_record *job_ptr, core_cnt = 0; cpus = job_res->cpus[i]; - /* Pack job onto socket(s) with best fit */ - socket_best_fit = -1; - total_cpus = 0; - cpus_cnt = xmalloc(sizeof(uint32_t)* sockets); - for (s = 0; s < sockets; s++) { - for (j = sock_start[s]; j < sock_end[s]; j++) { - if (bit_test(core_map, j)) - cpus_cnt[s] += vpus; - } - total_cpus += cpus_cnt[s]; - } - for (s = 0; s < sockets && total_cpus > cpus; s++) { - if ((ntasks_per_socket != 0xffff) && - (cpus_cnt[s] > ntasks_per_socket)) { - int x_cpus = cpus_cnt[s] - ntasks_per_socket; - x_cpus = MIN(x_cpus, (total_cpus - cpus)); - cpus_cnt[s] -= x_cpus; - total_cpus -= x_cpus; - } - if ((cpus_cnt[s] >= cpus) && - ((socket_best_fit == -1) || - (cpus_cnt[s] < cpus_cnt[socket_best_fit]))) - socket_best_fit = s; - } - if (socket_best_fit != -1) { - /* Use one socket with best fit, avoid all others */ + if (ntasks_per_socket != 0xffff) { + int x_cpus; + uint32_t total_cpus = 0; + uint32_t *cpus_cnt = xmalloc(sizeof(uint32_t)* sockets); for (s = 0; s < sockets; s++) { - if (s != socket_best_fit) - sock_avoid[s] = true; + for (j = sock_start[s]; j < sock_end[s]; j++) { + if (bit_test(core_map, j)) + cpus_cnt[s] += vpus; + } + total_cpus += cpus_cnt[s]; + } + for (s = 0; s < sockets && total_cpus > cpus; s++) { + if (cpus_cnt[s] > ntasks_per_socket) { + x_cpus = cpus_cnt[s] -ntasks_per_socket; + cpus_cnt[s] = ntasks_per_socket; + total_cpus -= x_cpus; + } } - total_cpus = cpus; - } else if (ntasks_per_socket != 0xffff) { - /* Avoid sockets that can't start ntasks */ for (s = 0; s < sockets && total_cpus > cpus; s++) { if ((cpus_cnt[s] <= ntasks_per_socket) && - ((total_cpus - cpus_cnt[s]) >= cpus)) { + (total_cpus - cpus_cnt[s] >= cpus)) { sock_avoid[s] = true; total_cpus -= cpus_cnt[s]; } } + xfree(cpus_cnt); } - xfree(cpus_cnt); while (cpus > 0) { uint16_t prev_cpus = cpus; diff --git a/src/plugins/select/cons_res/dist_tasks.h b/src/plugins/select/cons_res/dist_tasks.h index df177ddb5c297c956fed6a6fa2a206f6ee7ee9a4..30b2bf2b44896174d334ed26453c4958b385e1e1 100644 --- a/src/plugins/select/cons_res/dist_tasks.h +++ b/src/plugins/select/cons_res/dist_tasks.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 42bbbfcb1b0176bf0a8cf38c79c7059055b33a0f..6619139248cc09989df5f057832d88107e93d5fe 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -62,7 +62,7 @@ * from select/linear * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -121,12 +121,13 @@ static int _eval_nodes_topo(struct job_record *job_ptr, bitstr_t *node_map, * this node AND a core-level bitmap of the selected * sockets. * - * IN job_ptr - pointer to job requirements - * IN/OUT core_map - core_bitmap of available cores - * IN node_i - index of node to be evaluated + * IN job_ptr - pointer to job requirements + * IN/OUT core_map - core_bitmap of available cores + * IN part_core_map - bitmap of cores already allocated from this partition + * IN node_i - index of node to be evaluated */ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, - const uint32_t node_i) + bitstr_t *part_core_map, const uint32_t node_i) { uint16_t cpu_count = 0, cpu_cnt = 0; uint16_t si, cps, avail_cpus = 0, num_tasks = 0; @@ -140,13 +141,14 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, uint16_t threads_per_core = select_node_record[node_i].vpus; uint16_t min_cores = 1, min_sockets = 1, ntasks_per_socket = 0; uint16_t ntasks_per_core = 0xffff; + uint32_t free_cpu_count = 0, used_cpu_count = 0, *used_cpu_array = NULL; if (job_ptr->details && job_ptr->details->mc_ptr) { multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr; if (mc_ptr->cores_per_socket != (uint16_t) NO_VAL) { min_cores = mc_ptr->cores_per_socket; } - if (mc_ptr->sockets_per_node != (uint16_t) NO_VAL){ + if (mc_ptr->sockets_per_node != (uint16_t) NO_VAL) { min_sockets = mc_ptr->sockets_per_node; } if (mc_ptr->ntasks_per_core) { @@ -216,6 +218,7 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, * arrays and total core counts */ free_cores = xmalloc(sockets * sizeof(uint16_t)); used_cores = xmalloc(sockets * sizeof(uint16_t)); + used_cpu_array = xmalloc(sockets * sizeof(uint32_t)); for (c = core_begin; c < core_end; c++) { i = (uint16_t) (c - core_begin) / cores_per_socket; @@ -225,18 +228,41 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, } else { used_cores[i]++; } + if (part_core_map && bit_test(part_core_map, c)) + used_cpu_array[i]++; } - /* if a socket is already in use, it cannot be used - * by this job */ + /* if a socket is already in use, it cannot be used by this job */ for (i = 0; i < sockets; i++) { if (used_cores[i]) { free_core_count -= free_cores[i]; used_cores[i] += free_cores[i]; free_cores[i] = 0; } + free_cpu_count += free_cores[i] * threads_per_core; + if (used_cpu_array[i]) + used_cpu_count = used_cores[i] * threads_per_core; } xfree(used_cores); - used_cores = NULL; + xfree(used_cpu_array); + + /* Ignore resources that would push a job allocation over the + * partition CPU limit (if any) */ + if ((job_ptr->part_ptr->max_cpus_per_node != INFINITE) && + (free_cpu_count + used_cpu_count > + job_ptr->part_ptr->max_cpus_per_node)) { + int excess = free_cpu_count + used_cpu_count - + job_ptr->part_ptr->max_cpus_per_node; + for (c = core_begin; c < core_end; c++) { + i = (uint16_t) (c - core_begin) / cores_per_socket; + if (free_cores[i] > 0) { + free_core_count--; + free_cores[i]--; + excess -= threads_per_core; + if (excess <= 0) + break; + } + } + } /* Step 2: check min_cores per socket and min_sockets per node */ j = 0; @@ -290,7 +316,6 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, if (cpus_per_task < 2) { avail_cpus = num_tasks; - cps = num_tasks; } else { j = avail_cpus / cpus_per_task; num_tasks = MIN(num_tasks, j); @@ -310,7 +335,7 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, * allocating cores */ cps = num_tasks; - if (ntasks_per_socket > 1) { + if (ntasks_per_socket >= 1) { cps = ntasks_per_socket; if (cpus_per_task > 1) cps = ntasks_per_socket * cpus_per_task; @@ -375,15 +400,17 @@ fini: * job. Returns the number of cpus that can be used by * this node AND a bitmap of the selected cores. * - * IN job_ptr - pointer to job requirements - * IN/OUT core_map - bitmap of cores available for use/selected for use - * IN node_i - index of node to be evaluated - * IN cpu_type - if true, allocate CPUs rather than cores + * IN job_ptr - pointer to job requirements + * IN/OUT core_map - bitmap of cores available for use/selected for use + * IN part_core_map - bitmap of cores already allocated from this partition + * IN node_i - index of node to be evaluated + * IN cpu_type - if true, allocate CPUs rather than cores */ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, - const uint32_t node_i, bool cpu_type) + bitstr_t *part_core_map, const uint32_t node_i, + bool cpu_type) { - uint16_t avail_cpus = 0, num_tasks = 0; + uint16_t avail_cpus = 0, num_tasks = 0, total_cpus = 0; uint32_t core_begin = cr_get_coremap_offset(node_i); uint32_t core_end = cr_get_coremap_offset(node_i+1); uint32_t c; @@ -394,6 +421,7 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, uint16_t threads_per_core = select_node_record[node_i].vpus; uint16_t min_cores = 1, min_sockets = 1; uint16_t ntasks_per_core = 0xffff; + uint32_t free_cpu_count = 0, used_cpu_count = 0; if (job_ptr->details && job_ptr->details->mc_ptr) { multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr; @@ -471,6 +499,26 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, if (bit_test(core_map, c)) { free_cores[i]++; free_core_count++; + free_cpu_count += threads_per_core; + } + if (part_core_map && bit_test(part_core_map, c)) + used_cpu_count += threads_per_core; + + } + if ((job_ptr->part_ptr->max_cpus_per_node != INFINITE) && + (free_cpu_count + used_cpu_count > + job_ptr->part_ptr->max_cpus_per_node)) { + int excess = free_cpu_count + used_cpu_count - + job_ptr->part_ptr->max_cpus_per_node; + for (c = core_begin; c < core_end; c++) { + i = (uint16_t) (c - core_begin) / cores_per_socket; + if (free_cores[i] > 0) { + free_cores[i]--; + free_core_count--; + excess -= threads_per_core; + if (excess <= 0) + break; + } } } @@ -510,6 +558,7 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, /* convert from PER_CORE to TOTAL_FOR_NODE */ avail_cpus *= free_core_count; + total_cpus = avail_cpus; num_tasks *= free_core_count; /* If job requested exclusive rights to the node don't do the min here @@ -555,10 +604,11 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, bit_nclear(core_map, c, core_end-1); fini: - if (!num_tasks) { + if (!num_tasks) bit_nclear(core_map, core_begin, core_end-1); - } xfree(free_cores); + if ((job_ptr->details->shared == 0) && num_tasks) + return total_cpus; return num_tasks * cpus_per_task; } @@ -585,7 +635,7 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, const uint32_t node_i, struct node_use_record *node_usage, uint16_t cr_type, - bool test_only) + bool test_only, bitstr_t *part_core_map) { uint16_t cpus; uint32_t avail_mem, req_mem, gres_cores, gres_cpus, cpus_per_core; @@ -599,24 +649,36 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, return cpus; } + core_start_bit = cr_get_coremap_offset(node_i); + core_end_bit = cr_get_coremap_offset(node_i+1) - 1; + cpus_per_core = select_node_record[node_i].cpus / + (core_end_bit - core_start_bit + 1); + node_ptr = select_node_record[node_i].node_ptr; + if (node_usage[node_i].gres_list) + gres_list = node_usage[node_i].gres_list; + else + gres_list = node_ptr->gres_list; + + gres_plugin_job_core_filter(job_ptr->gres_list, gres_list, test_only, + core_map, core_start_bit, core_end_bit, + node_ptr->name); + if (cr_type & CR_CORE) { - cpus = _allocate_cores(job_ptr, core_map, node_i, false); + cpus = _allocate_cores(job_ptr, core_map, part_core_map, + node_i, false); /* cpu_alloc_size = CPUs per core */ cpu_alloc_size = select_node_record[node_i].vpus; } else if (cr_type & CR_SOCKET) { - cpus = _allocate_sockets(job_ptr, core_map, node_i); + cpus = _allocate_sockets(job_ptr, core_map, part_core_map, + node_i); /* cpu_alloc_size = CPUs per socket */ cpu_alloc_size = select_node_record[node_i].cores * select_node_record[node_i].vpus; } else { - cpus = _allocate_cores(job_ptr, core_map, node_i, true); + cpus = _allocate_cores(job_ptr, core_map, part_core_map, + node_i, true); cpu_alloc_size = 1; } - core_start_bit = cr_get_coremap_offset(node_i); - core_end_bit = cr_get_coremap_offset(node_i+1) - 1; - cpus_per_core = select_node_record[node_i].cpus / - (core_end_bit - core_start_bit + 1); - node_ptr = select_node_record[node_i].node_ptr; if (cr_type & CR_MEMORY) { /* Memory Check: check pn_min_memory to see if: @@ -643,10 +705,6 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, } } - if (node_usage[node_i].gres_list) - gres_list = node_usage[node_i].gres_list; - else - gres_list = node_ptr->gres_list; gres_cores = gres_plugin_job_test(job_ptr->gres_list, gres_list, test_only, core_map, core_start_bit, @@ -659,8 +717,17 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, ((job_ptr->details->cpus_per_task > 1) && (gres_cpus < job_ptr->details->cpus_per_task))) gres_cpus = 0; - while (gres_cpus < cpus) - cpus -= cpu_alloc_size; + + while (gres_cpus < cpus) { + if ((int) cpus < cpu_alloc_size) { + debug3("cons_res: cpu_alloc_size > cpus, cannot " + "continue (node: %s)", node_ptr->name); + cpus = 0; + break; + } else { + cpus -= cpu_alloc_size; + } + } if (cpus == 0) bit_nclear(core_map, core_start_bit, core_end_bit); @@ -712,12 +779,12 @@ static int _is_node_busy(struct part_res_record *p_ptr, uint32_t node_i, /* * Determine which of these nodes are usable by this job * - * Remove nodes from the bitmap that don't have enough memory or gres to + * Remove nodes from node_bitmap that don't have enough memory or gres to * support the job. * * Return SLURM_ERROR if a required node can't be used. * - * if node_state = NODE_CR_RESERVED, clear bitmap (if node is required + * if node_state = NODE_CR_RESERVED, clear node_bitmap (if node is required * then should we return NODE_BUSY!?!) * * if node_state = NODE_CR_ONE_ROW, then this node can only be used by @@ -728,15 +795,17 @@ static int _is_node_busy(struct part_res_record *p_ptr, uint32_t node_i, * - job_node_req = NODE_CR_ONE_ROW, then we need idle or non-sharing nodes */ static int _verify_node_state(struct part_res_record *cr_part_ptr, - struct job_record *job_ptr, bitstr_t * bitmap, + struct job_record *job_ptr, + bitstr_t *node_bitmap, uint16_t cr_type, struct node_use_record *node_usage, enum node_cr_state job_node_req) { struct node_record *node_ptr; - uint32_t i, free_mem, gres_cpus, gres_cores, min_mem, size; + uint32_t i, free_mem, gres_cpus, gres_cores, min_mem; int core_start_bit, core_end_bit, cpus_per_core; List gres_list; + int i_first, i_last; if (job_ptr->details->pn_min_memory & MEM_PER_CPU) { uint16_t min_cpus; @@ -749,9 +818,13 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, } else { min_mem = job_ptr->details->pn_min_memory; } - size = bit_size(bitmap); - for (i = 0; i < size; i++) { - if (!bit_test(bitmap, i)) + i_first = bit_ffs(node_bitmap); + if (i_first == -1) + i_last = -2; + else + i_last = bit_fls(node_bitmap); + for (i = i_first; i <= i_last; i++) { + if (!bit_test(node_bitmap, i)) continue; node_ptr = select_node_record[i].node_ptr; core_start_bit = cr_get_coremap_offset(i); @@ -839,7 +912,7 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, continue; /* node is usable, test next node */ clear_bit: /* This node is not usable by this job */ - bit_clear(bitmap, i); + bit_clear(node_bitmap, i); if (job_ptr->details->req_node_bitmap && bit_test(job_ptr->details->req_node_bitmap, i)) return SLURM_ERROR; @@ -905,7 +978,7 @@ static int _get_cpu_cnt(struct job_record *job_ptr, const int node_index, * * IN: job_ptr - pointer to the job requesting resources * IN: node_map - bitmap of available nodes - * IN/OUT: core_map - bitmap of available cores + * IN/OUT: core_map - bitmap of available cores * IN: cr_node_cnt - total number of nodes in the cluster * IN: cr_type - resource type * OUT: cpu_cnt - number of cpus that can be used by this job @@ -915,7 +988,7 @@ static void _get_res_usage(struct job_record *job_ptr, bitstr_t *node_map, bitstr_t *core_map, uint32_t cr_node_cnt, struct node_use_record *node_usage, uint16_t cr_type, uint16_t **cpu_cnt_ptr, - bool test_only) + bool test_only, bitstr_t *part_core_map) { uint16_t *cpu_cnt; uint32_t n; @@ -926,7 +999,7 @@ static void _get_res_usage(struct job_record *job_ptr, bitstr_t *node_map, continue; cpu_cnt[n] = _can_job_run_on_node(job_ptr, core_map, n, node_usage, cr_type, - test_only); + test_only, part_core_map); } *cpu_cnt_ptr = cpu_cnt; } @@ -1828,8 +1901,6 @@ static int _choose_nodes(struct job_record *job_ptr, bitstr_t *node_map, max_nodes = job_ptr->details->min_cpus; origmap = bit_copy(node_map); - if (origmap == NULL) - fatal("bit_copy malloc failure"); ec = _eval_nodes(job_ptr, node_map, min_nodes, max_nodes, req_nodes, cr_node_cnt, cpu_cnt); @@ -1883,6 +1954,8 @@ static int _choose_nodes(struct job_record *job_ptr, bitstr_t *node_map, * IN/OUT: core_map - bitmap of available cores / bitmap of selected cores * IN: cr_type - resource type * IN: test_only - ignore allocated memory check + * IN: part_core_map - bitmap of cores allocated to jobs of this partition + * or NULL if don't care * RET - array with number of CPUs available per node or NULL if not runnable */ static uint16_t *_select_nodes(struct job_record *job_ptr, uint32_t min_nodes, @@ -1890,13 +1963,15 @@ static uint16_t *_select_nodes(struct job_record *job_ptr, uint32_t min_nodes, bitstr_t *node_map, uint32_t cr_node_cnt, bitstr_t *core_map, struct node_use_record *node_usage, - uint16_t cr_type, bool test_only) + uint16_t cr_type, bool test_only, + bitstr_t *part_core_map) { - int rc; + int i, rc; uint16_t *cpu_cnt, *cpus = NULL; uint32_t start, n, a; //char str[100]; - bitstr_t *req_map = job_ptr->details->req_node_bitmap; + struct job_details *details_ptr = job_ptr->details; + bitstr_t *req_map = details_ptr->req_node_bitmap; if (bit_set_count(node_map) < min_nodes) return NULL; @@ -1908,7 +1983,7 @@ static uint16_t *_select_nodes(struct job_record *job_ptr, uint32_t min_nodes, /* get resource usage for this job from each available node */ _get_res_usage(job_ptr, node_map, core_map, cr_node_cnt, - node_usage, cr_type, &cpu_cnt, test_only); + node_usage, cr_type, &cpu_cnt, test_only, part_core_map); /* clear all nodes that do not have any * usable resources for this job */ @@ -1930,9 +2005,15 @@ static uint16_t *_select_nodes(struct job_record *job_ptr, uint32_t min_nodes, //bit_fmt(str, (sizeof(str) - 1), node_map); //info("_select_nodes nodemap: %s", str); + //bit_fmt(str, (sizeof(str) - 1), core_map); + //info("_select_nodes coremap: %s", str); - //bit_fmt(str, (sizeof(str) - 1), node_map); - //info("_select_nodes nodemap: %s", str); + if (details_ptr->ntasks_per_node && details_ptr->num_tasks) { + i = details_ptr->num_tasks; + i += (details_ptr->ntasks_per_node - 1); + i /= details_ptr->ntasks_per_node; + min_nodes = MAX(min_nodes, i); + } /* choose the best nodes for the job */ rc = _choose_nodes(job_ptr, node_map, min_nodes, max_nodes, req_nodes, @@ -1969,17 +2050,17 @@ static uint16_t *_select_nodes(struct job_record *job_ptr, uint32_t min_nodes, * * PROCEDURE: * - * Step 1: compare nodes in "avail" bitmap with current node state data + * Step 1: compare nodes in "avail" node_bitmap with current node state data * to find available nodes that match the job request * - * Step 2: check resources in "avail" bitmap with allocated resources from + * Step 2: check resources in "avail" node_bitmap with allocated resources from * higher priority partitions (busy resources are UNavailable) * - * Step 3: select resource usage on remaining resources in "avail" bitmap + * Step 3: select resource usage on remaining resources in "avail" node_bitmap * for this job, with the placement influenced by existing * allocations */ -extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, +extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, int mode, uint16_t cr_type, enum node_cr_state job_node_req, @@ -1991,7 +2072,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, static int gang_mode = -1; int error_code = SLURM_SUCCESS, ll; /* ll = layout array index */ uint16_t *layout_ptr = NULL; - bitstr_t *orig_map, *avail_cores, *free_cores; + bitstr_t *orig_map, *avail_cores, *free_cores, *part_core_map = NULL; bitstr_t *tmpcore = NULL, *reqmap = NULL; bool test_only; uint32_t c, i, k, n, csize, total_cpus, save_mem = 0; @@ -2019,11 +2100,11 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, else /* SELECT_MODE_RUN_NOW || SELECT_MODE_WILL_RUN */ test_only = false; - /* check node_state and update the node bitmap as necessary */ + /* check node_state and update the node_bitmap as necessary */ if (!test_only) { error_code = _verify_node_state(cr_part_ptr, job_ptr, - bitmap, cr_type, node_usage, - job_node_req); + node_bitmap, cr_type, + node_usage, job_node_req); if (error_code != SLURM_SUCCESS) { return error_code; } @@ -2046,11 +2127,11 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, if (select_debug_flags & DEBUG_FLAG_CPU_BIND) { info("cons_res: cr_job_test: evaluating job %u on %u nodes", - job_ptr->job_id, bit_set_count(bitmap)); + job_ptr->job_id, bit_set_count(node_bitmap)); } - orig_map = bit_copy(bitmap); - avail_cores = _make_core_bitmap(bitmap); + orig_map = bit_copy(node_bitmap); + avail_cores = _make_core_bitmap(node_bitmap); /* test to make sure that this job can succeed with all avail_cores * if 'no' then return FAIL @@ -2059,8 +2140,9 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, */ free_cores = bit_copy(avail_cores); cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, req_nodes, - bitmap, cr_node_cnt, free_cores, - node_usage, cr_type, test_only); + node_bitmap, cr_node_cnt, free_cores, + node_usage, cr_type, test_only, + part_core_map); if (cpu_count == NULL) { /* job cannot fit */ FREE_NULL_BITMAP(orig_map); @@ -2131,7 +2213,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, /*** Step 1 ***/ - bit_copybits(bitmap, orig_map); + bit_copybits(node_bitmap, orig_map); bit_copybits(free_cores, avail_cores); if (exc_core_bitmap) { @@ -2171,11 +2253,21 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, bit_copybits(tmpcore, p_ptr->row[i].row_bitmap); bit_not(tmpcore); /* set bits now "free" resources */ bit_and(free_cores, tmpcore); + + if (p_ptr->part_ptr != job_ptr->part_ptr) + continue; + if (part_core_map) { + bit_or(part_core_map, p_ptr->row[i].row_bitmap); + } else { + part_core_map = bit_copy(p_ptr->row[i]. + row_bitmap); + } } } cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, req_nodes, - bitmap, cr_node_cnt, free_cores, - node_usage, cr_type, test_only); + node_bitmap, cr_node_cnt, free_cores, + node_usage, cr_type, test_only, + part_core_map); if ((cpu_count) && (job_ptr->best_switch)) { /* job fits! We're done. */ @@ -2204,7 +2296,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, } /*** Step 2 ***/ - bit_copybits(bitmap, orig_map); + bit_copybits(node_bitmap, orig_map); bit_copybits(free_cores, avail_cores); if (exc_core_bitmap) { @@ -2240,8 +2332,9 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, /* make these changes permanent */ bit_copybits(avail_cores, free_cores); cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, req_nodes, - bitmap, cr_node_cnt, free_cores, - node_usage, cr_type, test_only); + node_bitmap, cr_node_cnt, free_cores, + node_usage, cr_type, test_only, + part_core_map); if (!cpu_count) { /* job needs resources that are currently in use by * higher-priority jobs, so fail for now */ @@ -2258,7 +2351,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, } /*** Step 3 ***/ - bit_copybits(bitmap, orig_map); + bit_copybits(node_bitmap, orig_map); bit_copybits(free_cores, avail_cores); /* remove existing allocations (jobs) from same-priority partitions @@ -2277,8 +2370,9 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, } } cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, req_nodes, - bitmap, cr_node_cnt, free_cores, - node_usage, cr_type, test_only); + node_bitmap, cr_node_cnt, free_cores, + node_usage, cr_type, test_only, + part_core_map); if (cpu_count) { /* jobs from low-priority partitions are the only thing left * in our way. for now we'll ignore them, but FIXME: we need @@ -2305,19 +2399,19 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, * avail_cores = static core_bitmap of all available cores */ - if (jp_ptr->row == NULL) { + if (!jp_ptr || !jp_ptr->row) { /* there's no existing jobs in this partition, so place * the job in avail_cores. FIXME: still need a good * placement algorithm here that optimizes "job overlap" * between this job (in these idle nodes) and existing * jobs in the other partitions with <= priority to * this partition */ - bit_copybits(bitmap, orig_map); + bit_copybits(node_bitmap, orig_map); bit_copybits(free_cores, avail_cores); cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, - req_nodes, bitmap, cr_node_cnt, + req_nodes, node_bitmap, cr_node_cnt, free_cores, node_usage, cr_type, - test_only); + test_only, part_core_map); if (select_debug_flags & DEBUG_FLAG_CPU_BIND) { info("cons_res: cr_job_test: test 4 pass - " "first row found"); @@ -2332,15 +2426,15 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, for (i = 0; i < c; i++) { if (!jp_ptr->row[i].row_bitmap) break; - bit_copybits(bitmap, orig_map); + bit_copybits(node_bitmap, orig_map); bit_copybits(free_cores, avail_cores); bit_copybits(tmpcore, jp_ptr->row[i].row_bitmap); bit_not(tmpcore); bit_and(free_cores, tmpcore); cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, - req_nodes, bitmap, cr_node_cnt, + req_nodes, node_bitmap, cr_node_cnt, free_cores, node_usage, cr_type, - test_only); + test_only, part_core_map); if (cpu_count) { if (select_debug_flags & DEBUG_FLAG_CPU_BIND) { info("cons_res: cr_job_test: test 4 pass - " @@ -2354,16 +2448,16 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, if ((i < c) && !jp_ptr->row[i].row_bitmap) { /* we've found an empty row, so use it */ - bit_copybits(bitmap, orig_map); + bit_copybits(node_bitmap, orig_map); bit_copybits(free_cores, avail_cores); if (select_debug_flags & DEBUG_FLAG_CPU_BIND) { info("cons_res: cr_job_test: " "test 4 trying empty row %i",i); } cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, - req_nodes, bitmap, cr_node_cnt, + req_nodes, node_bitmap, cr_node_cnt, free_cores, node_usage, cr_type, - test_only); + test_only, part_core_map); } if (!cpu_count) { @@ -2386,7 +2480,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, alloc_job: /* at this point we've found a good set of * bits to allocate to this job: - * - bitmap is the set of nodes to allocate + * - node_bitmap is the set of nodes to allocate * - free_cores is the set of allocated cores * - cpu_count is the number of cpus per allocated node * @@ -2397,6 +2491,7 @@ alloc_job: FREE_NULL_BITMAP(orig_map); FREE_NULL_BITMAP(avail_cores); FREE_NULL_BITMAP(tmpcore); + FREE_NULL_BITMAP(part_core_map); if ((!cpu_count) || (!job_ptr->best_switch)) { /* we were sent here to cleanup and exit */ FREE_NULL_BITMAP(free_cores); @@ -2408,7 +2503,7 @@ alloc_job: } /* At this point we have: - * - a bitmap of selected nodes + * - a node_bitmap of selected nodes * - a free_cores bitmap of usable cores on each selected node * - a per-alloc-node cpu_count array */ @@ -2434,11 +2529,9 @@ alloc_job: /** create the struct_job_res **/ job_res = create_job_resources(); - job_res->node_bitmap = bit_copy(bitmap); - job_res->nodes = bitmap2node_name(bitmap); - if (job_res->node_bitmap == NULL) - fatal("bit_copy malloc failure"); - job_res->nhosts = bit_set_count(bitmap); + job_res->node_bitmap = bit_copy(node_bitmap); + job_res->nodes = bitmap2node_name(node_bitmap); + job_res->nhosts = bit_set_count(node_bitmap); job_res->ncpus = job_res->nhosts; if (job_ptr->details->ntasks_per_node) job_res->ncpus *= details_ptr->ntasks_per_node; @@ -2474,7 +2567,7 @@ alloc_job: uint32_t j; if (layout_ptr && reqmap && bit_test(reqmap,n)) ll++; - if (bit_test(bitmap, n) == 0) + if (bit_test(node_bitmap, n) == 0) continue; j = cr_get_coremap_offset(n); k = cr_get_coremap_offset(n + 1); diff --git a/src/plugins/select/cons_res/job_test.h b/src/plugins/select/cons_res/job_test.h index 4a2ad6b5e480538afa0b31d535d450403bf898e4..631b3cd137ba75bc8eba055d6888013a41ae34f0 100644 --- a/src/plugins/select/cons_res/job_test.h +++ b/src/plugins/select/cons_res/job_test.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -61,7 +61,7 @@ /* _job_test - does most of the real work for select_p_job_test(), which * pretty much just handles load-leveling and max_share logic */ -int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, +int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, int mode, uint16_t cr_type, enum node_cr_state job_node_req, uint32_t cr_node_cnt, diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index fad2209b7baa0e6fd9a4f6ccd8c853845ffc2a4f..a67530bd2ecefe63b05e85f00c5c2019a431d778 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -62,7 +62,7 @@ * from select/linear * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -192,6 +192,7 @@ static bool job_preemption_tested = false; struct select_nodeinfo { uint16_t magic; /* magic number */ uint16_t alloc_cpus; + uint32_t alloc_memory; }; extern select_nodeinfo_t *select_p_select_nodeinfo_alloc(void); @@ -423,9 +424,6 @@ static void _create_part_data(void) this_ptr = select_part_record; part_iterator = list_iterator_create(part_list); - if (part_iterator == NULL) - fatal ("memory allocation failure"); - while ((p_ptr = (struct part_record *) list_next(part_iterator))) { this_ptr->part_ptr = p_ptr; this_ptr->num_rows = p_ptr->max_share; @@ -583,8 +581,23 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr, size = bit_size(this_row->row_bitmap); bit_nclear(this_row->row_bitmap, 0, size-1); } - return; + } else { + if (job_ptr) { /* just remove the job */ + xassert(job_ptr->job_resrcs); + remove_job_from_cores(job_ptr->job_resrcs, + &(this_row->row_bitmap), + cr_node_num_cores); + } else { /* totally rebuild the bitmap */ + size = bit_size(this_row->row_bitmap); + bit_nclear(this_row->row_bitmap, 0, size-1); + for (j = 0; j < this_row->num_jobs; j++) { + add_job_to_cores(this_row->job_list[j], + &(this_row->row_bitmap), + cr_node_num_cores); + } + } } + return; } /* gather data */ @@ -957,12 +970,8 @@ static int _job_expand(struct job_record *from_job_ptr, } tmp_bitmap = bit_copy(to_job_resrcs_ptr->node_bitmap); - if (!tmp_bitmap) - fatal("bit_copy: malloc failure"); bit_or(tmp_bitmap, from_job_resrcs_ptr->node_bitmap); tmp_bitmap2 = bit_copy(to_job_ptr->node_bitmap); - if (!tmp_bitmap) - fatal("bit_copy: malloc failure"); bit_or(tmp_bitmap2, from_job_ptr->node_bitmap); bit_and(tmp_bitmap, tmp_bitmap2); bit_free(tmp_bitmap2); @@ -1377,8 +1386,8 @@ static int _rm_job_from_one_node(struct job_record *job_ptr, } - /* job was found and removed from core-bitmap, so refresh CR bitmaps */ - _build_row_bitmaps(p_ptr, job_ptr); + /* some node of job removed from core-bitmap, so refresh CR bitmaps */ + _build_row_bitmaps(p_ptr, NULL); /* Adjust the node_state of the node removed from this job. * If all cores are now available, set node_state = NODE_CR_AVAILABLE */ @@ -1458,9 +1467,22 @@ static int _test_only(struct job_record *job_ptr, bitstr_t *bitmap, uint32_t req_nodes, uint16_t job_node_req) { int rc; + uint16_t tmp_cr_type = cr_type; + + if (job_ptr->part_ptr->cr_type) { + if (((cr_type & CR_SOCKET) || (cr_type & CR_CORE)) && + (cr_type & CR_ALLOCATE_FULL_SOCKET)) { + tmp_cr_type &= ~(CR_SOCKET|CR_CORE); + tmp_cr_type |= job_ptr->part_ptr->cr_type; + } else { + info("cons_res: Can't use Partition SelectType unless " + "using CR_Socket or CR_Core and " + "CR_ALLOCATE_FULL_SOCKET"); + } + } rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, - SELECT_MODE_TEST_ONLY, cr_type, job_node_req, + SELECT_MODE_TEST_ONLY, tmp_cr_type, job_node_req, select_node_cnt, select_part_record, select_node_usage, NULL); return rc; @@ -1497,14 +1519,25 @@ static int _run_now(struct job_record *job_ptr, bitstr_t *bitmap, bool remove_some_jobs = false; uint16_t pass_count = 0; uint16_t mode; + uint16_t tmp_cr_type = cr_type; save_bitmap = bit_copy(bitmap); top: orig_map = bit_copy(save_bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); + + if (job_ptr->part_ptr->cr_type) { + if (((cr_type & CR_SOCKET) || (cr_type & CR_CORE)) && + (cr_type & CR_ALLOCATE_FULL_SOCKET)) { + tmp_cr_type &= ~(CR_SOCKET|CR_CORE); + tmp_cr_type |= job_ptr->part_ptr->cr_type; + } else { + info("cons_res: Can't use Partition SelectType unless " + "using CR_Socket or CR_Core and " + "CR_ALLOCATE_FULL_SOCKET"); + } + } rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, - SELECT_MODE_RUN_NOW, cr_type, job_node_req, + SELECT_MODE_RUN_NOW, tmp_cr_type, job_node_req, select_node_cnt, select_part_record, select_node_usage, exc_core_bitmap); @@ -1525,8 +1558,6 @@ top: orig_map = bit_copy(save_bitmap); } job_iterator = list_iterator_create(preemptee_candidates); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(tmp_job_ptr) && @@ -1544,7 +1575,7 @@ top: orig_map = bit_copy(save_bitmap); rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN, - cr_type, job_node_req, + tmp_cr_type, job_node_req, select_node_cnt, future_part, future_usage, exc_core_bitmap); @@ -1591,13 +1622,9 @@ top: orig_map = bit_copy(save_bitmap); * actually used */ if (*preemptee_job_list == NULL) { *preemptee_job_list = list_create(NULL); - if (*preemptee_job_list == NULL) - fatal("list_create malloc failure"); } preemptee_iterator = list_iterator_create( preemptee_candidates); - if (preemptee_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(preemptee_iterator))) { mode = slurm_job_preempt_mode(tmp_job_ptr); @@ -1644,14 +1671,25 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, bitstr_t *orig_map; int action, rc = SLURM_ERROR; time_t now = time(NULL); + uint16_t tmp_cr_type = cr_type; orig_map = bit_copy(bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); + + if (job_ptr->part_ptr->cr_type) { + if (((cr_type & CR_SOCKET) || (cr_type & CR_CORE)) && + (cr_type & CR_ALLOCATE_FULL_SOCKET)) { + tmp_cr_type &= ~(CR_SOCKET|CR_CORE); + tmp_cr_type |= job_ptr->part_ptr->cr_type; + } else { + info("cons_res: Can't use Partition SelectType unless " + "using CR_Socket or CR_Core and " + "CR_ALLOCATE_FULL_SOCKET"); + } + } /* Try to run with currently available nodes */ rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, - SELECT_MODE_WILL_RUN, cr_type, job_node_req, + SELECT_MODE_WILL_RUN, tmp_cr_type, job_node_req, select_node_cnt, select_part_record, select_node_usage, exc_core_bitmap); if (rc == SLURM_SUCCESS) { @@ -1679,8 +1717,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, if (!cr_job_list) fatal("list_create: memory allocation error"); job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(tmp_job_ptr) && !IS_JOB_SUSPENDED(tmp_job_ptr)) @@ -1709,7 +1745,7 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, if (preemptee_candidates) { bit_or(bitmap, orig_map); rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, - req_nodes, SELECT_MODE_WILL_RUN, cr_type, + req_nodes, SELECT_MODE_WILL_RUN, tmp_cr_type, job_node_req, select_node_cnt, future_part, future_usage, exc_core_bitmap); if (rc == SLURM_SUCCESS) { @@ -1725,8 +1761,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, if (rc != SLURM_SUCCESS) { list_sort(cr_job_list, _cr_job_list_sort); job_iterator = list_iterator_create(cr_job_list); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = list_next(job_iterator))) { int ovrlap; bit_or(bitmap, orig_map); @@ -1739,7 +1773,7 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, tmp_job_ptr, 0); rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, - SELECT_MODE_WILL_RUN, cr_type, + SELECT_MODE_WILL_RUN, tmp_cr_type, job_node_req, select_node_cnt, future_part, future_usage, exc_core_bitmap); @@ -1762,12 +1796,8 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, * in selected plugin, but by Moab or something else. */ if (*preemptee_job_list == NULL) { *preemptee_job_list = list_create(NULL); - if (*preemptee_job_list == NULL) - fatal("list_create malloc failure"); } preemptee_iterator =list_iterator_create(preemptee_candidates); - if (preemptee_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(preemptee_iterator))) { if (bit_overlap(bitmap, @@ -1972,7 +2002,7 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, { int rc = EINVAL; uint16_t job_node_req; - bool debug_cpu_bind = false, debug_check = false; + static bool debug_cpu_bind = false, debug_check = false; xassert(bitmap); @@ -2018,8 +2048,8 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, if (job_ptr->job_resrcs) log_job_resources(job_ptr->job_id, job_ptr->job_resrcs); else { - info("no job_resources info for job %u", - job_ptr->job_id); + info("no job_resources info for job %u rc=%d", + job_ptr->job_id, rc); } } else if (debug_cpu_bind && job_ptr->job_resrcs) { log_job_resources(job_ptr->job_id, job_ptr->job_resrcs); @@ -2154,7 +2184,12 @@ extern int select_p_select_nodeinfo_pack(select_nodeinfo_t *nodeinfo, Buf buffer, uint16_t protocol_version) { - pack16(nodeinfo->alloc_cpus, buffer); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + pack16(nodeinfo->alloc_cpus, buffer); + pack32(nodeinfo->alloc_memory, buffer); + } else { + pack16(nodeinfo->alloc_cpus, buffer); + } return SLURM_SUCCESS; } @@ -2168,7 +2203,12 @@ extern int select_p_select_nodeinfo_unpack(select_nodeinfo_t **nodeinfo, nodeinfo_ptr = select_p_select_nodeinfo_alloc(); *nodeinfo = nodeinfo_ptr; - safe_unpack16(&nodeinfo_ptr->alloc_cpus, buffer); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack16(&nodeinfo_ptr->alloc_cpus, buffer); + safe_unpack32(&nodeinfo_ptr->alloc_memory, buffer); + } else { + safe_unpack16(&nodeinfo_ptr->alloc_cpus, buffer); + } return SLURM_SUCCESS; @@ -2211,7 +2251,6 @@ extern int select_p_select_nodeinfo_set_all(void) uint16_t tmp, tmp_16 = 0, tmp_part; static time_t last_set_all = 0; uint32_t node_threads, node_cpus; - select_nodeinfo_t *nodeinfo = NULL; /* only set this once when the last_node_update is newer than * the last time we set things up. */ @@ -2223,14 +2262,12 @@ extern int select_p_select_nodeinfo_set_all(void) } last_set_all = last_node_update; - for (n=0; n < select_node_cnt; n++) { - node_ptr = &(node_record_table_ptr[n]); - - /* We have to use the '_g_' here to make sure we get - the correct data to work on. i.e. cray calls this - plugin from within select/cray which has it's own - struct. - */ + for (n = 0, node_ptr = node_record_table_ptr; + n < select_node_cnt; n++, node_ptr++) { + select_nodeinfo_t *nodeinfo = NULL; + /* We have to use the '_g_' here to make sure we get the + * correct data to work on. i.e. cray calls this plugin + * from within select/cray which has it's own struct. */ select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_PTR, 0, (void *)&nodeinfo); @@ -2259,7 +2296,7 @@ extern int select_p_select_nodeinfo_set_all(void) continue; tmp = bit_set_count_range(p_ptr->row[i].row_bitmap, start, end); - /* get the row with the largest CPU count */ + /* Report row with largest CPU count */ tmp_part = MAX(tmp, tmp_part); } tmp_16 += tmp_part; /* Add CPU counts all parts */ @@ -2271,6 +2308,12 @@ extern int select_p_select_nodeinfo_set_all(void) tmp_16 *= node_threads; nodeinfo->alloc_cpus = tmp_16; + if (select_node_record) { + nodeinfo->alloc_memory = + select_node_usage[n].alloc_memory; + } else { + nodeinfo->alloc_memory = 0; + } } return SLURM_SUCCESS; @@ -2298,6 +2341,7 @@ extern int select_p_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, { int rc = SLURM_SUCCESS; uint16_t *uint16 = (uint16_t *) data; + uint32_t *uint32 = (uint32_t *) data; char **tmp_char = (char **) data; select_nodeinfo_t **select_nodeinfo = (select_nodeinfo_t **) data; @@ -2328,6 +2372,9 @@ extern int select_p_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, case SELECT_NODEDATA_EXTRA_INFO: *tmp_char = NULL; break; + case SELECT_NODEDATA_MEM_ALLOC: + *uint32 = nodeinfo->alloc_memory; + break; default: error("Unsupported option %d for get_nodeinfo.", dinfo); rc = SLURM_ERROR; @@ -2481,8 +2528,6 @@ extern int select_p_reconfigure(void) /* reload job data */ job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (IS_JOB_RUNNING(job_ptr)) { /* add the job */ @@ -2528,29 +2573,52 @@ bitstr_t *_make_core_bitmap_filtered(bitstr_t *node_map, int filter) return core_map; } -/* Once here, if core_cnt=0, avail_bitmap has nodes not used by any job or +/* Once here, if core_cnt is NULL, avail_bitmap has nodes not used by any job or * reservation */ bitstr_t *sequential_pick(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, bitstr_t **core_bitmap) + uint32_t *core_cnt, bitstr_t **core_bitmap) { bitstr_t *sp_avail_bitmap; char str[300]; - /* Just allowing symetric requests today */ - uint32_t cores_per_node = core_cnt / MAX(node_cnt, 1); + uint32_t cores_per_node = 0; bitstr_t *tmpcore; + int total_core_cnt = 0; + + /* We have these cases here: + * 1) Reservation requests using just number of nodes + * - core_cnt is null + * 2) Reservations request using number of nodes + number of cores + * 3) Reservations request using node list + * - node_cnt is 0 + * - core_cnt is null + * 4) Reservation request using node list + number of cores list + * - node_cnt is 0 + */ - debug2("reserving %u cores per node in %d nodes", - cores_per_node, node_cnt); - - sp_avail_bitmap = bit_alloc(bit_size(avail_bitmap)); - if (sp_avail_bitmap == NULL) { - fatal ("memory allocation failure"); + if ((node_cnt) && (core_cnt)) { + debug2("reserving %u cores per node in %d nodes", + cores_per_node, node_cnt); + total_core_cnt = core_cnt[0]; + cores_per_node = core_cnt[0] / MAX(node_cnt, 1); } + if ((!node_cnt) && (core_cnt)) { + int num_nodes = bit_set_count(avail_bitmap); + int i; + bit_fmt(str, (sizeof(str) - 1), avail_bitmap); + debug2("Reserving cores from nodes: %s", str); + for (i = 0; (i < num_nodes) && core_cnt[i]; i++) + total_core_cnt += core_cnt[i]; + } + + debug2("Reservations requires %d cores", total_core_cnt); + sp_avail_bitmap = bit_alloc(bit_size(avail_bitmap)); bit_fmt(str, (sizeof(str) - 1), avail_bitmap); bit_fmt(str, (sizeof(str) - 1), sp_avail_bitmap); if (core_cnt) { /* Reservation is using partial nodes */ + int node_list_inx = 0; + debug2("Reservation is using partial nodes"); /* if not NULL = Cores used by other core based reservations @@ -2566,24 +2634,24 @@ bitstr_t *sequential_pick(bitstr_t *avail_bitmap, uint32_t node_cnt, debug2("tmpcore contains just current free cores: %s", str); bit_and(*core_bitmap, tmpcore); /* clear core_bitmap */ - while (core_cnt) { + while (total_core_cnt) { int inx, coff, coff2; int i; int cores_in_node; int local_cores; - inx = bit_ffs(avail_bitmap); - if (inx < 0) { - info("reservation request can not be satisfied"); - FREE_NULL_BITMAP(sp_avail_bitmap); - FREE_NULL_BITMAP(tmpcore); - return NULL; + if (node_cnt == 0) { + cores_per_node = core_cnt[node_list_inx]; + if (cores_per_node == 0) + break; } + + inx = bit_ffs(avail_bitmap); + if (inx < 0) + break; debug2("Using node %d", inx); coff = cr_get_coremap_offset(inx); - /* TODO: is next right for the last possible node at - * avail_bitmap? */ coff2 = cr_get_coremap_offset(inx + 1); local_cores = coff2 - coff; @@ -2603,14 +2671,17 @@ bitstr_t *sequential_pick(bitstr_t *avail_bitmap, uint32_t node_cnt, if (cores_in_node < cores_per_node) continue; + debug2("Using node %d (avail: %d, needed: %d)", + inx, cores_in_node, cores_per_node); + cores_in_node = 0; for (i = 0; i < local_cores; i++) { if (bit_test(tmpcore, coff + i)) { bit_set(*core_bitmap, coff + i); - core_cnt--; + total_core_cnt--; cores_in_node++; if ((cores_in_node == cores_per_node) || - (core_cnt == 0)) + (total_core_cnt == 0)) break; } } @@ -2623,19 +2694,19 @@ bitstr_t *sequential_pick(bitstr_t *avail_bitmap, uint32_t node_cnt, } else { debug2("Reservation NOT using node %d", inx); } - + node_list_inx++; } FREE_NULL_BITMAP(tmpcore); - bit_fmt(str, (sizeof(str) - 1), *core_bitmap); - info("sequential pick using coremap: %s", str); - - if (core_cnt) { + if (total_core_cnt) { info("reservation request can not be satisfied"); FREE_NULL_BITMAP(sp_avail_bitmap); return NULL; } + bit_fmt(str, (sizeof(str) - 1), *core_bitmap); + info("sequential pick using coremap: %s", str); + } else { /* Reservation is using full nodes */ while (node_cnt) { @@ -2700,7 +2771,7 @@ static int _get_avail_core_in_node(bitstr_t *core_bitmap, int node) * RET - nodes selected for use by the reservation */ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, bitstr_t **core_bitmap) + uint32_t *core_cnt, bitstr_t **core_bitmap) { bitstr_t **switches_bitmap; /* nodes on this switch */ bitstr_t **switches_core_bitmap; /* cores on this switch */ @@ -2710,7 +2781,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, bitstr_t *avail_nodes_bitmap = NULL; /* nodes on any switch */ bitstr_t *sp_avail_bitmap; - int rem_nodes, rem_cores; /* remaining resources desired */ + int rem_nodes, rem_cores = 0; /* remaining resources desired */ int i, j; int best_fit_inx, first, last; int best_fit_nodes; @@ -2720,7 +2791,8 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, xassert(avail_bitmap); - if (!switch_record_cnt || !switch_record_table) { + /* When reservation includes a nodelist we use sequential_pick code */ + if (!switch_record_cnt || !switch_record_table || !node_cnt) { return sequential_pick(avail_bitmap, node_cnt, core_cnt, core_bitmap); } @@ -2733,10 +2805,15 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, *core_bitmap = _make_core_bitmap_filtered(avail_bitmap, 0); rem_nodes = node_cnt; - rem_cores = core_cnt; - /* TODO: allowing asymmetric cluster */ - cores_per_node = core_cnt / MAX(node_cnt, 1); + /* Assuming symmetric cluster */ + if (core_cnt) { + rem_cores = core_cnt[0]; + cores_per_node = core_cnt[0] / MAX(node_cnt, 1); + } else if (cr_node_num_cores) + cores_per_node = cr_node_num_cores[0]; + else + cores_per_node = 1; /* Construct a set of switch array entries, * use the same indexes as switch_record_table in slurmctld */ @@ -2805,7 +2882,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, best_fit_inx = -1; for (j=0; j<switch_record_cnt; j++) { if ((switches_node_cnt[j] < rem_nodes) || - (core_cnt && (switches_cpu_cnt[j] < core_cnt))) + (core_cnt && (switches_cpu_cnt[j] < core_cnt[0]))) continue; if ((best_fit_inx == -1) || (switch_record_table[j].level < @@ -2839,8 +2916,12 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, for (j=0; j<switch_record_cnt; j++) { if (switches_node_cnt[j] == 0) continue; - sufficient = (switches_node_cnt[j] >= rem_nodes) && - (switches_cpu_cnt[j] >= core_cnt); + if (core_cnt) { + sufficient = + (switches_node_cnt[j] >= rem_nodes) && + (switches_cpu_cnt[j] >= core_cnt[0]); + } else + sufficient = switches_node_cnt[j] >= rem_nodes; /* If first possibility OR */ /* first set large enough for request OR */ /* tightest fit (less resource waste) OR */ @@ -2920,17 +3001,14 @@ fini: for (i=0; i<switch_record_cnt; i++) { bitstr_t *exc_core_bitmap = NULL; sp_avail_bitmap = bit_alloc(bit_size(avail_bitmap)); - if (sp_avail_bitmap == NULL) - fatal ("memory allocation failure"); - if (*core_bitmap) { exc_core_bitmap = *core_bitmap; *core_bitmap = bit_alloc(bit_size(exc_core_bitmap)); } - cores_per_node = core_cnt / MAX(node_cnt, 1); + cores_per_node = core_cnt[0] / MAX(node_cnt, 1); - while (core_cnt) { + while (core_cnt[0]) { uint32_t inx, coff; int i; int avail_cores_in_node; @@ -2939,8 +3017,8 @@ fini: for (i=0; i<switch_record_cnt; i++) { if ((inx < 0) || (inx > bit_size(avail_bitmap))) break; - debug2("Using node inx %d cores_per_node: %d " - "core_cnt: %d", inx, cores_per_node, core_cnt); + debug2("Using node inx %d cores_per_node %d " + "core_cnt %d", inx, cores_per_node, core_cnt[0]); coff = cr_get_coremap_offset(inx); /* Clear this node from the initial available bitmap */ @@ -2966,12 +3044,12 @@ fini: for (i=0; i<switch_record_cnt; i++) { for (i = 0; i < cr_node_num_cores[inx]; i++) { if (!bit_test(exc_core_bitmap, coff + i)) { bit_set(*core_bitmap, coff + i); - core_cnt--; + core_cnt[0]--; avail_cores_in_node++; } if ((avail_cores_in_node == cores_per_node) || - (core_cnt == 0)) + (core_cnt[0] == 0)) break; } @@ -2985,7 +3063,7 @@ fini: for (i=0; i<switch_record_cnt; i++) { //bit_fmt(str, (sizeof(str) - 1), *core_bitmap); //info("sequential pick using coremap: %s", str); - if (core_cnt) { + if (core_cnt[0]) { info("reservation request can not be satisfied"); FREE_NULL_BITMAP(sp_avail_bitmap); return NULL; diff --git a/src/plugins/select/cons_res/select_cons_res.h b/src/plugins/select/cons_res/select_cons_res.h index 8c3fce29c64d9ec71fd6f1819ad93df756134346..27ad9088eb98b2e35abb857102b78b06d88b33f8 100644 --- a/src/plugins/select/cons_res/select_cons_res.h +++ b/src/plugins/select/cons_res/select_cons_res.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/cray/Makefile.am b/src/plugins/select/cray/Makefile.am index 2ec30a33b27d1adc7725be213b8395afffcf7161..1cc1205a8c0b8f9166c067119f8b8c4cfdec20b5 100644 --- a/src/plugins/select/cray/Makefile.am +++ b/src/plugins/select/cray/Makefile.am @@ -2,8 +2,6 @@ AUTOMAKE_OPTIONS = foreign -AM_CPPFLAGS = -DCRAY_CONFIG_FILE=\"$(sysconfdir)/cray.conf\" - PLUGIN_FLAGS = -module -avoid-version --export-dynamic INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -I. diff --git a/src/plugins/select/cray/Makefile.in b/src/plugins/select/cray/Makefile.in index 754f5eac2b40dccb3627b7cdfc48170927df2b7a..212595b2d3c699b71a4b49749c75ab532a356be3 100644 --- a/src/plugins/select/cray/Makefile.in +++ b/src/plugins/select/cray/Makefile.in @@ -59,6 +59,7 @@ subdir = src/plugins/select/cray DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -227,6 +231,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -247,6 +253,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -256,6 +265,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -263,6 +274,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -297,6 +317,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -324,6 +347,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -413,7 +439,6 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign -AM_CPPFLAGS = -DCRAY_CONFIG_FILE=\"$(sysconfdir)/cray.conf\" PLUGIN_FLAGS = -module -avoid-version --export-dynamic INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -I. pkglib_LTLIBRARIES = select_cray.la diff --git a/src/plugins/select/cray/basil_alps.h b/src/plugins/select/cray/basil_alps.h index fa686801ad417f0f742c027dfadf2a6da105f132..d4e284b5b74d8598ee5a12583ee90adcad688028 100644 --- a/src/plugins/select/cray/basil_alps.h +++ b/src/plugins/select/cray/basil_alps.h @@ -40,7 +40,7 @@ /* * Limits */ -#define TAG_DEPTH_MAX 12 /* maximum XML nesting level */ +#define TAG_DEPTH_MAX 16 /* maximum XML nesting level */ #define BASIL_STRING_SHORT 16 #define BASIL_STRING_MEDIUM 32 #define BASIL_STRING_LONG 64 @@ -57,7 +57,7 @@ enum basil_version { BV_4_0, /* Basil 1.2 CLE 4.x unconfirmed simulator version */ BV_4_1, /* Basil 1.2 CLE 4.x unconfirmed simulator version */ BV_5_0, /* Basil 1.2 CLE 5.x unconfirmed simulator version */ - BV_5_1, /* Basil 1.2 CLE 5.x unconfirmed simulator version */ + BV_5_1, /* Basil 1.3 CLE 5.x unconfirmed simulator version */ BV_MAX }; @@ -129,6 +129,11 @@ enum basil_element { #define BT_4_0_MAX (BT_ACCELALLOC + 1) /* End of Basil 4.0 */ /* FIXME: the Basil 4.1 interface is not yet fully released */ #define BT_4_1_MAX BT_4_0_MAX /* End of Basil 4.1 */ + BT_SOCKARRAY, /* Basil 1.3/5.1 Inventory/SocketArray */ + BT_SOCKET, /* Basil 1.3/5.1 Inventory/Socket */ + BT_COMUARRAY, /* Basil 1.3/5.1 Inventory/ComputeUnitArray */ + BT_COMPUNIT, /* Basil 1.3/5.1 Inventory/ComputeUnit */ +#define BT_5_1_MAX (BT_COMPUNIT + 1) /* End of Basil 5.1 */ BT_MAX /* End of Basil tags */ }; @@ -410,7 +415,8 @@ struct basil_inventory { char mpp_host[BASIL_STRING_SHORT]; time_t timestamp; bool is_gemini; - uint64_t change_count; + uint64_t change_count, + sched_change_count; uint32_t batch_avail, batch_total, nodes_total; @@ -443,7 +449,8 @@ struct basil_rsvn_param { depth, /* depth > 0, -d */ nppn, /* nppn > 0, -N */ npps, /* PEs per segment, -S */ - nspn; /* segments per node, -sn */ + nspn, /* segments per node, -sn */ + nppcu; /* Processors Per Compute Unit. BASIL 1.3 */ char *nodes; /* NodeParamArray */ struct basil_label *labels; /* LabelParamArray */ @@ -612,7 +619,8 @@ extern void free_inv(struct basil_inventory *inv); extern long basil_reserve(const char *user, const char *batch_id, uint32_t width, uint32_t depth, uint32_t nppn, - uint32_t mem_mb, struct nodespec *ns_head, + uint32_t mem_mb, uint32_t nppcu, + struct nodespec *ns_head, struct basil_accel_param *accel_head); extern int basil_confirm(uint32_t rsvn_id, int job_id, uint64_t pagg_id); extern const struct basil_rsvn *basil_rsvn_by_id(const struct basil_inventory *inv, diff --git a/src/plugins/select/cray/basil_interface.c b/src/plugins/select/cray/basil_interface.c index e50acbbb04a0aee06250f447129c244b357488ea..511062a3f11dab833a8fb1e82382340c1fd1de06 100644 --- a/src/plugins/select/cray/basil_interface.c +++ b/src/plugins/select/cray/basil_interface.c @@ -361,11 +361,7 @@ extern int basil_inventory(void) struct job_record *job_ptr; uint32_t resv_id; - if (job_iter == NULL) - fatal("list_iterator_create: malloc failure"); - while ((job_ptr = (struct job_record *)list_next(job_iter))) { - if (_get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RESV_ID, &resv_id) == SLURM_SUCCESS @@ -715,12 +711,15 @@ extern int do_basil_reserve(struct job_record *job_ptr) /* mppmem must be at least 1 for gang scheduling to work so * if you are wondering why gang scheduling isn't working you * should check your slurm.conf for DefMemPerNode */ - uint32_t mppdepth, mppnppn, mppwidth = 0, mppmem = 0, node_min_mem = 0; - uint32_t resv_id; + uint32_t mppdepth, mppnppn = INFINITE, mppwidth = 0, + mppmem = 0, node_min_mem = 0; + uint32_t resv_id, largest_cpus = 0, min_memory = INFINITE; int i, first_bit, last_bit; long rc; char *user, batch_id[16]; struct basil_accel_param* bap; + uint16_t nppcu = 0; +// uint16_t hwthreads_per_core = 1; if (!job_ptr->job_resrcs || job_ptr->job_resrcs->nhosts == 0) return SLURM_SUCCESS; @@ -741,16 +740,8 @@ extern int do_basil_reserve(struct job_record *job_ptr) if (first_bit == -1 || last_bit == -1) return SLURM_SUCCESS; /* no nodes allocated */ - mppdepth = MAX(1, job_ptr->details->cpus_per_task); - if (job_ptr->details->ntasks_per_node) { - mppnppn = job_ptr->details->ntasks_per_node; - } else if (job_ptr->details->num_tasks) { - mppnppn = (job_ptr->details->num_tasks + - job_ptr->job_resrcs->nhosts - 1) / - job_ptr->job_resrcs->nhosts; - } else { - mppnppn = 1; - } + /* always be 1 */ + mppdepth = 1; /* mppmem */ if (job_ptr->details->pn_min_memory & MEM_PER_CPU) { @@ -761,9 +752,25 @@ extern int do_basil_reserve(struct job_record *job_ptr) node_min_mem = job_ptr->details->pn_min_memory; } + if (slurmctld_conf.select_type_param & CR_ONE_TASK_PER_CORE) { + if (job_ptr->details && job_ptr->details->mc_ptr && + (job_ptr->details->mc_ptr->ntasks_per_core == 0xffff)) { + nppcu = 1; + debug("No explicit ntasks-per-core has been set, " + "using nppcu=1."); + } + } + + if (job_ptr->details && job_ptr->details->mc_ptr && + (job_ptr->details->mc_ptr->ntasks_per_core != 0xffff)) { + nppcu = job_ptr->details->mc_ptr->ntasks_per_core; + } + for (i = first_bit; i <= last_bit; i++) { struct node_record *node_ptr = node_record_table_ptr + i; + uint32_t node_cpus, node_mem; uint32_t basil_node_id; + /* uint32_t node_tasks; */ if (!bit_test(job_ptr->job_resrcs->node_bitmap, i)) continue; @@ -782,56 +789,55 @@ extern int do_basil_reserve(struct job_record *job_ptr) return SLURM_ERROR; } - if (node_min_mem) { - uint32_t node_cpus, node_mem; - int32_t tmp_mppmem; - - if (slurmctld_conf.fast_schedule) { - node_cpus = node_ptr->config_ptr->cpus; - node_mem = node_ptr->config_ptr->real_memory; - } else { - node_cpus = node_ptr->cpus; - node_mem = node_ptr->real_memory; - } + if (slurmctld_conf.fast_schedule) { + node_cpus = node_ptr->config_ptr->cpus; + node_mem = node_ptr->config_ptr->real_memory; + } else { + node_cpus = node_ptr->cpus; + node_mem = node_ptr->real_memory; + } - /* If the job has requested memory use it (if - lesser) for calculations. - */ - tmp_mppmem = MIN(node_mem, node_min_mem); + /* On a reservation we can only run one job per node + on a cray so allocate all the cpuss on each node + reguardless of the request. + */ + mppwidth += node_cpus; - /* - * ALPS 'Processing Elements per Node' value (aprun -N), - * which in slurm is --ntasks-per-node and 'mppnppn' in - * PBS: if --ntasks is specified, default to the number - * of cores per node (also the default for 'aprun -N'). - * On a heterogeneous system the nodes aren't - * always the same so keep track of the lowest - * mppmem and use it as the level for all - * nodes (mppmem is 0 when coming in). - */ - tmp_mppmem /= mppnppn ? mppnppn : node_cpus; - - /* Minimum memory per processing element should be 1, - * since 0 means give all the memory to the job. */ - if (tmp_mppmem <= 0) - tmp_mppmem = 1; + /* We want mppnppn to be the smallest number of cpus + per node and allocate that on each of the nodes + reguardless of the request. + */ + mppnppn = MIN(mppnppn, node_cpus); - if (mppmem) - mppmem = MIN(mppmem, tmp_mppmem); - else - mppmem = tmp_mppmem; + if (node_min_mem) { + /* Keep track of the largest cpu count and Min + memory if we need to split up the memory + per cpu. + */ + largest_cpus = MAX(largest_cpus, node_cpus); + min_memory = MIN(min_memory, node_mem); } } - /* mppwidth */ - for (i = 0; i < job_ptr->job_resrcs->nhosts; i++) { - uint32_t node_tasks = job_ptr->job_resrcs->cpus[i] / mppdepth; - - if (mppnppn && mppnppn < node_tasks) - node_tasks = mppnppn; - mppwidth += node_tasks; + if (node_min_mem) { + /* + * ALPS 'Processing Elements per Node' value (aprun -N), + * which in slurm is --ntasks-per-node and 'mppnppn' in + * PBS: if --ntasks is specified, default to the number + * of cores per node (also the default for 'aprun -N'). + * On a heterogeneous system the nodes aren't + * always the same so keep track of the lowest + * mppmem and use it as the level for all + * nodes (mppmem is 0 when coming in). + */ + mppmem = min_memory / largest_cpus; } + /* Minimum memory per processing element should be 1, + * since 0 means give all the memory to the job. */ + if (mppmem <= 0) + mppmem = 1; + snprintf(batch_id, sizeof(batch_id), "%u", job_ptr->job_id); user = uid_to_string(job_ptr->user_id); @@ -841,7 +847,7 @@ extern int do_basil_reserve(struct job_record *job_ptr) bap = NULL; rc = basil_reserve(user, batch_id, mppwidth, mppdepth, mppnppn, - mppmem, ns_head, bap); + mppmem, (uint32_t)nppcu, ns_head, bap); xfree(user); if (rc <= 0) { /* errno value will be resolved by select_g_job_begin() */ diff --git a/src/plugins/select/cray/cray_config.c b/src/plugins/select/cray/cray_config.c index 67c7caef00007905361a62d174cf039cf5b7e158..4cdbfd4a5605335d8dba3cbf5e8a04321a2054b1 100644 --- a/src/plugins/select/cray/cray_config.c +++ b/src/plugins/select/cray/cray_config.c @@ -7,7 +7,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -55,6 +55,7 @@ s_p_options_t cray_conf_file_options[] = { {"AlpsDir", S_P_STRING}, /* Vestigial option */ {"apbasil", S_P_STRING}, {"apkill", S_P_STRING}, + {"AlpsEngine", S_P_STRING}, {"SDBdb", S_P_STRING}, {"SDBhost", S_P_STRING}, {"SDBpass", S_P_STRING}, @@ -64,18 +65,6 @@ s_p_options_t cray_conf_file_options[] = { {NULL} }; -static char *_get_cray_conf(void) -{ - char *val = getenv("SLURM_CONF"); - char *rc = NULL; - - if (!val) - val = default_slurm_config_file; - rc = xstrdup(val); - xstrsubstitute(rc, "slurm.conf", "cray.conf"); - return rc; -} - extern int create_config(void) { int rc = SLURM_SUCCESS; @@ -89,7 +78,7 @@ extern int create_config(void) cray_conf = xmalloc(sizeof(cray_config_t)); - cray_conf_file = _get_cray_conf(); + cray_conf_file = get_extra_conf_path("cray.conf"); if (stat(cray_conf_file, &config_stat) < 0) { cray_conf->apbasil = xstrdup(DEFAULT_APBASIL); @@ -133,6 +122,8 @@ extern int create_config(void) if (!s_p_get_string(&cray_conf->apkill, "apkill", tbl)) cray_conf->apkill = xstrdup(DEFAULT_APKILL); + s_p_get_string(&cray_conf->alps_engine, "AlpsEngine", tbl); + if (!s_p_get_string(&cray_conf->sdb_db, "SDBdb", tbl)) cray_conf->sdb_db = xstrdup(DEFAULT_CRAY_SDB_DB); if (!s_p_get_string(&cray_conf->sdb_host, "SDBhost", tbl)) @@ -154,6 +145,7 @@ end_it: info("Cray conf is..."); info("\tapbasil=\t%s", cray_conf->apbasil); info("\tapkill=\t\t%s", cray_conf->apkill); + info("\tAlpsEngine=\t\t%s", cray_conf->alps_engine); info("\tSDBdb=\t\t%s", cray_conf->sdb_db); info("\tSDBhost=\t%s", cray_conf->sdb_host); info("\tSDBpass=\t%s", cray_conf->sdb_pass); @@ -171,6 +163,7 @@ extern int destroy_config(void) if (cray_conf) { xfree(cray_conf->apbasil); xfree(cray_conf->apkill); + xfree(cray_conf->alps_engine); xfree(cray_conf->sdb_db); xfree(cray_conf->sdb_host); xfree(cray_conf->sdb_pass); diff --git a/src/plugins/select/cray/cray_config.h b/src/plugins/select/cray/cray_config.h index bc474cc2d4117e7272d1edf53c6c11a676877877..c55d6f86f76b8fdbac5e7ed49223f01a2df3cae5 100644 --- a/src/plugins/select/cray/cray_config.h +++ b/src/plugins/select/cray/cray_config.h @@ -7,7 +7,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -65,6 +65,7 @@ /** * cray_config_t - Parsed representation of cray.conf + * @alps_engine: Basil engine version number * @apbasil: full path to ALPS 'apbasil' executable * @apkill: full path to ALPS 'apkill' executable * @sdb_host: DNS name of SDB host @@ -77,6 +78,7 @@ * jobs */ typedef struct { + char *alps_engine; char *apbasil; char *apkill; diff --git a/src/plugins/select/cray/libalps/Makefile.am b/src/plugins/select/cray/libalps/Makefile.am index 7092e4acfb4b857aa0777acbf8fdc3c0d6a14f9a..b63239895204118fca7e7e85a9b03e5caec77c81 100644 --- a/src/plugins/select/cray/libalps/Makefile.am +++ b/src/plugins/select/cray/libalps/Makefile.am @@ -15,6 +15,7 @@ libalps_la_SOURCES = \ parser_basil_1.1.c \ parser_basil_3.1.c \ parser_basil_4.0.c \ + parser_basil_5.1.c \ basil_request.c \ do_query.c \ do_reserve.c \ diff --git a/src/plugins/select/cray/libalps/Makefile.in b/src/plugins/select/cray/libalps/Makefile.in index 0ee398c134eceb56d539bfc8fd3d6c572f824e16..d2458343c69c5afe3e6e9502f6b73ea3e0b36b08 100644 --- a/src/plugins/select/cray/libalps/Makefile.in +++ b/src/plugins/select/cray/libalps/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/cray/libalps DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -106,11 +110,12 @@ libalps_la_DEPENDENCIES = $(am__DEPENDENCIES_1) am_libalps_la_OBJECTS = libalps_la-basil_mysql_routines.lo \ libalps_la-parser_common.lo libalps_la-parser_basil_1.0.lo \ libalps_la-parser_basil_1.1.lo libalps_la-parser_basil_3.1.lo \ - libalps_la-parser_basil_4.0.lo libalps_la-basil_request.lo \ - libalps_la-do_query.lo libalps_la-do_reserve.lo \ - libalps_la-do_release.lo libalps_la-do_confirm.lo \ - libalps_la-do_switch.lo libalps_la-memory_handling.lo \ - libalps_la-popen2.lo libalps_la-atoul.lo + libalps_la-parser_basil_4.0.lo libalps_la-parser_basil_5.1.lo \ + libalps_la-basil_request.lo libalps_la-do_query.lo \ + libalps_la-do_reserve.lo libalps_la-do_release.lo \ + libalps_la-do_confirm.lo libalps_la-do_switch.lo \ + libalps_la-memory_handling.lo libalps_la-popen2.lo \ + libalps_la-atoul.lo libalps_la_OBJECTS = $(am_libalps_la_OBJECTS) libalps_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libalps_la_CFLAGS) \ @@ -160,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -357,6 +384,7 @@ libalps_la_SOURCES = \ parser_basil_1.1.c \ parser_basil_3.1.c \ parser_basil_4.0.c \ + parser_basil_5.1.c \ basil_request.c \ do_query.c \ do_reserve.c \ @@ -436,6 +464,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_basil_1.1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_basil_3.1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_basil_4.0.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_basil_5.1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_common.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-popen2.Plo@am__quote@ @@ -502,6 +531,13 @@ libalps_la-parser_basil_4.0.lo: parser_basil_4.0.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -c -o libalps_la-parser_basil_4.0.lo `test -f 'parser_basil_4.0.c' || echo '$(srcdir)/'`parser_basil_4.0.c +libalps_la-parser_basil_5.1.lo: parser_basil_5.1.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -MT libalps_la-parser_basil_5.1.lo -MD -MP -MF $(DEPDIR)/libalps_la-parser_basil_5.1.Tpo -c -o libalps_la-parser_basil_5.1.lo `test -f 'parser_basil_5.1.c' || echo '$(srcdir)/'`parser_basil_5.1.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libalps_la-parser_basil_5.1.Tpo $(DEPDIR)/libalps_la-parser_basil_5.1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='parser_basil_5.1.c' object='libalps_la-parser_basil_5.1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -c -o libalps_la-parser_basil_5.1.lo `test -f 'parser_basil_5.1.c' || echo '$(srcdir)/'`parser_basil_5.1.c + libalps_la-basil_request.lo: basil_request.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -MT libalps_la-basil_request.lo -MD -MP -MF $(DEPDIR)/libalps_la-basil_request.Tpo -c -o libalps_la-basil_request.lo `test -f 'basil_request.c' || echo '$(srcdir)/'`basil_request.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libalps_la-basil_request.Tpo $(DEPDIR)/libalps_la-basil_request.Plo diff --git a/src/plugins/select/cray/libalps/basil_request.c b/src/plugins/select/cray/libalps/basil_request.c index 6e6d1ed2ade1a5e15042b66de11c1cea5bf926f8..0ca823d4a2cdd042f0afe0cd92377efefd51f695 100644 --- a/src/plugins/select/cray/libalps/basil_request.c +++ b/src/plugins/select/cray/libalps/basil_request.c @@ -80,7 +80,8 @@ static void _init_log_config(void) } } -static void _rsvn_write_reserve_xml(FILE *fp, struct basil_reservation *r) +static void _rsvn_write_reserve_xml(FILE *fp, struct basil_reservation *r, + enum basil_version version) { struct basil_rsvn_param *param; @@ -92,10 +93,17 @@ static void _rsvn_write_reserve_xml(FILE *fp, struct basil_reservation *r) _write_xml(fp, ">\n"); for (param = r->params; param; param = param->next) { - _write_xml(fp, " <ReserveParam architecture=\"%s\" " - "width=\"%ld\" depth=\"%ld\" nppn=\"%ld\"", - nam_arch[param->arch], - param->width, param->depth, param->nppn); + if (version >= BV_5_1) + _write_xml(fp, " <ReserveParam architecture=\"%s\" " + "width=\"%ld\" depth=\"%ld\" nppn=\"%ld\"" + " nppcu=\"%d\"", + nam_arch[param->arch], param->width, + param->depth, param->nppn, param->nppcu); + else + _write_xml(fp, " <ReserveParam architecture=\"%s\" " + "width=\"%ld\" depth=\"%ld\" nppn=\"%ld\"", + nam_arch[param->arch], + param->width, param->depth, param->nppn); if (param->memory || param->labels || param->nodes || param->accel) { @@ -124,7 +132,8 @@ static void _rsvn_write_reserve_xml(FILE *fp, struct basil_reservation *r) _write_xml(fp, " <LabelParamArray>\n"); for (label = param->labels; label; label = label->next) _write_xml(fp, " <LabelParam name=\"%s\"" - " type=\"%s\" disposition=\"%s\"/>\n", + " type=\"%s\" " + "disposition=\"%s\"/>\n", label->name, nam_labeltype[label->type], nam_ldisp[label->disp]); @@ -218,7 +227,7 @@ int basil_request(struct basil_parse_data *bp) break; case BM_reserve: _write_xml(apbasil, ">\n"); - _rsvn_write_reserve_xml(apbasil, bp->mdata.res); + _rsvn_write_reserve_xml(apbasil, bp->mdata.res, bp->version); break; case BM_confirm: if (bp->version == BV_1_0 && *bp->mdata.res->batch_id != '\0') diff --git a/src/plugins/select/cray/libalps/do_query.c b/src/plugins/select/cray/libalps/do_query.c index 309aaa9f53c6d3b5d978f068422ff97facb37d87..8e58635ef7a1820c6b7b014f0a83b96aaeacd6b1 100644 --- a/src/plugins/select/cray/libalps/do_query.c +++ b/src/plugins/select/cray/libalps/do_query.c @@ -15,6 +15,11 @@ static const char *_get_alps_engine(char *buf, size_t buflen) { struct basil_parse_data bp = {0}; + if (cray_conf->alps_engine) { + strncpy(buf, cray_conf->alps_engine, buflen); + return buf; + } + /* For this query use Basil 1.0 as lowest common denominator */ bp.version = BV_1_0; bp.method = BM_engine; @@ -75,7 +80,8 @@ extern enum basil_version get_basil_version(void) if (_get_alps_engine(engine_version, sizeof(engine_version)) == NULL) fatal("can not determine ALPS Engine version"); - else if (strncmp(engine_version, "5.1.0", 5) == 0) + else if ((strncmp(engine_version, "latest", 6) == 0) || + (strncmp(engine_version, "5.1", 3) == 0)) bv = BV_5_1; else if (strncmp(engine_version, "5.0", 3) == 0) bv = BV_5_0; diff --git a/src/plugins/select/cray/libalps/do_reserve.c b/src/plugins/select/cray/libalps/do_reserve.c index 337f092a8267cd6901b86580aaee288154f26616..fc896ab38b008a5a1a1f4b597eb13a8388fd28ec 100644 --- a/src/plugins/select/cray/libalps/do_reserve.c +++ b/src/plugins/select/cray/libalps/do_reserve.c @@ -44,7 +44,7 @@ static int _rsvn_add_mem_param(struct basil_rsvn_param *rp, uint32_t mem_mb) */ static int _rsvn_add_params(struct basil_reservation *resv, uint32_t width, uint32_t depth, uint32_t nppn, - uint32_t mem_mb, char *mppnodes, + uint32_t mem_mb, char *mppnodes, uint32_t nppcu, struct basil_accel_param *accel) { struct basil_rsvn_param *rp = xmalloc(sizeof(*rp)); @@ -57,6 +57,7 @@ static int _rsvn_add_params(struct basil_reservation *resv, rp->depth = depth; rp->nppn = nppn; rp->nodes = mppnodes; + rp->nppcu = nppcu; rp->accel = accel; if (mem_mb && _rsvn_add_mem_param(rp, mem_mb) < 0) { @@ -89,7 +90,7 @@ static struct basil_reservation *_rsvn_new(const char *user, const char *batch_id, uint32_t width, uint32_t depth, uint32_t nppn, uint32_t mem_mb, - char *mppnodes, + uint32_t nppcu, char *mppnodes, struct basil_accel_param *accel) { struct basil_reservation *res; @@ -105,7 +106,7 @@ static struct basil_reservation *_rsvn_new(const char *user, strncpy(res->batch_id, batch_id, sizeof(res->batch_id)); if (_rsvn_add_params(res, width, depth, nppn, - mem_mb, mppnodes, accel) < 0) { + mem_mb, mppnodes, nppcu, accel) < 0) { free_rsvn(res); return NULL; } @@ -127,7 +128,7 @@ static struct basil_reservation *_rsvn_new(const char *user, */ long basil_reserve(const char *user, const char *batch_id, uint32_t width, uint32_t depth, uint32_t nppn, - uint32_t mem_mb, struct nodespec *ns_head, + uint32_t mem_mb, uint32_t nppcu, struct nodespec *ns_head, struct basil_accel_param *accel_head) { struct basil_reservation *rsvn; @@ -138,7 +139,7 @@ long basil_reserve(const char *user, const char *batch_id, free_nodespec(ns_head); rsvn = _rsvn_new(user, batch_id, width, depth, nppn, mem_mb, - mppnodes, accel_head); + nppcu, mppnodes, accel_head); if (rsvn == NULL) return -BE_INTERNAL; diff --git a/src/plugins/select/cray/libalps/do_switch.c b/src/plugins/select/cray/libalps/do_switch.c index 1d8ed89be9be2c7e4a13b9135a124279e7858b2c..fefc2dcfb1fab22c16f15cd360b79ec82bb12c91 100644 --- a/src/plugins/select/cray/libalps/do_switch.c +++ b/src/plugins/select/cray/libalps/do_switch.c @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/cray/libalps/memory_handling.h b/src/plugins/select/cray/libalps/memory_handling.h index 06ac88066202f3fa8b3684fa60ebba91aacd8477..cc4afa6679564531fcd0ced0daebe8bdfaede434 100644 --- a/src/plugins/select/cray/libalps/memory_handling.h +++ b/src/plugins/select/cray/libalps/memory_handling.h @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/cray/libalps/parser_basil_4.0.c b/src/plugins/select/cray/libalps/parser_basil_4.0.c index 4ef61fd9422fa41f795f09dd172200a6080a9ba4..c0e2cd489cdcbfaedd98dab0e4f97c51b5cf0560 100644 --- a/src/plugins/select/cray/libalps/parser_basil_4.0.c +++ b/src/plugins/select/cray/libalps/parser_basil_4.0.c @@ -7,7 +7,7 @@ #include "parser_internal.h" /** Basil 4.0 'Released' element */ -static void eh_released_4_0(struct ud *ud, const XML_Char **attrs) +void eh_released_4_0(struct ud *ud, const XML_Char **attrs) { char *attribs[] = { "claims" }; /* @@ -24,7 +24,7 @@ static void eh_released_4_0(struct ud *ud, const XML_Char **attrs) } /** Basil 4.0 'NodeArray' element */ -static void eh_node_array_4_0(struct ud *ud, const XML_Char **attrs) +void eh_node_array_4_0(struct ud *ud, const XML_Char **attrs) { char *attribs[] = { "changecount" }; /* diff --git a/src/plugins/select/cray/libalps/parser_basil_5.1.c b/src/plugins/select/cray/libalps/parser_basil_5.1.c new file mode 100644 index 0000000000000000000000000000000000000000..376bed49ec447c04250ff38fb05608746291ffc0 --- /dev/null +++ b/src/plugins/select/cray/libalps/parser_basil_5.1.c @@ -0,0 +1,301 @@ +/* + * XML tag handlers specific to Basil 5.1 (development release) + * + * Copyright (c) 2009-2011 Centro Svizzero di Calcolo Scientifico (CSCS) + * Licensed under the GPLv2. + */ +#include "parser_internal.h" + +/** Basil 5.1 'NodeArray' element */ +void eh_node_array_5_1(struct ud *ud, const XML_Char **attrs) +{ + char *attribs[] = { "schedchangecount" }; + /* + * The 'schedchangecount' attribute is new in Basil + * 1.3/5.1. Quoting Basil 1.3 documentation: + * To properly support the usage model suggested in item 10, + * below, it is necessary to add the schedchangecount + * attribute to the response to a QUERY(INVENTORY) reqest as well. + */ + eh_node_array_4_0(ud, attrs); + extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + + if (atou64(attribs[0], &ud->bp->mdata.inv->sched_change_count) < 0) + fatal("illegal sched_change_count = %s", attribs[0]); + + if ( ud->bp->mdata.inv->sched_change_count + > ud->bp->mdata.inv->change_count) + fatal("illegal sched_change_count = %"PRIu64", must be " + "< change_count (%"PRIu64")", + ud->bp->mdata.inv->sched_change_count, + ud->bp->mdata.inv->change_count); +} + +/** Basil 5.1 'Socket' element */ +void eh_socket_5_1(struct ud *ud, const XML_Char **attrs) +{ + char *attribs[] = { "ordinal", "architecture", "clock_mhz" }; + + extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + +#if 0 + if (atou64(attribs[0], &ud->bp->mdata.inv->sched_change_count) < 0) + fatal("illegal sched_change_count = %s", attribs[0]); +#endif + ud->counter[BT_SEGMARRAY] = 0; +} +/** Basil 5.1 'Compute' element */ +void eh_compute_5_1(struct ud *ud, const XML_Char **attrs) +{ + char *attribs[] = { "ordinal" }; + + extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + +#if 0 + if (atou64(attribs[0], &ud->bp->mdata.inv->sched_change_count) < 0) + fatal("illegal sched_change_count = %s", attribs[0]); +#endif + /* As a ProcessArray element is now a child of a ComputeUnit + element, the ComputeUnit handler must clear its counter. */ + ud->counter[BT_PROCARRAY] = 0; +} + +const struct element_handler basil_5_1_elements[] = { + [BT_MESSAGE] = { + .tag = "Message", + .depth = 0xff, /* unused, can appear at any depth */ + .uniq = false, + .hnd = eh_message + }, + [BT_RESPONSE] = { + .tag = "BasilResponse", + .depth = 0, + .uniq = true, + .hnd = eh_response + }, + [BT_RESPDATA] = { + .tag = "ResponseData", + .depth = 1, + .uniq = true, + .hnd = eh_resp_data + }, + [BT_RESERVED] = { + .tag = "Reserved", + .depth = 2, + .uniq = true, + .hnd = eh_reserved + }, + [BT_RESVDNODEARRAY] = { + .tag = "ReservedNodeArray", + .depth = 3, + .uniq = true, + .hnd = NULL + }, + [BT_RESVDNODE] = { + .tag = "ReservedNode", + .depth = 4, + .uniq = false, + .hnd = eh_resvd_node + }, + [BT_CONFIRMED] = { + .tag = "Confirmed", + .depth = 2, + .uniq = true, + .hnd = eh_confirmed + }, + [BT_RELEASED] = { + .tag = "Released", + .depth = 2, + .uniq = true, + .hnd = eh_released_4_0 + }, + [BT_ENGINE] = { + .tag = "Engine", + .depth = 2, + .uniq = true, + .hnd = eh_engine_3_1 + }, + [BT_INVENTORY] = { + .tag = "Inventory", + .depth = 2, + .uniq = true, + .hnd = eh_inventory_3_1 + }, + [BT_NODEARRAY] = { + .tag = "NodeArray", + .depth = 3, + .uniq = true, + .hnd = eh_node_array_5_1 + }, + [BT_NODE] = { + .tag = "Node", + .depth = 4, + .uniq = false, + .hnd = eh_node + }, + [BT_SOCKARRAY] = { + .tag = "SocketArray", + .depth = 5, + .uniq = true, + .hnd = NULL + }, + [BT_SOCKET] = { + .tag = "Socket", + .depth = 6, + .uniq = false, + .hnd = eh_socket_5_1 + }, + [BT_SEGMARRAY] = { + .tag = "SegmentArray", + .depth = 7, + .uniq = true, + .hnd = NULL + }, + [BT_SEGMENT] = { + .tag = "Segment", + .depth = 8, + .uniq = false, + .hnd = eh_segment + }, + [BT_COMUARRAY] = { + .tag = "ComputeUnitArray", + .depth = 9, + .uniq = true, + .hnd = NULL + }, + [BT_COMPUNIT] = { + .tag = "ComputeUnit", + .depth = 10, + .uniq = false, + .hnd = eh_compute_5_1 + }, + [BT_PROCARRAY] = { + .tag = "ProcessorArray", + .depth = 11, + .uniq = true, + .hnd = NULL + }, + [BT_PROCESSOR] = { + .tag = "Processor", + .depth = 12, + .uniq = false, + .hnd = eh_proc + }, + [BT_PROCALLOC] = { + .tag = "ProcessorAllocation", + .depth = 13, + .uniq = false, + .hnd = eh_proc_alloc + }, + [BT_MEMARRAY] = { + .tag = "MemoryArray", + .depth = 9, + .uniq = true, + .hnd = NULL + }, + [BT_MEMORY] = { + .tag = "Memory", + .depth = 10, + .uniq = false, + .hnd = eh_mem + }, + [BT_MEMALLOC] = { + .tag = "MemoryAllocation", + .depth = 11, + .uniq = false, + .hnd = eh_mem_alloc + }, + [BT_LABELARRAY] = { + .tag = "LabelArray", + .depth = 9, + .uniq = true, + .hnd = NULL + }, + [BT_LABEL] = { + .tag = "Label", + .depth = 10, + .uniq = false, + .hnd = eh_label + }, + [BT_ACCELARRAY] = { + .tag = "AcceleratorArray", + .depth = 5, + .uniq = true, + .hnd = NULL + }, + [BT_ACCEL] = { + .tag = "Accelerator", + .depth = 6, + .uniq = false, + .hnd = eh_accel + }, + [BT_ACCELALLOC] = { + .tag = "AcceleratorAllocation", + .depth = 7, + .uniq = false, + .hnd = eh_accel_alloc + }, + [BT_RESARRAY] = { + .tag = "ReservationArray", + .depth = 3, + .uniq = true, + .hnd = NULL + }, + [BT_RESVN] = { + .tag = "Reservation", + .depth = 4, + .uniq = false, + .hnd = eh_resv_3_1 + }, + [BT_APPARRAY] = { + .tag = "ApplicationArray", + .depth = 5, + .uniq = true, + .hnd = NULL + }, + [BT_APPLICATION] = { + .tag = "Application", + .depth = 6, + .uniq = false, + .hnd = eh_application + }, + [BT_CMDARRAY] = { + .tag = "CommandArray", + .depth = 7, + .uniq = true, + .hnd = NULL + }, + [BT_COMMAND] = { + .tag = "Command", + .depth = 8, + .uniq = false, + .hnd = eh_command + }, + [BT_SWITCHRES] = { + .tag = "Reservation", + .depth = 3, + .uniq = false, + .hnd = eh_switch_resv + }, + [BT_SWITCHAPP] = { + .tag = "Application", + .depth = 3, + .uniq = false, + .hnd = eh_switch_app + }, + [BT_SWITCHRESARRAY] = { + .tag = "ReservationArray", + .depth = 2, + .uniq = true, + .hnd = NULL + }, + [BT_SWITCHAPPARRAY] = { + .tag = "ApplicationArray", + .depth = 2, + .uniq = true, + .hnd = NULL + }, + [BT_5_1_MAX] = { + NULL, 0, 0, NULL + } +}; diff --git a/src/plugins/select/cray/libalps/parser_common.c b/src/plugins/select/cray/libalps/parser_common.c index 318310cb5660745c6b5515aa7c953fd5ee851039..5028d4693bf098d9ab0b9c4ff7d6240786152855 100644 --- a/src/plugins/select/cray/libalps/parser_common.c +++ b/src/plugins/select/cray/libalps/parser_common.c @@ -201,7 +201,11 @@ void eh_node(struct ud *ud, const XML_Char **attrs) ud->ud_inventory->node_head = new; } - ud->counter[BT_SEGMARRAY] = 0; + if ( ud->bp->version < BV_5_1 ) + ud->counter[BT_SEGMARRAY] = 0; + else + ud->counter[BT_SOCKARRAY] = 0; + ud->counter[BT_ACCELARRAY] = 0; /* Cover up Basil version differences by faking a segment. */ @@ -232,7 +236,11 @@ void eh_segment(struct ud *ud, const XML_Char **attrs) ud->ud_inventory->node_head->seg_head = new; } - ud->counter[BT_PROCARRAY] = 0; + if ( ud->bp->version < BV_5_1 ) + ud->counter[BT_PROCARRAY] = 0; + else + ud->counter[BT_COMUARRAY] = 0; + ud->counter[BT_MEMARRAY] = 0; ud->counter[BT_LABELARRAY] = 0; } @@ -243,17 +251,22 @@ void eh_proc(struct ud *ud, const XML_Char **attrs) struct basil_node_processor proc = {0}; char *attribs[] = { "ordinal", "architecture", "clock_mhz" }; - extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + if ( ud->bp->version < BV_5_1 ) + extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + else + extract_attributes(attrs, attribs, 1); if (atou32(attribs[0], &proc.ordinal) < 0) fatal("illegal ordinal = %s", attribs[0]); - for (proc.arch = BPT_X86_64; proc.arch < BPT_MAX; proc.arch++) - if (strcmp(attribs[1], nam_proc[proc.arch]) == 0) - break; + if ( ud->bp->version < BV_5_1 ) { + for (proc.arch = BPT_X86_64; proc.arch < BPT_MAX; proc.arch++) + if (strcmp(attribs[1], nam_proc[proc.arch]) == 0) + break; - if (atou32(attribs[2], &proc.clock_mhz) < 0) - fatal("illegal clock_mhz = %s", attribs[2]); + if (atou32(attribs[2], &proc.clock_mhz) < 0) + fatal("illegal clock_mhz = %s", attribs[2]); + } if (ud->ud_inventory) { struct basil_node_processor *new = xmalloc(sizeof(*new)); @@ -499,7 +512,7 @@ static const struct element_handler *basil_tables[BV_MAX] = { [BV_4_0] = basil_4_0_elements, [BV_4_1] = basil_4_0_elements, [BV_5_0] = basil_4_0_elements, - [BV_5_1] = basil_4_0_elements + [BV_5_1] = basil_5_1_elements }; /** @@ -526,6 +539,12 @@ static enum basil_method _tag_to_method(const enum basil_element tag) return BM_inventory; case BT_SWITCH ... BT_SWITCHAPPARRAY: return BM_switch; + case BT_SOCKARRAY: /* INVENTORY, Basil >= 5.1.0/1.3 */ + case BT_COMUARRAY: /* INVENTORY, Basil >= 5.1.0/1.3 */ + return BM_none; + case BT_SOCKET: /* INVENTORY, Basil >= 5.1.0/1.3 */ + case BT_COMPUNIT: /* INVENTORY, Basil >= 5.1.0/1.3 */ + return BM_inventory; default: return BM_UNKNOWN; } @@ -539,21 +558,22 @@ static void _start_handler(void *user_data, enum basil_method method; enum basil_element tag; - for (tag = BT_MESSAGE; table[tag].tag; tag++) { + for (tag = BT_MESSAGE; tag < BT_MAX; tag++) { + if ( table[tag].tag ) if (strcmp(table[tag].tag, el) == 0) { /* since BM_inventory is returned for Arrays if the method is switch we need to "switch" it up here. */ if (ud->bp->method == BM_switch) { - if(!strcmp(table[tag].tag, "ReservationArray")) + if (!strcmp(table[tag].tag, "ReservationArray")) tag = BT_SWITCHRESARRAY; - else if(!strcmp(table[tag].tag, "Reservation")) + else if (!strcmp(table[tag].tag, "Reservation")) tag = BT_SWITCHRES; - else if(!strcmp(table[tag].tag, + else if (!strcmp(table[tag].tag, "ApplicationArray")) tag = BT_SWITCHAPPARRAY; - else if(!strcmp(table[tag].tag, "Application")) + else if (!strcmp(table[tag].tag, "Application")) tag = BT_SWITCHAPP; } break; @@ -600,23 +620,25 @@ static void _end_handler(void *user_data, const XML_Char *el) enum basil_element end_tag; --ud->depth; - for (end_tag = BT_MESSAGE; table[end_tag].tag; end_tag++) + + for (end_tag = BT_MESSAGE; end_tag < BT_MAX; end_tag++) + if ( table[end_tag].tag ) if (strcmp(table[end_tag].tag, el) == 0) { /* since BM_inventory is returned for Arrays if the method is switch we need to "switch" it up here. */ if (ud->bp->method == BM_switch) { - if(!strcmp(table[end_tag].tag, + if (!strcmp(table[end_tag].tag, "ReservationArray")) end_tag = BT_SWITCHRESARRAY; - else if(!strcmp(table[end_tag].tag, + else if (!strcmp(table[end_tag].tag, "Reservation")) end_tag = BT_SWITCHRES; - else if(!strcmp(table[end_tag].tag, + else if (!strcmp(table[end_tag].tag, "ApplicationArray")) end_tag = BT_SWITCHAPPARRAY; - else if(!strcmp(table[end_tag].tag, + else if (!strcmp(table[end_tag].tag, "Application")) end_tag = BT_SWITCHAPP; } @@ -696,6 +718,7 @@ int parse_basil(struct basil_parse_data *bp, int fd) len = read(fd, xmlbuf, sizeof(xmlbuf)); if (len == -1) fatal("read error on stream: len=%d", len); + switch (XML_Parse(parser, xmlbuf, len, len == 0)) { case XML_STATUS_ERROR: xmlbuf[len] = '\0'; diff --git a/src/plugins/select/cray/libalps/parser_internal.h b/src/plugins/select/cray/libalps/parser_internal.h index b805e1ff9034c6ac2c9ba93850528a0a0d5eb9b6..f55ab47cb254824d7ef32b934c5d5eef99415cb5 100644 --- a/src/plugins/select/cray/libalps/parser_internal.h +++ b/src/plugins/select/cray/libalps/parser_internal.h @@ -37,7 +37,7 @@ struct ud { /* * Tag handler lookup * - * @tag: NUL-terminated tag name + * @tag: NULL-terminated tag name * @depth: depth at which this tag expected (not valid for all tags) * @uniq: whether @tag should be unique within document * @hnd: attribute-parsing function @@ -58,6 +58,7 @@ extern const struct element_handler basil_1_0_elements[]; extern const struct element_handler basil_1_1_elements[]; extern const struct element_handler basil_3_1_elements[]; extern const struct element_handler basil_4_0_elements[]; +extern const struct element_handler basil_5_1_elements[]; /* atoul.c */ extern int atou64(const char *str, uint64_t *value); extern int atou32(const char *str, uint32_t *value); @@ -103,9 +104,19 @@ extern void eh_node_3_1(struct ud *ud, const XML_Char **attrs); extern void eh_resv_3_1(struct ud *ud, const XML_Char **attrs); /* Basil 4.0 and above common handlers */ +extern void eh_released_4_0(struct ud *ud, const XML_Char **attrs); +extern void eh_node_array_4_0(struct ud *ud, const XML_Char **attrs); extern void eh_accel(struct ud *ud, const XML_Char **attrs); extern void eh_accel_alloc(struct ud *ud, const XML_Char **attrs); extern void eh_switch_res(struct ud *ud, const XML_Char **attrs); +/* st on think the name of the following was a mistake, should be ...resv + hence I added the new line after */ +extern void eh_switch_resv(struct ud *ud, const XML_Char **attrs); extern void eh_switch_app(struct ud *ud, const XML_Char **attrs); +/* Basil 5.1 and above comon handlers */ +extern void eh_node_array_5_1(struct ud *ud, const XML_Char **attrs); +extern void eh_socket_5_1(struct ud *ud, const XML_Char **attrs); +extern void eh_compute_5_1(struct ud *ud, const XML_Char **attrs); + #endif /*__PARSER_INTERNAL_H__ */ diff --git a/src/plugins/select/cray/libemulate/Makefile.in b/src/plugins/select/cray/libemulate/Makefile.in index 9297669ace04d17c3d5e0d9d375c66d880889bfa..df8ab9548a3da5304fbab8e3456f31137e8f9092 100644 --- a/src/plugins/select/cray/libemulate/Makefile.in +++ b/src/plugins/select/cray/libemulate/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/cray/libemulate DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -154,6 +158,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -174,6 +180,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -183,6 +192,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -190,6 +201,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -224,6 +244,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -251,6 +274,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/cray/libemulate/alps_emulate.c b/src/plugins/select/cray/libemulate/alps_emulate.c index ed5c898d0a753f1a88c2c581472a3ee413c62c37..4b8275eccd4162b876474b42b28d3a43dc28510d 100644 --- a/src/plugins/select/cray/libemulate/alps_emulate.c +++ b/src/plugins/select/cray/libemulate/alps_emulate.c @@ -6,7 +6,7 @@ * Written by Morris Jette <jette@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -490,7 +490,7 @@ extern void free_inv(struct basil_inventory *inv) extern long basil_reserve(const char *user, const char *batch_id, uint32_t width, uint32_t depth, uint32_t nppn, - uint32_t mem_mb, struct nodespec *ns_head, + uint32_t mem_mb, uint32_t nppcu, struct nodespec *ns_head, struct basil_accel_param *accel_head) { int i; @@ -499,8 +499,8 @@ extern long basil_reserve(const char *user, const char *batch_id, #if _DEBUG struct nodespec *my_node_spec; info("basil_reserve user:%s batch_id:%s width:%u depth:%u nppn:%u " - "mem_mb:%u", - user, batch_id, width, depth, nppn, mem_mb); + "mem_mb:%u nppcu:%u", + user, batch_id, width, depth, nppn, mem_mb, nppcu); my_node_spec = ns_head; while (my_node_spec) { info("basil_reserve node_spec:start:%u,end:%u", diff --git a/src/plugins/select/cray/libemulate/hilbert.c b/src/plugins/select/cray/libemulate/hilbert.c index 6a26e722f3d4289f95198625f04dc59e6df8295b..55471e9f3cca9bf93107a467683eb8873da4ab9a 100644 --- a/src/plugins/select/cray/libemulate/hilbert.c +++ b/src/plugins/select/cray/libemulate/hilbert.c @@ -53,9 +53,9 @@ int n) // I dimension { P = Q - 1; for( i = n-1; i; i-- ) - if( X[i] & Q ) X[0] ^= P; // invert + if ( X[i] & Q ) X[0] ^= P; // invert else{ t = (X[0] ^ X[i]) & P; X[0] ^= t; X[i] ^= t; } // exchange - if( X[0] & Q ) X[0] ^= P; // invert + if ( X[0] & Q ) X[0] ^= P; // invert } } extern void AxestoTranspose( @@ -70,9 +70,9 @@ int n) // I dimension for( Q = 1 << (b - 1); Q > 1; Q >>= 1 ) { P = Q - 1; - if( X[0] & Q ) X[0] ^= P; // invert + if ( X[0] & Q ) X[0] ^= P; // invert for( i = 1; i < n; i++ ) - if( X[i] & Q ) X[0] ^= P; // invert + if ( X[i] & Q ) X[0] ^= P; // invert else{ t = (X[0] ^ X[i]) & P; X[0] ^= t; X[i] ^= t; } // exchange } diff --git a/src/plugins/select/cray/nodespec.c b/src/plugins/select/cray/nodespec.c index 3bf8364dcb5bf8aa0ef0d8d1728b3b8d7a956b84..05016ddcba4031c0e1ee87afe58db6ef5a0a7b47 100644 --- a/src/plugins/select/cray/nodespec.c +++ b/src/plugins/select/cray/nodespec.c @@ -153,9 +153,6 @@ char *ns_to_string(const struct nodespec *head) size *= CRAY_MAX_DIGITS + 1; buf = xmalloc(size); - if (buf == NULL) - fatal("can not allocate %d", (int)size); - if (ns_ranged_string(head, buf, size) < 0) fatal("can not expand nodelist expression"); } diff --git a/src/plugins/select/cray/other_select.c b/src/plugins/select/cray/other_select.c index 686ddbbbd9458b71c838bb9b98983f935f1f9eee..23aa68805162398443caae2e897045a7f0b712ba 100644 --- a/src/plugins/select/cray/other_select.c +++ b/src/plugins/select/cray/other_select.c @@ -16,7 +16,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -743,7 +743,7 @@ extern int other_reconfigure (void) * RET - nodes selected for use by the reservation */ extern bitstr_t * other_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, bitstr_t **core_bitmap) + uint32_t *core_cnt, bitstr_t **core_bitmap) { if (other_select_init() < 0) return NULL; diff --git a/src/plugins/select/cray/other_select.h b/src/plugins/select/cray/other_select.h index 3842e96b449e69055c916a2571daf6a68eb28ed6..4137a5d454061447c210537b4a1fb3e5c3d2a00b 100644 --- a/src/plugins/select/cray/other_select.h +++ b/src/plugins/select/cray/other_select.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -380,7 +380,7 @@ extern int other_pack_select_info(time_t last_query_time, uint16_t show_flags, extern int other_reconfigure(void); extern bitstr_t * other_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, bitstr_t **core_bitmap); + uint32_t *core_cnt, bitstr_t **core_bitmap); extern void other_ba_init(node_info_msg_t *node_info_ptr, bool sanity_check); extern void other_ba_fini(void); diff --git a/src/plugins/select/cray/parser_common.h b/src/plugins/select/cray/parser_common.h index 4ad70daef5bb6c9a9ec6b3765d82fe93bc87b0f7..2f2c4ecf415888b539c4d2bce3911b9f7f187904 100644 --- a/src/plugins/select/cray/parser_common.h +++ b/src/plugins/select/cray/parser_common.h @@ -22,7 +22,7 @@ const char *bv_names[BV_MAX] = { /* Basil Protocol version */ [BV_4_0] = "1.2", [BV_4_1] = "1.2", [BV_5_0] = "1.2", - [BV_5_1] = "1.2" + [BV_5_1] = "1.3" }; const char *bv_names_long[BV_MAX] = { /* Actual version name */ diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index 0760292ce06e52db0746a38f4dfe90f2f8dc2611..913106a267d86c68e75f293c4cecfe1c721f7c83 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -74,7 +74,6 @@ int switch_record_cnt __attribute__((weak_import)); slurmdb_cluster_rec_t *working_cluster_rec __attribute__((weak_import)) = NULL; void *acct_db_conn __attribute__((weak_import)) = NULL; bitstr_t *avail_node_bitmap __attribute__((weak_import)) = NULL; -char *default_slurm_config_file __attribute__((weak_import)) = NULL; #else slurm_ctl_conf_t slurmctld_conf; struct node_record *node_record_table_ptr; @@ -87,7 +86,6 @@ int switch_record_cnt; slurmdb_cluster_rec_t *working_cluster_rec = NULL; void *acct_db_conn = NULL; bitstr_t *avail_node_bitmap = NULL; -char *default_slurm_config_file = NULL; int clusteracct_storage_g_node_down(void *db_conn, struct node_record *node_ptr, time_t event_time, char *reason, @@ -864,7 +862,7 @@ extern int select_p_reconfigure(void) } extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, - uint32_t core_cnt, bitstr_t **core_bitmap) + uint32_t *core_cnt, bitstr_t **core_bitmap) { return other_resv_test(avail_bitmap, node_cnt, core_cnt, core_bitmap); } diff --git a/src/plugins/select/linear/Makefile.in b/src/plugins/select/linear/Makefile.in index d05f200d95eaea000d28fd0a3f5dfe2bccf85b84..40dcce415ff69f342ebbd417bcfd21723899f776 100644 --- a/src/plugins/select/linear/Makefile.in +++ b/src/plugins/select/linear/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/linear DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 9bbd2bc7a2a9fb651904a2b67a95a055f07955bb..2e4cc34c9f056abadf764be9b1d188c277ca527b 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -108,6 +108,7 @@ int switch_record_cnt; struct select_nodeinfo { uint16_t magic; /* magic number */ uint16_t alloc_cpus; + uint32_t alloc_memory; }; static int _add_job_to_nodes(struct cr_record *cr_ptr, @@ -126,7 +127,7 @@ static int _find_job_mate(struct job_record *job_ptr, bitstr_t *bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes); static void _free_cr(struct cr_record *cr_ptr); -static uint16_t _get_avail_cpus(struct job_record *job_ptr, int index); +static int _get_avail_cpus(struct job_record *job_ptr, int index); static uint16_t _get_total_cpus(int index); static void _init_node_cr(void); static int _job_count_bitmap(struct cr_record *cr_ptr, @@ -448,14 +449,15 @@ static bool _enough_nodes(int avail_nodes, int rem_nodes, * IN job_ptr - pointer to job being scheduled * IN index - index of node's configuration information in select_node_ptr */ -static uint16_t _get_avail_cpus(struct job_record *job_ptr, int index) +static int _get_avail_cpus(struct job_record *job_ptr, int index) { struct node_record *node_ptr; - uint16_t avail_cpus; - uint16_t cpus, boards, sockets, cores, threads; - uint16_t cpus_per_task = 1; - uint16_t ntasks_per_node = 0, ntasks_per_socket, ntasks_per_core; - uint16_t min_sockets, min_cores, min_threads; + int avail_cpus; + uint16_t boards_per_node, sockets_per_board; + uint16_t cores_per_socket, thread_per_core; + uint16_t cpus_per_node, cpus_per_task = 1; + uint16_t ntasks_per_node = 0, ntasks_per_core; + uint32_t nppcu, total_threads; multi_core_data_t *mc_ptr = NULL; if (job_ptr->details == NULL) @@ -465,53 +467,58 @@ static uint16_t _get_avail_cpus(struct job_record *job_ptr, int index) cpus_per_task = job_ptr->details->cpus_per_task; if (job_ptr->details->ntasks_per_node) ntasks_per_node = job_ptr->details->ntasks_per_node; - if ((mc_ptr = job_ptr->details->mc_ptr)) { - ntasks_per_socket = mc_ptr->ntasks_per_socket; + if ((mc_ptr = job_ptr->details->mc_ptr)) ntasks_per_core = mc_ptr->ntasks_per_core; - min_sockets = mc_ptr->sockets_per_node; - min_cores = mc_ptr->cores_per_socket; - min_threads = mc_ptr->threads_per_core; - } else { - ntasks_per_socket = 0; + else ntasks_per_core = 0; - min_sockets = (uint16_t) NO_VAL; - min_cores = (uint16_t) NO_VAL; - min_threads = (uint16_t) NO_VAL; - } node_ptr = select_node_ptr + index; if (select_fast_schedule) { /* don't bother checking each node */ - cpus = node_ptr->config_ptr->cpus; - boards = node_ptr->config_ptr->boards; - sockets = node_ptr->config_ptr->sockets; - cores = node_ptr->config_ptr->cores; - threads = node_ptr->config_ptr->threads; + cpus_per_node = node_ptr->config_ptr->cpus; + boards_per_node = node_ptr->config_ptr->boards; + sockets_per_board = node_ptr->config_ptr->sockets; + cores_per_socket = node_ptr->config_ptr->cores; + thread_per_core = node_ptr->config_ptr->threads; } else { - cpus = node_ptr->cpus; - boards = node_ptr->boards; - sockets = node_ptr->sockets; - cores = node_ptr->cores; - threads = node_ptr->threads; + cpus_per_node = node_ptr->cpus; + boards_per_node = node_ptr->boards; + sockets_per_board = node_ptr->sockets; + cores_per_socket = node_ptr->cores; + thread_per_core = node_ptr->threads; } #if SELECT_DEBUG - info("host %s HW_ cpus %u boards %u sockets %u cores %u threads %u ", - node_ptr->name, cpus, boards, sockets, cores, threads); -#else - /* Largely to avoid warning about unused variable "boards" */ - debug2("host %s HW_ cpus %u boards %u sockets %u cores %u threads %u ", - node_ptr->name, cpus, boards, sockets, cores, threads); + info("host:%s HW_ cpus_per_node:%u boards_per_node:%u " + "sockets_per_boards:%u cores_per_socket:%u thread_per_core:%u ", + node_ptr->name, cpus_per_node, boards_per_node, sockets_per_board, + cores_per_socket, thread_per_core); #endif - - avail_cpus = slurm_get_avail_procs( - min_sockets, min_cores, min_threads, cpus_per_task, - ntasks_per_node, ntasks_per_socket, ntasks_per_core, - &cpus, &sockets, &cores, &threads, NULL, - CR_CPU, job_ptr->job_id, node_ptr->name); - + /* pick defaults for any unspecified items */ + if (cpus_per_task <= 0) + cpus_per_task = 1; + if (thread_per_core <= 0) + thread_per_core = 1; + if (cores_per_socket <= 0) + cores_per_socket = 1; + if (boards_per_node <= 0) + boards_per_node = 1; + if (sockets_per_board <= 0) { + sockets_per_board = cpus_per_node / boards_per_node / + cores_per_socket / thread_per_core; + } + + nppcu = ntasks_per_core; + total_threads = boards_per_node * sockets_per_board * + cores_per_socket * thread_per_core; + avail_cpus = adjust_cpus_nppcu(nppcu, thread_per_core, total_threads); + + if (ntasks_per_node > 0) + avail_cpus = MIN(avail_cpus, ntasks_per_node * cpus_per_task); #if SELECT_DEBUG - debug("avail_cpus index %d = %d (out of %d %d %d %d)", - index, avail_cpus, cpus, sockets, cores, threads); + debug("avail_cpus index %d = %u (out of boards_per_node:%u " + "sockets_per_boards:%u cores_per_socket:%u thread_per_core:%u)", + index, avail_cpus, boards_per_node, sockets_per_board, + cores_per_socket, thread_per_core); #endif return(avail_cpus); } @@ -531,6 +538,28 @@ static uint16_t _get_total_cpus(int index) return node_ptr->cpus; } +static uint16_t _get_total_threads(int index) +{ + struct node_record *node_ptr = &(select_node_ptr[index]); + if (select_fast_schedule) + return node_ptr->config_ptr->threads; + else + return node_ptr->threads; +} + +/* + * _get_ntasks_per_core - Retrieve the value of ntasks_per_core from + * the given job_details record. If it wasn't set, return 0xffff. + * Intended for use with the adjust_cpus_nppcu function. + */ + +static uint16_t _get_ntasks_per_core(struct job_details *details) { + if (details->mc_ptr) + return details->mc_ptr->ntasks_per_core; + else + return 0xffff; +} + static job_resources_t *_create_job_resources(int node_cnt) { job_resources_t *job_resrcs_ptr; @@ -552,7 +581,7 @@ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) { int i, j, k; int first_bit, last_bit; - uint32_t node_cpus, total_cpus = 0, node_cnt; + uint32_t node_cpus, total_cpus = 0, node_cnt, node_threads; struct node_record *node_ptr; uint32_t job_memory_cpu = 0, job_memory_node = 0; job_resources_t *job_resrcs_ptr; @@ -572,8 +601,6 @@ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) job_ptr->job_resrcs = job_resrcs_ptr = _create_job_resources(node_cnt); job_resrcs_ptr->node_bitmap = bit_copy(bitmap); job_resrcs_ptr->nodes = bitmap2node_name(bitmap); - if (job_resrcs_ptr->node_bitmap == NULL) - fatal("bit_copy malloc failure"); job_resrcs_ptr->ncpus = job_ptr->total_cpus; if (build_job_resources(job_resrcs_ptr, (void *)select_node_ptr, select_fast_schedule)) @@ -587,10 +614,17 @@ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) if (!bit_test(bitmap, i)) continue; node_ptr = &(select_node_ptr[i]); - if (select_fast_schedule) - node_cpus = node_ptr->config_ptr->cpus; - else - node_cpus = node_ptr->cpus; + if (select_fast_schedule) { + node_cpus = node_ptr->config_ptr->cpus; + node_threads = node_ptr->config_ptr->threads; + } else { + node_cpus = node_ptr->cpus; + node_threads = node_ptr->threads; + } + + node_cpus = adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + node_threads, node_cpus); job_resrcs_ptr->cpus[j] = node_cpus; if ((k == -1) || (job_resrcs_ptr->cpu_array_value[k] != node_cpus)) { @@ -862,7 +896,10 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, rem_nodes--; max_nodes--; rem_cpus -= avail_cpus; - total_cpus += _get_total_cpus(index); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(index), + _get_total_cpus(index)); } else { /* node not required (yet) */ bit_clear(bitmap, index); consec_cpus[consec_index] += avail_cpus; @@ -969,7 +1006,7 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, if (best_fit_nodes == 0) break; if (job_ptr->details->contiguous && - ((best_fit_cpus < rem_cpus) || + ((best_fit_cpus < rem_cpus) || (!_enough_nodes(best_fit_nodes, rem_nodes, min_nodes, req_nodes)))) break; /* no hole large enough */ @@ -989,7 +1026,10 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, max_nodes--; avail_cpus = _get_avail_cpus(job_ptr, i); rem_cpus -= avail_cpus; - total_cpus += _get_total_cpus(i); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(i), + _get_total_cpus(i)); } for (i = (best_fit_req - 1); i >= consec_start[best_fit_location]; i--) { @@ -1003,7 +1043,10 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, max_nodes--; avail_cpus = _get_avail_cpus(job_ptr, i); rem_cpus -= avail_cpus; - total_cpus += _get_total_cpus(i); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(i), + _get_total_cpus(i)); } } else { for (i = consec_start[best_fit_location]; @@ -1018,7 +1061,10 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, max_nodes--; avail_cpus = _get_avail_cpus(job_ptr, i); rem_cpus -= avail_cpus; - total_cpus += _get_total_cpus(i); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(i), + _get_total_cpus(i)); } } if (job_ptr->details->contiguous || @@ -1244,7 +1290,10 @@ static int _job_test_topo(struct job_record *job_ptr, bitstr_t *bitmap, bit_set(bitmap, i); alloc_nodes++; rem_cpus -= avail_cpus; - total_cpus += _get_total_cpus(i); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(i), + _get_total_cpus(i)); } } /* Accumulate additional resources from leafs that @@ -1279,7 +1328,10 @@ static int _job_test_topo(struct job_record *job_ptr, bitstr_t *bitmap, bit_set(bitmap, i); alloc_nodes++; rem_cpus -= _get_avail_cpus(job_ptr, i); - total_cpus += _get_total_cpus(i); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(i), + _get_total_cpus(i)); if ((alloc_nodes > max_nodes) || ((alloc_nodes >= want_nodes) && (rem_cpus <= 0))) @@ -1349,7 +1401,10 @@ static int _job_test_topo(struct job_record *job_ptr, bitstr_t *bitmap, bit_set(bitmap, i); alloc_nodes++; rem_cpus -= _get_avail_cpus(job_ptr, i); - total_cpus += _get_total_cpus(i); + total_cpus += adjust_cpus_nppcu( + _get_ntasks_per_core(job_ptr->details), + _get_total_threads(i), + _get_total_cpus(i)); if ((alloc_nodes > max_nodes) || ((alloc_nodes >= want_nodes) && (rem_cpus <= 0))) break; @@ -1625,12 +1680,8 @@ static int _job_expand(struct job_record *from_job_ptr, } tmp_bitmap = bit_copy(to_job_resrcs_ptr->node_bitmap); - if (!tmp_bitmap) - fatal("bit_copy: malloc failure"); bit_or(tmp_bitmap, from_job_resrcs_ptr->node_bitmap); tmp_bitmap2 = bit_copy(to_job_ptr->node_bitmap); - if (!tmp_bitmap) - fatal("bit_copy: malloc failure"); bit_or(tmp_bitmap2, from_job_ptr->node_bitmap); bit_and(tmp_bitmap, tmp_bitmap2); bit_free(tmp_bitmap2); @@ -1766,9 +1817,10 @@ static int _decr_node_job_cnt(int node_inx, struct job_record *job_ptr, { struct node_record *node_ptr = node_record_table_ptr + node_inx; struct part_cr_record *part_cr_ptr; - bool exclusive, is_job_running; + bool exclusive = false, is_job_running; - exclusive = (job_ptr->details->shared == 0); + if (job_ptr->details) + exclusive = (job_ptr->details->shared == 0); if (exclusive) { if (cr_ptr->nodes[node_inx].exclusive_cnt) cr_ptr->nodes[node_inx].exclusive_cnt--; @@ -2154,8 +2206,6 @@ static void _init_node_cr(void) /* build partition records */ part_iterator = list_iterator_create(part_list); - if (part_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { for (i = 0; i < select_node_cnt; i++) { if (part_ptr->node_bitmap == NULL) @@ -2212,7 +2262,10 @@ static void _init_node_cr(void) if (job_resrcs_ptr->node_bitmap == NULL) continue; - exclusive = (job_ptr->details->shared == 0); + if (job_ptr->details) + exclusive = (job_ptr->details->shared == 0); + else + exclusive = 0; node_offset = -1; i_first = bit_ffs(job_resrcs_ptr->node_bitmap); i_last = bit_fls(job_resrcs_ptr->node_bitmap); @@ -2310,8 +2363,6 @@ static int _test_only(struct job_record *job_ptr, bitstr_t *bitmap, uint32_t save_mem; orig_map = bit_copy(bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); /* Try to run with currently available nodes */ i = _job_count_bitmap(cr_ptr, job_ptr, orig_map, bitmap, @@ -2360,8 +2411,6 @@ static int _run_now(struct job_record *job_ptr, bitstr_t *bitmap, uint16_t pass_count = 0; orig_map = bit_copy(bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); for (max_run_job=0; ((max_run_job<max_share) && (rc != SLURM_SUCCESS)); max_run_job++) { @@ -2405,8 +2454,6 @@ top: if ((rc != SLURM_SUCCESS) && preemptee_candidates && (exp_cr = _dup_cr(cr_ptr))) { /* Remove all preemptable jobs from simulated environment */ job_iterator = list_iterator_create(preemptee_candidates); - if (job_iterator == NULL) - fatal ("memory allocation failure in linear"); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { bool remove_all = false; @@ -2465,8 +2512,6 @@ top: if ((rc != SLURM_SUCCESS) && preemptee_candidates && * actually used */ if (*preemptee_job_list == NULL) { *preemptee_job_list = list_create(NULL); - if (*preemptee_job_list == NULL) - fatal("list_create malloc failure"); } preemptee_iterator = list_iterator_create( preemptee_candidates); @@ -2511,8 +2556,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, max_run_jobs = MAX((max_share - 1), 1); /* exclude this job */ orig_map = bit_copy(bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); /* Try to run with currently available nodes */ i = _job_count_bitmap(cr_ptr, job_ptr, orig_map, bitmap, @@ -2538,8 +2581,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, /* Build list of running and suspended jobs */ cr_job_list = list_create(NULL); - if (!cr_job_list) - fatal("list_create: memory allocation failure"); job_iterator = list_iterator_create(job_list); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(tmp_job_ptr) && @@ -2617,8 +2658,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, * in selected plugin, but by Moab or something else. */ if (*preemptee_job_list == NULL) { *preemptee_job_list = list_create(NULL); - if (*preemptee_job_list == NULL) - fatal("list_create malloc failure"); } preemptee_iterator =list_iterator_create(preemptee_candidates); while ((tmp_job_ptr = (struct job_record *) @@ -3055,7 +3094,12 @@ extern int select_p_select_nodeinfo_pack(select_nodeinfo_t *nodeinfo, Buf buffer, uint16_t protocol_version) { - pack16(nodeinfo->alloc_cpus, buffer); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + pack16(nodeinfo->alloc_cpus, buffer); + pack32(nodeinfo->alloc_memory, buffer); + } else { + pack16(nodeinfo->alloc_cpus, buffer); + } return SLURM_SUCCESS; } @@ -3069,7 +3113,12 @@ extern int select_p_select_nodeinfo_unpack(select_nodeinfo_t **nodeinfo, nodeinfo_ptr = select_p_select_nodeinfo_alloc(); *nodeinfo = nodeinfo_ptr; - safe_unpack16(&nodeinfo_ptr->alloc_cpus, buffer); + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack16(&nodeinfo_ptr->alloc_cpus, buffer); + safe_unpack32(&nodeinfo_ptr->alloc_memory, buffer); + } else { + safe_unpack16(&nodeinfo_ptr->alloc_cpus, buffer); + } return SLURM_SUCCESS; @@ -3092,7 +3141,7 @@ extern select_nodeinfo_t *select_p_select_nodeinfo_alloc(void) extern int select_p_select_nodeinfo_free(select_nodeinfo_t *nodeinfo) { - if(nodeinfo) { + if (nodeinfo) { if (nodeinfo->magic != NODEINFO_MAGIC) { error("select_p_select_nodeinfo_free: " "nodeinfo magic bad"); @@ -3107,7 +3156,7 @@ extern int select_p_select_nodeinfo_free(select_nodeinfo_t *nodeinfo) extern int select_p_select_nodeinfo_set_all(void) { struct node_record *node_ptr = NULL; - int i=0; + int n; static time_t last_set_all = 0; /* only set this once when the last_node_update is newer than @@ -3120,19 +3169,16 @@ extern int select_p_select_nodeinfo_set_all(void) } last_set_all = last_node_update; - for (i=0; i<node_record_count; i++) { + for (n = 0, node_ptr = node_record_table_ptr; + n < select_node_cnt; n++, node_ptr++) { select_nodeinfo_t *nodeinfo = NULL; - - node_ptr = node_record_table_ptr + i; - /* We have to use the '_g_' here to make sure we get - the correct data to work on. i.e. cray calls this - plugin from within select/cray which has it's own - struct. - */ + /* We have to use the '_g_' here to make sure we get the + * correct data to work on. i.e. cray calls this plugin + * from within select/cray which has it's own struct. */ select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_PTR, 0, (void *)&nodeinfo); - if(!nodeinfo) { + if (!nodeinfo) { error("no nodeinfo returned from structure"); continue; } @@ -3146,6 +3192,11 @@ extern int select_p_select_nodeinfo_set_all(void) nodeinfo->alloc_cpus = node_ptr->cpus; } else nodeinfo->alloc_cpus = 0; + if (cr_ptr && cr_ptr->nodes) { + nodeinfo->alloc_memory = cr_ptr->nodes[n].alloc_memory; + } else { + nodeinfo->alloc_memory = 0; + } } return SLURM_SUCCESS; @@ -3170,6 +3221,7 @@ extern int select_p_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, { int rc = SLURM_SUCCESS; uint16_t *uint16 = (uint16_t *) data; + uint32_t *uint32 = (uint32_t *) data; char **tmp_char = (char **) data; select_nodeinfo_t **select_nodeinfo = (select_nodeinfo_t **) data; @@ -3200,6 +3252,9 @@ extern int select_p_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, case SELECT_NODEDATA_EXTRA_INFO: *tmp_char = NULL; break; + case SELECT_NODEDATA_MEM_ALLOC: + *uint32 = nodeinfo->alloc_memory; + break; default: error("Unsupported option %d for get_nodeinfo.", dinfo); rc = SLURM_ERROR; diff --git a/src/plugins/select/linear/select_linear.h b/src/plugins/select/linear/select_linear.h index db146f19c153d50552562f4e872c6d41122cf37f..0681fa468fcb4c301983515849590bb0734023fe 100644 --- a/src/plugins/select/linear/select_linear.h +++ b/src/plugins/select/linear/select_linear.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/serial/Makefile.in b/src/plugins/select/serial/Makefile.in index 7d3ccd079194aeaf3637294547e4e212d4f183a5..464ff7c1f4bdecb7481eac68335b064593109a39 100644 --- a/src/plugins/select/serial/Makefile.in +++ b/src/plugins/select/serial/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/select/serial DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/select/serial/dist_tasks.c b/src/plugins/select/serial/dist_tasks.c index 8ddef4efd47073b694d719256e9a7cad209e1d7b..30b61ed7a87603f25ebc1d24428a802a5670ac24 100644 --- a/src/plugins/select/serial/dist_tasks.c +++ b/src/plugins/select/serial/dist_tasks.c @@ -4,7 +4,7 @@ * Copyright (C) 2012 SchedMD LLC. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/serial/dist_tasks.h b/src/plugins/select/serial/dist_tasks.h index f2cbe28de36eb427a68561eba397b9fd78a7e0b3..93f0e89899fbc801af2727e9fb927aa3c62643d5 100644 --- a/src/plugins/select/serial/dist_tasks.h +++ b/src/plugins/select/serial/dist_tasks.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/serial/job_test.c b/src/plugins/select/serial/job_test.c index 78cf7e41e48b73065d4644663030ab7a83829fca..17fc1c4b29d89480cb9d79f0177ef9b27d97bd00 100644 --- a/src/plugins/select/serial/job_test.c +++ b/src/plugins/select/serial/job_test.c @@ -6,7 +6,7 @@ * from select/linear * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -119,6 +119,15 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, node_ptr = select_node_record[node_i].node_ptr; cpus_per_core = select_node_record[node_i].cpus / (core_end_bit - core_start_bit + 1); + if (node_usage[node_i].gres_list) + gres_list = node_usage[node_i].gres_list; + else + gres_list = node_ptr->gres_list; + + gres_plugin_job_core_filter(job_ptr->gres_list, gres_list, test_only, + core_map, core_start_bit, core_end_bit, + node_ptr->name); + if ((cr_type & CR_MEMORY) && cpus) { req_mem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU; avail_mem = select_node_record[node_i].real_memory; @@ -128,10 +137,6 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, cpus = 0; } - if (node_usage[node_i].gres_list) - gres_list = node_usage[node_i].gres_list; - else - gres_list = node_ptr->gres_list; gres_cores = gres_plugin_job_test(job_ptr->gres_list, gres_list, test_only, core_map, core_start_bit, @@ -338,8 +343,6 @@ bitstr_t *_make_core_bitmap(bitstr_t *node_map) nodes = bit_size(node_map); size = cr_get_coremap_offset(nodes); bitstr_t *core_map = bit_alloc(size); - if (!core_map) - fatal("bit_alloc: malloc failure"); i_first = bit_ffs(node_map); if (i_first >= 0) @@ -731,7 +734,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, int mode, * avail_cores = static core_bitmap of all available cores */ - if (jp_ptr->row == NULL) { + if (!jp_ptr || !jp_ptr->row) { /* there's no existing jobs in this partition, so place * the job in avail_cores. FIXME: still need a good * placement algorithm here that optimizes "job overlap" @@ -864,8 +867,6 @@ alloc_job: job_res = create_job_resources(); job_res->node_bitmap = bit_copy(bitmap); job_res->nodes = bitmap2node_name(bitmap); - if (job_res->node_bitmap == NULL) - fatal("bit_copy malloc failure"); job_res->nhosts = bit_set_count(bitmap); job_res->ncpus = job_res->nhosts; if (job_ptr->details->ntasks_per_node) diff --git a/src/plugins/select/serial/job_test.h b/src/plugins/select/serial/job_test.h index 6cf9e73edd09f36e864e8370fc722f16c76d4f8f..bb9ae0cc0be7fd9cb946f786682a6ac9f66a4fdd 100644 --- a/src/plugins/select/serial/job_test.h +++ b/src/plugins/select/serial/job_test.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/select/serial/select_serial.c b/src/plugins/select/serial/select_serial.c index a2c81bf47682e96b7c448b1ff022b7db9174fb1b..6718b391cddd6266f119d3fd5f5d34a82ba0ef87 100644 --- a/src/plugins/select/serial/select_serial.c +++ b/src/plugins/select/serial/select_serial.c @@ -3,7 +3,7 @@ * job allocations. ***************************************************************************** * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -356,9 +356,6 @@ static void _create_part_data(void) this_ptr = select_part_record; part_iterator = list_iterator_create(part_list); - if (part_iterator == NULL) - fatal ("memory allocation failure"); - while ((p_ptr = (struct part_record *) list_next(part_iterator))) { this_ptr->part_ptr = p_ptr; this_ptr->num_rows = p_ptr->max_share; @@ -517,11 +514,20 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr, bit_nclear(this_row->row_bitmap, 0, size-1); } } else { - xassert(job_ptr); - xassert(job_ptr->job_resrcs); - remove_job_from_cores(job_ptr->job_resrcs, - &this_row->row_bitmap, - cr_node_num_cores); + if (job_ptr) { /* just remove the job */ + xassert(job_ptr->job_resrcs); + remove_job_from_cores(job_ptr->job_resrcs, + &(this_row->row_bitmap), + cr_node_num_cores); + } else { /* totally rebuild the bitmap */ + size = bit_size(this_row->row_bitmap); + bit_nclear(this_row->row_bitmap, 0, size-1); + for (j = 0; j < this_row->num_jobs; j++) { + add_job_to_cores(this_row->job_list[j], + &(this_row->row_bitmap), + cr_node_num_cores); + } + } } return; } @@ -1083,8 +1089,6 @@ static int _run_now(struct job_record *job_ptr, bitstr_t *bitmap, save_bitmap = bit_copy(bitmap); top: orig_map = bit_copy(save_bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); rc = cr_job_test(job_ptr, bitmap, SELECT_MODE_RUN_NOW, cr_type, job_node_share, @@ -1108,8 +1112,6 @@ top: orig_map = bit_copy(save_bitmap); } job_iterator = list_iterator_create(preemptee_candidates); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(tmp_job_ptr) && @@ -1172,13 +1174,9 @@ top: orig_map = bit_copy(save_bitmap); * actually used */ if (*preemptee_job_list == NULL) { *preemptee_job_list = list_create(NULL); - if (*preemptee_job_list == NULL) - fatal("list_create malloc failure"); } preemptee_iterator = list_iterator_create( preemptee_candidates); - if (preemptee_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(preemptee_iterator))) { mode = slurm_job_preempt_mode(tmp_job_ptr); @@ -1225,8 +1223,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, time_t now = time(NULL); orig_map = bit_copy(bitmap); - if (!orig_map) - fatal("bit_copy: malloc failure"); /* Try to run with currently available nodes */ rc = cr_job_test(job_ptr, bitmap, @@ -1258,8 +1254,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, if (!cr_job_list) fatal("list_create: memory allocation error"); job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(tmp_job_ptr) && !IS_JOB_SUSPENDED(tmp_job_ptr)) @@ -1304,8 +1298,6 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, if (rc != SLURM_SUCCESS) { list_sort(cr_job_list, _cr_job_list_sort); job_iterator = list_iterator_create(cr_job_list); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = list_next(job_iterator))) { int ovrlap; bit_or(bitmap, orig_map); @@ -1339,12 +1331,8 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, * in selected plugin, but by Moab or something else. */ if (*preemptee_job_list == NULL) { *preemptee_job_list = list_create(NULL); - if (*preemptee_job_list == NULL) - fatal("list_create malloc failure"); } preemptee_iterator =list_iterator_create(preemptee_candidates); - if (preemptee_iterator == NULL) - fatal ("memory allocation failure"); while ((tmp_job_ptr = (struct job_record *) list_next(preemptee_iterator))) { if (bit_overlap(bitmap, @@ -1882,7 +1870,7 @@ extern int select_p_select_nodeinfo_set_all(void) tmp++; } /* get the row with the largest cpu - count on it. */ + * count on it. */ if (tmp > tmp_16) tmp_16 = tmp; } @@ -1921,6 +1909,7 @@ extern int select_p_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, { int rc = SLURM_SUCCESS; uint16_t *uint16 = (uint16_t *) data; + uint32_t *uint32 = (uint32_t *) data; char **tmp_char = (char **) data; select_nodeinfo_t **select_nodeinfo = (select_nodeinfo_t **) data; @@ -1951,6 +1940,9 @@ extern int select_p_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, case SELECT_NODEDATA_EXTRA_INFO: *tmp_char = NULL; break; + case SELECT_NODEDATA_MEM_ALLOC: + *uint32 = 0; + break; default: error("Unsupported option %d for get_nodeinfo.", dinfo); rc = SLURM_ERROR; @@ -2105,8 +2097,6 @@ extern int select_p_reconfigure(void) /* reload job data */ job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal ("memory allocation failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (IS_JOB_RUNNING(job_ptr)) { /* add the job */ diff --git a/src/plugins/select/serial/select_serial.h b/src/plugins/select/serial/select_serial.h index b6b9dd81da04763c376c487cc2770c048a7be222..7c03e1ac8bb267b6832b178db29ef7908d4b9a8b 100644 --- a/src/plugins/select/serial/select_serial.h +++ b/src/plugins/select/serial/select_serial.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/slurmctld/Makefile.am b/src/plugins/slurmctld/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..571d33883f63bb653f456d23fdf875b16af56586 --- /dev/null +++ b/src/plugins/slurmctld/Makefile.am @@ -0,0 +1,3 @@ +# Makefile for slurmctld plugins + +SUBDIRS = dynalloc diff --git a/src/plugins/slurmctld/Makefile.in b/src/plugins/slurmctld/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..65e769c4a19a7b41dc6074dd9614934a6ecf6cfc --- /dev/null +++ b/src/plugins/slurmctld/Makefile.in @@ -0,0 +1,735 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for slurmctld plugins +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/slurmctld +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = dynalloc +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/plugins/slurmctld/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/plugins/slurmctld/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/slurmctld/dynalloc/Makefile.am b/src/plugins/slurmctld/dynalloc/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..691891b1684373a9a9ccee16e34a5d93e8889557 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/Makefile.am @@ -0,0 +1,39 @@ +# Makefile for dynalloc (resource dynamic allocation) plugin + +AUTOMAKE_OPTIONS = foreign + +AM_CXXFLAGS = -fexceptions + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common + +AS_DYNALLOC_SOURCES = \ + allocate.c \ + allocate.h \ + allocator.c \ + allocator.h \ + argv.c \ + argv.h \ + deallocate.c \ + deallocate.h \ + info.c \ + info.h \ + job_ports_list.c \ + job_ports_list.h \ + msg.c \ + msg.h \ + slurmctld_dynalloc.c + +if SLURM_ENABLE_DYNAMIC_ALLOCATION + +pkglib_LTLIBRARIES = slurmctld_dynalloc.la +slurmctld_dynalloc_la_SOURCES = $(AS_DYNALLOC_SOURCES) +slurmctld_dynalloc_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + +else + +EXTRA_slurmctld_dynalloc_la_SOURCES = $(AS_DYNALLOC_SOURCES) + +endif + diff --git a/src/plugins/slurmctld/dynalloc/Makefile.in b/src/plugins/slurmctld/dynalloc/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..6edeae02273a4ee04445fc95e48bca0262ab23fc --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/Makefile.in @@ -0,0 +1,754 @@ +# Makefile.in generated by automake 1.11.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for dynalloc (resource dynamic allocation) plugin + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/slurmctld/dynalloc +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkglibdir)" +LTLIBRARIES = $(pkglib_LTLIBRARIES) +slurmctld_dynalloc_la_LIBADD = +am__slurmctld_dynalloc_la_SOURCES_DIST = allocate.c allocate.h \ + allocator.c allocator.h argv.c argv.h deallocate.c \ + deallocate.h info.c info.h job_ports_list.c job_ports_list.h \ + msg.c msg.h slurmctld_dynalloc.c +am__objects_1 = allocate.lo allocator.lo argv.lo deallocate.lo info.lo \ + job_ports_list.lo msg.lo slurmctld_dynalloc.lo +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@am_slurmctld_dynalloc_la_OBJECTS = \ +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@ $(am__objects_1) +am__EXTRA_slurmctld_dynalloc_la_SOURCES_DIST = allocate.c allocate.h \ + allocator.c allocator.h argv.c argv.h deallocate.c \ + deallocate.h info.c info.h job_ports_list.c job_ports_list.h \ + msg.c msg.h slurmctld_dynalloc.c +slurmctld_dynalloc_la_OBJECTS = $(am_slurmctld_dynalloc_la_OBJECTS) +slurmctld_dynalloc_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(slurmctld_dynalloc_la_LDFLAGS) $(LDFLAGS) -o $@ +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@am_slurmctld_dynalloc_la_rpath = \ +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@ -rpath $(pkglibdir) +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(slurmctld_dynalloc_la_SOURCES) \ + $(EXTRA_slurmctld_dynalloc_la_SOURCES) +DIST_SOURCES = $(am__slurmctld_dynalloc_la_SOURCES_DIST) \ + $(am__EXTRA_slurmctld_dynalloc_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +AM_CXXFLAGS = -fexceptions +PLUGIN_FLAGS = -module -avoid-version --export-dynamic +AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common +AS_DYNALLOC_SOURCES = \ + allocate.c \ + allocate.h \ + allocator.c \ + allocator.h \ + argv.c \ + argv.h \ + deallocate.c \ + deallocate.h \ + info.c \ + info.h \ + job_ports_list.c \ + job_ports_list.h \ + msg.c \ + msg.h \ + slurmctld_dynalloc.c + +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@pkglib_LTLIBRARIES = slurmctld_dynalloc.la +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@slurmctld_dynalloc_la_SOURCES = $(AS_DYNALLOC_SOURCES) +@SLURM_ENABLE_DYNAMIC_ALLOCATION_TRUE@slurmctld_dynalloc_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) +@SLURM_ENABLE_DYNAMIC_ALLOCATION_FALSE@EXTRA_slurmctld_dynalloc_la_SOURCES = $(AS_DYNALLOC_SOURCES) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/slurmctld/dynalloc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/slurmctld/dynalloc/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +slurmctld_dynalloc.la: $(slurmctld_dynalloc_la_OBJECTS) $(slurmctld_dynalloc_la_DEPENDENCIES) $(EXTRA_slurmctld_dynalloc_la_DEPENDENCIES) + $(slurmctld_dynalloc_la_LINK) $(am_slurmctld_dynalloc_la_rpath) $(slurmctld_dynalloc_la_OBJECTS) $(slurmctld_dynalloc_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocate.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/argv.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/deallocate.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/job_ports_list.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/msg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurmctld_dynalloc.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(pkglibdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pkglibLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkglibLTLIBRARIES \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/slurmctld/dynalloc/allocate.c b/src/plugins/slurmctld/dynalloc/allocate.c new file mode 100644 index 0000000000000000000000000000000000000000..0a98e652edb159b1865be00c63bb6bfe8b3d1b2c --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/allocate.c @@ -0,0 +1,728 @@ +/*****************************************************************************\ + * allocate.c - dynamic resource allocation + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <signal.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" + +#include "src/common/log.h" +#include "src/common/bitstring.h" +#include "src/common/node_conf.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/common/node_select.h" + +#include "src/slurmctld/slurmctld.h" +#include "src/slurmctld/node_scheduler.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/state_save.h" +#include "src/slurmctld/port_mgr.h" + +#include "allocate.h" +#include "info.h" +#include "constants.h" +#include "job_ports_list.h" + + +static int _get_nodelist_optional(uint16_t request_node_num, + const char *node_range_list, + char *final_req_node_list); + +static int _get_nodelist_mandatory(uint16_t request_node_num, + const char *node_range_list, + char *final_req_node_list); + +static int _get_tasks_per_node( + const resource_allocation_response_msg_t *alloc, + const job_desc_msg_t *desc, char *tasks_per_node); + +static char *_uint16_array_to_str_xmalloc(int array_len, + const uint16_t *array); + +static int _setup_job_desc_msg(uint32_t np, uint32_t request_node_num, + char *node_range_list, const char *flag, + time_t timeout, const char *cpu_bind, + uint32_t mem_per_cpu, job_desc_msg_t *job_desc_msg); + +/** + * select n nodes from the given node_range_list. + * + * optional means trying best to allocate node from + * node_range_list, allocation should include all nodes + * in the given list that are currently available. If + * that isn't enough to meet the request_node_num, + * then take any other nodes that are available to + * fill out the requested number. + * + * IN: + * request_node_num: requested node number + * node_range_list: specified node range to select from + * OUT Parameter: + * final_req_node_list + * RET OUT + * -1 if requested node number is larger than available + * 0 successful, final_req_node_list is returned + */ +static int _get_nodelist_optional(uint16_t request_node_num, + const char *node_range_list, + char *final_req_node_list) +{ + hostlist_t avail_hl_system = NULL; //available hostlist in slurm + hostlist_t avail_hl_pool = NULL; //available hostlist in the given node pool + hostlist_t hostlist = NULL; + char *avail_pool_range = NULL; + int avail_pool_num; + int extra_needed_num; + char *subset = NULL; + char *hostname = NULL; + char *tmp = NULL; + int i; + + /* get all available hostlist in SLURM system */ + avail_hl_system = get_available_host_list_system_m(); + + if (request_node_num > slurm_hostlist_count(avail_hl_system)){ + slurm_hostlist_destroy(avail_hl_system); + return SLURM_FAILURE; + } + + avail_hl_pool = choose_available_from_node_list_m(node_range_list); + avail_pool_range = slurm_hostlist_ranged_string_malloc(avail_hl_pool); + avail_pool_num = slurm_hostlist_count(avail_hl_pool); + + if (request_node_num <= avail_pool_num) { + subset = get_hostlist_subset_m(avail_pool_range,request_node_num); + strcpy(final_req_node_list, subset); + free(subset); + } else { /* avail_pool_num < reqeust_node_num <= avail_node_num_system */ + hostlist = slurm_hostlist_create(avail_pool_range); + extra_needed_num = request_node_num - avail_pool_num; + + for (i = 0; i < extra_needed_num; ) { + hostname = slurm_hostlist_shift(avail_hl_system); + if (slurm_hostlist_find(hostlist, hostname) == -1) { + slurm_hostlist_push_host(hostlist, hostname); + i++; + } + free(hostname); + } + + tmp = slurm_hostlist_ranged_string_xmalloc(hostlist); + strcpy(final_req_node_list, tmp); + xfree(tmp); + slurm_hostlist_destroy(hostlist); + } + + free(avail_pool_range); + slurm_hostlist_destroy(avail_hl_system); + slurm_hostlist_destroy(avail_hl_pool); + + return SLURM_SUCCESS; +} + +/** + * select n nodes from the given node_range_list + * + * mandatory means all nodes must be allocated + * from node_range_list + * + * IN: + * request_node_num: requested node number + * node_range_list: specified node range to select from + * OUT Parameter: + * final_req_node_list + * RET OUT + * -1 if requested node number is larger than available + * 0 successful, final_req_node_list is returned + */ +static int _get_nodelist_mandatory(uint16_t request_node_num, + const char *node_range_list, + char *final_req_node_list) +{ + hostlist_t avail_hl = NULL; + char *avail_node_range = NULL; + char *subset = NULL; + int rc; + + /* select n (request_node_num) available nodes from node_range_list */ + avail_hl = choose_available_from_node_list_m(node_range_list); + avail_node_range = slurm_hostlist_ranged_string_malloc(avail_hl); + + if (request_node_num <= slurm_hostlist_count(avail_hl)) { + subset = get_hostlist_subset_m(avail_node_range, request_node_num); + strcpy(final_req_node_list, subset); + + free(subset); + rc = SLURM_SUCCESS; + } else { + rc = SLURM_FAILURE; + } + + free(avail_node_range); + slurm_hostlist_destroy(avail_hl); + return rc; +} + +/* + * Note: the return should be xfree(str) + */ +static char* _uint16_array_to_str_xmalloc(int array_len, + const uint16_t *array) +{ + int i; + int previous = 0; + char *sep = ","; /* seperator */ + char *str = xstrdup(""); + + if (NULL == array) + return str; + + for (i = 0; i < array_len; i++) { + if ((i+1 < array_len) + && (array[i] == array[i+1])) { + previous++; + continue; + } + + if (i == array_len-1) /* last time through loop */ + sep = ""; + if (0 < previous) { + xstrfmtcat(str, "%u(x%u)%s", + array[i], previous+1, sep); + } else { + xstrfmtcat(str, "%u%s", array[i], sep); + } + previous = 0; + } + + return str; +} + +/** + * get tasks_per_nodes + * + * IN: + * alloc: resource allocation response + * desc: job resource requirement + * OUT Parameter: + * tasks_per_node + * RET OUT + * -1 if failed + * 0 successful, tasks_per_node is returned + */ +static int _get_tasks_per_node( + const resource_allocation_response_msg_t *alloc, + const job_desc_msg_t *desc, char *tasks_per_node) +{ + uint32_t num_tasks = desc->num_tasks; + slurm_step_layout_t *step_layout = NULL; + uint32_t node_cnt = alloc->node_cnt; + char *tmp = NULL; + int i; + + /* If no tasks were given we will figure it out here + * by totalling up the cpus and then dividing by the + * number of cpus per task */ + if (NO_VAL == num_tasks) { + num_tasks = 0; + for (i = 0; i < alloc->num_cpu_groups; i++) { + num_tasks += alloc->cpu_count_reps[i] + * alloc->cpus_per_node[i]; + } + if ((int)desc->cpus_per_task > 1 + && desc->cpus_per_task != (uint16_t)NO_VAL) + num_tasks /= desc->cpus_per_task; + } + + if (!(step_layout = slurm_step_layout_create(alloc->node_list, + alloc->cpus_per_node, + alloc->cpu_count_reps, + node_cnt, + num_tasks, + desc->cpus_per_task, + desc->task_dist, + desc->plane_size))) + return SLURM_FAILURE; + + tmp = _uint16_array_to_str_xmalloc(step_layout->node_cnt, step_layout->tasks); + + slurm_step_layout_destroy(step_layout); + + if (NULL != tmp) + strcpy(tasks_per_node, tmp); + + xfree(tmp); + return SLURM_SUCCESS; +} + +/** + * after initing, setup job_desc_msg_t with specific requirements + * + * IN: + * np: number of process to run + * request_node_num: the amount of requested node + * node_range_list: requested node pool + * flag: optional or mandatory + * timeout: + * cpu_bind: e.g., cores, sockets, threads + * mem_per_cpu: memory size per CPU (MB) + * OUT Parameter: + * job_desc_msg + * RET OUT + * -1 if failed + * 0 successful, job_desc_msg is returned + */ +static int _setup_job_desc_msg(uint32_t np, uint32_t request_node_num, + char *node_range_list, const char *flag, + time_t timeout, const char *cpu_bind, + uint32_t mem_per_cpu, job_desc_msg_t *job_desc_msg) +{ + char final_req_node_list[SIZE] = ""; + int rc; + hostlist_t hostlist = NULL; + + job_desc_msg->user_id = getuid(); + job_desc_msg->group_id = getgid(); + job_desc_msg->contiguous = 0; + + /* set np */ + if (0 != np) { + job_desc_msg->num_tasks = np; + job_desc_msg->min_cpus = np; + } + + if (0 != request_node_num) { /* N != 0 */ + if (0 != strlen(node_range_list)) { + /* N != 0 && node_list != "", select nodes according to flag */ + if (0 == strcmp(flag, "mandatory")) { + rc = _get_nodelist_mandatory(request_node_num, + node_range_list, final_req_node_list); + + if (SLURM_SUCCESS == rc) { + if (0 != strlen(final_req_node_list)) + job_desc_msg->req_nodes = final_req_node_list; + else + job_desc_msg->min_nodes = request_node_num; + } else { + error ("can not meet mandatory requirement"); + return SLURM_FAILURE; + } + } else { /* flag == "optional" */ + rc = _get_nodelist_optional(request_node_num, + node_range_list, final_req_node_list); + if (SLURM_SUCCESS == rc) { + if (0 != strlen(final_req_node_list)) + job_desc_msg->req_nodes = final_req_node_list; + else + job_desc_msg->min_nodes = request_node_num; + } else { + job_desc_msg->min_nodes = request_node_num; + } + } + } else { + /* N != 0 && node_list == "" */ + job_desc_msg->min_nodes = request_node_num; + } + } else { /* N == 0 */ + if (0 != strlen(node_range_list)) { + /* N == 0 && node_list != "" */ + if (0 == strcmp(flag, "optional")) { + hostlist = slurm_hostlist_create(node_range_list); + request_node_num = slurm_hostlist_count(hostlist); + rc = _get_nodelist_optional(request_node_num, + node_range_list, final_req_node_list); + if (SLURM_SUCCESS == rc) { + if (0 != strlen(final_req_node_list)) + job_desc_msg->req_nodes = final_req_node_list; + else + job_desc_msg->min_nodes = request_node_num; + } else { + job_desc_msg->min_nodes = request_node_num; + } + + slurm_hostlist_destroy(hostlist); + } else { /* flag == "mandatory" */ + job_desc_msg->req_nodes = node_range_list; + } + } + /* if N == 0 && node_list == "", do nothing */ + } + + /* for cgroup */ + if (mem_per_cpu > 0) + job_desc_msg->pn_min_memory = mem_per_cpu | MEM_PER_CPU; + + if (NULL != cpu_bind || 0 != strlen(cpu_bind)) { + if(0 == strcmp(cpu_bind, "cores")) + job_desc_msg->cpu_bind_type = CPU_BIND_TO_CORES; + else if (0 == strcmp(cpu_bind, "sockets")) + job_desc_msg->cpu_bind_type = CPU_BIND_TO_SOCKETS; + else if (0 == strcmp(cpu_bind, "threads")) + job_desc_msg->cpu_bind_type = CPU_BIND_TO_THREADS; + } + return SLURM_SUCCESS; +} + + +/** + * select n nodes from the given node_range_list through rpc + * + * if (flag == mandatory), all requested nodes must be allocated + * from node_list; else if (flag == optional), try best to allocate + * node from node_list, and the allocation should include all + * nodes in the given list that are currently available. If that + * isn't enough to meet the node_num_request, then take any other + * nodes that are available to fill out the requested number. + * + * IN: + * np: number of process to run + * request_node_num: requested node number + * node_range_list: specified node range to select from + * flag: optional or mandatory + * timeout: timeout + * cpu_bind:e.g., cores, threads, sockets + * mem_per_cpu: memory size per CPU (MB) + * OUT Parameter: + * jobid: slurm jobid + * reponse_node_list: + * tasks_per_node: like 4(x2) 3,2 + * RET OUT: + * -1 if requested node number is larger than available or timeout + * 0 successful + */ +int allocate_node_rpc(uint32_t np, uint32_t request_node_num, + char *node_range_list, const char *flag, + time_t timeout, const char *cpu_bind, + uint32_t mem_per_cpu, uint32_t resv_port_cnt, + uint32_t *slurm_jobid, char *reponse_node_list, + char *tasks_per_node, char *resv_ports) +{ + job_desc_msg_t job_desc_msg; + resource_allocation_response_msg_t *job_alloc_resp_msg = NULL; + struct job_record *job_ptr = NULL; + struct step_record step; + uid_t uid = getuid(); + int rc, i; + + slurm_init_job_desc_msg (&job_desc_msg); + rc = _setup_job_desc_msg(np, request_node_num, node_range_list, flag, + timeout, cpu_bind, mem_per_cpu, &job_desc_msg); + if (rc) + return SLURM_FAILURE; + + job_alloc_resp_msg = slurm_allocate_resources_blocking(&job_desc_msg, + timeout, NULL); + if (!job_alloc_resp_msg) { + error("allocate failure, timeout or request too many nodes"); + return SLURM_FAILURE; + } + + /* OUT: slurm_jobid, reponse_node_list, tasks_per_node */ + *slurm_jobid = job_alloc_resp_msg->job_id; + strcpy(reponse_node_list, job_alloc_resp_msg->node_list); + _get_tasks_per_node(job_alloc_resp_msg, &job_desc_msg, tasks_per_node); + + info("allocate [ node_list = %s ] to [ job_id = %u ]", + job_alloc_resp_msg->node_list, job_alloc_resp_msg->job_id); + + /* free the allocated resource msg */ + slurm_free_resource_allocation_response_msg(job_alloc_resp_msg); + + job_ptr = find_job_record(job_alloc_resp_msg->job_id); + /**************************\ + * resv port * + \**************************/ + if (0 == resv_port_cnt) + resv_port_cnt = 1; + step.resv_port_cnt = resv_port_cnt; + step.job_ptr = job_ptr; + step.step_node_bitmap = job_ptr->node_bitmap; + rc = resv_port_alloc(&step); + if (SLURM_SUCCESS != rc) { + cancel_job(job_ptr->job_id, uid); + xfree(step.resv_ports); + xfree(step.resv_port_array); + return SLURM_FAILURE; + } + strcpy(resv_ports, step.resv_ports); + for (i = 0; i < step.resv_port_cnt; i++) { + info("reserved ports %s for job %u : resv_port_array[%d]=%u", + step.resv_ports, step.job_ptr->job_id, + i, step.resv_port_array[i]); + } + + /* keep slurm_jobid and resv_port_array in a List + * for future release port */ + append_job_ports_item(job_ptr->job_id, step.resv_port_cnt, + step.resv_ports, step.resv_port_array); + + xfree(step.resv_ports); + xfree(step.resv_port_array); + +#if 0 + //kill the job, release the resource, just for test + if (slurm_kill_job(job_alloc_resp_msg->job_id, SIGKILL, 0)) { + error ("ERROR: kill job %d\n", slurm_get_errno()); + return SLURM_FAILURE; + } +#endif + + return SLURM_SUCCESS; +} + +/** + * select n nodes from the given node_range_list directly through + * "job_allocate" in slurmctld/job_mgr.c + * + * if (flag == mandatory), all requested nodes must be allocated + * from node_list; else if (flag == optional), try best to allocate + * node from node_list, and the allocation should include all + * nodes in the given list that are currently available. If that + * isn't enough to meet the node_num_request, then take any other + * nodes that are available to fill out the requested number. + * + * IN: + * np: number of process to run + * request_node_num: requested node number + * node_range_list: specified node range to select from + * flag: optional or mandatory + * timeout: timeout + * cpu_bind: cpu bind type, e.g., cores, socket + * mem_per_cpu: memory size per cpu (MB) + * OUT Parameter: + * slurm_jobid: slurm jobid + * reponse_node_list: + * tasks_per_node: like 4(x2) 3,2 + * RET OUT: + * -1 if requested node number is larger than available or timeout + * 0 successful, final_req_node_list is returned + */ +int allocate_node(uint32_t np, uint32_t request_node_num, + char *node_range_list, const char *flag, + time_t timeout, const char *cpu_bind, + uint32_t mem_per_cpu, uint32_t resv_port_cnt, + uint32_t *slurm_jobid, char *reponse_node_list, + char *tasks_per_node, char *resv_ports) +{ + int rc, error_code, i; + + resource_allocation_response_msg_t alloc_msg; + job_desc_msg_t job_desc_msg; + struct job_record *job_ptr = NULL; + bool job_waiting = false; + uid_t uid = getuid(); + struct step_record step; + + slurm_init_job_desc_msg (&job_desc_msg); + rc = _setup_job_desc_msg(np, request_node_num, node_range_list, flag, + timeout, cpu_bind, mem_per_cpu, &job_desc_msg); + + if (rc) + return SLURM_FAILURE; + + /* Locks: Read config, write job, write node, read partition */ + slurmctld_lock_t job_write_lock = { + READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; + + job_desc_msg.immediate = 0; + rc = validate_job_create_req(&job_desc_msg); + if (rc) { + error("invalid job request."); + return SLURM_FAILURE; + } + + lock_slurmctld(job_write_lock); + error_code = job_allocate(&job_desc_msg, job_desc_msg.immediate, + false, //will run + NULL, // will_run_response_msg_t + true, //allocate + job_desc_msg.user_id, &job_ptr); + unlock_slurmctld(job_write_lock); + + /* cleanup */ + xfree(job_desc_msg.partition); + + if ((error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) || + (error_code == ESLURM_RESERVATION_NOT_USABLE) || + (error_code == ESLURM_QOS_THRES) || + (error_code == ESLURM_NODE_NOT_AVAIL) || + (error_code == ESLURM_JOB_HELD)) + job_waiting = true; + + if ((SLURM_SUCCESS == error_code) || + ((0 == job_desc_msg.immediate) && job_waiting)) { + xassert(job_ptr); + + /* note: allocated node list is in 'job_ptr->job_id' */ + /* not 'job_ptr->alloc_node' */ + + if (0 < job_ptr->job_id && NULL == job_ptr->nodes) { + /* job is pending, so cancel the job */ + cancel_job(job_ptr->job_id, uid); + return SLURM_FAILURE; + } else { /* allocate successful */ + strcpy(reponse_node_list, job_ptr->nodes); + *slurm_jobid = job_ptr->job_id; + info("allocate [ allocated_node_list=%s ] to [ slurm_jobid=%u ]", + job_ptr->nodes, job_ptr->job_id); + + /* transform job_ptr to alloc_msg for further use */ + if (job_ptr->job_resrcs && + job_ptr->job_resrcs->cpu_array_cnt) { + alloc_msg.num_cpu_groups = + job_ptr->job_resrcs->cpu_array_cnt; + i = sizeof(uint32_t) * alloc_msg.num_cpu_groups; + alloc_msg.cpu_count_reps = xmalloc(i); + memcpy(alloc_msg.cpu_count_reps, + job_ptr->job_resrcs->cpu_array_reps, i); + i = sizeof(uint16_t) * alloc_msg.num_cpu_groups; + alloc_msg.cpus_per_node = xmalloc(i); + memcpy(alloc_msg.cpus_per_node, + job_ptr->job_resrcs->cpu_array_value, i); + } else { + alloc_msg.num_cpu_groups = 0; + alloc_msg.cpu_count_reps = NULL; + alloc_msg.cpus_per_node = NULL; + } + alloc_msg.error_code = error_code; + alloc_msg.job_id = job_ptr->job_id; + alloc_msg.node_cnt = job_ptr->node_cnt; + alloc_msg.node_list = xstrdup(job_ptr->nodes); + alloc_msg.alias_list = xstrdup(job_ptr->alias_list); + alloc_msg.select_jobinfo = + select_g_select_jobinfo_copy(job_ptr->select_jobinfo); + if (job_ptr->details) { + alloc_msg.pn_min_memory = job_ptr->details-> + pn_min_memory; + } else { + alloc_msg.pn_min_memory = 0; + } + + /* to get tasks_per_node */ + _get_tasks_per_node(&alloc_msg, &job_desc_msg, + tasks_per_node); + + /* cleanup */ + xfree(alloc_msg.cpu_count_reps); + xfree(alloc_msg.cpus_per_node); + xfree(alloc_msg.node_list); + xfree(alloc_msg.alias_list); + + select_g_select_jobinfo_free(alloc_msg.select_jobinfo); + schedule_job_save(); /* has own locks */ + schedule_node_save(); /* has own locks */ + + /**************************\ + * resv port * + \**************************/ + if (0 == resv_port_cnt) + resv_port_cnt = 1; + step.resv_port_cnt = resv_port_cnt; + step.job_ptr = job_ptr; + step.step_node_bitmap = job_ptr->node_bitmap; + rc = resv_port_alloc(&step); + if (SLURM_SUCCESS != rc) { + cancel_job(job_ptr->job_id, uid); + xfree(step.resv_ports); + xfree(step.resv_port_array); + return SLURM_FAILURE; + } + strcpy(resv_ports, step.resv_ports); + for (i = 0; i < step.resv_port_cnt; i++) { + info("reserved ports %s for job %u : resv_port_array[%d]=%u", + step.resv_ports, step.job_ptr->job_id, + i, step.resv_port_array[i]); + } + + /* keep slurm_jobid and resv_port_array in a List */ + append_job_ports_item(job_ptr->job_id, step.resv_port_cnt, + step.resv_ports, step.resv_port_array); + + xfree(step.resv_ports); + xfree(step.resv_port_array); + +#if 0 + /* only for test */ + cancel_job(job_ptr->job_id, uid); +#endif + return SLURM_SUCCESS; + } + } else { + return SLURM_FAILURE; + } +} + +/** + * cancel a job + * + * IN: + * job_id: slurm jobid + * uid: user id + * OUT Parameter: + * RET OUT: + * -1 failed + * 0 successful + */ +int cancel_job(uint32_t job_id, uid_t uid) +{ + int rc; + /* Locks: Read config, write job, write node */ + slurmctld_lock_t job_write_lock = { + READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; + + lock_slurmctld(job_write_lock); + rc = job_signal(job_id, SIGKILL, 0, uid, false); + unlock_slurmctld(job_write_lock); + + if (rc) { /* cancel failure */ + info("Signal %u JobId=%u by UID=%u: %s", + SIGKILL, job_id, uid, slurm_strerror(rc)); + return SLURM_FAILURE; + } else { /* cancel successful */ + info("sched: Cancel of JobId=%u by UID=%u", job_id, uid); + slurmctld_diag_stats.jobs_canceled++; + + /* Below function provides its own locking */ + schedule_job_save(); + return SLURM_SUCCESS; + } +} diff --git a/src/plugins/slurmctld/dynalloc/allocate.h b/src/plugins/slurmctld/dynalloc/allocate.h new file mode 100644 index 0000000000000000000000000000000000000000..f4014d67fecea573d0e4b7857776bb02638142a6 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/allocate.h @@ -0,0 +1,135 @@ +/*****************************************************************************\ + * allocate.h - dynamic resource allocation + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_ALLOCATE_H_ +#define DYNALLOC_ALLOCATE_H_ + + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +/** + * select n nodes from the given node_range_list through rpc + * + * if (flag == mandatory), all requested nodes must be allocated + * from node_list; else if (flag == optional), try best to allocate + * node from node_list, and the allocation should include all + * nodes in the given list that are currently available. If that + * isn't enough to meet the node_num_request, then take any other + * nodes that are available to fill out the requested number. + * + * IN: + * np: number of process to run + * request_node_num: requested node number + * node_range_list: specified node range to select from + * flag: optional or mandatory + * timeout: timeout + * cpu_bind:e.g., cores, threads, sockets + * mem_per_cpu: memory size per CPU (MB) + * OUT Parameter: + * jobid: slurm jobid + * reponse_node_list: + * tasks_per_node: like 4(x2) 3,2 + * RET OUT: + * -1 if requested node number is larger than available or timeout + * 0 successful + */ +int allocate_node_rpc(uint32_t np, uint32_t request_node_num, + char *node_range_list, const char *flag, + time_t timeout, const char *cpu_bind, + uint32_t mem_per_cpu, uint32_t resv_port_cnt, + uint32_t *slurm_jobid, char *reponse_node_list, + char *tasks_per_node, char *resv_ports); + +/** + * select n nodes from the given node_range_list directly through + * "job_allocate" in slurmctld/job_mgr.c + * + * if (flag == mandatory), all requested nodes must be allocated + * from node_list; else if (flag == optional), try best to allocate + * node from node_list, and the allocation should include all + * nodes in the given list that are currently available. If that + * isn't enough to meet the node_num_request, then take any other + * nodes that are available to fill out the requested number. + * + * IN: + * np: number of process to run + * request_node_num: requested node number + * node_range_list: specified node range to select from + * flag: optional or mandatory + * timeout: timeout + * cpu_bind: cpu bind type, e.g., cores, socket + * mem_per_cpu: memory size per cpu (MB) + * OUT Parameter: + * slurm_jobid: slurm jobid + * reponse_node_list: + * tasks_per_node: like 4(x2) 3,2 + * RET OUT: + * -1 if requested node number is larger than available or timeout + * 0 successful, final_req_node_list is returned + */ +int allocate_node(uint32_t np, uint32_t request_node_num, + char *node_range_list, const char *flag, + time_t timeout, const char *cpu_bind, + uint32_t mem_per_cpu, uint32_t resv_port_cnt, + uint32_t *slurm_jobid, char *reponse_node_list, + char *tasks_per_node, char *resv_ports); + +/** + * cancel a job + * + * IN: + * job_id: slurm jobid + * uid: user id + * OUT Parameter: + * RET OUT: + * -1 failed + * 0 successful + */ +extern int cancel_job(uint32_t job_id, uid_t uid); + +#endif /* DYNALLOC_ALLOCATE_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/allocator.c b/src/plugins/slurmctld/dynalloc/allocator.c new file mode 100644 index 0000000000000000000000000000000000000000..768dd9a73f66bc74de9ec82c83247d039d6ad18d --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/allocator.c @@ -0,0 +1,293 @@ +/*****************************************************************************\ + * allocator.c - dynamic resource allocation + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "allocator.h" +#include "allocate.h" +#include "info.h" +#include "argv.h" +#include "msg.h" +#include "constants.h" + + +static void _parse_job_params(const char *cmd, char *orte_jobid, + char *return_flag, size_t *job_timeout); + +static void _parse_app_params(const char *cmd, char *appid, + uint32_t *np, uint32_t *request_node_num, + char *node_range_list, char *flag, + char *cpu_bind, uint32_t *mem_per_cpu, + uint32_t *resv_port_cnt); + +static void _allocate_app_op(const char *msg_app, + size_t app_timeout, + char *app_resp_msg); + +/* + * Parse the job part of msg(cmd) to obtain job parameters + * + * e.g., if a allocate request is like "allocate jobid=100 + * return=all timeout=10:app=0 np=5 N=2 node_list=vm2,vm3 + * flag=mandatory:app=1 N=2", then the job part of msg is + * "jobid=100 return=all timeout=10". + * + * IN: + * cmd: job part of msg + * OUT Parameter: + * orte_jobid: + * return_flag: + * job_timeout: timeout of resource allocation for the whole job + */ +static void _parse_job_params(const char *cmd, char *orte_jobid, + char *return_flag, size_t *job_timeout) +{ + char *tmp = NULL; + char *p_str = NULL; + char *pos = NULL; + + tmp = xstrdup(cmd); + p_str = strtok(tmp, " "); + while (p_str) { + if (strstr(p_str, "jobid")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + strcpy(orte_jobid, pos); + } else if (strstr(p_str, "return")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + strcpy(return_flag, pos); + } else if (strstr(p_str, "timeout")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + *job_timeout = atol(pos); + } + p_str = strtok(NULL, " "); + } + + /* cleanup */ + xfree(tmp); +} + +/* + * Parse the app part of msg(cmd) to obtain app parameters + * + * e.g., if a allocate request is like "allocate jobid=100 + * return=all timeout=10:app=0 np=5 N=2 node_list=vm2,vm3 + * flag=mandatory:app=1 N=2", then the app part of msg is + * "app=0 np=5 N=2 node_list=vm2,vm3 flag=mandatory:app=1 N=2". + * + * IN: + * cmd: app part of msg + * OUT Parameter: + * appid: + * np: number of process + * request_node_num: + * node_range_list: + * flag: mandatory or optional + * cpu_bind: cpu bind type, e.g., cores + * mem_per_cpu: memory per cpu (MB) + */ +static void _parse_app_params(const char *cmd, char *appid, + uint32_t *np, uint32_t *request_node_num, + char *node_range_list, char *flag, + char *cpu_bind, uint32_t *mem_per_cpu, + uint32_t *resv_port_cnt) +{ + char *tmp = NULL; + char *p_str = NULL; + char *pos = NULL; + + tmp = xstrdup(cmd); + p_str = strtok(tmp, " "); + while (p_str) { + if (strstr(p_str, "app")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + strcpy(appid, pos); + } else if (strstr(p_str, "np")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + *np = strtoul(pos, NULL, 10); + } else if (strstr(p_str, "N=")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + *request_node_num = strtoul(pos, NULL, 10); + } else if (strstr(p_str, "node_list")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + strcpy(node_range_list, pos); + } else if (strstr(p_str, "flag")) { + pos = strchr(p_str, '='); + pos++; /* step over the = */ + strcpy(flag, pos); + } else if (strstr(p_str, "cpu_bind")) { + pos = strchr(p_str, '='); + pos++; + strcpy(cpu_bind, pos); + } else if (strstr(p_str, "mem_per_cpu")) { + pos = strchr(p_str, '='); + pos++; + *mem_per_cpu = strtoul(pos, NULL, 10); + } else if (strstr(p_str, "resv_port_cnt")) { + pos = strchr(p_str, '='); + pos++; + *resv_port_cnt = strtoul(pos, NULL, 10); + } + p_str = strtok(NULL, " "); + } + + /* cleanup */ + xfree(tmp); +} + +/* + * allocate resource for an app + * + * IN: + * msg_app: cmd of allocation requirement + * app_timeout: + * OUT Parameter: + * app_resp_msg: allocation result + */ +static void _allocate_app_op(const char *msg_app, + size_t app_timeout, + char *app_resp_msg) +{ + char appid[16]; + uint32_t np = 0; + uint32_t request_node_num = 0; + char node_range_list[SIZE] = ""; + char flag[16] = "mandatory"; /* if not specified, by default */ + + char cpu_bind[32] = ""; + uint32_t mem_per_cpu = 0; + uint32_t resv_port_cnt = 1; + /* out params */ + uint32_t slurm_jobid; + char resp_node_list[SIZE]; + char tasks_per_node[SIZE] = ""; + char resv_ports[SIZE] = ""; + int rc; + + _parse_app_params(msg_app, appid, &np, &request_node_num, + node_range_list, flag, cpu_bind, &mem_per_cpu, &resv_port_cnt); + + rc = allocate_node(np, request_node_num, node_range_list, flag, + app_timeout, cpu_bind, mem_per_cpu, resv_port_cnt, + &slurm_jobid, resp_node_list, tasks_per_node, resv_ports); + + if (SLURM_SUCCESS == rc) { + sprintf(app_resp_msg, + "app=%s slurm_jobid=%u allocated_node_list=%s tasks_per_node=%s resv_ports=%s", + appid, slurm_jobid, resp_node_list, tasks_per_node, resv_ports); + } else { + sprintf(app_resp_msg, "app=%s allocate_failure", appid); + } +} + +/* + * allocate resources for a job. + * + * The job will consist of at least one app, e.g., "allocate + * jobid=100 return=all timeout=10:app=0 np=5 N=2 + * node_list=vm2,vm3 flag=mandatory:app=1 N=2". + * + * IN: + * new_fd: send allocation result to socket_fd + * msg: resource requirement cmd + */ +extern void allocate_job_op(slurm_fd_t new_fd, const char *msg) +{ + char orte_jobid[16] = ""; + char return_flag[16] = ""; + size_t job_timeout = 15; /* if not specified, by default */ + + char send_buf[SIZE]; + char **app_argv = NULL, **tmp_app_argv; + size_t app_timeout; + uint32_t app_count = 1; + char app_resp_msg[SIZE]; + char **all_resp_msg_argv = NULL, **tmp_all_resp_msg_argv; + + app_argv = argv_split(msg, ':'); + /* app_count dose not include the first part (job info) */ + app_count = argv_count(app_argv) - 1; + /* app_argv will be freed */ + tmp_app_argv = app_argv; + while (*tmp_app_argv) { + if (strstr(*tmp_app_argv, "allocate")) { + _parse_job_params(*tmp_app_argv, orte_jobid, + return_flag, &job_timeout); + } else if (strstr(*tmp_app_argv, "app")) { + app_timeout = job_timeout / app_count; + + _allocate_app_op(*tmp_app_argv, app_timeout, app_resp_msg); + + if (0 == strcmp(return_flag, "all") + && 0 != strlen(app_resp_msg)) { + argv_append_nosize(&all_resp_msg_argv, app_resp_msg); + } else if (0 != strlen(app_resp_msg)) { + /* if return_flag != "all", + * each app's allocation will be sent individually */ + sprintf(send_buf, "jobid=%s:%s", orte_jobid, app_resp_msg); + info("BBB: send to client: %s", send_buf); + send_reply(new_fd, send_buf); + } + } + tmp_app_argv++; + } + /* free app_argv */ + argv_free(app_argv); + + if (0 == strcmp(return_flag, "all")) { + sprintf(send_buf, "jobid=%s", orte_jobid); + /* all_resp_msg_argv will be freed */ + tmp_all_resp_msg_argv = all_resp_msg_argv; + while (*tmp_all_resp_msg_argv) { + sprintf(send_buf, "%s:%s", send_buf, *tmp_all_resp_msg_argv); + tmp_all_resp_msg_argv++; + } + /* free all_resp_msg_argv */ + argv_free(all_resp_msg_argv); + + info("BBB: send to client: %s", send_buf); + send_reply(new_fd, send_buf); + } +} diff --git a/src/plugins/slurmctld/dynalloc/allocator.h b/src/plugins/slurmctld/dynalloc/allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..093424bb0a90925dae7cec243b5d09ace1eba936 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/allocator.h @@ -0,0 +1,70 @@ +/*****************************************************************************\ + * allocator.h - dynamic resource allocation + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_ALLOCATOR_H_ +#define DYNALLOC_ALLOCATOR_H_ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include "slurm/slurm.h" +#include "msg.h" + +/* + * allocate resources for a job. + * + * The job will consist of at least one app, e.g., "allocate + * jobid=100 return=all timeout=10:app=0 np=5 N=2 + * node_list=vm2,vm3 flag=mandatory:app=1 N=2". + * + * IN: + * new_fd: send allocation result to socket_fd + * msg: resource requirement cmd + */ +extern void allocate_job_op(slurm_fd_t new_fd, const char *msg); + +#endif /* DYNALLOC_ALLOCATOR_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/argv.c b/src/plugins/slurmctld/dynalloc/argv.c new file mode 100644 index 0000000000000000000000000000000000000000..7e152dfddc7e134e66491292843e14f16f01d143 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/argv.c @@ -0,0 +1,553 @@ +/*****************************************************************************\ + * argv.c - + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "slurm/slurm.h" + +#include "argv.h" +#include "constants.h" + +/* + * Append a string to the end of a new or existing argv array. + */ +int argv_append(int *argc, char ***argv, const char *arg) +{ + int rc; + + /* add the new element */ + if (SLURM_SUCCESS != (rc = argv_append_nosize(argv, arg))) { + return rc; + } + + *argc = argv_count(*argv); + + return SLURM_SUCCESS; +} + +int argv_append_nosize(char ***argv, const char *arg) +{ + int argc; + + /* Create new argv. */ + if (NULL == *argv) { + *argv = (char**) malloc(2 * sizeof(char *)); + if (NULL == *argv) { + return SLURM_FAILURE; + } + argc = 0; + (*argv)[0] = NULL; + (*argv)[1] = NULL; + } + + /* Extend existing argv. */ + else { + /* count how many entries currently exist */ + argc = argv_count(*argv); + + *argv = (char**) realloc(*argv, (argc + 2) * sizeof(char *)); + if (NULL == *argv) { + return SLURM_FAILURE; + } + } + + /* Set the newest element to point to a copy of the arg string */ + (*argv)[argc] = strdup(arg); + if (NULL == (*argv)[argc]) { + return SLURM_FAILURE; + } + + argc = argc + 1; + (*argv)[argc] = NULL; + + return SLURM_SUCCESS; +} + +int argv_prepend_nosize(char ***argv, const char *arg) +{ + int argc; + int i; + + /* Create new argv. */ + if (NULL == *argv) { + *argv = (char**) malloc(2 * sizeof(char *)); + if (NULL == *argv) { + return SLURM_FAILURE; + } + (*argv)[0] = strdup(arg); + (*argv)[1] = NULL; + } else { + /* count how many entries currently exist */ + argc = argv_count(*argv); + + *argv = (char**) realloc(*argv, (argc + 2) * sizeof(char *)); + if (NULL == *argv) { + return SLURM_FAILURE; + } + (*argv)[argc+1] = NULL; + + /* shift all existing elements down 1 */ + for (i=argc; 0 < i; i--) { + (*argv)[i] = (*argv)[i-1]; + } + (*argv)[0] = strdup(arg); + } + + return SLURM_SUCCESS; +} + +int argv_append_unique_nosize(char ***argv, const char *arg, bool overwrite) +{ + int i; + + /* if the provided array is NULL, then the arg cannot be present, + * so just go ahead and append + */ + if (NULL == *argv) { + return argv_append_nosize(argv, arg); + } + + /* see if this arg is already present in the array */ + for (i=0; NULL != (*argv)[i]; i++) { + if (0 == strcmp(arg, (*argv)[i])) { + /* already exists - are we authorized to overwrite? */ + if (overwrite) { + free((*argv)[i]); + (*argv)[i] = strdup(arg); + } + return SLURM_SUCCESS; + } + } + + /* we get here if the arg is not in the array - so add it */ + return argv_append_nosize(argv, arg); +} + +/* + * Free a NULL-terminated argv array. + */ +void argv_free(char **argv) +{ + char **p; + + if (NULL == argv) + return; + + for (p = argv; NULL != *p; ++p) { + free(*p); + } + + free(argv); +} + + +/* + * Split a string into a NULL-terminated argv array. + */ +static char **argv_split_inter(const char *src_string, int delimiter, + int include_empty) +{ + char arg[SIZE]; + char **argv = NULL; + const char *p; + char *argtemp; + int argc = 0; + size_t arglen; + + while (src_string && *src_string) { + p = src_string; + arglen = 0; + + while (('\0' != *p) && (*p != delimiter)) { + ++p; + ++arglen; + } + + /* zero length argument, skip */ + if (src_string == p) { + if (include_empty) { + arg[0] = '\0'; + if (SLURM_SUCCESS != argv_append(&argc, &argv, arg)) + return NULL; + } + } + + /* tail argument, add straight from the original string */ + else if ('\0' == *p) { + if (SLURM_SUCCESS != argv_append(&argc, &argv, src_string)) + return NULL; + src_string = p; + continue; + } + + /* long argument, malloc buffer, copy and add */ + else if (arglen > (SIZE - 1)) { + argtemp = (char*) malloc(arglen + 1); + if (NULL == argtemp) + return NULL; + + strncpy(argtemp, src_string, arglen); + argtemp[arglen] = '\0'; + + if (SLURM_SUCCESS != argv_append(&argc, &argv, argtemp)) { + free(argtemp); + return NULL; + } + + free(argtemp); + } + + /* short argument, copy to buffer and add */ + else { + strncpy(arg, src_string, arglen); + arg[arglen] = '\0'; + + if (SLURM_SUCCESS != argv_append(&argc, &argv, arg)) + return NULL; + } + + src_string = p + 1; + } + + /* All done */ + return argv; +} + +char **argv_split(const char *src_string, int delimiter) +{ + return argv_split_inter(src_string, delimiter, 0); +} + +char **argv_split_with_empty(const char *src_string, int delimiter) +{ + return argv_split_inter(src_string, delimiter, 1); +} + +/* + * Return the length of a NULL-terminated argv array. + */ +int argv_count(char **argv) +{ + char **p; + int i; + + if (NULL == argv) + return 0; + + for (i = 0, p = argv; *p; i++, p++) + continue; + + return i; +} + +/* + * Join all the elements of an argv array into a single + * newly-allocated string. + */ +char *argv_join(char **argv, int delimiter) +{ + char **p; + char *pp; + char *str; + size_t str_len = 0; + size_t i; + + /* Bozo case */ + if (NULL == argv || NULL == argv[0]) { + return strdup(""); + } + + /* Find the total string length in argv including delimiters. The + last delimiter is replaced by the NULL character. */ + for (p = argv; *p; ++p) { + str_len += strlen(*p) + 1; + } + + /* Allocate the string. */ + if (NULL == (str = (char*) malloc(str_len))) + return NULL; + + /* Loop filling in the string. */ + str[--str_len] = '\0'; + p = argv; + pp = *p; + + for (i = 0; i < str_len; ++i) { + if ('\0' == *pp) { + /* End of a string, fill in a delimiter + * and go to the next string. */ + str[i] = (char) delimiter; + ++p; + pp = *p; + } else { + str[i] = *pp++; + } + } + + /* All done */ + return str; +} + +/* + * Join all the elements of an argv array from within a + * specified range into a single newly-allocated string. + */ +char *argv_join_range(char **argv, size_t start, size_t end, int delimiter) +{ + char **p; + char *pp; + char *str; + size_t str_len = 0; + size_t i; + + /* Bozo case */ + if (NULL == argv || NULL == argv[0] || (int)start > argv_count(argv)) { + return strdup(""); + } + + /* Find the total string length in argv including delimiters. The + * last delimiter is replaced by the NULL character. */ + for (p = &argv[start], i=start; *p && i < end; ++p, ++i) { + str_len += strlen(*p) + 1; + } + + /* Allocate the string. */ + if (NULL == (str = (char*) malloc(str_len))) + return NULL; + + /* Loop filling in the string. */ + str[--str_len] = '\0'; + p = &argv[start]; + pp = *p; + + for (i = 0; i < str_len; ++i) { + if ('\0' == *pp) { + /* End of a string, fill in a delimiter and go to the + * next string. */ + str[i] = (char) delimiter; + ++p; + pp = *p; + } else { + str[i] = *pp++; + } + } + + /* All done */ + return str; +} + +/* + * Return the number of bytes consumed by an argv array. + */ +size_t argv_len(char **argv) +{ + char **p; + size_t length; + + if (NULL == argv) + return (size_t) 0; + + length = sizeof(char *); + + for (p = argv; *p; ++p) { + length += strlen(*p) + 1 + sizeof(char *); + } + + return length; +} + +/* + * Copy a NULL-terminated argv array. + */ +char **argv_copy(char **argv) +{ + char **dupv = NULL; + int dupc = 0; + + if (NULL == argv) + return NULL; + + /* create an "empty" list, so that we return something valid if we + * were passed a valid list with no contained elements */ + dupv = (char**) malloc(sizeof(char*)); + dupv[0] = NULL; + + while (NULL != *argv) { + if (SLURM_SUCCESS != argv_append(&dupc, &dupv, *argv)) { + argv_free(dupv); + return NULL; + } + + ++argv; + } + + /* All done */ + return dupv; +} + +int argv_delete(int *argc, char ***argv, int start, int num_to_delete) +{ + int i; + int count; + int suffix_count; + char **tmp; + + /* Check for the bozo cases */ + if (NULL == argv || NULL == *argv || 0 == num_to_delete) { + return SLURM_SUCCESS; + } + count = argv_count(*argv); + if (start > count) { + return SLURM_SUCCESS; + } else if (start < 0 || num_to_delete < 0) { + return SLURM_FAILURE; + } + + /* Ok, we have some tokens to delete. Calculate the new length of + * the argv array. */ + suffix_count = count - (start + num_to_delete); + if (suffix_count < 0) { + suffix_count = 0; + } + + /* Free all items that are being deleted */ + for (i = start; i < count && i < start + num_to_delete; ++i) { + free((*argv)[i]); + } + + /* Copy the suffix over the deleted items */ + for (i = start; i < start + suffix_count; ++i) { + (*argv)[i] = (*argv)[i + num_to_delete]; + } + + /* Add the trailing NULL */ + (*argv)[i] = NULL; + + /* adjust the argv array */ + tmp = (char**)realloc(*argv, sizeof(char**) * (i + 1)); + if (NULL != tmp) *argv = tmp; + + /* adjust the argc */ + (*argc) -= num_to_delete; + + return SLURM_SUCCESS; +} + +int argv_insert(char ***target, int start, char **source) +{ + int i, source_count, target_count; + int suffix_count; + + /* Check for the bozo cases */ + if (NULL == target || NULL == *target || start < 0) { + return SLURM_FAILURE; + } else if (NULL == source) { + return SLURM_SUCCESS; + } + + /* Easy case: appending to the end */ + target_count = argv_count(*target); + source_count = argv_count(source); + if (start > target_count) { + for (i = 0; i < source_count; ++i) { + argv_append(&target_count, target, source[i]); + } + } + + /* Harder: insertting into the middle */ + else { + /* Alloc out new space */ + *target = (char**) realloc(*target, + sizeof(char *) * + (target_count + source_count + 1)); + + /* Move suffix items down to the end */ + suffix_count = target_count - start; + for (i = suffix_count - 1; i >= 0; --i) { + (*target)[start + source_count + i] = + (*target)[start + i]; + } + (*target)[start + suffix_count + source_count] = NULL; + + /* Strdup in the source argv */ + for (i = start; i < start + source_count; ++i) { + (*target)[i] = strdup(source[i - start]); + } + } + + /* All done */ + return SLURM_SUCCESS; +} + +int argv_insert_element(char ***target, int location, char *source) +{ + int i, target_count; + int suffix_count; + + /* Check for the bozo cases */ + if (NULL == target || NULL == *target || location < 0) { + return SLURM_FAILURE; + } else if (NULL == source) { + return SLURM_SUCCESS; + } + + /* Easy case: appending to the end */ + target_count = argv_count(*target); + if (location > target_count) { + argv_append(&target_count, target, source); + return SLURM_SUCCESS; + } + + /* Alloc out new space */ + *target = (char**) realloc(*target, + sizeof(char*) * (target_count + 2)); + + /* Move suffix items down to the end */ + suffix_count = target_count - location; + for (i = suffix_count - 1; i >= 0; --i) { + (*target)[location + 1 + i] = + (*target)[location + i]; + } + (*target)[location + suffix_count + 1] = NULL; + + /* Strdup in the source */ + (*target)[location] = strdup(source); + + /* All done */ + return SLURM_SUCCESS; +} diff --git a/src/plugins/slurmctld/dynalloc/argv.h b/src/plugins/slurmctld/dynalloc/argv.h new file mode 100644 index 0000000000000000000000000000000000000000..90335dfac693d8596ee64479fae0f4fa88e496af --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/argv.h @@ -0,0 +1,317 @@ +/*****************************************************************************\ + * argv.h - + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_ARGV_H_ +#define DYNALLOC_ARGV_H_ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +/** + * Append a string (by value) to an new or existing NULL-terminated + * argv array. + * + * @param argc Pointer to the length of the argv array. Must not be + * NULL. + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * + * @retval DYNALLOC_SUCCESS On success + * @retval DYNALLOC_ERROR On failure + * + * This function adds a string to an argv array of strings by value; + * it is permissable to pass a string on the stack as the str + * argument to this function. + * + * To add the first entry to an argv array, call this function with + * (*argv == NULL). This function will allocate an array of length + * 2; the first entry will point to a copy of the string passed in + * arg, the second entry will be set to NULL. + * + * If (*argv != NULL), it will be realloc'ed to be 1 (char*) larger, + * and the next-to-last entry will point to a copy of the string + * passed in arg. The last entry will be set to NULL. + * + * Just to reinforce what was stated above: the string is copied by + * value into the argv array; there is no need to keep the original + * string (i.e., the arg parameter) after invoking this function. + */ +extern int argv_append(int *argc, char ***argv, const char *arg); + +/** + * Append to an argv-style array, but ignore the size of the array. + * + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * + * @retval DYNALLOC_SUCCESS On success + * @retval DYNALLOC_ERROR On failure + * + * This function is identical to the argv_append() function + * except that it does not take a pointer to an argc (integer + * representing the size of the array). This is handy for + * argv-style arrays that do not have integers that are actively + * maintaing their sizes. + */ +extern int argv_append_nosize(char ***argv, const char *arg); + +/** + * Insert the provided arg at the beginning of the array + * + * @param argv Pointer to an argv array + * @param str Pointer to the string to prepend + * + * @retval DYNALLOC_SUCCESS On success + * @retval DYNALLOC_ERROR On failure + */ +extern int argv_prepend_nosize(char ***argv, const char *arg); + +/** + * Append to an argv-style array, but only if the provided argument + * doesn't already exist somewhere in the array. Ignore the size of the array. + * + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * @param bool Whether or not to overwrite a matching value if found + * + * @retval DYNALLOC_SUCCESS On success + * @retval DYNALLOC_ERROR On failure + * + * This function is identical to the argv_append_nosize() function + * except that it only appends the provided argument if it does not already + * exist in the provided array, or overwrites it if it is. + */ +extern int argv_append_unique_nosize(char ***argv, const char *arg, + bool overwrite); + +/** + * Free a NULL-terminated argv array. + * + * @param argv Argv array to free. + * + * This function frees an argv array and all of the strings that it + * contains. Since the argv parameter is passed by value, it is not + * set to NULL in the caller's scope upon return. + * + * It is safe to invoke this function with a NULL pointer. It is + * not safe to invoke this function with a non-NULL-terminated argv + * array. + */ +extern void argv_free(char **argv); + +/** + * Split a string into a NULL-terminated argv array. Do not include empty + * strings in result array. + * + * @param src_string Input string. + * @param delimiter Delimiter character. + * + * @retval argv pointer to new argv array on success + * @retval NULL on error + * + * All strings are insertted into the argv array by value; the + * newly-allocated array makes no references to the src_string + * argument (i.e., it can be freed after calling this function + * without invalidating the output argv). + */ +extern char **argv_split(const char *src_string, int delimiter); + +/** + * Split a string into a NULL-terminated argv array. Include empty + * strings in result array. + * + * @param src_string Input string. + * @param delimiter Delimiter character. + * + * @retval argv pointer to new argv array on success + * @retval NULL on error + * + * All strings are insertted into the argv array by value; the + * newly-allocated array makes no references to the src_string + * argument (i.e., it can be freed after calling this function + * without invalidating the output argv). + */ +extern char **argv_split_with_empty(const char *src_string, int delimiter) ; + +/** + * Return the length of a NULL-terminated argv array. + * + * @param argv The input argv array. + * + * @retval 0 If NULL is passed as argv. + * @retval count Number of entries in the argv array. + * + * The argv array must be NULL-terminated. + */ +extern int argv_count(char **argv); + +/** + * Join all the elements of an argv array into a single + * newly-allocated string. + * + * @param argv The input argv array. + * @param delimiter Delimiter character placed between each argv string. + * + * @retval new_string Output string on success. + * @retval NULL On failure. + * + * Similar to the Perl join function, this function takes an input + * argv and joins them into into a single string separated by the + * delimiter character. + * + * It is the callers responsibility to free the returned string. + */ +extern char *argv_join(char **argv, int delimiter) ; + +extern char *argv_join_range(char **argv, size_t start, size_t end, + int delimiter); + +/** + * Return the number of bytes consumed by an argv array. + * + * @param argv The input argv array. + * + * Count the number of bytes consumed by a NULL-terminated argv + * array. This includes the number of bytes used by each of the + * strings as well as the pointers used in the argv array. + */ +extern size_t argv_len(char **argv); + +/** + * Copy a NULL-terminated argv array. + * + * @param argv The input argv array. + * + * @retval argv Copied argv array on success. + * @retval NULL On failure. + * + * Copy an argv array, including copying all off its strings. + * Specifically, the output argv will be an array of the same length + * as the input argv, and strcmp(argv_in[i], argv_out[i]) will be 0. + */ +extern char **argv_copy(char **argv); + +/** + * Delete one or more tokens from the middle of an argv. + * + * @param argv The argv to delete from + * @param start The index of the first token to delete + * @param num_to_delete How many tokens to delete + * + * @retval DYNALLOC_SUCCESS Always + * + * Delete some tokens from within an existing argv. The start + * parameter specifies the first token to delete, and will delete + * (num_to_delete-1) tokens following it. argv will be realloc()ed + * to *argc - num_deleted size. + * + * If start is beyond the end of the argv array, this function is + * a no-op. + * + * If num_to_delete runs beyond the end of the argv array, this + * function will delete all tokens starting with start to the end + * of the array. + * + * All deleted items in the argv array will have their contents + * free()ed (it is assumed that the argv "owns" the memory that + * the pointer points to). + */ +extern int argv_delete(int *argc, char ***argv, + int start, int num_to_delete); + +/** + * Insert one argv array into the middle of another + * + * @param target The argv to insert tokens into + * @param start Index where the first token will be placed in target + * @param source The argv to copy tokens from + * + * @retval DYNALLOC_SUCCESS upon success + * @retval DYNALLOC_BAD_PARAM if any parameters are non-sensical + * + * This function takes one arg and inserts it in the middle of + * another. The first token in source will be insertted at index + * start in the target argv; all other tokens will follow it. + * Similar to argv_append(), the target may be realloc()'ed + * to accomodate the new storage requirements. + * + * The source array is left unaffected -- its contents are copied + * by value over to the target array (i.e., the strings that + * source points to are strdup'ed into the new locations in + * target). + */ +extern int argv_insert(char ***target, int start, char **source); + +/** + * Insert one argv element in front of a specific position in an array + * + * @param target The argv to insert tokens into + * @param location Index where the token will be placed in target + * @param source The token to be inserted + * + * @retval DYNALLOC_SUCCESS upon success + * @retval DYNALLOC_BAD_PARAM if any parameters are non-sensical + * + * This function takes one arg and inserts it in the middle of + * another. The token will be inserted at the specified index + * in the target argv; all other tokens will be shifted down. + * Similar to argv_append(), the target may be realloc()'ed + * to accomodate the new storage requirements. + * + * The source token is left unaffected -- its contents are copied + * by value over to the target array (i.e., the string that + * source points to is strdup'ed into the new location in + * target). + */ +extern int argv_insert_element(char ***target, int location, char *source); + + +#endif /* DYNALLOC_ARGV_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/constants.h b/src/plugins/slurmctld/dynalloc/constants.h new file mode 100644 index 0000000000000000000000000000000000000000..2a9da1a0ed5ee187654da001a5338739bc5f28b6 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/constants.h @@ -0,0 +1,51 @@ +/*****************************************************************************\ + * constants.h - + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_CONSTANTS_H_ +#define DYNALLOC_CONSTANTS_H_ + +#if HAVE_STDBOOL_H +# include <stdbool.h> +#else +typedef enum {false, true} bool; +#endif /* !HAVE_STDBOOL_H */ + + +#define SIZE 8192 + + +#endif /* DYNALLOC_CONSTANTS_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/deallocate.c b/src/plugins/slurmctld/dynalloc/deallocate.c new file mode 100644 index 0000000000000000000000000000000000000000..6da065179aada9dcae67ce9d3e857ef9b159f09d --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/deallocate.c @@ -0,0 +1,167 @@ +/*****************************************************************************\ + * deallocate.c - complete job resource allocation + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" + +#include "src/common/log.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/port_mgr.h" +#include "src/slurmctld/state_save.h" + +#include "deallocate.h" +#include "argv.h" +#include "constants.h" +#include "job_ports_list.h" + + +/** + * deallocate the resources for slurm jobs. + * + * the deallocate msg can be like "deallocate slurm_jobid=123 + * job_return_code=0:slurm_jobid=124 job_return_code=0" + * + * IN: + * msg: the deallocate msg + * + */ +extern void deallocate(const char *msg) +{ + char **jobid_argv = NULL, **tmp_jobid_argv; + char *pos = NULL; + /* params to complete a job allocation */ + uint32_t slurm_jobid; + uid_t uid = 0; + bool job_requeue = false; + bool node_fail = false; + uint32_t job_return_code = NO_VAL; + int rc = SLURM_SUCCESS; + /* Locks: Write job, write node */ + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK + }; + + jobid_argv = argv_split(msg, ':'); + /* jobid_argv will be freed */ + tmp_jobid_argv = jobid_argv; + + while (*tmp_jobid_argv) { + /* to identify the slurm_job */ + if (NULL != (pos = strstr(*tmp_jobid_argv, "slurm_jobid="))) { + pos = pos + strlen("slurm_jobid="); /* step over */ + sscanf(pos, "%u", &slurm_jobid); + } + + if (NULL != (pos = strstr(*tmp_jobid_argv,"job_return_code="))){ + pos = pos + strlen("job_return_code="); /* step over*/ + sscanf(pos, "%u", &job_return_code); + } + + lock_slurmctld(job_write_lock); + rc = job_complete(slurm_jobid, uid, job_requeue, + node_fail, job_return_code); + unlock_slurmctld(job_write_lock); + + /* return result */ + if (rc) { + info("deallocate JobId=%u: %s ", + slurm_jobid, slurm_strerror(rc)); + } else { + debug2("deallocate JobId=%u ", slurm_jobid); + (void) schedule_job_save(); /* Has own locking */ + (void) schedule_node_save(); /* Has own locking */ + } + + /* deallocate port */ + deallocate_port(slurm_jobid); + + /*step to the next */ + tmp_jobid_argv++; + } + /* free app_argv */ + argv_free(jobid_argv); +} + +/** + * deallocate the ports for a slurm job. + * + * deallocate the ports and remove the entry from List. + * + * IN: + * slurm_jobid: slurm jobid + * + */ +extern void deallocate_port(uint32_t slurm_jobid) +{ + job_ports_t *item = NULL; + ListIterator it = NULL; + struct job_record *job_ptr = NULL; + struct step_record step; + + if (NULL == job_ports_list) + return; + + it = list_iterator_create(job_ports_list); + item = (job_ports_t *) list_find(it, find_job_ports_item_func, + &slurm_jobid); + if (NULL == item) { + info ("slurm_jobid = %u not found in List.", slurm_jobid); + return; + } + + job_ptr = find_job_record(slurm_jobid); + step.job_ptr = job_ptr; + step.step_node_bitmap = job_ptr->node_bitmap; + step.step_id = 0; + step.resv_port_cnt = item->port_cnt; + step.resv_ports =item->resv_ports; + step.resv_port_array = xmalloc(sizeof(int) * step.resv_port_cnt); + memcpy(step.resv_port_array, item->port_array, + sizeof(int) * step.resv_port_cnt); + /* call resv_port_free in port_mgr.c */ + resv_port_free(&step); + + /* delete the item from list and automatically + * call 'free_job_ports_item_func' */ + list_delete_item (it); + /* destroy iterator */ + list_iterator_destroy(it); +} diff --git a/src/plugins/slurmctld/dynalloc/deallocate.h b/src/plugins/slurmctld/dynalloc/deallocate.h new file mode 100644 index 0000000000000000000000000000000000000000..d71a18eed0bfcb717423e39cad49e89f42caf962 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/deallocate.h @@ -0,0 +1,83 @@ +/*****************************************************************************\ + * deallocate.h - complete job resource allocation + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_DEALLOCATE_H_ +#define DYNALLOC_DEALLOCATE_H_ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +#include "msg.h" + +/** + * deallocate the resources for slurm jobs. + * + * the deallocate msg can be like "deallocate slurm_jobid=123 + * job_return_code=0:slurm_jobid=124 job_return_code=0" + * + * IN: + * msg: the deallocate msg + * + */ +extern void deallocate(const char *msg); + +/** + * deallocate the ports for a slurm job. + * + * deallocate the ports and remove the entry from List. + * + * IN: + * slurm_jobid: slurm jobid + * + */ +extern void deallocate_port(uint32_t slurm_jobid); + +#endif /* DYNALLOC_DEALLOCATE_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/info.c b/src/plugins/slurmctld/dynalloc/info.c new file mode 100644 index 0000000000000000000000000000000000000000..b79027df3cddfd33eafbb39fdd124302a6defb1d --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/info.c @@ -0,0 +1,263 @@ +/*****************************************************************************\ + * info.c - get nodes information in slurm + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" +#include "src/common/log.h" +#include "src/common/node_conf.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" + +#include "src/slurmctld/locks.h" + +#include "info.h" + +static uint16_t fast_schedule = (uint16_t) NO_VAL; + +/** + * get total number of nodes and slots in slurm. + * + * IN: + * OUT Parameter: + * nodes: number of nodes in slurm + * slots: number of slots in slurm + */ +void get_total_nodes_slots (uint16_t *nodes, uint16_t *slots) +{ + int i; + struct node_record *node_ptr; + /* Locks: Read node */ + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + + if (fast_schedule == (uint16_t) NO_VAL) + fast_schedule = slurm_get_fast_schedule(); + + *slots = 0; + lock_slurmctld(node_read_lock); + *nodes = node_record_count; + for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; + i++, node_ptr++) { + if (fast_schedule == 2) + (*slots) += node_ptr->config_ptr->cpus; + else + (*slots) += node_ptr->cpus; + } + unlock_slurmctld(node_read_lock); +} + +/** + * get number of available nodes and slots in slurm. + * + * IN: + * OUT Parameter: + * nodes: number of available nodes in slurm + * slots: number of available slots in slurm + */ +void get_free_nodes_slots (uint16_t *nodes, uint16_t *slots) +{ + int i; + struct node_record *node_ptr; + /* Locks: Read node */ + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + + if (fast_schedule == (uint16_t) NO_VAL) + fast_schedule = slurm_get_fast_schedule(); + + *nodes = 0; + *slots = 0; + lock_slurmctld(node_read_lock); + for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; + i++, node_ptr++) { + if (IS_NODE_IDLE(node_ptr)) { + (*nodes) ++; + if (fast_schedule == 2) + (*slots) += node_ptr->config_ptr->cpus; + else + (*slots) += node_ptr->cpus; + } + } + unlock_slurmctld(node_read_lock); +} + +/** + * get available node list in slurm. + * + * IN: + * OUT Parameter: + * RET OUT: + * hostlist_t: available node list in slurm + * + * Note: the return result should be slurm_hostlist_destroy(hostlist) + */ +hostlist_t get_available_host_list_system_m(void) +{ + int i; + struct node_record *node_ptr; + hostlist_t hostlist = NULL; + + /* Locks: Read node */ + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + + hostlist = slurm_hostlist_create(NULL); + lock_slurmctld(node_read_lock); + for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; + i++, node_ptr++) { + if (IS_NODE_IDLE(node_ptr)) { + slurm_hostlist_push_host(hostlist, node_ptr->name); + } + } + unlock_slurmctld(node_read_lock); + + return hostlist; +} + +/** + * get the range of available node list in slurm. + * + * IN: + * OUT Parameter: + * RET OUT: + * a string indicating the range of available node list in slurm + * + * Note: the return result should be free(str) + */ +char* get_available_host_list_range_sytem_m(void) +{ + hostlist_t hostlist = NULL; + char *range = NULL; + + hostlist = get_available_host_list_system_m(); + range = slurm_hostlist_ranged_string_malloc (hostlist); + slurm_hostlist_destroy(hostlist); + return range; +} + +/** + * get available node list within a given node list range + * + * IN: + * node_list: the given node list range + * OUT Parameter: + * RET OUT + * available node list + * + * Note: the return result should be slurm_hostlist_destroy(hostlist) + */ +hostlist_t choose_available_from_node_list_m(const char *node_list) +{ + char *hostname = NULL; + hostlist_t given_hl = NULL; + hostlist_t avail_hl_system = NULL; + hostlist_t result_hl = NULL; + + given_hl = slurm_hostlist_create (node_list); + avail_hl_system = get_available_host_list_system_m(); + result_hl = slurm_hostlist_create(NULL); + + while ((hostname = slurm_hostlist_shift(given_hl))) { + if (-1 != slurm_hostlist_find (avail_hl_system, hostname)) { + slurm_hostlist_push_host(result_hl, hostname); + } + /* Note: to free memory after slurm_hostlist_shift(), + * remember to use free(str), not xfree(str) + */ + free(hostname); + } + + slurm_hostlist_destroy(given_hl); + slurm_hostlist_destroy(avail_hl_system); + return result_hl; +} + +/** + * get a subset node range with node_num nodes from a host_name_list + * + * IN: + * host_name_list: the given host_name_list + * node_num: the number of host to choose + * OUT Parameter: + * RET OUT + * the subset node range, NULL if the node number of subset is + * larger than the node number in host_name_list + * + * Note: the return should be free(str) + */ +char* get_hostlist_subset_m(const char *host_name_list, uint16_t node_num) +{ + hostlist_t hostlist = NULL; + hostlist_t temp_hl = NULL; + int sum; + char *hostname = NULL; + char *range = NULL; + int i; + + if(NULL == host_name_list) + return NULL; + + hostlist = slurm_hostlist_create(host_name_list); + sum = slurm_hostlist_count(hostlist); + + if (sum < node_num) { + error ("node_num > sum of host in hostlist"); + slurm_hostlist_destroy(hostlist); + return NULL; + } + + temp_hl = slurm_hostlist_create(NULL); + + for (i = 0; i < node_num; i++) { + hostname = slurm_hostlist_shift(hostlist); + if (NULL != hostname) { + slurm_hostlist_push_host(temp_hl, hostname); + free(hostname); + } + } + + range = slurm_hostlist_ranged_string_malloc(temp_hl); + + slurm_hostlist_destroy(temp_hl); + slurm_hostlist_destroy(hostlist); + return range; +} diff --git a/src/plugins/slurmctld/dynalloc/info.h b/src/plugins/slurmctld/dynalloc/info.h new file mode 100644 index 0000000000000000000000000000000000000000..e09b9aebb7eb5f8911131dbc0a8653cf2da9a85e --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/info.h @@ -0,0 +1,128 @@ +/*****************************************************************************\ + * info.h - get nodes information in slurm + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_INFO_H_ +#define DYNALLOC_INFO_H_ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include "slurm/slurm.h" + +/** + * get total number of nodes and slots in slurm. + * + * IN: + * OUT Parameter: + * nodes: number of nodes in slurm + * slots: number of slots in slurm + */ +extern void get_total_nodes_slots(uint16_t *nodes, uint16_t *slots); + +/** + * get number of available nodes and slots in slurm. + * + * IN: + * OUT Parameter: + * nodes: number of available nodes in slurm + * slots: number of available slots in slurm + */ +extern void get_free_nodes_slots(uint16_t *nodes, uint16_t *slots); + +/** + * get available node list in slurm. + * + * IN: + * OUT Parameter: + * RET OUT: + * hostlist_t: available node list in slurm + * + * Note: the return result should be slurm_hostlist_destroy(hostlist) + */ +extern hostlist_t get_available_host_list_system_m(void); + +/** + * get the range of available node list in slurm. + * + * IN: + * OUT Parameter: + * RET OUT: + * a string indicating the range of available node list in slurm + * + * Note: the return result should be free(str) + */ +extern char* get_available_host_list_range_sytem_m(void); + +/** + * get available node list within a given node list range + * + * IN: + * node_list: the given node list range + * OUT Parameter: + * RET OUT + * available node list + * + * Note: the return result should be slurm_hostlist_destroy(hostlist) + */ +extern hostlist_t choose_available_from_node_list_m(const char *node_list); + +/** + * get a subset node range with node_num nodes from a host_name_list + * + * IN: + * host_name_list: the given host_name_list + * node_num: the number of host to choose + * OUT Parameter: + * RET OUT + * the subset node range, NULL if the node number of subset is + * larger than the node number in host_name_list + * + * Note: the return should be free(str) + */ +extern char* get_hostlist_subset_m(const char *host_name_list, uint16_t node_num); + +#endif /* DYNALLOC_INFO_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/job_ports_list.c b/src/plugins/slurmctld/dynalloc/job_ports_list.c new file mode 100644 index 0000000000000000000000000000000000000000..7fd2efafcb27ea2877fe38365f795c590bbd8867 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/job_ports_list.c @@ -0,0 +1,112 @@ +/*****************************************************************************\ + * job_ports_list.c - keep the pair of (slurm_jobid, resv_ports) for future release + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" + +#include "job_ports_list.h" + +List job_ports_list = NULL; + +extern void append_job_ports_item(uint32_t slurm_jobid, uint16_t port_cnt, + char *resv_ports, int *port_array) +{ + job_ports_t *item = NULL; + + if (NULL == job_ports_list) + job_ports_list = list_create(free_job_ports_item_func); + + item = xmalloc(sizeof(job_ports_t)); + item->slurm_jobid = slurm_jobid; + item->port_cnt = port_cnt; + item->resv_ports = xstrdup(resv_ports); + item->port_array = xmalloc(sizeof(int) * port_cnt); + memcpy(item->port_array, port_array, sizeof(int)*port_cnt); + list_append (job_ports_list, item); +} + +extern void free_job_ports_item_func(void *voiditem) +{ + job_ports_t *item = (job_ports_t *) voiditem; + if (item) { + xfree(item->resv_ports); + xfree(item->port_array); + xfree(item); + } +} + +extern int find_job_ports_item_func(void *voiditem, void *key) +{ + job_ports_t *item = NULL; + uint32_t *jobid = NULL; + + item = (job_ports_t *)voiditem; + jobid = (uint32_t *)key; + + if (item->slurm_jobid == *jobid) + return 1; + else + return 0; +} + + +extern void print_list() +{ + int i, j; + ListIterator it = NULL; + job_ports_t *item = NULL; + + info("count = %d", list_count (job_ports_list)); + + /* create iterator! */ + it = list_iterator_create (job_ports_list); + /* list_next until NULL */ + j = 0; + while ( NULL != (item = (job_ports_t*)list_next(it)) ) { + info("j = %d", j++); + info("item->slurm_jobid = %u", item->slurm_jobid); + info("item->port_cnt = %d", item->port_cnt); + info("item->resv_ports = %s", item->resv_ports); + for (i = 0; i < item->port_cnt; i++) { + info("item->port_array[i] = %d", item->port_array[i]); + } + } + list_iterator_destroy(it); +} diff --git a/src/plugins/slurmctld/dynalloc/job_ports_list.h b/src/plugins/slurmctld/dynalloc/job_ports_list.h new file mode 100644 index 0000000000000000000000000000000000000000..e64cc9a4a20f45f0a594c2e5adf870ee425a3539 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/job_ports_list.h @@ -0,0 +1,79 @@ +/*****************************************************************************\ + * job_ports_list.h - keep the pair of (slurm_jobid, resv_ports) for future release + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef JOB_PORTS_LIST_H_ +#define JOB_PORTS_LIST_H_ + + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <string.h> + +#include "src/common/list.h" +#include "src/common/xmalloc.h" + + +typedef struct { + uint32_t slurm_jobid; + uint16_t port_cnt; + char *resv_ports; + int *port_array; +} job_ports_t; + +extern List job_ports_list; + +extern void append_job_ports_item(uint32_t slurm_jobid, uint16_t port_cnt, + char *resv_ports, int *port_array); + +extern void free_job_ports_item_func(void *voiditem); + +extern int find_job_ports_item_func(void *voiditem, void *key); + +extern void print_list(); + +#endif /* JOB_PORTS_LIST_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/msg.c b/src/plugins/slurmctld/dynalloc/msg.c new file mode 100644 index 0000000000000000000000000000000000000000..be7fcd2f0c0c40103c75afa4b11a3d8e3095c018 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/msg.c @@ -0,0 +1,375 @@ +/*****************************************************************************\ + * msg.c - Message/communcation manager for dynalloc (resource dynamic + * allocation) plugin + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "slurm/slurm.h" +#include "src/common/uid.h" +#include "src/slurmctld/locks.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/poll.h> + +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "info.h" +#include "allocate.h" +#include "allocator.h" +#include "deallocate.h" +#include "msg.h" +#include "argv.h" +#include "constants.h" + +#define _DEBUG 0 + +/* When a remote socket closes on AIX, we have seen poll() return EAGAIN + * indefinitely for a pending write request. Rather than locking up + * slurmctld's dynalloc interface, abort after MAX_RETRIES poll() failures. */ +#define MAX_RETRIES 10 + +static bool thread_running = false; +static bool thread_shutdown = false; +static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_t msg_thread_id; +static char *err_msg; +static int err_code; +static uint16_t sched_port; + +static void * _msg_thread(void *no_data); +static void _proc_msg(slurm_fd_t new_fd, char *msg); +static char * _recv_msg(slurm_fd_t new_fd); +static size_t _send_msg(slurm_fd_t new_fd, char *buf, size_t size); +static size_t _read_bytes(int fd, char *buf, size_t size); +static size_t _write_bytes(int fd, char *buf, size_t size); + + +/*****************************************************************************\ + * spawn message hander thread +\*****************************************************************************/ +extern int spawn_msg_thread(void) +{ + pthread_attr_t thread_attr_msg; + slurm_ctl_conf_t *conf; + /* Locks: Read configurationn */ + slurmctld_lock_t config_read_lock = { + READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; + + lock_slurmctld(config_read_lock); + conf = slurm_conf_lock(); + sched_port = conf->dynalloc_port; + slurm_conf_unlock(); + unlock_slurmctld(config_read_lock); + if (sched_port == 0) { + error("DynAllocPort == 0, not spawning communication thread"); + return SLURM_ERROR; + } + + pthread_mutex_lock( &thread_flag_mutex ); + if (thread_running) { + error("dynalloc thread already running, not starting another"); + pthread_mutex_unlock(&thread_flag_mutex); + return SLURM_ERROR; + } + + slurm_attr_init(&thread_attr_msg); + if (pthread_create(&msg_thread_id, &thread_attr_msg, + _msg_thread, NULL)) + fatal("pthread_create %m"); + else + info("dynalloc: msg thread create successful!"); + + + slurm_attr_destroy(&thread_attr_msg); + thread_running = true; + pthread_mutex_unlock(&thread_flag_mutex); + return SLURM_SUCCESS; +} + +/*****************************************************************************\ + * terminate message hander thread +\*****************************************************************************/ +extern void term_msg_thread(void) +{ + pthread_mutex_lock(&thread_flag_mutex); + if (thread_running) { + int fd; + slurm_addr_t addr; + + thread_shutdown = true; + + /* Open and close a connection to the listening port. + * Allows slurm_accept_msg_conn() to return in + * _msg_thread() so that it can check the thread_shutdown + * flag. + */ + slurm_set_addr(&addr, sched_port, "localhost"); + fd = slurm_open_stream(&addr); + if (fd != -1) { + /* we don't care if the open failed */ + slurm_close_stream(fd); + } + + debug2("waiting for dynalloc thread to exit"); + pthread_join(msg_thread_id, NULL); + msg_thread_id = 0; + thread_shutdown = false; + thread_running = false; + debug2("join of dynalloc thread successful"); + } + pthread_mutex_unlock(&thread_flag_mutex); +} + +/*****************************************************************************\ + * message hander thread +\*****************************************************************************/ +static void *_msg_thread(void *no_data) +{ + slurm_fd_t sock_fd = -1, new_fd; + slurm_addr_t cli_addr; + char *msg; + int i; + + /* If JobSubmitDynAllocPort is already taken, keep trying to open it + * once per minute. Slurmctld will continue to function + * during this interval even if nothing can be scheduled. */ + for (i=0; (!thread_shutdown); i++) { + if (i > 0) + sleep(60); + sock_fd = slurm_init_msg_engine_port(sched_port); + if (sock_fd != SLURM_SOCKET_ERROR) + break; + error("dynalloc: slurm_init_msg_engine_port %u %m", + sched_port); + error("dynalloc: Unable to communicate with ORTE RAS"); + } + + /* Process incoming RPCs until told to shutdown */ + while (!thread_shutdown) { + if ((new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr)) + == SLURM_SOCKET_ERROR) { + if (errno != EINTR) + error("dyalloc: slurm_accept_msg_conn %m"); + continue; + } + + if (thread_shutdown) { + close(new_fd); + break; + } + + err_code = 0; + err_msg = ""; + msg = _recv_msg(new_fd); + if (msg) { + _proc_msg(new_fd, msg); + xfree(msg); + } + slurm_close_accepted_conn(new_fd); + } + verbose("dynalloc: message engine shutdown"); + if (sock_fd > 0) + (void) slurm_shutdown_msg_engine(sock_fd); + pthread_exit((void *) 0); + return NULL; +} + +static size_t _read_bytes(int fd, char *buf, size_t size) +{ + size_t bytes_remaining, bytes_read; + char *ptr; + struct pollfd ufds; + int rc; + + bytes_remaining = size; + size = 0; + ufds.fd = fd; + ufds.events = POLLIN; + ptr = buf; + while (bytes_remaining > 0) { +// rc = poll(&ufds, 1, 10000); /* 10 sec timeout */ + rc = poll(&ufds, 1, 100); //0.1sec + if (rc == 0) /* timed out */ + break; + if ((rc == -1) && /* some error */ + ((errno== EINTR) || (errno == EAGAIN))) + continue; + if ((ufds.revents & POLLIN) == 0) /* some poll error */ + break; + + bytes_read = read(fd, ptr, bytes_remaining); + if (bytes_read <= 0) + break; + bytes_remaining -= bytes_read; + size += bytes_read; + ptr += bytes_read; + } + + return size; +} + +static size_t _write_bytes(int fd, char *buf, size_t size) +{ + size_t bytes_remaining, bytes_written; + char *ptr; + struct pollfd ufds; + int rc, retry_cnt = 0; + + bytes_remaining = size; + size = 0; + ptr = buf; + ufds.fd = fd; + ufds.events = POLLOUT; + while (bytes_remaining > 0) { +// rc = poll(&ufds, 1, 10000); /* 10 sec timeout */ + rc = poll(&ufds, 1, 100); //0.1sec + if (rc == 0) /* timed out */ + break; + if ((rc == -1) && /* some error */ + ((errno== EINTR) || (errno == EAGAIN))) { + if ((retry_cnt++) >= MAX_RETRIES) { + error("dynalloc: repeated poll errors for " + "write: %m"); + break; + } + continue; + } + if ((ufds.revents & POLLOUT) == 0) /* some poll error */ + break; + + bytes_written = write(fd, ptr, bytes_remaining); + if (bytes_written <= 0) + break; + bytes_remaining -= bytes_written; + size += bytes_written; + ptr += bytes_written; + } + + return size; +} + +/*****************************************************************************\ + * Read a message (request) from specified file descriptor + * + * RET - The message which must be xfreed or + * NULL on error +\*****************************************************************************/ +static char * _recv_msg(slurm_fd_t new_fd) +{ + char *buf; + buf = xmalloc(SIZE + 1); /* need '\0' on end to print */ + if (_read_bytes((int) new_fd, buf, SIZE) <= 0) { + err_code = -246; + err_msg = "unable to read message data"; + error("dynalloc: unable to read data message"); + xfree(buf); + return NULL; + } + + info("-------------------------"); + info("dynalloc msg recv:%s", buf); + + return buf; +} + +/*****************************************************************************\ + * Send a message (response) to specified file descriptor + * + * RET - Number of data bytes written (excludes header) +\*****************************************************************************/ +static size_t _send_msg(slurm_fd_t new_fd, char *buf, size_t size) +{ + size_t data_sent; + + if (slurm_get_debug_flags()) + info("dynalloc msg send:%s", buf); + + data_sent = _write_bytes((int) new_fd, buf, size); + if (data_sent != size) { + error("dynalloc: unable to write data message (%lu of %lu) %m", + (long unsigned) data_sent, (long unsigned) size); + } + + return data_sent; +} + +/*****************************************************************************\ + * process and respond to a request +\*****************************************************************************/ +static void _proc_msg(slurm_fd_t new_fd, char *msg) +{ + char send_buf[SIZE]; + uint16_t nodes = 0, slots = 0; + + info("AAA: received from client: %s", msg); + + if (new_fd < 0) + return; + + if (!msg) { + strcpy(send_buf, "NULL request, failure"); + info("BBB: send to client: %s", send_buf); + send_reply(new_fd, send_buf); + } else { + //identify the cmd + if (0 == strcasecmp(msg, "get total nodes and slots")) { + get_total_nodes_slots(&nodes, &slots); + sprintf(send_buf, "total_nodes=%d total_slots=%d", + nodes, slots); + info("BBB: send to client: %s", send_buf); + send_reply(new_fd, send_buf); + } else if (0 == strcasecmp(msg, "get available nodes and slots")) { + get_free_nodes_slots(&nodes, &slots); + sprintf(send_buf, "avail_nodes=%d avail_slots=%d", + nodes, slots); + info("BBB: send to client: %s", send_buf); + send_reply(new_fd, send_buf); + } else if (0 == strncasecmp(msg, "allocate", 8)) { + allocate_job_op(new_fd, msg); + } else if (0 == strncasecmp(msg, "deallocate", 10)) { + deallocate(msg); + } + } + return; +} + +extern void send_reply(slurm_fd_t new_fd, char *response) +{ + _send_msg(new_fd, response, strlen(response)+1); +} diff --git a/src/plugins/slurmctld/dynalloc/msg.h b/src/plugins/slurmctld/dynalloc/msg.h new file mode 100644 index 0000000000000000000000000000000000000000..08961759334e798ad30ea3d9f2faf3c2230a04fe --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/msg.h @@ -0,0 +1,92 @@ +/*****************************************************************************\ + * msg.h - Message/communcation manager for dynalloc (resource dynamic allocation) plugin + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef DYNALLOC_MSG_H_ +#define DYNALLOC_MSG_H_ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "slurm/slurm_errno.h" + +#include "src/common/bitstring.h" +#include "src/common/hostlist.h" +#include "src/common/log.h" +#include "src/common/parse_config.h" +#include "src/common/read_config.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_interface.h" +#include "src/common/uid.h" +#include "src/common/xmalloc.h" +#include "src/common/xsignal.h" +#include "src/common/xstring.h" +#include "src/slurmctld/slurmctld.h" + +/* + * Spawn message hander thread + */ +extern int spawn_msg_thread(void); + +/* + * Terminate message hander thread + */ +extern void term_msg_thread(void); + +/* + * Send message + */ +extern void send_reply(slurm_fd_t new_fd, char *response); + +#endif /* DYNALLOC_MSG_H_ */ diff --git a/src/plugins/slurmctld/dynalloc/slurmctld_dynalloc.c b/src/plugins/slurmctld/dynalloc/slurmctld_dynalloc.c new file mode 100644 index 0000000000000000000000000000000000000000..36191e73af164e78e7c1c2b7599ac851d05caa79 --- /dev/null +++ b/src/plugins/slurmctld/dynalloc/slurmctld_dynalloc.c @@ -0,0 +1,70 @@ +/*****************************************************************************\ + * slurmctld_dynalloc.c - plugin for dynalloc (resource dynamic allocation) + ***************************************************************************** + * Copyright (C) 2012-2013 Los Alamos National Security, LLC. + * Written by Jimmy Cao <Jimmy.Cao@emc.com>, Ralph Castain <rhc@open-mpi.org> + * All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <stdio.h> + +#include "slurm/slurm_errno.h" +#include "slurm/slurm.h" + +#include "src/common/plugin.h" +#include "src/common/log.h" +#include "src/common/node_select.h" +#include "src/common/slurm_priority.h" +#include "src/slurmctld/slurmctld.h" + +#include "msg.h" + +const char plugin_name[] = "SLURM resource dynamic allocation plugin"; +const char plugin_type[] = "slurmctld/dynalloc"; +const uint32_t plugin_version = 100; + +/**************************************************************************/ +/* TAG( init ) */ +/**************************************************************************/ +extern int init( void ) +{ + verbose( "sched: resource dynamic allocation plugin loaded" ); + return spawn_msg_thread(); +} + +/**************************************************************************/ +/* TAG( fini ) */ +/**************************************************************************/ +extern void fini( void ) +{ + term_msg_thread(); +} diff --git a/src/plugins/switch/Makefile.in b/src/plugins/switch/Makefile.in index 9b52da2517d43196b25fe5230efe3595b1178ec7..b90c00217dcd4f4d89498e4e5c1fa4c213221b05 100644 --- a/src/plugins/switch/Makefile.in +++ b/src/plugins/switch/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/switch DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/switch/none/Makefile.in b/src/plugins/switch/none/Makefile.in index 73cd330ed5303217350ba5dbafe49393d47b5adc..b73871a43400c4c6e9b31ea143bf995de20f4bc7 100644 --- a/src/plugins/switch/none/Makefile.in +++ b/src/plugins/switch/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/switch/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/switch/none/switch_none.c b/src/plugins/switch/none/switch_none.c index fc1da857a6659f0e4b04e55b94b7dad128c42ca7..ee221d098044dbf75310d4a918bcfbef97fdc215 100644 --- a/src/plugins/switch/none/switch_none.c +++ b/src/plugins/switch/none/switch_none.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/switch/nrt/Makefile.in b/src/plugins/switch/nrt/Makefile.in index bd352f62cee68d69f534873485ce5ba9e7366b16..dd9b5a8bf4dcfb19f9a9e3e99868c8e00615444b 100644 --- a/src/plugins/switch/nrt/Makefile.in +++ b/src/plugins/switch/nrt/Makefile.in @@ -55,9 +55,10 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = src/plugins/switch/nrt -DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -225,6 +229,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -245,6 +251,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -254,6 +263,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -261,6 +272,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -295,6 +315,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -322,6 +345,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/switch/nrt/README b/src/plugins/switch/nrt/README deleted file mode 100644 index a1c44db46c03e01c691c7f8c9515f2f01fd67ea3..0000000000000000000000000000000000000000 --- a/src/plugins/switch/nrt/README +++ /dev/null @@ -1,26 +0,0 @@ -Sample tests - -./srun -N2 --network="sn_all,devtype=ib" hostname -./srun -N2 --network="sn_all,devtype=iponly" hostname -./srun -N2 --network="sn_single,devtype=ib" hostname -./srun -N2 --network="sn_single,devtype=ib,us" hostname -./srun -N2 --network="sn_single,devtype=iponly,ipv4" hostname -./srun -N2 --network="sn_single,devtype=iponly,ipv6" hostname -./srun -N2 --network="sn_all,ipv6" hostname -./srun -N2 --network="mlx4_0" hostname -./srun -N2 --network="eth0" hostname -./srun -N2 --network="mlx4_0,mpi,lapi" hostname -./srun -N2 --network="mlx4_0,mpi,lapi,us" hostname -./srun -N2 --network="mlx4_0,mpi,lapi,instances=3" hostname -./srun -N2 -n4 --network="mlx4_0,mpi,lapi,us,instances=3" hostname -./srun -N2 --network="sn_all" hostname -./srun -N2 --network="sn_all,mpi,lapi" hostname -./srun -N2 --network="sn_all,mpi,lapi" hostname -./srun -N2 -n4 --network="sn_all,mpi,lapi,instances=2" hostname -./srun -N2 -n4 --network="sn_all,mpi,lapi,instances=3,us" hostname -./srun -N2 --network="mlx4_0,bulk_xfer=1000,us" hostname - -FAILURES -./srun -N2 --network="sn_all,devtype=hfi" hostname -./srun -N2 --network="sn_single,devtype=hfi" hostname -./srun -N2 --network="sn_single,devtype=iponly,us" hostname diff --git a/src/plugins/switch/nrt/libpermapi/Makefile.in b/src/plugins/switch/nrt/libpermapi/Makefile.in index b4706e70b25c242233caa550f93963050118d3d9..b6da436cb0b6b2685165d5d7da4bfa237f361b41 100644 --- a/src/plugins/switch/nrt/libpermapi/Makefile.in +++ b/src/plugins/switch/nrt/libpermapi/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/switch/nrt/libpermapi DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = -DMYSELF_SO=\"$(libdir)/slurm/libpermapi.so\" @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/switch/nrt/libpermapi/shr_64.c b/src/plugins/switch/nrt/libpermapi/shr_64.c index 9d25cc13b2b447edd61827d297e2af72d47fbcf8..4d03b7f65ab5248d2b0f9e547d67170c030ebd3a 100644 --- a/src/plugins/switch/nrt/libpermapi/shr_64.c +++ b/src/plugins/switch/nrt/libpermapi/shr_64.c @@ -6,7 +6,7 @@ * Written by Danny Auble <da@schedmd.com> et. al. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -461,7 +461,7 @@ static char *_uint16_array_to_str(int array_len, const uint16_t *array) char *sep = ","; /* seperator */ char *str = xstrdup(""); - if(array == NULL) + if (array == NULL) return str; for (i = 0; i < array_len; i++) { diff --git a/src/plugins/switch/nrt/nrt.c b/src/plugins/switch/nrt/nrt.c index 5bd751de0ffdaec325de32ea3f40e77acc6add95..45c3457d3fb6dac7438b0cc453bfb4e79e244d6e 100644 --- a/src/plugins/switch/nrt/nrt.c +++ b/src/plugins/switch/nrt/nrt.c @@ -9,7 +9,7 @@ * Largely re-written for NRT support by Morris Jette <jette@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -1332,10 +1332,8 @@ _allocate_window_single(char *adapter_name, slurm_nrt_jobinfo_t *jp, } continue; } -// if ((network_id >= 0) && (adapter->network_id != network_id)) -// continue; if ((adapter_type != NRT_MAX_ADAPTER_TYPES) && - (adapter->adapter_type == adapter_type)) { + (node->adapter_list[i].adapter_type == adapter_type)) { adapter = &node->adapter_list[i]; break; } @@ -1573,8 +1571,6 @@ _print_adapter_status(nrt_cmd_status_adapter_t *status_adapter) hostset_t hs; hs = hostset_create(""); - if (hs == NULL) - fatal("hostset_create malloc failure"); info("--Begin Adapter Status--"); info(" adapter_name: %s", status_adapter->adapter_name); info(" adapter_type: %s", @@ -1663,8 +1659,6 @@ _print_nodeinfo(slurm_nrt_nodeinfo_t *n) info(" window_count: %hu", a->window_count); hs = hostset_create(""); - if (hs == NULL) - fatal("hostset_create malloc failure"); w = a->window_list; for (j = 0; j < a->window_count; j++) { if ((w[j].state == NRT_WIN_AVAILABLE) && @@ -3062,7 +3056,11 @@ nrt_build_jobinfo(slurm_nrt_jobinfo_t *jp, hostlist_t hl, } hostlist_iterator_reset(hi); - if (adapter_type == NRT_IPONLY) { + if (nnodes < 2) { + /* Without more than one node, high-speed network access is + * unnecesary */ + jp->tables_per_task = 0; + } else if (adapter_type == NRT_IPONLY) { /* If tables_per_task != 0 for adapter_type == NRT_IPONLY * then the device's window count in NRT is incremented. * When we later read the adapter information, the adapter @@ -3530,8 +3528,8 @@ nrt_free_jobinfo(slurm_nrt_jobinfo_t *jp) tableinfo = &jp->tableinfo[i]; xfree(tableinfo->table); } - xfree(jp->tableinfo); } + xfree(jp->tableinfo); if (jp->nodenames) hostlist_destroy(jp->nodenames); @@ -3615,6 +3613,7 @@ _wait_for_window_unloaded(char *adapter_name, nrt_adapter_t adapter_type, _print_adapter_status(&status_adapter); } for (j = 0; j < window_count; j++) { + /* CLANG false positive here */ if (status_array[j].window_id == window_id) break; } @@ -4361,8 +4360,6 @@ nrt_clear_node_state(void) } hs = hostset_create(""); - if (hs == NULL) - fatal("hostset_create malloc failure"); } for (k = 0; k < window_count; k++) { if (debug_flags & DEBUG_FLAG_SWITCH) { diff --git a/src/plugins/switch/nrt/nrt_keys.h b/src/plugins/switch/nrt/nrt_keys.h index 9c53097ac56e581e433d69e6e6f76249c5866d43..f0f21cfe29a9df320f1564d05c3a5b041888f1c9 100644 --- a/src/plugins/switch/nrt/nrt_keys.h +++ b/src/plugins/switch/nrt/nrt_keys.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/switch/nrt/slurm_nrt.h b/src/plugins/switch/nrt/slurm_nrt.h index 0306f80aaa0f3c91059f9c20d9ecce73f8e90a6d..6030a13387c062e1a1501d17462d4fc5ffed7ffa 100644 --- a/src/plugins/switch/nrt/slurm_nrt.h +++ b/src/plugins/switch/nrt/slurm_nrt.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/switch/nrt/switch_nrt.c b/src/plugins/switch/nrt/switch_nrt.c index 3634bc39551e8d4ad9a8c54cc6324872cd1e44e2..daf045eb7512d20d0a202b858e08ea999312c635 100644 --- a/src/plugins/switch/nrt/switch_nrt.c +++ b/src/plugins/switch/nrt/switch_nrt.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/Makefile.in b/src/plugins/task/Makefile.in index 768df9bf7f41ed7d94abde4f0a36b0f85512d79d..0d58037444d55069e980cbace47fb01bc19be39b 100644 --- a/src/plugins/task/Makefile.in +++ b/src/plugins/task/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/task DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/task/affinity/Makefile.in b/src/plugins/task/affinity/Makefile.in index 358d3500ec71f7d7566ae76c99230bc07dc3ef0b..5cb972e93a3a4dc91a0dd98e7d94f80157b99c0a 100644 --- a/src/plugins/task/affinity/Makefile.in +++ b/src/plugins/task/affinity/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/task/affinity DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -192,6 +196,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -212,6 +218,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -221,6 +230,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -228,6 +239,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -262,6 +282,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -289,6 +312,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/task/affinity/affinity.c b/src/plugins/task/affinity/affinity.c index f8656a81b3a4c26b9d6700c5470e067e565bff16..81698763e2df83bfa87c18dfe6ac529e14660589 100644 --- a/src/plugins/task/affinity/affinity.c +++ b/src/plugins/task/affinity/affinity.c @@ -5,7 +5,7 @@ * Copyright (C) 2005-2006 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/affinity/affinity.h b/src/plugins/task/affinity/affinity.h index bfb5d2db66517eaa32e877a9eda2691f1f3f0cbc..9e33f7acb54c53760a1305ca39a286d11c041d02 100644 --- a/src/plugins/task/affinity/affinity.h +++ b/src/plugins/task/affinity/affinity.h @@ -5,7 +5,7 @@ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/affinity/cpuset.c b/src/plugins/task/affinity/cpuset.c index 976c860ce2b285415a33c6230b0b539bc67f7242..405c93015821bafceee185b189bfeb9149190de4 100644 --- a/src/plugins/task/affinity/cpuset.c +++ b/src/plugins/task/affinity/cpuset.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -38,6 +38,8 @@ \*****************************************************************************/ #include "affinity.h" +static bool cpuset_prefix_set = false; +static char *cpuset_prefix = ""; static void _cpuset_to_cpustr(const cpu_set_t *mask, char *str) { @@ -69,11 +71,26 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) /* Copy "cpus" contents from parent directory * "cpus" must be set before any tasks can be added. */ - snprintf(file_path, sizeof(file_path), "%s/cpus", base); + snprintf(file_path, sizeof(file_path), "%s/%scpus", + base, cpuset_prefix); + fd = open(file_path, O_RDONLY); if (fd < 0) { - error("open(%s): %m", file_path); - return -1; + if (!cpuset_prefix_set) { + cpuset_prefix_set = 1; + cpuset_prefix = "cpuset."; + snprintf(file_path, sizeof(file_path), "%s/%scpus", + base, cpuset_prefix); + fd = open(file_path, O_RDONLY); + if (fd < 0) { + cpuset_prefix = ""; + error("open(%s): %m", file_path); + return -1; + } + } else { + error("open(%s): %m", file_path); + return -1; + } } rc = read(fd, mstr, sizeof(mstr)); close(fd); @@ -81,7 +98,8 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) error("read(%s): %m", file_path); return -1; } - snprintf(file_path, sizeof(file_path), "%s/cpus", path); + snprintf(file_path, sizeof(file_path), "%s/%scpus", + path, cpuset_prefix); fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); @@ -96,7 +114,8 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) /* Copy "mems" contents from parent directory, if it exists. * "mems" must be set before any tasks can be added. */ - snprintf(file_path, sizeof(file_path), "%s/mems", base); + snprintf(file_path, sizeof(file_path), "%s/%smems", + base, cpuset_prefix); fd = open(file_path, O_RDONLY); if (fd < 0) { error("open(%s): %m", file_path); @@ -108,7 +127,8 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) error("read(%s): %m", file_path); return -1; } - snprintf(file_path, sizeof(file_path), "%s/mems", path); + snprintf(file_path, sizeof(file_path), "%s/%smems", + path, cpuset_prefix); fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); @@ -155,7 +175,8 @@ int slurm_set_cpuset(char *base, char *path, pid_t pid, size_t size, } /* Set "cpus" per user request */ - snprintf(file_path, sizeof(file_path), "%s/cpus", path); + snprintf(file_path, sizeof(file_path), "%s/%scpus", + path, cpuset_prefix); _cpuset_to_cpustr(mask, mstr); fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { @@ -171,7 +192,8 @@ int slurm_set_cpuset(char *base, char *path, pid_t pid, size_t size, /* copy "mems" contents from parent directory, if it exists. * "mems" must be set before any tasks can be added. */ - snprintf(file_path, sizeof(file_path), "%s/mems", base); + snprintf(file_path, sizeof(file_path), "%s/%smems", + base, cpuset_prefix); fd = open(file_path, O_RDONLY); if (fd < 0) { error("open(%s): %m", file_path); @@ -182,7 +204,8 @@ int slurm_set_cpuset(char *base, char *path, pid_t pid, size_t size, error("read(%s): %m", file_path); return -1; } - snprintf(file_path, sizeof(file_path), "%s/mems", path); + snprintf(file_path, sizeof(file_path), "%s/%smems", + path, cpuset_prefix); fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); @@ -231,7 +254,8 @@ int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask) char file_path[PATH_MAX]; char mstr[1 + CPU_SETSIZE * 4]; - snprintf(file_path, sizeof(file_path), "%s/cpus", path); + snprintf(file_path, sizeof(file_path), "%s/%scpus", + path, cpuset_prefix); fd = open(file_path, O_RDONLY); if (fd < 0) { error("open(%s): %m", file_path); @@ -269,7 +293,8 @@ int slurm_memset_available(void) char file_path[PATH_MAX]; struct stat buf; - snprintf(file_path, sizeof(file_path), "%s/mems", CPUSET_DIR); + snprintf(file_path, sizeof(file_path), "%s/%smems", + CPUSET_DIR, cpuset_prefix); return stat(file_path, &buf); } @@ -280,7 +305,8 @@ int slurm_set_memset(char *path, nodemask_t *new_mask) int fd, i, max_node; ssize_t rc; - snprintf(file_path, sizeof(file_path), "%s/mems", path); + snprintf(file_path, sizeof(file_path), "%s/%smems", + path, cpuset_prefix); fd = open(file_path, O_CREAT | O_RDWR, 0700); if (fd < 0) { error("open(%s): %m", file_path); diff --git a/src/plugins/task/affinity/dist_tasks.c b/src/plugins/task/affinity/dist_tasks.c index b05118ee486cf5102c16e42c76f201045a25b3b6..d3618b537dc7c2fc5077e25629a4ac0a2b1b63b1 100644 --- a/src/plugins/task/affinity/dist_tasks.c +++ b/src/plugins/task/affinity/dist_tasks.c @@ -5,7 +5,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -191,13 +191,6 @@ void batch_bind(batch_job_launch_msg_t *req) (conf->sockets * conf->cores)); req_map = (bitstr_t *) bit_alloc(num_cpus); hw_map = (bitstr_t *) bit_alloc(conf->block_map_size); - if (!req_map || !hw_map) { - error("task/affinity: malloc error"); - FREE_NULL_BITMAP(req_map); - FREE_NULL_BITMAP(hw_map); - slurm_cred_free_args(&arg); - return; - } #ifdef HAVE_FRONT_END { @@ -497,11 +490,6 @@ static char *_alloc_mask(launch_tasks_request_msg_t *req, return NULL; alloc_mask = bit_alloc(bit_size(alloc_bitmap)); - if (!alloc_mask) { - error("malloc error"); - FREE_NULL_BITMAP(alloc_bitmap); - return NULL; - } i = 0; for (s=0, s_miss=false; s<sockets; s++) { @@ -603,13 +591,6 @@ static bitstr_t *_get_avail_map(launch_tasks_request_msg_t *req, req_map = (bitstr_t *) bit_alloc(num_cpus); hw_map = (bitstr_t *) bit_alloc(conf->block_map_size); - if (!req_map || !hw_map) { - error("task/affinity: malloc error"); - FREE_NULL_BITMAP(req_map); - FREE_NULL_BITMAP(hw_map); - slurm_cred_free_args(&arg); - return NULL; - } /* Transfer core_bitmap data to local req_map. * The MOD function handles the case where fewer processes * physically exist than are configured (slurmd is out of @@ -774,7 +755,6 @@ static int _task_layout_lllp_multi(launch_tasks_request_msg_t *req, req->cpus_per_task = i; } - size = bit_size(avail_map); i = 0; while (taskcount < max_tasks) { if (taskcount == last_taskcount) @@ -873,7 +853,6 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req, req->cpus_per_task = i; } - size = bit_size(avail_map); i = 0; while (taskcount < max_tasks) { if (taskcount == last_taskcount) @@ -1082,7 +1061,7 @@ static bitstr_t *_lllp_map_abstract_mask(bitstr_t *bitmask) for (i = 0; i < num_bits; i++) { if (bit_test(bitmask,i)) { bit = BLOCK_MAP(i); - if(bit < bit_size(newmask)) + if (bit < bit_size(newmask)) bit_set(newmask, bit); else error("_lllp_map_abstract_mask: can't go from " diff --git a/src/plugins/task/affinity/dist_tasks.h b/src/plugins/task/affinity/dist_tasks.h index 48efce041db097f72ba14bb4578b75cd1503476f..3e4d995a470b08da920fdf03da94ba3645f70c29 100644 --- a/src/plugins/task/affinity/dist_tasks.h +++ b/src/plugins/task/affinity/dist_tasks.h @@ -4,7 +4,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/affinity/numa.c b/src/plugins/task/affinity/numa.c index 41b570c846ecde4baaab855d329a15523c05323d..b09b749e88badb1de0f2b26c4ea0a740a8ba6579 100644 --- a/src/plugins/task/affinity/numa.c +++ b/src/plugins/task/affinity/numa.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/affinity/schedutils.c b/src/plugins/task/affinity/schedutils.c index d4e15a73c688738bed202922c0ed4beff1e805a2..7f3a3b61adfe2d6fe7df017003caab6db54cb505 100644 --- a/src/plugins/task/affinity/schedutils.c +++ b/src/plugins/task/affinity/schedutils.c @@ -6,7 +6,7 @@ * Copyright (C) 2004 Robert Love * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/affinity/task_affinity.c b/src/plugins/task/affinity/task_affinity.c index 7792142133a0da31a9344e79dd2cfe847601f0e2..0f527264598f627df44ea0ce02301486fb9659ba 100644 --- a/src/plugins/task/affinity/task_affinity.c +++ b/src/plugins/task/affinity/task_affinity.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/cgroup/Makefile.in b/src/plugins/task/cgroup/Makefile.in index cf1ea53efde59d8c78f138c596e2354cdf0b8065..e1b855d9b56bad1c197a159a088ff2dbef5ac93e 100644 --- a/src/plugins/task/cgroup/Makefile.in +++ b/src/plugins/task/cgroup/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/task/cgroup DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -183,6 +187,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -203,6 +209,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -212,6 +221,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -219,6 +230,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -253,6 +273,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -280,6 +303,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/task/cgroup/task_cgroup.c b/src/plugins/task/cgroup/task_cgroup.c index 0113b12c564642df332263aa702be53a5df01d34..239b16aad4faec892d8d8da527ad212638b24875 100644 --- a/src/plugins/task/cgroup/task_cgroup.c +++ b/src/plugins/task/cgroup/task_cgroup.c @@ -6,7 +6,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/cgroup/task_cgroup.h b/src/plugins/task/cgroup/task_cgroup.h index a65d3a4f2edba0b55a54cb6f3afda0e5587be1e3..850e2ca1973dcf04c4d7a13aa498bcde8902c4b8 100644 --- a/src/plugins/task/cgroup/task_cgroup.h +++ b/src/plugins/task/cgroup/task_cgroup.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c index e569e80642964d73c934ddf267ce6628c4c98842..28369fa0d0875986bbd7d6c66c119883e8b5d84f 100644 --- a/src/plugins/task/cgroup/task_cgroup_cpuset.c +++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c @@ -7,7 +7,7 @@ * Written by Martin Perry <martin.perry@bull.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -98,6 +98,9 @@ static inline int hwloc_bitmap_asprintf(char **str, hwloc_bitmap_t bitmap) #define PATH_MAX 256 #endif +static bool cpuset_prefix_set = false; +static char *cpuset_prefix = ""; + static char user_cgroup_path[PATH_MAX]; static char job_cgroup_path[PATH_MAX]; static char jobstep_cgroup_path[PATH_MAX]; @@ -181,10 +184,10 @@ static int _xcgroup_cpuset_init(xcgroup_t* cg) int fstatus,i; char* cpuset_metafiles[] = { - "cpuset.cpus", - "cpuset.mems" + "cpus", + "mems" }; - char* cpuset_meta; + char cpuset_meta[PATH_MAX]; char* cpuset_conf; size_t csize; @@ -211,11 +214,19 @@ static int _xcgroup_cpuset_init(xcgroup_t* cg) /* inherits ancestor params */ for (i = 0 ; i < 2 ; i++) { - cpuset_meta = cpuset_metafiles[i]; + again: + snprintf(cpuset_meta, sizeof(cpuset_meta), "%s%s", + cpuset_prefix, cpuset_metafiles[i]); if (xcgroup_get_param(&acg,cpuset_meta, &cpuset_conf,&csize) != XCGROUP_SUCCESS) { - debug2("task/cgroup: assuming no cpuset cg " + if (!cpuset_prefix_set) { + cpuset_prefix_set = 1; + cpuset_prefix = "cpuset."; + goto again; + } + + debug("task/cgroup: assuming no cpuset cg " "support for '%s'",acg.path); xcgroup_destroy(&acg); return fstatus; @@ -224,7 +235,7 @@ static int _xcgroup_cpuset_init(xcgroup_t* cg) cpuset_conf[csize-1]='\0'; if (xcgroup_set_param(cg,cpuset_meta,cpuset_conf) != XCGROUP_SUCCESS) { - debug2("task/cgroup: unable to write %s configuration " + debug("task/cgroup: unable to write %s configuration " "(%s) for cpuset cg '%s'",cpuset_meta, cpuset_conf,cg->path); xcgroup_destroy(&acg); @@ -439,6 +450,7 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job) char* user_alloc_cores = NULL; char* job_alloc_cores = NULL; char* step_alloc_cores = NULL; + char cpuset_meta[PATH_MAX]; char* cpus = NULL; size_t cpus_size; @@ -459,8 +471,16 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job) xfree(slurm_cgpath); return SLURM_ERROR; } - rc = xcgroup_get_param(&slurm_cg,"cpuset.cpus",&cpus,&cpus_size); +again: + snprintf(cpuset_meta, sizeof(cpuset_meta), "%scpus", cpuset_prefix); + rc = xcgroup_get_param(&slurm_cg, cpuset_meta, &cpus,&cpus_size); if (rc != XCGROUP_SUCCESS || cpus_size == 1) { + if (!cpuset_prefix_set && (rc != XCGROUP_SUCCESS)) { + cpuset_prefix_set = 1; + cpuset_prefix = "cpuset."; + goto again; + } + /* initialize the cpusets as it was inexistant */ if (_xcgroup_cpuset_init(&slurm_cg) != XCGROUP_SUCCESS) { @@ -576,7 +596,7 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job) /* * check that user's cpuset cgroup is consistant and add the job cores */ - rc = xcgroup_get_param(&user_cpuset_cg,"cpuset.cpus",&cpus,&cpus_size); + rc = xcgroup_get_param(&user_cpuset_cg, cpuset_meta, &cpus,&cpus_size); if (rc != XCGROUP_SUCCESS || cpus_size == 1) { /* initialize the cpusets as it was inexistant */ if (_xcgroup_cpuset_init(&user_cpuset_cg) != @@ -592,7 +612,7 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job) xstrcat(user_alloc_cores,","); xstrcat(user_alloc_cores,cpus); } - xcgroup_set_param(&user_cpuset_cg,"cpuset.cpus",user_alloc_cores); + xcgroup_set_param(&user_cpuset_cg, cpuset_meta, user_alloc_cores); xfree(cpus); /* @@ -614,7 +634,7 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job) xcgroup_destroy(&job_cpuset_cg); goto error; } - xcgroup_set_param(&job_cpuset_cg,"cpuset.cpus",job_alloc_cores); + xcgroup_set_param(&job_cpuset_cg, cpuset_meta, job_alloc_cores); /* * create step cgroup in the cpuset ns (it should not exists) @@ -643,7 +663,7 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job) xcgroup_destroy(&step_cpuset_cg); goto error; } - xcgroup_set_param(&step_cpuset_cg,"cpuset.cpus",step_alloc_cores); + xcgroup_set_param(&step_cpuset_cg, cpuset_meta, step_alloc_cores); /* attach the slurmstepd to the step cpuset cgroup */ pid_t pid = getpid(); diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.h b/src/plugins/task/cgroup/task_cgroup_cpuset.h index a2452b55c1d81b424ec4e38df6e107b14a10c587..436bd4428929370a5a9b56e4d5de9140b8e90eeb 100644 --- a/src/plugins/task/cgroup/task_cgroup_cpuset.h +++ b/src/plugins/task/cgroup/task_cgroup_cpuset.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/cgroup/task_cgroup_devices.c b/src/plugins/task/cgroup/task_cgroup_devices.c index c24fdaf4690e06ab1743bb448317e79d8b852adb..a0f5bc82bc6f167fb42cc2cd14cc5a643ba85ce8 100644 --- a/src/plugins/task/cgroup/task_cgroup_devices.c +++ b/src/plugins/task/cgroup/task_cgroup_devices.c @@ -5,7 +5,7 @@ * Written by Yiannis Georgiou <yiannis.georgiou@bull.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -468,7 +468,7 @@ static int read_allowed_devices_file(char **allowed_devices) line[strlen(line)-1] = '\0'; /* global pattern matching and return the list of matches*/ - if(glob(line, GLOB_NOSORT, NULL, &globbuf) != 0){ + if (glob(line, GLOB_NOSORT, NULL, &globbuf) != 0){ debug3("Device %s does not exist", line); }else{ for(l=0; l < globbuf.gl_pathc; l++){ diff --git a/src/plugins/task/cgroup/task_cgroup_devices.h b/src/plugins/task/cgroup/task_cgroup_devices.h index f69a3d8097d5a6644fddf68fe1fd25ac055ddba0..ee18a8c176662a4af1b8a4e08f73bb3fcf2fbccf 100644 --- a/src/plugins/task/cgroup/task_cgroup_devices.h +++ b/src/plugins/task/cgroup/task_cgroup_devices.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/cgroup/task_cgroup_memory.c b/src/plugins/task/cgroup/task_cgroup_memory.c index f695e9bb293fa39e7addf8582e9c7640afe41779..4558ec37b269b17b952f37b3a2d71c2e49dde27a 100644 --- a/src/plugins/task/cgroup/task_cgroup_memory.c +++ b/src/plugins/task/cgroup/task_cgroup_memory.c @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/cgroup/task_cgroup_memory.h b/src/plugins/task/cgroup/task_cgroup_memory.h index 27ab417c91a05a1831e0b0021d3ae4d9b8bf346f..bda89804865c72aeeea0371800b2e09f7dbb041d 100644 --- a/src/plugins/task/cgroup/task_cgroup_memory.h +++ b/src/plugins/task/cgroup/task_cgroup_memory.h @@ -5,7 +5,7 @@ * Written by Matthieu Hautreux <matthieu.hautreux@cea.fr> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/task/none/Makefile.in b/src/plugins/task/none/Makefile.in index 255f7b56da00f4644efe6541f25f33555767acd5..0e97384bc595a9ac9db1b38467b0b4a5c0025bc8 100644 --- a/src/plugins/task/none/Makefile.in +++ b/src/plugins/task/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/task/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/task/none/task_none.c b/src/plugins/task/none/task_none.c index ec5a5042f885051b7c73f92cecb4b1269960091d..517ca94399705d010360e113600d4d97509686c6 100644 --- a/src/plugins/task/none/task_none.c +++ b/src/plugins/task/none/task_none.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/topology/3d_torus/Makefile.in b/src/plugins/topology/3d_torus/Makefile.in index a44829498f5b270fe9ab6150ced9ca7c357ba342..ca3246af7c3959aca23318c503d581cc80bd83e9 100644 --- a/src/plugins/topology/3d_torus/Makefile.in +++ b/src/plugins/topology/3d_torus/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/topology/3d_torus DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -181,6 +185,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -201,6 +207,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -210,6 +219,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -217,6 +228,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -251,6 +271,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -278,6 +301,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/topology/3d_torus/hilbert.c b/src/plugins/topology/3d_torus/hilbert.c index 6a26e722f3d4289f95198625f04dc59e6df8295b..da27fc184d97254563e7b9f97d07fc8320cc7915 100644 --- a/src/plugins/topology/3d_torus/hilbert.c +++ b/src/plugins/topology/3d_torus/hilbert.c @@ -53,9 +53,9 @@ int n) // I dimension { P = Q - 1; for( i = n-1; i; i-- ) - if( X[i] & Q ) X[0] ^= P; // invert + if ( X[i] & Q ) X[0] ^= P; // invert else{ t = (X[0] ^ X[i]) & P; X[0] ^= t; X[i] ^= t; } // exchange - if( X[0] & Q ) X[0] ^= P; // invert + if ( X[0] & Q ) X[0] ^= P; // invert } } extern void AxestoTranspose( @@ -70,9 +70,9 @@ int n) // I dimension for( Q = 1 << (b - 1); Q > 1; Q >>= 1 ) { P = Q - 1; - if( X[0] & Q ) X[0] ^= P; // invert + if ( X[0] & Q ) X[0] ^= P; // invert for( i = 1; i < n; i++ ) - if( X[i] & Q ) X[0] ^= P; // invert + if ( X[i] & Q ) X[0] ^= P; // invert else{ t = (X[0] ^ X[i]) & P; X[0] ^= t; X[i] ^= t; } // exchange } diff --git a/src/plugins/topology/3d_torus/hilbert_slurm.c b/src/plugins/topology/3d_torus/hilbert_slurm.c index f80984e41edd55cb1215b09c335363124ac08e23..124d3a564eba0ce45cdaef2bdb164f6c57888de6 100644 --- a/src/plugins/topology/3d_torus/hilbert_slurm.c +++ b/src/plugins/topology/3d_torus/hilbert_slurm.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/topology/3d_torus/topology_3d_torus.c b/src/plugins/topology/3d_torus/topology_3d_torus.c index 4f1d5102c0c5b2a8d7b460d48f892e6891d8b964..930abc8dd3ee7ef49fa2daea2fb2dfae4c760582 100644 --- a/src/plugins/topology/3d_torus/topology_3d_torus.c +++ b/src/plugins/topology/3d_torus/topology_3d_torus.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/topology/Makefile.in b/src/plugins/topology/Makefile.in index 38d3790c80ab00c2d29f8990d162db797a0507f9..ad8e802695f94908b476274f3fcd158a42e9cdaf 100644 --- a/src/plugins/topology/Makefile.in +++ b/src/plugins/topology/Makefile.in @@ -57,6 +57,7 @@ subdir = src/plugins/topology DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -169,6 +173,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -189,6 +195,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -198,6 +207,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -205,6 +216,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -239,6 +259,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -266,6 +289,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/topology/node_rank/Makefile.in b/src/plugins/topology/node_rank/Makefile.in index 18bbd6c59b92bdc93f5e98f5a2dff7d400080d9b..fcb8ad32ce537ccbea1281a6aa0e86cc52b58f31 100644 --- a/src/plugins/topology/node_rank/Makefile.in +++ b/src/plugins/topology/node_rank/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/topology/node_rank DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/topology/node_rank/topology_node_rank.c b/src/plugins/topology/node_rank/topology_node_rank.c index b9cabef6d3ae29ee769cbda2198cc9715e8eb4b7..f5ac057bbebd5b9519ca3e4cfbc07ad02864294b 100644 --- a/src/plugins/topology/node_rank/topology_node_rank.c +++ b/src/plugins/topology/node_rank/topology_node_rank.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/topology/none/Makefile.in b/src/plugins/topology/none/Makefile.in index 888e292a6c1a3de832d8b627c7e443e5bee8b20e..037d1f944d54192b0fb14dc384eec9983a7e5037 100644 --- a/src/plugins/topology/none/Makefile.in +++ b/src/plugins/topology/none/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/topology/none DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/plugins/topology/none/topology_none.c b/src/plugins/topology/none/topology_none.c index 67ab6582f22201d3c053c975af9f401b34944777..e1460f638da0e8973786633ebaef1c7129ccf7a6 100644 --- a/src/plugins/topology/none/topology_none.c +++ b/src/plugins/topology/none/topology_none.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/plugins/topology/tree/Makefile.am b/src/plugins/topology/tree/Makefile.am index b739c9c474f1b2c59de3b3f84d6785c800eb3e46..a08f7ded871ca0841b8dd1c5bdfa9ae3cb384842 100644 --- a/src/plugins/topology/tree/Makefile.am +++ b/src/plugins/topology/tree/Makefile.am @@ -1,6 +1,5 @@ # Makefile for topology/tree plugin -AM_CPPFLAGS = -DTOPOLOGY_CONFIG_FILE=\"$(sysconfdir)/topology.conf\" AUTOMAKE_OPTIONS = foreign PLUGIN_FLAGS = -module -avoid-version --export-dynamic diff --git a/src/plugins/topology/tree/Makefile.in b/src/plugins/topology/tree/Makefile.in index 595cfa37e6b707b0e2baab4ae33eadeb7406ff8d..4074924729f028afd7ca89067330b5f512399a46 100644 --- a/src/plugins/topology/tree/Makefile.in +++ b/src/plugins/topology/tree/Makefile.in @@ -58,6 +58,7 @@ subdir = src/plugins/topology/tree DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -180,6 +184,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -200,6 +206,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -209,6 +218,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -216,6 +227,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -250,6 +270,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -277,6 +300,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -365,7 +391,6 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -AM_CPPFLAGS = -DTOPOLOGY_CONFIG_FILE=\"$(sysconfdir)/topology.conf\" AUTOMAKE_OPTIONS = foreign PLUGIN_FLAGS = -module -avoid-version --export-dynamic INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common diff --git a/src/plugins/topology/tree/topology_tree.c b/src/plugins/topology/tree/topology_tree.c index 383d4729e7fedff04e904dccb4afc9e20b9e112e..6b24c0ecd501b03ca64a55e4350870f8796a84ae 100644 --- a/src/plugins/topology/tree/topology_tree.c +++ b/src/plugins/topology/tree/topology_tree.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -97,7 +97,6 @@ static char* topo_conf = NULL; static void _destroy_switches(void *ptr); static void _free_switch_record_table(void); static int _get_switch_inx(const char *name); -static char *_get_topo_conf(void); static void _log_switches(void); static int _node_name2bitmap(char *node_names, bitstr_t **bitmap, hostlist_t *invalid_hostlist); @@ -295,8 +294,10 @@ static void _validate_switches(void) if (switch_ptr->level != -1) continue; hl = hostlist_create(switch_ptr->switches); - if (!hl) - fatal("hostlist_create: malloc failure"); + if (!hl) { + fatal("Invalid switches: %s", + switch_ptr->switches); + } while ((child = hostlist_pop(hl))) { j = _get_switch_inx(child); if ((j < 0) || (j == i)) { @@ -427,28 +428,6 @@ static void _free_switch_record_table(void) } } -static char *_get_topo_conf(void) -{ - char *val = getenv("SLURM_CONF"); - char *rc; - int i; - - if (!val) - return xstrdup(TOPOLOGY_CONFIG_FILE); - - /* Replace file name on end of path */ - i = strlen(val) - strlen("slurm.conf") + strlen("topology.conf") + 1; - rc = xmalloc(i); - strcpy(rc, val); - val = strrchr(rc, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = rc; - strcpy(val, "topology.conf"); - return rc; -} - /* Return count of switch configuration entries read */ extern int _read_topo_file(slurm_conf_switches_t **ptr_array[]) { @@ -461,7 +440,7 @@ extern int _read_topo_file(slurm_conf_switches_t **ptr_array[]) debug("Reading the topology.conf file"); if (!topo_conf) - topo_conf = _get_topo_conf(); + topo_conf = get_extra_conf_path("topology.conf"); conf_hashtbl = s_p_hashtbl_create(switch_options); if (s_p_parse_file(conf_hashtbl, NULL, topo_conf, false) == @@ -548,8 +527,6 @@ static int _node_name2bitmap(char *node_names, bitstr_t **bitmap, hostlist_t host_list; my_bitmap = (bitstr_t *) bit_alloc(node_record_count); - if (my_bitmap == NULL) - fatal("bit_alloc malloc failure"); *bitmap = my_bitmap; if (node_names == NULL) { diff --git a/src/sacct/Makefile.in b/src/sacct/Makefile.in index 3001ede49090c851c7f4b505ed0ac2a3044c451e..d40323269836e6e569d575384d8cf310a1fab6b9 100644 --- a/src/sacct/Makefile.in +++ b/src/sacct/Makefile.in @@ -61,6 +61,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -78,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -86,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sacct/options.c b/src/sacct/options.c index f2cdb0fb73670eca6fdb93d9e390b6a348ee24c7..3af44fdb7d8120690aa4d24a3765f6f447f1d077 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -45,11 +45,7 @@ #include <time.h> /* getopt_long options, integers but not characters */ -#define OPT_LONG_ALLOCS 0x100 -#define OPT_LONG_DUP 0x101 -#define OPT_LONG_HELP 0x102 -#define OPT_LONG_USAGE 0x103 -#define OPT_LONG_NAME 0x104 +#define OPT_LONG_NAME 0x100 void _help_fields_msg(void); void _help_msg(void); @@ -73,9 +69,9 @@ void _help_fields_msg(void) for (i = 0; fields[i].name; i++) { if (i & 3) printf(" "); - else if(i) + else if (i) printf("\n"); - printf("%-13s", fields[i].name); + printf("%-17s", fields[i].name); } printf("\n"); return; @@ -113,13 +109,13 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -128,23 +124,23 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); //info("got %s %d", name, i-start); name = _convert_to_id( name, gid ); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -153,7 +149,7 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -162,17 +158,17 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); name = _convert_to_id(name, gid); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -193,13 +189,13 @@ static int _addto_state_char_list(List char_list, char *names) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -208,12 +204,12 @@ static int _addto_state_char_list(List char_list, char *names) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); c = job_state_num(name); @@ -224,11 +220,11 @@ static int _addto_state_char_list(List char_list, char *names) name = xstrdup_printf("%d", c); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -237,7 +233,7 @@ static int _addto_state_char_list(List char_list, char *names) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -246,7 +242,7 @@ static int _addto_state_char_list(List char_list, char *names) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); c = job_state_num(name); @@ -256,11 +252,11 @@ static int _addto_state_char_list(List char_list, char *names) name = xstrdup_printf("%d", c); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -284,13 +280,13 @@ static int _addto_step_list(List step_list, char *names) int quote = 0; int count = 0; - if(!step_list) { + if (!step_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(step_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -299,12 +295,12 @@ static int _addto_step_list(List step_list, char *names) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { char *dot = NULL; name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); @@ -330,7 +326,7 @@ static int _addto_step_list(List step_list, char *names) xfree(name); while((curr_step = list_next(itr))) { - if((curr_step->jobid + if ((curr_step->jobid == selected_step->jobid) && (curr_step->stepid == selected_step-> @@ -338,7 +334,7 @@ static int _addto_step_list(List step_list, char *names) break; } - if(!curr_step) { + if (!curr_step) { list_append(step_list, selected_step); count++; @@ -352,7 +348,7 @@ static int _addto_step_list(List step_list, char *names) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); @@ -374,13 +370,13 @@ static int _addto_step_list(List step_list, char *names) xfree(name); while((curr_step = list_next(itr))) { - if((curr_step->jobid == selected_step->jobid) + if ((curr_step->jobid == selected_step->jobid) && (curr_step->stepid == selected_step->stepid)) break; } - if(!curr_step) { + if (!curr_step) { list_append(step_list, selected_step); count++; } else @@ -453,7 +449,10 @@ sacct [<OPTION>] \n\ maxrsstask,averss,maxpages,maxpagesnode, \n\ maxpagestask,avepages,mincpu,mincpunode, \n\ mincputask,avecpu,ntasks,alloccpus,elapsed, \n\ - state,exitcode,avecpufreq,consumedenergy' \n\ + state,exitcode,avecpufreq,consumedenergy, \n\ + maxdiskread,maxdiskreadnode,maxdiskreadtask, \n\ + avediskread,maxdiskwrite,maxdiskwritenode, \n\ + maxdiskwritetask,avediskread' \n\ -L, --allclusters: \n\ Display jobs ran on all clusters. By default, only jobs \n\ ran on the cluster from where sacct is called are \n\ @@ -539,28 +538,25 @@ int get_data(void) ListIterator itr_step = NULL; slurmdb_job_cond_t *job_cond = params.job_cond; - if(params.opt_completion) { + if (params.opt_completion) { jobs = g_slurm_jobcomp_get_jobs(job_cond); return SLURM_SUCCESS; } else { jobs = slurmdb_jobs_get(acct_db_conn, job_cond); } - if (params.opt_fdump) - return SLURM_SUCCESS; - - if(!jobs) + if (!jobs) return SLURM_ERROR; itr = list_iterator_create(jobs); while((job = list_next(itr))) { - if(job->user) { + if (job->user) { struct passwd *pw = NULL; if ((pw=getpwnam(job->user))) job->uid = pw->pw_uid; } - if(!job->steps || !list_count(job->steps)) + if (!job->steps || !list_count(job->steps)) continue; itr_step = list_iterator_create(job->steps); @@ -568,7 +564,7 @@ int get_data(void) /* now aggregate the aggregatable */ job->alloc_cpus = MAX(job->alloc_cpus, step->ncpus); - if(step->state < JOB_COMPLETE) + if (step->state < JOB_COMPLETE) continue; job->tot_cpu_sec += step->tot_cpu_sec; job->tot_cpu_usec += step->tot_cpu_usec; @@ -610,51 +606,48 @@ void parse_command_line(int argc, char **argv) bool set; static struct option long_options[] = { - {"allusers", no_argument, 0, 'a'}, - {"accounts", required_argument, 0, 'A'}, - {"allocations", no_argument, ¶ms.opt_allocs, OPT_LONG_ALLOCS}, - {"brief", no_argument, 0, 'b'}, - {"completion", no_argument, ¶ms.opt_completion, 'c'}, - {"dump", no_argument, 0, 'd'}, - {"duplicates", no_argument, ¶ms.opt_dup, OPT_LONG_DUP}, - {"helpformat", no_argument, 0, 'e'}, - {"help-fields", no_argument, 0, 'e'}, - {"endtime", required_argument, 0, 'E'}, - {"file", required_argument, 0, 'f'}, - {"gid", required_argument, 0, 'g'}, - {"group", required_argument, 0, 'g'}, - {"help", no_argument, 0, 'h'}, - {"helpformat", no_argument, ¶ms.opt_help, OPT_LONG_HELP}, - {"name", required_argument, 0, OPT_LONG_NAME}, - {"nnodes", required_argument, 0, 'i'}, - {"ncpus", required_argument, 0, 'I'}, - {"jobs", required_argument, 0, 'j'}, - {"timelimit-min", required_argument, 0, 'k'}, - {"timelimit-max", required_argument, 0, 'K'}, - {"long", no_argument, 0, 'l'}, - {"allclusters", no_argument, 0, 'L'}, - {"cluster", required_argument, 0, 'M'}, - {"clusters", required_argument, 0, 'M'}, - {"nodelist", required_argument, 0, 'N'}, - {"noheader", no_argument, 0, 'n'}, - {"fields", required_argument, 0, 'o'}, - {"format", required_argument, 0, 'o'}, - {"formatted_dump", no_argument, 0, 'O'}, - {"parsable", no_argument, 0, 'p'}, - {"parsable2", no_argument, 0, 'P'}, - {"qos", required_argument, 0, 'q'}, - {"partition", required_argument, 0, 'r'}, - {"state", required_argument, 0, 's'}, - {"starttime", required_argument, 0, 'S'}, - {"truncate", no_argument, 0, 'T'}, - {"uid", required_argument, 0, 'u'}, - {"usage", no_argument, ¶ms.opt_help, OPT_LONG_USAGE}, - {"user", required_argument, 0, 'u'}, - {"verbose", no_argument, 0, 'v'}, - {"version", no_argument, 0, 'V'}, - {"wckeys", required_argument, 0, 'W'}, - {"associations", required_argument, 0, 'x'}, - {0, 0, 0, 0}}; + {"allusers", no_argument, 0, 'a'}, + {"accounts", required_argument, 0, 'A'}, + {"allocations", no_argument, 0, 'X'}, + {"brief", no_argument, 0, 'b'}, + {"completion", no_argument, 0, 'c'}, + {"duplicates", no_argument, 0, 'D'}, + {"helpformat", no_argument, 0, 'e'}, + {"help-fields", no_argument, 0, 'e'}, + {"endtime", required_argument, 0, 'E'}, + {"file", required_argument, 0, 'f'}, + {"gid", required_argument, 0, 'g'}, + {"group", required_argument, 0, 'g'}, + {"help", no_argument, 0, 'h'}, + {"name", required_argument, 0, OPT_LONG_NAME}, + {"nnodes", required_argument, 0, 'i'}, + {"ncpus", required_argument, 0, 'I'}, + {"jobs", required_argument, 0, 'j'}, + {"timelimit-min", required_argument, 0, 'k'}, + {"timelimit-max", required_argument, 0, 'K'}, + {"long", no_argument, 0, 'l'}, + {"allclusters", no_argument, 0, 'L'}, + {"cluster", required_argument, 0, 'M'}, + {"clusters", required_argument, 0, 'M'}, + {"nodelist", required_argument, 0, 'N'}, + {"noheader", no_argument, 0, 'n'}, + {"fields", required_argument, 0, 'o'}, + {"format", required_argument, 0, 'o'}, + {"parsable", no_argument, 0, 'p'}, + {"parsable2", no_argument, 0, 'P'}, + {"qos", required_argument, 0, 'q'}, + {"partition", required_argument, 0, 'r'}, + {"state", required_argument, 0, 's'}, + {"starttime", required_argument, 0, 'S'}, + {"truncate", no_argument, 0, 'T'}, + {"uid", required_argument, 0, 'u'}, + {"usage", no_argument, 0, 'U'}, + {"user", required_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"version", no_argument, 0, 'V'}, + {"wckeys", required_argument, 0, 'W'}, + {"associations", required_argument, 0, 'x'}, + {0, 0, 0, 0}}; params.opt_uid = getuid(); params.opt_gid = getgid(); @@ -665,7 +658,7 @@ void parse_command_line(int argc, char **argv) while (1) { /* now cycle through the command line */ c = getopt_long(argc, argv, - "aA:bcC:dDeE:f:g:hi:I:j:k:K:lLM:nN:o:OpPq:r:s:S:Ttu:vVW:x:X", + "aA:bcC:DeE:f:g:hi:I:j:k:K:lLM:nN:o:pPq:r:s:S:Ttu:UvVW:x:X", long_options, &optionIndex); if (c == -1) break; @@ -674,7 +667,7 @@ void parse_command_line(int argc, char **argv) all_users = 1; break; case 'A': - if(!job_cond->acct_list) + if (!job_cond->acct_list) job_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->acct_list, optarg); @@ -690,21 +683,16 @@ void parse_command_line(int argc, char **argv) everything else. */ case 'M': - if(!strcasecmp(optarg, "-1")) { + if (!strcasecmp(optarg, "-1")) { all_clusters = 1; break; } all_clusters=0; - if(!job_cond->cluster_list) + if (!job_cond->cluster_list) job_cond->cluster_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->cluster_list, optarg); break; - case 'd': - error("--dump has been depricated and will go away " - "in future releases."); - params.opt_dump = 1; - break; case 'D': params.opt_dup = 1; break; @@ -721,7 +709,7 @@ void parse_command_line(int argc, char **argv) params.opt_filein = xstrdup(optarg); break; case 'g': - if(!job_cond->groupid_list) + if (!job_cond->groupid_list) job_cond->groupid_list = list_create(slurm_destroy_char); _addto_id_char_list(job_cond->groupid_list, optarg, 1); @@ -766,7 +754,7 @@ void parse_command_line(int argc, char **argv) exit(1); } - if(!job_cond->step_list) + if (!job_cond->step_list) job_cond->step_list = list_create( slurmdb_destroy_selected_step); _addto_step_list(job_cond->step_list, optarg); @@ -793,7 +781,7 @@ void parse_command_line(int argc, char **argv) print_fields_have_header = 0; break; case 'N': - if(job_cond->used_nodes) { + if (job_cond->used_nodes) { error("Aleady asked for nodes '%s'", job_cond->used_nodes); break; @@ -801,7 +789,7 @@ void parse_command_line(int argc, char **argv) job_cond->used_nodes = xstrdup(optarg); break; case OPT_LONG_NAME: - if(!job_cond->jobname_list) + if (!job_cond->jobname_list) job_cond->jobname_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->jobname_list, optarg); @@ -809,11 +797,6 @@ void parse_command_line(int argc, char **argv) case 'o': xstrfmtcat(params.opt_field_list, "%s,", optarg); break; - case 'O': - error("--formatted_dump has been depricated and " - "will go away in future releases."); - params.opt_fdump = 1; - break; case 'p': print_fields_parsable_print = PRINT_FIELDS_PARSABLE_ENDING; @@ -832,16 +815,16 @@ void parse_command_line(int argc, char **argv) acct_db_conn, &qos_cond); } - if(!job_cond->qos_list) + if (!job_cond->qos_list) job_cond->qos_list = list_create(slurm_destroy_char); - if(!slurmdb_addto_qos_char_list(job_cond->qos_list, + if (!slurmdb_addto_qos_char_list(job_cond->qos_list, g_qos_list, optarg, 0)) fatal("problem processing qos list"); break; case 'r': - if(!job_cond->partition_list) + if (!job_cond->partition_list) job_cond->partition_list = list_create(slurm_destroy_char); @@ -849,7 +832,7 @@ void parse_command_line(int argc, char **argv) optarg); break; case 's': - if(!job_cond->state_list) + if (!job_cond->state_list) job_cond->state_list = list_create(slurm_destroy_char); @@ -867,12 +850,12 @@ void parse_command_line(int argc, char **argv) params.opt_help = 3; break; case 'u': - if(!strcmp(optarg, "-1")) { + if (!strcmp(optarg, "-1")) { all_users = 1; break; } all_users = 0; - if(!job_cond->userid_list) + if (!job_cond->userid_list) job_cond->userid_list = list_create(slurm_destroy_char); _addto_id_char_list(job_cond->userid_list, optarg, 0); @@ -883,7 +866,7 @@ void parse_command_line(int argc, char **argv) verbosity++; break; case 'W': - if(!job_cond->wckey_list) + if (!job_cond->wckey_list) job_cond->wckey_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->wckey_list, optarg); @@ -892,12 +875,13 @@ void parse_command_line(int argc, char **argv) print_slurm_version(); exit(0); case 'x': - if(!job_cond->associd_list) + if (!job_cond->associd_list) job_cond->associd_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->associd_list, optarg); break; case 't': + /* 't' is deprecated and was replaced with 'X'. */ case 'X': params.opt_allocs = 1; break; @@ -918,13 +902,10 @@ void parse_command_line(int argc, char **argv) if (params.opt_dup < 0) /* not already set explicitly */ params.opt_dup = 0; - if (params.opt_fdump) - params.opt_dup |= FDUMP_FLAG; - job_cond->duplicates = params.opt_dup; job_cond->without_steps = params.opt_allocs; - if(!job_cond->usage_start && !job_cond->step_list) { + if (!job_cond->usage_start && !job_cond->step_list) { struct tm start_tm; job_cond->usage_start = time(NULL); /* If we are looking for job states default to now. @@ -945,13 +926,13 @@ void parse_command_line(int argc, char **argv) } } - if(verbosity > 0) { + if (verbosity > 0) { char *start_char =NULL, *end_char = NULL; start_char = xstrdup(ctime(&job_cond->usage_start)); /* remove the new line */ start_char[strlen(start_char)-1] = '\0'; - if(job_cond->usage_end) { + if (job_cond->usage_end) { end_char = xstrdup(ctime(&job_cond->usage_end)); /* remove the new line */ end_char[strlen(end_char)-1] = '\0'; @@ -974,7 +955,7 @@ void parse_command_line(int argc, char **argv) params.opt_help, params.opt_allocs); - if(params.opt_completion) { + if (params.opt_completion) { g_slurm_jobcomp_init(params.opt_filein); acct_type = slurm_get_jobcomp_type(); @@ -996,15 +977,15 @@ void parse_command_line(int argc, char **argv) } xfree(acct_type); acct_db_conn = slurmdb_connection_get(); - if(errno != SLURM_SUCCESS) { + if (errno != SLURM_SUCCESS) { error("Problem talking to the database: %m"); exit(1); } } /* specific clusters requested? */ - if(all_clusters) { - if(job_cond->cluster_list + if (all_clusters) { + if (job_cond->cluster_list && list_count(job_cond->cluster_list)) { list_destroy(job_cond->cluster_list); job_cond->cluster_list = NULL; @@ -1017,12 +998,12 @@ void parse_command_line(int argc, char **argv) while((start = list_next(itr))) debug2("\t: %s", start); list_iterator_destroy(itr); - } else if(!job_cond->cluster_list + } else if (!job_cond->cluster_list || !list_count(job_cond->cluster_list)) { - if(!job_cond->cluster_list) + if (!job_cond->cluster_list) job_cond->cluster_list = list_create(slurm_destroy_char); - if((start = slurm_get_cluster_name())) { + if ((start = slurm_get_cluster_name())) { list_append(job_cond->cluster_list, start); debug2("Clusters requested:\t%s", start); } @@ -1030,17 +1011,17 @@ void parse_command_line(int argc, char **argv) /* if any jobs or nodes are specified set to look for all users if none are set */ - if(!job_cond->userid_list || !list_count(job_cond->userid_list)) - if((job_cond->step_list && list_count(job_cond->step_list)) + if (!job_cond->userid_list || !list_count(job_cond->userid_list)) + if ((job_cond->step_list && list_count(job_cond->step_list)) || job_cond->used_nodes) all_users=1; /* set all_users for user root if not requesting any */ - if(!job_cond->userid_list && !params.opt_uid) + if (!job_cond->userid_list && !params.opt_uid) all_users = 1; - if(all_users) { - if(job_cond->userid_list && list_count(job_cond->userid_list)) { + if (all_users) { + if (job_cond->userid_list && list_count(job_cond->userid_list)) { list_destroy(job_cond->userid_list); job_cond->userid_list = NULL; } @@ -1051,9 +1032,9 @@ void parse_command_line(int argc, char **argv) while((start = list_next(itr))) debug2("\t: %s", start); list_iterator_destroy(itr); - } else if(!job_cond->userid_list + } else if (!job_cond->userid_list || !list_count(job_cond->userid_list)) { - if(!job_cond->userid_list) + if (!job_cond->userid_list) job_cond->userid_list = list_create(slurm_destroy_char); start = xstrdup_printf("%u", params.opt_uid); list_append(job_cond->userid_list, start); @@ -1089,7 +1070,7 @@ void parse_command_line(int argc, char **argv) debug2("Jobs requested:"); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if(selected_step->stepid != NO_VAL) + if (selected_step->stepid != NO_VAL) debug2("\t: %d.%d", selected_step->jobid, selected_step->stepid); @@ -1123,7 +1104,7 @@ void parse_command_line(int argc, char **argv) char time_str[128], tmp1[32], tmp2[32]; mins2time_str(job_cond->timelimit_min, tmp1, sizeof(tmp1)); sprintf(time_str, "%s", tmp1); - if(job_cond->timelimit_max) { + if (job_cond->timelimit_max) { int len = strlen(tmp1); mins2time_str(job_cond->timelimit_max, tmp2, sizeof(tmp2)); @@ -1143,8 +1124,8 @@ void parse_command_line(int argc, char **argv) } /* select the output fields */ - if(brief_output) { - if(params.opt_completion) + if (brief_output) { + if (params.opt_completion) dot = BRIEF_COMP_FIELDS; else dot = BRIEF_FIELDS; @@ -1152,8 +1133,8 @@ void parse_command_line(int argc, char **argv) xstrfmtcat(params.opt_field_list, "%s,", dot); } - if(long_output) { - if(params.opt_completion) + if (long_output) { + if (params.opt_completion) dot = LONG_COMP_FIELDS; else dot = LONG_FIELDS; @@ -1162,9 +1143,7 @@ void parse_command_line(int argc, char **argv) } if (params.opt_field_list==NULL) { - if (params.opt_dump) - goto endopt; - if(params.opt_completion) + if (params.opt_completion) dot = DEFAULT_COMP_FIELDS; else if ( ( env_val = getenv("SACCT_FORMAT") ) ) dot = xstrdup(env_val); @@ -1179,20 +1158,33 @@ void parse_command_line(int argc, char **argv) char *tmp_char = NULL; int command_len = 0; int newlen = 0; + bool newlen_set = false; *end = 0; while (isspace(*start)) start++; /* discard whitespace */ - if(!(int)*start) + if (!(int)*start) continue; - if((tmp_char = strstr(start, "\%"))) { + if ((tmp_char = strstr(start, "\%"))) { + newlen_set = true; newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } command_len = strlen(start); + if (!strncasecmp("ALL", start, command_len)) { + for (i = 0; fields[i].name; i++) { + if (newlen_set) + fields[i].len = newlen; + list_append(print_fields_list, &fields[i]); + start = end + 1; + } + start = end + 1; + continue; + } + for (i = 0; fields[i].name; i++) { if (!strncasecmp(fields[i].name, start, command_len)) goto foundfield; @@ -1200,13 +1192,13 @@ void parse_command_line(int argc, char **argv) error("Invalid field requested: \"%s\"", start); exit(1); foundfield: - if(newlen) + if (newlen_set) fields[i].len = newlen; list_append(print_fields_list, &fields[i]); start = end + 1; } field_count = list_count(print_fields_list); -endopt: + if (optind < argc) { debug2("Error: Unknown arguments:"); for (i=optind; i<argc; i++) @@ -1217,210 +1209,6 @@ endopt: return; } -/* Note: do_dump() strives to present data in an upward-compatible - * manner so that apps written to use data from `sacct -d` in slurm - * v1.0 will continue to work in v1.1 and later. - * - * To help ensure this compatibility, - * a. The meaning of an existing field never changes - * b. New fields are appended to the end of a record - * - * The "numfields" field of the record can be used as a sub-version - * number, as it will never decrease for the life of the current - * record version number (currently 1). For example, if your app needs - * to use field 28, a record with numfields<28 is too old a version - * for you, while numfields>=28 will provide what you are expecting. - */ -void do_dump(void) -{ - ListIterator itr = NULL; - ListIterator itr_step = NULL; - slurmdb_job_rec_t *job = NULL; - slurmdb_step_rec_t *step = NULL; - struct tm ts; - - itr = list_iterator_create(jobs); - while((job = list_next(itr))) { - - if(list_count(job->steps)) { - job->stats.cpu_ave /= list_count(job->steps); - job->stats.rss_ave /= list_count(job->steps); - job->stats.vsize_ave /= list_count(job->steps); - job->stats.pages_ave /= list_count(job->steps); - } - - /* JOB_START */ - if (job->show_full) { - gmtime_r(&job->start, &ts); - printf("%u %s %04d%02d%02d%02d%02d%02d %d %s %s ", - job->jobid, - job->partition, - 1900+(ts.tm_year), - 1+(ts.tm_mon), - ts.tm_mday, - ts.tm_hour, - ts.tm_min, - ts.tm_sec, - (int)job->submit, - job->blockid, /* block id */ - "-"); /* reserved 1 */ - - printf("JOB_START 1 16 %d %d %s %d %d %d %s %s\n", - job->uid, - job->gid, - job->jobname, - job->track_steps, - job->priority, - job->alloc_cpus, - job->nodes, - job->account); - } - /* JOB_STEP */ - itr_step = list_iterator_create(job->steps); - while((step = list_next(itr_step))) { - gmtime_r(&step->start, &ts); - printf("%u %s %04d%02d%02d%02d%02d%02d %d %s %s ", - job->jobid, - job->partition, - 1900+(ts.tm_year), - 1+(ts.tm_mon), - ts.tm_mday, - ts.tm_hour, - ts.tm_min, - ts.tm_sec, - (int)job->submit, - job->blockid, /* block id */ - "-"); /* reserved 1 */ - if(step->end == 0) - step->end = job->end; - - gmtime_r(&step->end, &ts); - printf("JOB_STEP 1 50 %u %04d%02d%02d%02d%02d%02d ", - step->stepid, - 1900+(ts.tm_year), 1+(ts.tm_mon), ts.tm_mday, - ts.tm_hour, ts.tm_min, ts.tm_sec); - printf("%s %d %d %d %d ", - job_state_string_compact(step->state), - step->exitcode, - step->ncpus, - step->ncpus, - step->elapsed); - printf("%d %d %d %d %d %d %d %d ", - step->tot_cpu_sec, - step->tot_cpu_usec, - (int)step->user_cpu_sec, - (int)step->user_cpu_usec, - (int)step->sys_cpu_sec, - (int)step->sys_cpu_usec, - step->stats.vsize_max/1024, - step->stats.rss_max/1024); - /* Data added in Slurm v1.1 */ - printf("%u %u %.2f %u %u %.2f %d %u %u %.2f " - "%.u %u %u %.2f %s %s %s\n", - step->stats.vsize_max_nodeid, - step->stats.vsize_max_taskid, - step->stats.vsize_ave/1024, - step->stats.rss_max_nodeid, - step->stats.rss_max_taskid, - step->stats.rss_ave/1024, - step->stats.pages_max, - step->stats.pages_max_nodeid, - step->stats.pages_max_taskid, - step->stats.pages_ave, - step->stats.cpu_min, - step->stats.cpu_min_nodeid, - step->stats.cpu_min_taskid, - step->stats.cpu_ave, - step->stepname, - step->nodes, - job->account); - } - list_iterator_destroy(itr_step); - /* JOB_TERMINATED */ - if (job->show_full) { - gmtime_r(&job->start, &ts); - printf("%u %s %04d%02d%02d%02d%02d%02d %d %s %s ", - job->jobid, - job->partition, - 1900+(ts.tm_year), - 1+(ts.tm_mon), - ts.tm_mday, - ts.tm_hour, - ts.tm_min, - ts.tm_sec, - (int)job->submit, - job->blockid, /* block id */ - "-"); /* reserved 1 */ - gmtime_r(&job->end, &ts); - printf("JOB_TERMINATED 1 50 %d ", - job->elapsed); - printf("%04d%02d%02d%02d%02d%02d ", - 1900+(ts.tm_year), 1+(ts.tm_mon), ts.tm_mday, - ts.tm_hour, ts.tm_min, ts.tm_sec); - printf("%s %d %d %d %d ", - job_state_string_compact(job->state), - job->exitcode, - job->alloc_cpus, - job->alloc_cpus, - job->elapsed); - printf("%d %d %d %d %d %d %d %d ", - job->tot_cpu_sec, - job->tot_cpu_usec, - (int)job->user_cpu_sec, - (int)job->user_cpu_usec, - (int)job->sys_cpu_sec, - (int)job->sys_cpu_usec, - job->stats.vsize_max/1024, - job->stats.rss_max/1024); - /* Data added in Slurm v1.1 */ - printf("%u %u %.2f %u %u %.2f %d %u %u %.2f " - "%.u %u %u %.2f %s %s %s %d\n", - job->stats.vsize_max_nodeid, - job->stats.vsize_max_taskid, - job->stats.vsize_ave/1024, - job->stats.rss_max_nodeid, - job->stats.rss_max_taskid, - job->stats.rss_ave/1024, - job->stats.pages_max, - job->stats.pages_max_nodeid, - job->stats.pages_max_taskid, - job->stats.pages_ave, - job->stats.cpu_min, - job->stats.cpu_min_nodeid, - job->stats.cpu_min_taskid, - job->stats.cpu_ave, - "-", - job->nodes, - job->account, - job->requid); - } - } - list_iterator_destroy(itr); -} - -void do_dump_completion(void) -{ - ListIterator itr = NULL; - jobcomp_job_rec_t *job = NULL; - - itr = list_iterator_create(jobs); - while((job = list_next(itr))) { - printf("JOB %u %s %s %s %s(%u) %u(%s) %u %s %s %s %s", - job->jobid, job->partition, job->start_time, - job->end_time, job->uid_name, job->uid, job->gid, - job->gid_name, job->node_cnt, job->nodelist, - job->jobname, job->state, - job->timelimit); - if(job->blockid) - printf(" %s %s %s %s %u %s %s", - job->blockid, job->connection, job->reboot, - job->rotate, job->max_procs, job->geo, - job->bg_start_point); - printf("\n"); - } - list_iterator_destroy(itr); -} - void do_help(void) { switch (params.opt_help) { @@ -1454,17 +1242,19 @@ void do_list(void) slurmdb_job_rec_t *job = NULL; slurmdb_step_rec_t *step = NULL; - if(!jobs) + if (!jobs) return; itr = list_iterator_create(jobs); while((job = list_next(itr))) { - if(list_count(job->steps)) { + if (list_count(job->steps)) { int cnt = list_count(job->steps); job->stats.cpu_ave /= (double)cnt; job->stats.rss_ave /= (double)cnt; job->stats.vsize_ave /= (double)cnt; job->stats.pages_ave /= (double)cnt; + job->stats.disk_read_ave /= (double)cnt; + job->stats.disk_write_ave /= (double)cnt; } if (job->show_full) @@ -1474,7 +1264,7 @@ void do_list(void) && (job->track_steps || !job->show_full)) { itr_step = list_iterator_create(job->steps); while((step = list_next(itr_step))) { - if(step->end == 0) + if (step->end == 0) step->end = job->end; print_fields(JOBSTEP, step); } @@ -1497,7 +1287,7 @@ void do_list_completion(void) ListIterator itr = NULL; jobcomp_job_rec_t *job = NULL; - if(!jobs) + if (!jobs) return; itr = list_iterator_create(jobs); @@ -1516,16 +1306,16 @@ void sacct_init() void sacct_fini() { - if(print_fields_itr) + if (print_fields_itr) list_iterator_destroy(print_fields_itr); - if(print_fields_list) + if (print_fields_list) list_destroy(print_fields_list); - if(jobs) + if (jobs) list_destroy(jobs); - if(g_qos_list) + if (g_qos_list) list_destroy(g_qos_list); - if(params.opt_completion) + if (params.opt_completion) g_slurm_jobcomp_fini(); else { slurmdb_connection_close(&acct_db_conn); diff --git a/src/sacct/print.c b/src/sacct/print.c index 60da32b3de51107756e4a65f20757e6502b183e3..f700d5c9ff31399c64670a382cc1b9003f4dbee6 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -50,7 +50,7 @@ char *_elapsed_time(long secs, long usecs) long subsec = 0; char *str = NULL; - if(secs < 0 || secs == (long)NO_VAL) + if (secs < 0 || secs == (long)NO_VAL) return NULL; @@ -58,7 +58,7 @@ char *_elapsed_time(long secs, long usecs) secs++; usecs -= 1E6; } - if(usecs > 0) { + if (usecs > 0) { /* give me 3 significant digits to tack onto the sec */ subsec = (usecs/1000); } @@ -73,7 +73,7 @@ char *_elapsed_time(long secs, long usecs) else if (hours) str = xstrdup_printf("%2.2ld:%2.2ld:%2.2ld", hours, minutes, seconds); - else if(subsec) + else if (subsec) str = xstrdup_printf("%2.2ld:%2.2ld.%3.3ld", minutes, seconds, subsec); else @@ -88,23 +88,35 @@ static char *_find_qos_name_from_list( ListIterator itr = NULL; slurmdb_qos_rec_t *qos = NULL; - if(!qos_list || qosid == NO_VAL) + if (!qos_list || qosid == NO_VAL) return NULL; itr = list_iterator_create(qos_list); while((qos = list_next(itr))) { - if(qosid == qos->id) + if (qosid == qos->id) break; } list_iterator_destroy(itr); - if(qos) + if (qos) return qos->name; else return "Unknown"; } +static void _print_small_double( + char *outbuf, int buf_size, double dub, int units) +{ + if (fuzzy_equal(dub, NO_VAL)) + return; + if (dub > 1) + convert_num_unit((float)dub, outbuf, buf_size, units); + else if (dub > 0) + snprintf(outbuf, buf_size, "%.2fM", dub); + else + snprintf(outbuf, buf_size, "0"); +} void print_fields(type_t type, void *object) { @@ -120,13 +132,13 @@ void print_fields(type_t type, void *object) switch(type) { case JOB: step = NULL; - if(!job->track_steps) + if (!job->track_steps) step = (slurmdb_step_rec_t *)job->first_step_ptr; /* set this to avoid printing out info for things that don't mean anything. Like an allocation that never ran anything. */ - if(!step) + if (!step) job->track_steps = 1; break; @@ -139,6 +151,7 @@ void print_fields(type_t type, void *object) char *tmp_char = NULL; int tmp_int = NO_VAL, tmp_int2 = NO_VAL; double tmp_dub = (double)NO_VAL; + uint32_t tmp_uint32 = (uint32_t)NO_VAL; uint64_t tmp_uint64 = (uint64_t)NO_VAL; memset(&outbuf, 0, sizeof(outbuf)); @@ -148,7 +161,7 @@ void print_fields(type_t type, void *object) case JOB: tmp_int = job->alloc_cpus; // we want to use the step info - if(!step) + if (!step) break; case JOBSTEP: tmp_int = step->ncpus; @@ -192,9 +205,9 @@ void print_fields(type_t type, void *object) break; } if (!fuzzy_equal(tmp_dub, NO_VAL)) - convert_num_unit((float)tmp_dub, - outbuf, sizeof(outbuf), - UNIT_NONE); + convert_num_unit2((float)tmp_dub, + outbuf, sizeof(outbuf), + UNIT_KILO, 1000, false); field->print_routine(field, outbuf, @@ -220,7 +233,7 @@ void print_fields(type_t type, void *object) case PRINT_AVECPU: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_dub = job->stats.cpu_ave; break; case JOBSTEP: @@ -239,10 +252,50 @@ void print_fields(type_t type, void *object) (curr_inx == field_count)); xfree(tmp_char); break; + case PRINT_AVEDISKREAD: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_dub = job->stats.disk_read_ave; + break; + case JOBSTEP: + tmp_dub = step->stats.disk_read_ave; + break; + case JOBCOMP: + default: + break; + } + _print_small_double(outbuf, sizeof(outbuf), + tmp_dub, UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_AVEDISKWRITE: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_dub = job->stats.disk_write_ave; + break; + case JOBSTEP: + tmp_dub = step->stats.disk_write_ave; + break; + case JOBCOMP: + default: + break; + } + _print_small_double(outbuf, sizeof(outbuf), + tmp_dub, UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; case PRINT_AVEPAGES: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_dub = job->stats.pages_ave; break; case JOBSTEP: @@ -264,7 +317,7 @@ void print_fields(type_t type, void *object) case PRINT_AVERSS: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_dub = job->stats.rss_ave; break; case JOBSTEP: @@ -286,7 +339,7 @@ void print_fields(type_t type, void *object) case PRINT_AVEVSIZE: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_dub = job->stats.vsize_ave; break; case JOBSTEP: @@ -366,14 +419,31 @@ void print_fields(type_t type, void *object) break; } if (!fuzzy_equal(tmp_dub, NO_VAL)) - convert_num_unit((float)tmp_dub, - outbuf, sizeof(outbuf), - UNIT_NONE); + convert_num_unit2((float)tmp_dub, + outbuf, sizeof(outbuf), + UNIT_NONE, 1000, false); field->print_routine(field, outbuf, (curr_inx == field_count)); break; + case PRINT_CONSUMED_ENERGY_RAW: + switch (type) { + case JOB: + if (!job->track_steps) + tmp_dub = step->stats.consumed_energy; + break; + case JOBSTEP: + tmp_dub = step->stats.consumed_energy; + break; + default: + break; + } + + field->print_routine(field, + tmp_dub, + (curr_inx == field_count)); + break; case PRINT_CPU_TIME: switch(type) { case JOB: @@ -611,7 +681,7 @@ void print_fields(type_t type, void *object) case JOB: /* below really should be step. It is not a typo */ - if(!job->track_steps) + if (!job->track_steps) tmp_char = slurm_step_layout_type_name( step->task_dist); break; @@ -629,10 +699,138 @@ void print_fields(type_t type, void *object) tmp_char, (curr_inx == field_count)); break; + case PRINT_MAXDISKREAD: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_dub = job->stats.disk_read_max; + break; + case JOBSTEP: + tmp_dub = step->stats.disk_read_max; + break; + case JOBCOMP: + default: + break; + } + _print_small_double(outbuf, sizeof(outbuf), + tmp_dub, UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_MAXDISKREADNODE: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_char = find_hostname( + job->stats.disk_read_max_nodeid, + job->nodes); + break; + case JOBSTEP: + tmp_char = find_hostname( + step->stats.disk_read_max_nodeid, + step->nodes); + break; + case JOBCOMP: + default: + tmp_char = NULL; + break; + } + field->print_routine(field, + tmp_char, + (curr_inx == field_count)); + xfree(tmp_char); + break; + case PRINT_MAXDISKREADTASK: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_uint32 = + job->stats.disk_read_max_taskid; + break; + case JOBSTEP: + tmp_uint32 = step->stats.disk_read_max_taskid; + break; + case JOBCOMP: + default: + tmp_uint32 = NO_VAL; + break; + } + if (tmp_uint32 == (uint32_t)NO_VAL) + tmp_uint32 = NO_VAL; + field->print_routine(field, + tmp_uint32, + (curr_inx == field_count)); + break; + case PRINT_MAXDISKWRITE: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_dub = job->stats.disk_write_max; + break; + case JOBSTEP: + tmp_dub = step->stats.disk_write_max; + break; + case JOBCOMP: + default: + break; + } + _print_small_double(outbuf, sizeof(outbuf), + tmp_dub, UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_MAXDISKWRITENODE: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_char = find_hostname( + job->stats.disk_write_max_nodeid, + job->nodes); + break; + case JOBSTEP: + tmp_char = find_hostname( + step->stats.disk_write_max_nodeid, + step->nodes); + break; + case JOBCOMP: + default: + tmp_char = NULL; + break; + } + field->print_routine(field, + tmp_char, + (curr_inx == field_count)); + xfree(tmp_char); + break; + case PRINT_MAXDISKWRITETASK: + switch(type) { + case JOB: + if (!job->track_steps) + tmp_uint32 = + job->stats.disk_write_max_taskid; + break; + case JOBSTEP: + tmp_uint32 = step->stats.disk_write_max_taskid; + break; + case JOBCOMP: + default: + tmp_uint32 = NO_VAL; + break; + } + if (tmp_uint32 == (uint32_t)NO_VAL) + tmp_uint32 = NO_VAL; + field->print_routine(field, + tmp_uint32, + (curr_inx == field_count)); + break; case PRINT_MAXPAGES: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_int = job->stats.pages_max; break; case JOBSTEP: @@ -642,7 +840,7 @@ void print_fields(type_t type, void *object) default: break; } - if(tmp_int != NO_VAL) + if (tmp_int != NO_VAL) convert_num_unit((float)tmp_int, outbuf, sizeof(outbuf), UNIT_KILO); @@ -654,7 +852,7 @@ void print_fields(type_t type, void *object) case PRINT_MAXPAGESNODE: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_char = find_hostname( job->stats.pages_max_nodeid, job->nodes); @@ -677,28 +875,28 @@ void print_fields(type_t type, void *object) case PRINT_MAXPAGESTASK: switch(type) { case JOB: - if(!job->track_steps) - tmp_int = + if (!job->track_steps) + tmp_uint32 = job->stats.pages_max_taskid; break; case JOBSTEP: - tmp_int = step->stats.pages_max_taskid; + tmp_uint32 = step->stats.pages_max_taskid; break; case JOBCOMP: default: - tmp_int = NO_VAL; + tmp_uint32 = NO_VAL; break; } - if (tmp_int == (uint16_t)NO_VAL) - tmp_int = NO_VAL; + if (tmp_uint32 == (uint32_t)NO_VAL) + tmp_uint32 = NO_VAL; field->print_routine(field, - tmp_int, + tmp_uint32, (curr_inx == field_count)); break; case PRINT_MAXRSS: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_int = job->stats.rss_max; break; case JOBSTEP: @@ -708,7 +906,7 @@ void print_fields(type_t type, void *object) default: break; } - if(tmp_int != NO_VAL) + if (tmp_int != NO_VAL) convert_num_unit((float)tmp_int, outbuf, sizeof(outbuf), UNIT_KILO); @@ -720,7 +918,7 @@ void print_fields(type_t type, void *object) case PRINT_MAXRSSNODE: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_char = find_hostname( job->stats.rss_max_nodeid, job->nodes); @@ -743,27 +941,27 @@ void print_fields(type_t type, void *object) case PRINT_MAXRSSTASK: switch(type) { case JOB: - if(!job->track_steps) - tmp_int = job->stats.rss_max_taskid; + if (!job->track_steps) + tmp_uint32 = job->stats.rss_max_taskid; break; case JOBSTEP: - tmp_int = step->stats.rss_max_taskid; + tmp_uint32 = step->stats.rss_max_taskid; break; case JOBCOMP: default: - tmp_int = NO_VAL; + tmp_uint32 = NO_VAL; break; } - if (tmp_int == (uint16_t)NO_VAL) - tmp_int = NO_VAL; + if (tmp_uint32 == (uint32_t)NO_VAL) + tmp_uint32 = NO_VAL; field->print_routine(field, - tmp_int, + tmp_uint32, (curr_inx == field_count)); break; case PRINT_MAXVSIZE: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_int = job->stats.vsize_max; break; case JOBSTEP: @@ -774,7 +972,7 @@ void print_fields(type_t type, void *object) tmp_int = NO_VAL; break; } - if(tmp_int != NO_VAL) + if (tmp_int != NO_VAL) convert_num_unit((float)tmp_int, outbuf, sizeof(outbuf), UNIT_KILO); @@ -786,7 +984,7 @@ void print_fields(type_t type, void *object) case PRINT_MAXVSIZENODE: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_char = find_hostname( job->stats.vsize_max_nodeid, job->nodes); @@ -809,28 +1007,28 @@ void print_fields(type_t type, void *object) case PRINT_MAXVSIZETASK: switch(type) { case JOB: - if(!job->track_steps) - tmp_int = + if (!job->track_steps) + tmp_uint32 = job->stats.vsize_max_taskid; break; case JOBSTEP: - tmp_int = step->stats.vsize_max_taskid; + tmp_uint32 = step->stats.vsize_max_taskid; break; case JOBCOMP: default: - tmp_int = NO_VAL; + tmp_uint32 = NO_VAL; break; } - if (tmp_int == (uint16_t)NO_VAL) - tmp_int = NO_VAL; + if (tmp_uint32 == (uint32_t)NO_VAL) + tmp_uint32 = NO_VAL; field->print_routine(field, - tmp_int, + tmp_uint32, (curr_inx == field_count)); break; case PRINT_MINCPU: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_dub = job->stats.cpu_min; break; case JOBSTEP: @@ -850,7 +1048,7 @@ void print_fields(type_t type, void *object) case PRINT_MINCPUNODE: switch(type) { case JOB: - if(!job->track_steps) + if (!job->track_steps) tmp_char = find_hostname( job->stats.cpu_min_nodeid, job->nodes); @@ -873,21 +1071,21 @@ void print_fields(type_t type, void *object) case PRINT_MINCPUTASK: switch(type) { case JOB: - if(!job->track_steps) - tmp_int = job->stats.cpu_min_taskid; + if (!job->track_steps) + tmp_uint32 = job->stats.cpu_min_taskid; break; case JOBSTEP: - tmp_int = step->stats.cpu_min_taskid; + tmp_uint32 = step->stats.cpu_min_taskid; break; case JOBCOMP: default: - tmp_int = NO_VAL; + tmp_uint32 = NO_VAL; break; } - if (tmp_int == (uint16_t)NO_VAL) - tmp_int = NO_VAL; + if (tmp_uint32 == (uint32_t)NO_VAL) + tmp_uint32 = NO_VAL; field->print_routine(field, - tmp_int, + tmp_uint32, (curr_inx == field_count)); break; case PRINT_NODELIST: @@ -926,7 +1124,7 @@ void print_fields(type_t type, void *object) break; } - if(!tmp_int) { + if (!tmp_int) { hostlist_t hl = hostlist_create(tmp_char); tmp_int = hostlist_count(hl); hostlist_destroy(hl); @@ -940,10 +1138,10 @@ void print_fields(type_t type, void *object) case PRINT_NTASKS: switch(type) { case JOB: - if(!job->track_steps && !step) + if (!job->track_steps && !step) tmp_int = job->alloc_cpus; // we want to use the step info - if(!step) + if (!step) break; case JOBSTEP: tmp_int = step->ntasks; @@ -1044,6 +1242,34 @@ void print_fields(type_t type, void *object) tmp_int, (curr_inx == field_count)); break; + case PRINT_REQ_CPUFREQ: + switch (type) { + case JOB: + if (!job->track_steps && !step) + tmp_dub = NO_VAL; + // we want to use the step info + if (!step) + break; + case JOBSTEP: + tmp_dub = step->req_cpufreq; + break; + default: + break; + } + if (tmp_dub == CPU_FREQ_LOW) + snprintf(outbuf, sizeof(outbuf), "Low"); + else if (tmp_dub == CPU_FREQ_MEDIUM) + snprintf(outbuf, sizeof(outbuf), "Medium"); + else if (tmp_dub == CPU_FREQ_HIGH) + snprintf(outbuf, sizeof(outbuf), "High"); + else if (!fuzzy_equal(tmp_dub, NO_VAL)) + convert_num_unit2((float)tmp_dub, + outbuf, sizeof(outbuf), + UNIT_KILO, 1000, false); + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; case PRINT_REQ_CPUS: switch(type) { case JOB: @@ -1063,10 +1289,42 @@ void print_fields(type_t type, void *object) tmp_int, (curr_inx == field_count)); break; + case PRINT_REQ_MEM: + switch(type) { + case JOB: + tmp_uint32 = job->req_mem; + break; + case JOBSTEP: + tmp_uint32 = step->job_ptr->req_mem; + break; + case JOBCOMP: + default: + tmp_uint32 = NO_VAL; + break; + } + + if (tmp_uint32 != (uint32_t)NO_VAL) { + bool per_cpu = false; + if (tmp_uint32 & MEM_PER_CPU) { + tmp_uint32 &= (~MEM_PER_CPU); + per_cpu = true; + } + convert_num_unit((float)tmp_uint32, + outbuf, sizeof(outbuf), + UNIT_MEGA); + if (per_cpu) + sprintf(outbuf+strlen(outbuf), "c"); + else + sprintf(outbuf+strlen(outbuf), "n"); + } + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; case PRINT_RESV: switch(type) { case JOB: - if(job->start) + if (job->start) tmp_int = job->start - job->eligible; else tmp_int = time(NULL) - job->eligible; @@ -1087,7 +1345,7 @@ void print_fields(type_t type, void *object) case PRINT_RESV_CPU: switch(type) { case JOB: - if(job->start) + if (job->start) tmp_int = (job->start - job->eligible) * job->req_cpus; else @@ -1110,7 +1368,7 @@ void print_fields(type_t type, void *object) case PRINT_RESV_CPU_RAW: switch(type) { case JOB: - if(job->start) + if (job->start) tmp_int = (job->start - job->eligible) * job->req_cpus; else @@ -1173,11 +1431,11 @@ void print_fields(type_t type, void *object) "%s by %d", job_state_string(tmp_int), tmp_int2); - else if(tmp_int != NO_VAL) + else if (tmp_int != NO_VAL) snprintf(outbuf, FORMAT_STRING_SIZE, "%s", job_state_string(tmp_int)); - else if(tmp_char) + else if (tmp_char) snprintf(outbuf, FORMAT_STRING_SIZE, "%s", tmp_char); @@ -1255,7 +1513,7 @@ void print_fields(type_t type, void *object) tmp_char = "UNLIMITED"; else if (job->timelimit == NO_VAL) tmp_char = "Partition_Limit"; - else if(job->timelimit) { + else if (job->timelimit) { char tmp1[128]; mins2time_str(job->timelimit, tmp1, sizeof(tmp1)); @@ -1302,7 +1560,7 @@ void print_fields(type_t type, void *object) case PRINT_UID: switch(type) { case JOB: - if(job->user) { + if (job->user) { if ((pw=getpwnam(job->user))) tmp_int = pw->pw_uid; } else @@ -1325,9 +1583,9 @@ void print_fields(type_t type, void *object) case PRINT_USER: switch(type) { case JOB: - if(job->user) + if (job->user) tmp_char = job->user; - else if(job->uid != -1) { + else if (job->uid != -1) { if ((pw=getpwuid(job->uid))) tmp_char = pw->pw_name; } diff --git a/src/sacct/process.c b/src/sacct/process.c index 070ab03aa50ccb89038516f8d531250f2bc8ae3e..a909ec30ce9657eb1b1936a9e7914c1a1866cfba 100644 --- a/src/sacct/process.c +++ b/src/sacct/process.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -46,12 +46,12 @@ char *find_hostname(uint32_t pos, char *hosts) hostlist_t hostlist = NULL; char *temp = NULL, *host = NULL; - if(!hosts || (pos == (uint32_t)NO_VAL)) + if (!hosts || (pos == (uint32_t)NO_VAL)) return NULL; hostlist = hostlist_create(hosts); temp = hostlist_nth(hostlist, pos); - if(temp) { + if (temp) { host = xstrdup(temp); free(temp); } @@ -98,4 +98,16 @@ void aggregate_stats(slurmdb_stats_t *dest, slurmdb_stats_t *from) else dest->consumed_energy += from->consumed_energy; dest->act_cpufreq += from->act_cpufreq; + if (dest->disk_read_max < from->disk_read_max) { + dest->disk_read_max = from->disk_read_max; + dest->disk_read_max_nodeid = from->disk_read_max_nodeid; + dest->disk_read_max_taskid = from->disk_read_max_taskid; + } + dest->disk_read_ave += from->disk_read_ave; + if (dest->disk_write_max < from->disk_write_max) { + dest->disk_write_max = from->disk_write_max; + dest->disk_write_max_nodeid = from->disk_write_max_nodeid; + dest->disk_write_max_taskid = from->disk_write_max_taskid; + } + dest->disk_write_ave += from->disk_write_ave; } diff --git a/src/sacct/sacct.c b/src/sacct/sacct.c index 6bb1befdb07b06a1ea33e719d210ad40b7a9643e..bd935826d2224ff47ea83267094a8ebaca59cb38 100644 --- a/src/sacct/sacct.c +++ b/src/sacct/sacct.c @@ -6,7 +6,7 @@ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,6 +49,8 @@ print_field_t fields[] = { {7, "AssocID", print_fields_uint, PRINT_ASSOCID}, {10, "AveCPU", print_fields_str, PRINT_AVECPU}, {10, "AveCPUFreq", print_fields_str, PRINT_ACT_CPUFREQ}, + {14, "AveDiskRead", print_fields_str, PRINT_AVEDISKREAD}, + {14, "AveDiskWrite", print_fields_str, PRINT_AVEDISKWRITE}, {10, "AvePages", print_fields_str, PRINT_AVEPAGES}, {10, "AveRSS", print_fields_str, PRINT_AVERSS}, {10, "AveVMSize", print_fields_str, PRINT_AVEVSIZE}, @@ -56,6 +58,8 @@ print_field_t fields[] = { {10, "Cluster", print_fields_str, PRINT_CLUSTER}, {14, "Comment", print_fields_str, PRINT_COMMENT}, {14, "ConsumedEnergy", print_fields_str, PRINT_CONSUMED_ENERGY}, + {17, "ConsumedEnergyRaw", print_fields_double, + PRINT_CONSUMED_ENERGY_RAW}, {10, "CPUTime", print_fields_time_from_secs, PRINT_CPU_TIME}, {10, "CPUTimeRAW", print_fields_uint64, PRINT_CPU_TIME_RAW}, {15, "DerivedExitCode", print_fields_str, PRINT_DERIVED_EC}, @@ -68,6 +72,12 @@ print_field_t fields[] = { {-12, "JobID", print_fields_str, PRINT_JOBID}, {10, "JobName", print_fields_str, PRINT_JOBNAME}, {9, "Layout", print_fields_str, PRINT_LAYOUT}, + {12, "MaxDiskRead", print_fields_str, PRINT_MAXDISKREAD}, + {15, "MaxDiskReadNode", print_fields_str, PRINT_MAXDISKREADNODE}, + {15, "MaxDiskReadTask", print_fields_uint, PRINT_MAXDISKREADTASK}, + {12, "MaxDiskWrite", print_fields_str, PRINT_MAXDISKWRITE}, + {16, "MaxDiskWriteNode", print_fields_str, PRINT_MAXDISKWRITENODE}, + {16, "MaxDiskWriteTask", print_fields_uint, PRINT_MAXDISKWRITETASK}, {8, "MaxPages", print_fields_str, PRINT_MAXPAGES}, {12, "MaxPagesNode", print_fields_str, PRINT_MAXPAGESNODE}, {14, "MaxPagesTask", print_fields_uint, PRINT_MAXPAGESTASK}, @@ -88,7 +98,9 @@ print_field_t fields[] = { {10, "Partition", print_fields_str, PRINT_PARTITION}, {10, "QOS", print_fields_str, PRINT_QOS}, {6, "QOSRAW", print_fields_uint, PRINT_QOSRAW}, + {10, "ReqCPUFreq", print_fields_str, PRINT_REQ_CPUFREQ}, {8, "ReqCPUS", print_fields_uint, PRINT_REQ_CPUS}, + {10, "ReqMem", print_fields_str, PRINT_REQ_MEM}, {10, "Reserved", print_fields_time_from_secs, PRINT_RESV}, {10, "ResvCPU", print_fields_time_from_secs, PRINT_RESV_CPU}, {10, "ResvCPURAW", print_fields_uint, PRINT_RESV_CPU}, @@ -111,8 +123,6 @@ List jobs = NULL; int main(int argc, char **argv) { enum { - SACCT_DUMP, - SACCT_FDUMP, SACCT_LIST, SACCT_HELP, SACCT_USAGE @@ -128,32 +138,16 @@ int main(int argc, char **argv) if (params.opt_help) op = SACCT_HELP; - else if (params.opt_dump) { - op = SACCT_DUMP; - } else if (params.opt_fdump) { - op = SACCT_FDUMP; - } else + else op = SACCT_LIST; switch (op) { - case SACCT_DUMP: - if(get_data() == SLURM_ERROR) - exit(errno); - if(params.opt_completion) - do_dump_completion(); - else - do_dump(); - break; - case SACCT_FDUMP: - if(get_data() == SLURM_ERROR) - exit(errno); - break; case SACCT_LIST: print_fields_header(print_fields_list); - if(get_data() == SLURM_ERROR) + if (get_data() == SLURM_ERROR) exit(errno); - if(params.opt_completion) + if (params.opt_completion) do_list_completion(); else do_list(); diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index 293d18655593c13c1a8907959ee9c02bd548651a..aefcae3c2775d1839f6ac6cfd38df69783a64f21 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -69,7 +69,7 @@ #define BRIEF_COMP_FIELDS "jobid,uid,state" #define DEFAULT_FIELDS "jobid,jobname,partition,account,alloccpus,state,exitcode" #define DEFAULT_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,end" -#define LONG_FIELDS "jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode,avecpufreq,consumedenergy" +#define LONG_FIELDS "jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode,avecpufreq,reqcpufreq,reqmem,consumedenergy,maxdiskread,maxdiskreadnode,maxdiskreadtask,avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite" #define LONG_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,start,end,timelimit" @@ -97,13 +97,16 @@ typedef enum { PRINT_ASSOCID, PRINT_AVECPU, PRINT_ACT_CPUFREQ, - PRINT_CONSUMED_ENERGY, + PRINT_AVEDISKREAD, + PRINT_AVEDISKWRITE, PRINT_AVEPAGES, PRINT_AVERSS, PRINT_AVEVSIZE, PRINT_BLOCKID, PRINT_CLUSTER, PRINT_COMMENT, + PRINT_CONSUMED_ENERGY, + PRINT_CONSUMED_ENERGY_RAW, PRINT_CPU_TIME, PRINT_CPU_TIME_RAW, PRINT_DERIVED_EC, @@ -116,6 +119,12 @@ typedef enum { PRINT_JOBID, PRINT_JOBNAME, PRINT_LAYOUT, + PRINT_MAXDISKREAD, + PRINT_MAXDISKREADNODE, + PRINT_MAXDISKREADTASK, + PRINT_MAXDISKWRITE, + PRINT_MAXDISKWRITENODE, + PRINT_MAXDISKWRITETASK, PRINT_MAXPAGES, PRINT_MAXPAGESNODE, PRINT_MAXPAGESTASK, @@ -135,7 +144,9 @@ typedef enum { PRINT_PRIO, PRINT_QOS, PRINT_QOSRAW, + PRINT_REQ_CPUFREQ, PRINT_REQ_CPUS, + PRINT_REQ_MEM, PRINT_RESV, PRINT_RESV_CPU, PRINT_RESV_CPU_RAW, @@ -156,9 +167,7 @@ typedef enum { typedef struct { slurmdb_job_cond_t *job_cond; int opt_completion; /* --completion */ - int opt_dump; /* --dump */ int opt_dup; /* --duplicates; +1 = explicitly set */ - int opt_fdump; /* --formattted_dump */ char *opt_field_list; /* --fields= */ int opt_gid; /* running persons gid */ int opt_help; /* --help */ @@ -188,8 +197,6 @@ void print_fields(type_t type, void *object); /* options.c */ int get_data(void); void parse_command_line(int argc, char **argv); -void do_dump(void); -void do_dump_completion(void); void do_help(void); void do_list(void); void do_list_completion(void); diff --git a/src/sacctmgr/Makefile.in b/src/sacctmgr/Makefile.in index 81258bf288a4484d495e68fd601eb081ef3671c8..9dd996ba5156ff0e7bccce288b9a7087c903ecdc 100644 --- a/src/sacctmgr/Makefile.in +++ b/src/sacctmgr/Makefile.in @@ -59,6 +59,7 @@ subdir = src/sacctmgr DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -163,6 +167,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -183,6 +189,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -192,6 +201,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -199,6 +210,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -233,6 +253,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -260,6 +283,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index 6bb1252656d1455cf98cff13261e532cd337c2b9..39ec6bb070561661f83bfad43f73265ef0853539 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -53,13 +53,13 @@ static int _set_cond(int *start, int argc, char *argv[], int command_len = 0; int option = 0; - if(!acct_cond) { + if (!acct_cond) { exit_code=1; fprintf(stderr, "No acct_cond given"); return -1; } - if(!acct_cond->assoc_cond) { + if (!acct_cond->assoc_cond) { acct_cond->assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); } @@ -68,11 +68,11 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } @@ -101,47 +101,47 @@ static int _set_cond(int *start, int argc, char *argv[], } else if (!end && !strncasecmp(argv[i], "WOPLimits", MAX(command_len, 4))) { assoc_cond->without_parent_limits = 1; - } else if(!end && !strncasecmp(argv[i], "where", + } else if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end + } else if (!end || !strncasecmp(argv[i], "Names", MAX(command_len, 1)) || !strncasecmp(argv[i], "Accounts", MAX(command_len, 1)) || !strncasecmp(argv[i], "Acct", MAX(command_len, 4))) { - if(!assoc_cond->acct_list) { + if (!assoc_cond->acct_list) { assoc_cond->acct_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list( + if (slurm_addto_char_list( assoc_cond->acct_list, argv[i]+end)) u_set = 1; } else if (!strncasecmp(argv[i], "Descriptions", MAX(command_len, 1))) { - if(!acct_cond->description_list) { + if (!acct_cond->description_list) { acct_cond->description_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(acct_cond->description_list, + if (slurm_addto_char_list(acct_cond->description_list, argv[i]+end)) u_set = 1; } else if (!strncasecmp(argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp(argv[i], "Organizations", MAX(command_len, 1))) { - if(!acct_cond->organization_list) { + if (!acct_cond->organization_list) { acct_cond->organization_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(acct_cond->organization_list, + if (slurm_addto_char_list(acct_cond->organization_list, argv[i]+end)) u_set = 1; - } else if(!(a_set = sacctmgr_set_association_cond( + } else if (!(a_set = sacctmgr_set_association_cond( assoc_cond, argv[i], argv[i]+end, command_len, option))) { exit_code=1; @@ -153,11 +153,11 @@ static int _set_cond(int *start, int argc, char *argv[], (*start) = i; - if(u_set && a_set) + if (u_set && a_set) return 3; - else if(a_set) + else if (a_set) return 2; - else if(u_set) + else if (u_set) return 1; return 0; @@ -178,11 +178,11 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } @@ -191,17 +191,17 @@ static int _set_rec(int *start, int argc, char *argv[], if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) { i--; break; - } else if(!end && !strncasecmp(argv[i], "set", + } else if (!end && !strncasecmp(argv[i], "set", MAX(command_len, 3))) { continue; - } else if(!end + } else if (!end || !strncasecmp(argv[i], "Accounts", MAX(command_len, 1)) || !strncasecmp(argv[i], "Names", MAX(command_len, 1)) || !strncasecmp(argv[i], "Acct", MAX(command_len, 4))) { - if(acct_list) + if (acct_list) slurm_addto_char_list(acct_list, argv[i]+end); else { exit_code=1; @@ -211,7 +211,7 @@ static int _set_rec(int *start, int argc, char *argv[], } } else if (!strncasecmp(argv[i], "Clusters", MAX(command_len, 1))) { - if(cluster_list) + if (cluster_list) slurm_addto_char_list(cluster_list, argv[i]+end); else { @@ -231,7 +231,7 @@ static int _set_rec(int *start, int argc, char *argv[], } else if (!strncasecmp (argv[i], "RawUsage", MAX(command_len, 7))) { uint32_t usage; - if(!assoc) + if (!assoc) continue; assoc->usage = xmalloc(sizeof( assoc_mgr_association_usage_t)); @@ -240,7 +240,7 @@ static int _set_rec(int *start, int argc, char *argv[], assoc->usage->usage_raw = usage; a_set = 1; } - } else if(!assoc || + } else if (!assoc || (assoc && !(a_set = sacctmgr_set_association_rec( assoc, argv[i], argv[i]+end, command_len, option)))) { @@ -253,11 +253,11 @@ static int _set_rec(int *start, int argc, char *argv[], (*start) = i; - if(u_set && a_set) + if (u_set && a_set) return 3; - else if(a_set) + else if (a_set) return 2; - else if(u_set) + else if (u_set) return 1; return 0; @@ -269,14 +269,14 @@ static int _isdefault_old(List acct_list) slurmdb_user_cond_t user_cond; List ret_list = NULL; - if(!acct_list || !list_count(acct_list)) + if (!acct_list || !list_count(acct_list)) return rc; memset(&user_cond, 0, sizeof(slurmdb_user_cond_t)); user_cond.def_acct_list = acct_list; ret_list = acct_storage_g_get_users(db_conn, my_uid, &user_cond); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { ListIterator itr = list_iterator_create(ret_list); slurmdb_user_rec_t *user = NULL; fprintf(stderr," Users listed below have these " @@ -289,7 +289,7 @@ static int _isdefault_old(List acct_list) rc = 1; } - if(ret_list) + if (ret_list) list_destroy(ret_list); return rc; @@ -387,10 +387,10 @@ extern int sacctmgr_add_account(int argc, char *argv[]) limit_set += _set_rec(&i, argc, argv, name_list, cluster_list, start_acct, start_assoc); } - if(exit_code) + if (exit_code) return SLURM_ERROR; - if(!name_list || !list_count(name_list)) { + if (!name_list || !list_count(name_list)) { list_destroy(name_list); list_destroy(cluster_list); slurmdb_destroy_association_rec(start_assoc); @@ -410,7 +410,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) db_conn, my_uid, &account_cond); } - if(!local_account_list) { + if (!local_account_list) { exit_code=1; fprintf(stderr, " Problem getting accounts from database. " "Contact your admin.\n"); @@ -421,14 +421,14 @@ extern int sacctmgr_add_account(int argc, char *argv[]) return SLURM_ERROR; } - if(!start_assoc->parent_acct) + if (!start_assoc->parent_acct) start_assoc->parent_acct = xstrdup("root"); - if(!cluster_list || !list_count(cluster_list)) { + if (!cluster_list || !list_count(cluster_list)) { slurmdb_cluster_rec_t *cluster_rec = NULL; List tmp_list = acct_storage_g_get_clusters(db_conn, my_uid, NULL); - if(!tmp_list) { + if (!tmp_list) { exit_code=1; fprintf(stderr, " Problem getting clusters from database. " @@ -441,7 +441,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) return SLURM_ERROR; } - if(!list_count(tmp_list)) { + if (!list_count(tmp_list)) { exit_code=1; fprintf(stderr, " Can't add accounts, no cluster " @@ -454,7 +454,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) list_destroy(local_account_list); return SLURM_ERROR; } - if(!cluster_list) + if (!cluster_list) list_create(slurm_destroy_char); else list_flush(cluster_list); @@ -482,10 +482,10 @@ extern int sacctmgr_add_account(int argc, char *argv[]) list_iterator_reset(itr); while((cluster_rec = list_next(itr))) { - if(!strcasecmp(cluster_rec->name, cluster)) + if (!strcasecmp(cluster_rec->name, cluster)) break; } - if(!cluster_rec) { + if (!cluster_rec) { exit_code=1; fprintf(stderr, " This cluster '%s' " "doesn't exist.\n" @@ -499,7 +499,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) list_iterator_destroy(itr_c); list_destroy(temp_list); - if(!list_count(cluster_list)) { + if (!list_count(cluster_list)) { slurmdb_destroy_association_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); list_destroy(local_account_list); @@ -524,7 +524,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) local_assoc_list = acct_storage_g_get_associations( db_conn, my_uid, &assoc_cond); list_destroy(assoc_cond.acct_list); - if(!local_assoc_list) { + if (!local_assoc_list) { exit_code=1; fprintf(stderr, " Problem getting associations from database. " "Contact your admin.\n"); @@ -538,7 +538,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) itr = list_iterator_create(name_list); while((name = list_next(itr))) { - if(!name[0]) { + if (!name[0]) { exit_code=1; fprintf(stderr, " No blank names are " "allowed when adding.\n"); @@ -547,21 +547,21 @@ extern int sacctmgr_add_account(int argc, char *argv[]) } acct = NULL; - if(!sacctmgr_find_account_from_list(local_account_list, name)) { + if (!sacctmgr_find_account_from_list(local_account_list, name)) { acct = xmalloc(sizeof(slurmdb_account_rec_t)); acct->assoc_list = list_create(slurmdb_destroy_association_rec); acct->name = xstrdup(name); - if(start_acct->description) + if (start_acct->description) acct->description = xstrdup(start_acct->description); else acct->description = xstrdup(name); - if(start_acct->organization) + if (start_acct->organization) acct->organization = xstrdup(start_acct->organization); - else if(strcmp(start_assoc->parent_acct, "root")) + else if (strcmp(start_assoc->parent_acct, "root")) acct->organization = xstrdup(start_assoc->parent_acct); else @@ -573,12 +573,12 @@ extern int sacctmgr_add_account(int argc, char *argv[]) itr_c = list_iterator_create(cluster_list); while((cluster = list_next(itr_c))) { - if(sacctmgr_find_account_base_assoc_from_list( + if (sacctmgr_find_account_base_assoc_from_list( local_assoc_list, name, cluster)) { //printf(" already have this assoc\n"); continue; } - if(!sacctmgr_find_account_base_assoc_from_list( + if (!sacctmgr_find_account_base_assoc_from_list( local_assoc_list, start_assoc->parent_acct, cluster)) { exit_code=1; @@ -600,6 +600,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) assoc->shares_raw = start_assoc->shares_raw; assoc->grp_cpu_mins = start_assoc->grp_cpu_mins; + assoc->grp_cpu_run_mins = start_assoc->grp_cpu_run_mins; assoc->grp_cpus = start_assoc->grp_cpus; assoc->grp_jobs = start_assoc->grp_jobs; assoc->grp_mem = start_assoc->grp_mem; @@ -616,7 +617,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) assoc->qos_list = copy_char_list(start_assoc->qos_list); - if(acct) + if (acct) list_append(acct->assoc_list, assoc); else list_append(assoc_list, assoc); @@ -634,25 +635,26 @@ extern int sacctmgr_add_account(int argc, char *argv[]) list_destroy(local_assoc_list); - if(!list_count(acct_list) && !list_count(assoc_list)) { + if (!list_count(acct_list) && !list_count(assoc_list)) { printf(" Nothing new added.\n"); + rc = SLURM_ERROR; goto end_it; - } else if(!assoc_str) { + } else if (!assoc_str) { exit_code=1; fprintf(stderr, " No associations created.\n"); goto end_it; } - if(acct_str) { + if (acct_str) { printf(" Adding Account(s)\n%s", acct_str); printf(" Settings\n"); - if(start_acct->description) + if (start_acct->description) printf(" Description = %s\n", start_acct->description); else printf(" Description = %s\n", "Account Name"); - if(start_acct->organization) + if (start_acct->organization) printf(" Organization = %s\n", start_acct->organization); else @@ -662,23 +664,23 @@ extern int sacctmgr_add_account(int argc, char *argv[]) xfree(acct_str); } - if(assoc_str) { + if (assoc_str) { printf(" Associations\n%s", assoc_str); xfree(assoc_str); } - if(limit_set) { + if (limit_set) { printf(" Settings\n"); sacctmgr_print_assoc_limits(start_assoc); } notice_thread_init(); - if(list_count(acct_list)) + if (list_count(acct_list)) rc = acct_storage_g_add_accounts(db_conn, my_uid, acct_list); - if(rc == SLURM_SUCCESS) { - if(list_count(assoc_list)) + if (rc == SLURM_SUCCESS) { + if (list_count(assoc_list)) rc = acct_storage_g_add_associations(db_conn, my_uid, assoc_list); } else { @@ -691,8 +693,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) } notice_thread_fini(); - if(rc == SLURM_SUCCESS) { - if(commit_check("Would you like to commit changes?")) { + if (rc == SLURM_SUCCESS) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); @@ -747,26 +749,26 @@ extern int sacctmgr_list_account(int argc, char *argv[]) cond_set |= prev_set; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_account_cond(acct_cond); list_destroy(format_list); return SLURM_ERROR; - } else if(!list_count(format_list)) { + } else if (!list_count(format_list)) { slurm_addto_char_list(format_list, "Acc,Des,O"); - if(acct_cond->with_assocs) + if (acct_cond->with_assocs) slurm_addto_char_list(format_list, "Cl,ParentN,U,Share,GrpJ,GrpN," "GrpCPUs,GrpMEM,GrpS,GrpWall,GrpCPUMins," "MaxJ,MaxN,MaxCPUs,MaxS,MaxW," "MaxCPUMins,QOS,DefaultQOS"); - if(acct_cond->with_coords) + if (acct_cond->with_coords) slurm_addto_char_list(format_list, "Coord"); } - if(!acct_cond->with_assocs && cond_set > 1) { - if(!commit_check("You requested options that are only vaild " + if (!acct_cond->with_assocs && cond_set > 1) { + if (!commit_check("You requested options that are only vaild " "when querying with the withassoc option.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); @@ -779,7 +781,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); - if(exit_code) { + if (exit_code) { slurmdb_destroy_account_cond(acct_cond); list_destroy(print_fields_list); return SLURM_ERROR; @@ -788,7 +790,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) acct_list = acct_storage_g_get_accounts(db_conn, my_uid, acct_cond); slurmdb_destroy_account_cond(acct_cond); - if(!acct_list) { + if (!acct_list) { exit_code=1; fprintf(stderr, " Problem with query.\n"); list_destroy(print_fields_list); @@ -802,7 +804,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field_count = list_count(print_fields_list); while((acct = list_next(itr))) { - if(acct->assoc_list) { + if (acct->assoc_list) { ListIterator itr3 = list_iterator_create(acct->assoc_list); while((assoc = list_next(itr3))) { @@ -937,20 +939,20 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) } } - if(exit_code) { + if (exit_code) { slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_association_rec(assoc); return SLURM_ERROR; - } else if(!rec_set) { + } else if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_association_rec(assoc); return SLURM_ERROR; - } else if(!cond_set) { - if(!commit_check("You didn't set any conditions with 'WHERE'.\n" + } else if (!cond_set) { + if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); slurmdb_destroy_account_cond(acct_cond); @@ -975,8 +977,8 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) } notice_thread_init(); - if(rec_set & 1) { // process the account changes - if(cond_set == 2) { + if (rec_set & 1) { // process the account changes + if (cond_set == 2) { exit_code=1; fprintf(stderr, " There was a problem with your " @@ -986,7 +988,7 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) } ret_list = acct_storage_g_modify_accounts( db_conn, my_uid, acct_cond, acct); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified accounts...\n"); @@ -995,7 +997,7 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) } list_iterator_destroy(itr); set = 1; - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { @@ -1006,13 +1008,13 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); } assoc_start: - if(rec_set == 3 || rec_set == 2) { // process the association changes - if(cond_set == 1 && !acct_cond->assoc_cond->acct_list) { + if (rec_set == 3 || rec_set == 2) { // process the association changes + if (cond_set == 1 && !acct_cond->assoc_cond->acct_list) { rc = SLURM_ERROR; exit_code=1; fprintf(stderr, @@ -1021,10 +1023,10 @@ assoc_start: goto assoc_end; } - if(assoc->parent_acct) { + if (assoc->parent_acct) { slurmdb_account_rec_t *acct_rec = sacctmgr_find_account(assoc->parent_acct); - if(!acct_rec) { + if (!acct_rec) { exit_code=1; fprintf(stderr, " Parent Account %s doesn't exist.\n", @@ -1037,7 +1039,7 @@ assoc_start: ret_list = acct_storage_g_modify_associations( db_conn, my_uid, acct_cond->assoc_cond, assoc); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { set = 1; if (assoc->def_qos_id != NO_VAL) set = sacctmgr_check_default_qos( @@ -1058,8 +1060,9 @@ assoc_start: list_iterator_destroy(itr); set = 1; } - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing modified\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -1068,15 +1071,15 @@ assoc_start: rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); } assoc_end: notice_thread_fini(); - if(set) { - if(commit_check("Would you like to commit changes?")) + if (set) { + if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); @@ -1109,7 +1112,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) cond_set |= prev_set; } - if(!cond_set) { + if (!cond_set) { exit_code=1; fprintf(stderr, " No conditions given to remove, not executing.\n"); @@ -1117,24 +1120,24 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) return SLURM_ERROR; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_account_cond(acct_cond); return SLURM_ERROR; } /* check to see if person is trying to remove root account. This is * bad, and should not be allowed outside of deleting a cluster. */ - if(acct_cond->assoc_cond + if (acct_cond->assoc_cond && acct_cond->assoc_cond->acct_list && list_count(acct_cond->assoc_cond->acct_list)) { char *tmp_char = NULL; itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, "root")) + if (!strcasecmp(tmp_char, "root")) break; } list_iterator_destroy(itr); - if(tmp_char) { + if (tmp_char) { exit_code=1; fprintf(stderr, " You are not allowed to remove " "the root account.\n" @@ -1150,10 +1153,10 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) acct_cond->assoc_cond->only_defs = 0; notice_thread_init(); - if(cond_set == 1) { + if (cond_set == 1) { ret_list = acct_storage_g_remove_accounts( db_conn, my_uid, acct_cond); - } else if(cond_set & 2) { + } else if (cond_set & 2) { ret_list = acct_storage_g_remove_associations( db_conn, my_uid, acct_cond->assoc_cond); } @@ -1161,7 +1164,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) notice_thread_fini(); slurmdb_destroy_account_cond(acct_cond); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = NULL; @@ -1170,7 +1173,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) * output from acct_storage_g_remove_accounts, and * with a previously got assoc_list. */ - if(_isdefault(cond_set, ret_list, local_assoc_list)) { + if (_isdefault(cond_set, ret_list, local_assoc_list)) { exit_code=1; fprintf(stderr, " Please either remove the " "accounts listed " @@ -1185,7 +1188,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) /* If there were jobs running with an association to be deleted, don't. */ - if(rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { + if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { fprintf(stderr, " Error with request: %s\n", slurm_strerror(rc)); while((object = list_next(itr))) { @@ -1195,23 +1198,24 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) goto end_it; } - if(cond_set == 1) { + if (cond_set == 1) { printf(" Deleting accounts...\n"); - } else if(cond_set & 2) { + } else if (cond_set & 2) { printf(" Deleting account associations...\n"); } while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); - if(commit_check("Would you like to commit changes?")) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing deleted\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -1222,7 +1226,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) end_it: - if(ret_list) + if (ret_list) list_destroy(ret_list); if (local_assoc_list) list_destroy(local_assoc_list); diff --git a/src/sacctmgr/archive_functions.c b/src/sacctmgr/archive_functions.c index dacdb7039d62303d05040be5fb948661ce8f8e85..5a8757d3aaecdfdcb251c97646f3aa9598a57441 100644 --- a/src/sacctmgr/archive_functions.c +++ b/src/sacctmgr/archive_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,13 +66,13 @@ extern int _addto_uid_char_list(List char_list, char *names) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -81,23 +81,23 @@ extern int _addto_uid_char_list(List char_list, char *names) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); //info("got %s %d", name, i-start); name = _string_to_uid( name ); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -106,7 +106,7 @@ extern int _addto_uid_char_list(List char_list, char *names) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -115,17 +115,17 @@ extern int _addto_uid_char_list(List char_list, char *names) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); name = _string_to_uid( name ); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -146,17 +146,17 @@ static int _set_cond(int *start, int argc, char *argv[], uint32_t tmp; slurmdb_job_cond_t *job_cond = NULL; - if(!arch_cond) { + if (!arch_cond) { error("No arch_cond given"); return -1; } - if(!arch_cond->job_cond) + if (!arch_cond->job_cond) arch_cond->job_cond = xmalloc(sizeof(slurmdb_job_cond_t)); job_cond = arch_cond->job_cond; for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -165,29 +165,33 @@ static int _set_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "where", + if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end && !strncasecmp(argv[i], "events", + } else if (!end && !strncasecmp(argv[i], "events", MAX(command_len, 1))) { arch_cond->purge_event |= SLURMDB_PURGE_ARCHIVE; set = 1; - } else if(!end && !strncasecmp(argv[i], "jobs", + } else if (!end && !strncasecmp(argv[i], "jobs", MAX(command_len, 1))) { arch_cond->purge_job |= SLURMDB_PURGE_ARCHIVE; set = 1; - } else if(!end && !strncasecmp(argv[i], "steps", + } else if (!end && !strncasecmp(argv[i], "reservations", + MAX(command_len, 1))) { + arch_cond->purge_resv |= SLURMDB_PURGE_ARCHIVE; + set = 1; + } else if (!end && !strncasecmp(argv[i], "steps", MAX(command_len, 1))) { arch_cond->purge_step |= SLURMDB_PURGE_ARCHIVE; set = 1; - } else if(!end && !strncasecmp(argv[i], "suspend", + } else if (!end && !strncasecmp(argv[i], "suspend", MAX(command_len, 1))) { arch_cond->purge_suspend |= SLURMDB_PURGE_ARCHIVE; set = 1; - } else if(!end + } else if (!end || !strncasecmp(argv[i], "Clusters", MAX(command_len, 1))) { - if(!job_cond->cluster_list) + if (!job_cond->cluster_list) job_cond->cluster_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->cluster_list, @@ -195,7 +199,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Accounts", MAX(command_len, 2))) { - if(!job_cond->acct_list) + if (!job_cond->acct_list) job_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->acct_list, @@ -203,7 +207,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Associations", MAX(command_len, 2))) { - if(!job_cond->associd_list) + if (!job_cond->associd_list) job_cond->associd_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->associd_list, @@ -218,7 +222,7 @@ static int _set_cond(int *start, int argc, char *argv[], job_cond->usage_end = parse_time(argv[i]+end, 1); set = 1; } else if (!strncasecmp (argv[i], "Gid", MAX(command_len, 2))) { - if(!job_cond->groupid_list) + if (!job_cond->groupid_list) job_cond->groupid_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->groupid_list, @@ -229,7 +233,7 @@ static int _set_cond(int *start, int argc, char *argv[], char *end_char = NULL, *start_char = argv[i]+end; slurmdb_selected_step_t *selected_step = NULL; char *dot = NULL; - if(!job_cond->step_list) + if (!job_cond->step_list) job_cond->step_list = list_create(slurm_destroy_char); @@ -238,7 +242,7 @@ static int _set_cond(int *start, int argc, char *argv[], *end_char = 0; while (isspace(*start_char)) start_char++; /* discard whitespace */ - if(!(int)*start_char) + if (!(int)*start_char) continue; selected_step = xmalloc( sizeof(slurmdb_selected_step_t)); @@ -259,7 +263,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Partitions", MAX(command_len, 2))) { - if(!job_cond->partition_list) + if (!job_cond->partition_list) job_cond->partition_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->partition_list, @@ -283,6 +287,15 @@ static int _set_cond(int *start, int argc, char *argv[], arch_cond->purge_job |= tmp; set = 1; } + } else if (!strncasecmp (argv[i], "PurgeResvAfter", + MAX(command_len, 10))) { + if ((tmp = slurmdb_parse_purge(argv[i]+end)) + == NO_VAL) { + exit_code = 1; + } else { + arch_cond->purge_resv |= tmp; + set = 1; + } } else if (!strncasecmp (argv[i], "PurgeStepAfter", MAX(command_len, 10))) { if ((tmp = slurmdb_parse_purge(argv[i]+end)) @@ -321,6 +334,16 @@ static int _set_cond(int *start, int argc, char *argv[], arch_cond->purge_job |= SLURMDB_PURGE_MONTHS; set = 1; } + } else if (!strncasecmp (argv[i], "PurgeResvMonths", + MAX(command_len, 6))) { + if (get_uint(argv[i]+end, &tmp, "PurgeResvMonths") + != SLURM_SUCCESS) { + exit_code = 1; + } else { + arch_cond->purge_resv |= tmp; + arch_cond->purge_resv |= SLURMDB_PURGE_MONTHS; + set = 1; + } } else if (!strncasecmp (argv[i], "PurgeStepMonths", MAX(command_len, 7))) { if (get_uint(argv[i]+end, &tmp, "PurgeStepMonths") @@ -353,7 +376,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!job_cond->userid_list) + if (!job_cond->userid_list) job_cond->userid_list = list_create(slurm_destroy_char); _addto_uid_char_list(job_cond->userid_list, @@ -386,22 +409,24 @@ extern int sacctmgr_archive_dump(int argc, char *argv[]) _set_cond(&i, argc, argv, arch_cond); } - if(!arch_cond->purge_event) + if (!arch_cond->purge_event) arch_cond->purge_event = NO_VAL; - if(!arch_cond->purge_job) + if (!arch_cond->purge_job) arch_cond->purge_job = NO_VAL; - if(!arch_cond->purge_step) + if (!arch_cond->purge_resv) + arch_cond->purge_resv = NO_VAL; + if (!arch_cond->purge_step) arch_cond->purge_step = NO_VAL; - if(!arch_cond->purge_suspend) + if (!arch_cond->purge_suspend) arch_cond->purge_suspend = NO_VAL; - if(exit_code) { + if (exit_code) { slurmdb_destroy_archive_cond(arch_cond); return SLURM_ERROR; } if (arch_cond->archive_dir) { - if(stat(arch_cond->archive_dir, &st) < 0) { + if (stat(arch_cond->archive_dir, &st) < 0) { exit_code = errno; fprintf(stderr, " dump: Failed to stat %s: %m\n " "Note: For archive dump, " @@ -429,7 +454,7 @@ extern int sacctmgr_archive_dump(int argc, char *argv[]) } if (arch_cond->archive_script) { - if(stat(arch_cond->archive_script, &st) < 0) { + if (stat(arch_cond->archive_script, &st) < 0) { exit_code = errno; fprintf(stderr, " dump: Failed to stat %s: %m\n " "Note: For archive dump, the script must be on " @@ -455,8 +480,8 @@ extern int sacctmgr_archive_dump(int argc, char *argv[]) } rc = jobacct_storage_g_archive(db_conn, arch_cond); - if(rc == SLURM_SUCCESS) { - if(commit_check("Would you like to commit changes?")) { + if (rc == SLURM_SUCCESS) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); @@ -483,7 +508,7 @@ extern int sacctmgr_archive_load(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -492,7 +517,7 @@ extern int sacctmgr_archive_load(int argc, char *argv[]) } } - if(!end + if (!end || !strncasecmp (argv[i], "File", MAX(command_len, 1))) { arch_rec->archive_file = strip_quotes(argv[i]+end, NULL, 0); @@ -505,7 +530,7 @@ extern int sacctmgr_archive_load(int argc, char *argv[]) } } - if(exit_code) { + if (exit_code) { slurmdb_destroy_archive_rec(arch_rec); return SLURM_ERROR; } @@ -524,7 +549,7 @@ extern int sacctmgr_archive_load(int argc, char *argv[]) arch_rec->archive_file = fullpath; } - if(stat(arch_rec->archive_file, &st) < 0) { + if (stat(arch_rec->archive_file, &st) < 0) { exit_code = errno; fprintf(stderr, " load: Failed to stat %s: %m\n " "Note: For archive load, the file must be on " @@ -535,8 +560,8 @@ extern int sacctmgr_archive_load(int argc, char *argv[]) } rc = jobacct_storage_g_archive_load(db_conn, arch_rec); - if(rc == SLURM_SUCCESS) { - if(commit_check("Would you like to commit changes?")) { + if (rc == SLURM_SUCCESS) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c index b5e9600bbc915cca6d121752ac533a3f4688607c..69eada523b7691314a96cd6ce692734683e51b06 100644 --- a/src/sacctmgr/association_functions.c +++ b/src/sacctmgr/association_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -119,7 +119,7 @@ static int _set_cond(int *start, int argc, char *argv[], if (format_list) slurm_addto_char_list(format_list, argv[i]+end); - } else if(!(set = sacctmgr_set_association_cond( + } else if (!(set = sacctmgr_set_association_cond( assoc_cond, argv[i], argv[i]+end, command_len, option)) || exit_code) { exit_code = 1; @@ -406,14 +406,14 @@ extern int sacctmgr_set_association_cond(slurmdb_association_cond_t *assoc_cond, if (slurm_addto_char_list(assoc_cond->parent_acct_list, value)) set = 1; } else if (!strncasecmp (type, "QosLevel", MAX(command_len, 1))) { - if(!assoc_cond->qos_list) + if (!assoc_cond->qos_list) assoc_cond->qos_list = list_create(slurm_destroy_char); - if(!g_qos_list) + if (!g_qos_list) g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, NULL); - if(slurmdb_addto_qos_char_list(assoc_cond->qos_list, g_qos_list, + if (slurmdb_addto_qos_char_list(assoc_cond->qos_list, g_qos_list, value, option)) set = 1; } else if (!strncasecmp (type, "Users", MAX(command_len, 1))) { @@ -437,17 +437,17 @@ extern int sacctmgr_set_association_rec(slurmdb_association_rec_t *assoc, return set; if (!strncasecmp (type, "DefaultQOS", MAX(command_len, 8))) { - if(!g_qos_list) + if (!g_qos_list) g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, NULL); - if(atoi(value) == -1) + if (atoi(value) == -1) assoc->def_qos_id = -1; else assoc->def_qos_id = str_2_slurmdb_qos( g_qos_list, value); - if(assoc->def_qos_id == NO_VAL) { + if (assoc->def_qos_id == NO_VAL) { fprintf(stderr, "You gave a bad default qos '%s'. " "Use 'list qos' to get " @@ -569,7 +569,7 @@ extern void sacctmgr_print_association_rec(slurmdb_association_rec_t *assoc, xassert(field); - if(!assoc) { + if (!assoc) { field->print_routine(field, NULL, last); return; } diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index 26ded4d08c9d740657fd6d3833704bd7d7d3ba3b..4016cb44b2f5adaca0b8231bf35f64d6fce3b137 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -59,7 +59,7 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -71,7 +71,7 @@ static int _set_cond(int *start, int argc, char *argv[], if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) { i--; break; - } else if(!end && !strncasecmp(argv[i], "where", + } else if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; } else if (!end && @@ -81,21 +81,21 @@ static int _set_cond(int *start, int argc, char *argv[], } else if (!end && !strncasecmp(argv[i], "WOLimits", MAX(command_len, 3))) { without_limits = 1; - } else if(!end || !strncasecmp(argv[i], "Names", + } else if (!end || !strncasecmp(argv[i], "Names", MAX(command_len, 1)) || !strncasecmp(argv[i], "Clusters", MAX(command_len, 3))) { - if(!cluster_cond->cluster_list) + if (!cluster_cond->cluster_list) cluster_cond->cluster_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(cluster_cond->cluster_list, + if (slurm_addto_char_list(cluster_cond->cluster_list, argv[i]+end)) a_set = 1; } else if (!strncasecmp(argv[i], "Classification", MAX(command_len, 3))) { cluster_cond->classification = str_2_classification(argv[i]+end); - if(cluster_cond->classification) + if (cluster_cond->classification) c_set = 1; } else if (!strncasecmp(argv[i], "flags", MAX(command_len, 2))) { @@ -104,23 +104,23 @@ static int _set_cond(int *start, int argc, char *argv[], c_set = 1; } else if (!strncasecmp(argv[i], "Format", MAX(command_len, 2))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); - } else if(!end || !strncasecmp(argv[i], "PluginIDSelect", + } else if (!end || !strncasecmp(argv[i], "PluginIDSelect", MAX(command_len, 1))) { - if(!cluster_cond->plugin_id_select_list) + if (!cluster_cond->plugin_id_select_list) cluster_cond->plugin_id_select_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list( + if (slurm_addto_char_list( cluster_cond->plugin_id_select_list, argv[i]+end)) c_set = 1; - } else if(!end || !strncasecmp(argv[i], "RPCVersions", + } else if (!end || !strncasecmp(argv[i], "RPCVersions", MAX(command_len, 1))) { - if(!cluster_cond->rpc_version_list) + if (!cluster_cond->rpc_version_list) cluster_cond->rpc_version_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(cluster_cond->rpc_version_list, + if (slurm_addto_char_list(cluster_cond->rpc_version_list, argv[i]+end)) c_set = 1; } else { @@ -133,11 +133,11 @@ static int _set_cond(int *start, int argc, char *argv[], } (*start) = i; - if(c_set && a_set) + if (c_set && a_set) return 3; - else if(a_set) { + else if (a_set) { return 2; - } else if(c_set) + } else if (c_set) return 1; return 0; } @@ -155,11 +155,11 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } @@ -168,23 +168,23 @@ static int _set_rec(int *start, int argc, char *argv[], if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) { i--; break; - } else if(!end && !strncasecmp(argv[i], "set", + } else if (!end && !strncasecmp(argv[i], "set", MAX(command_len, 3))) { continue; - } else if(!end + } else if (!end || !strncasecmp(argv[i], "Names", MAX(command_len, 1)) || !strncasecmp(argv[i], "Clusters", MAX(command_len, 3))) { - if(name_list) + if (name_list) slurm_addto_char_list(name_list, argv[i]+end); } else if (!strncasecmp(argv[i], "Classification", MAX(command_len, 3))) { - if(classification) { + if (classification) { *classification = str_2_classification(argv[i]+end); - if(*classification) + if (*classification) set = 1; } } else if (!strncasecmp(argv[i], "GrpCPUMins", @@ -198,7 +198,7 @@ static int _set_rec(int *start, int argc, char *argv[], exit_code=1; fprintf(stderr, "GrpWall is not a valid option " "for the root association of a cluster.\n"); - } else if(!assoc || + } else if (!assoc || (assoc && !(set = sacctmgr_set_association_rec( assoc, argv[i], argv[i]+end, command_len, option)))) { @@ -239,10 +239,10 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) limit_set += _set_rec(&i, argc, argv, name_list, &start_assoc, &class); } - if(exit_code) { + if (exit_code) { list_destroy(name_list); return SLURM_ERROR; - } else if(!list_count(name_list)) { + } else if (!list_count(name_list)) { list_destroy(name_list); exit_code=1; fprintf(stderr, " Need name of cluster to add.\n"); @@ -257,7 +257,7 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) temp_list = acct_storage_g_get_clusters(db_conn, my_uid, &cluster_cond); - if(!temp_list) { + if (!temp_list) { exit_code=1; fprintf(stderr, " Problem getting clusters from database. " @@ -272,10 +272,10 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) list_iterator_reset(itr); while((cluster_rec = list_next(itr))) { - if(!strcasecmp(cluster_rec->name, name)) + if (!strcasecmp(cluster_rec->name, name)) break; } - if(cluster_rec) { + if (cluster_rec) { printf(" This cluster %s already exists. " "Not adding.\n", name); list_delete_item(itr_c); @@ -284,7 +284,7 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) list_iterator_destroy(itr); list_iterator_destroy(itr_c); list_destroy(temp_list); - if(!list_count(name_list)) { + if (!list_count(name_list)) { list_destroy(name_list); return SLURM_ERROR; } @@ -294,7 +294,7 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) cluster_list = list_create(slurmdb_destroy_cluster_rec); itr = list_iterator_create(name_list); while((name = list_next(itr))) { - if(!name[0]) { + if (!name[0]) { exit_code=1; fprintf(stderr, " No blank names are " "allowed when adding.\n"); @@ -312,7 +312,7 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) xmalloc(sizeof(slurmdb_association_rec_t)); slurmdb_init_association_rec(cluster->root_assoc, 0); printf(" Name = %s\n", cluster->name); - if(cluster->classification) + if (cluster->classification) printf(" Classification= %s\n", get_classification_str(cluster->classification)); @@ -341,15 +341,16 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(name_list); - if(limit_set) { + if (limit_set) { printf(" Default Limits\n"); sacctmgr_print_assoc_limits(&start_assoc); - if(start_assoc.qos_list) + if (start_assoc.qos_list) list_destroy(start_assoc.qos_list); } - if(!list_count(cluster_list)) { + if (!list_count(cluster_list)) { printf(" Nothing new added.\n"); + rc = SLURM_ERROR; goto end_it; } @@ -357,11 +358,11 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) rolled back. So we ask before hand if they are serious about it so we can rollback if needed. */ - if(commit_check("Would you like to commit changes?")) { + if (commit_check("Would you like to commit changes?")) { notice_thread_init(); rc = acct_storage_g_add_clusters(db_conn, my_uid, cluster_list); notice_thread_fini(); - if(rc == SLURM_SUCCESS) { + if (rc == SLURM_SUCCESS) { acct_storage_g_commit(db_conn, 1); } else { exit_code=1; @@ -411,16 +412,16 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) _set_cond(&i, argc, argv, cluster_cond, format_list); } - if(exit_code) { + if (exit_code) { slurmdb_destroy_cluster_cond(cluster_cond); list_destroy(format_list); return SLURM_ERROR; } - if(!list_count(format_list)) { + if (!list_count(format_list)) { slurm_addto_char_list(format_list, "Cl,Controlh,Controlp,RPC"); - if(!without_limits) + if (!without_limits) slurm_addto_char_list(format_list, "Fa,GrpJ,GrpN,GrpS,MaxJ,MaxN," "MaxS,MaxW,QOS,DefaultQOS"); @@ -431,7 +432,7 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); - if(exit_code) { + if (exit_code) { slurmdb_destroy_cluster_cond(cluster_cond); list_destroy(print_fields_list); return SLURM_ERROR; @@ -441,7 +442,7 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) cluster_cond); slurmdb_destroy_cluster_cond(cluster_cond); - if(!cluster_list) { + if (!cluster_list) { exit_code=1; fprintf(stderr, " Problem with query.\n"); list_destroy(print_fields_list); @@ -496,7 +497,7 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) break; } case PRINT_DQOS: - if(!g_qos_list) { + if (!g_qos_list) { g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, @@ -588,7 +589,7 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) { hostlist_t hl = hostlist_create(cluster->nodes); int cnt = 0; - if(hl) { + if (hl) { cnt = hostlist_count(hl); hostlist_destroy(hl); } @@ -605,7 +606,7 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_QOS: - if(!g_qos_list) + if (!g_qos_list) g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, NULL); @@ -693,36 +694,36 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } } - if(!rec_set) { + if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); rc = SLURM_ERROR; goto end_it; - } else if(!cond_set) { - if(!commit_check("You didn't set any conditions with 'WHERE'.\n" + } else if (!cond_set) { + if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); rc = SLURM_SUCCESS; goto end_it; } - } else if(exit_code) { + } else if (exit_code) { rc = SLURM_ERROR; goto end_it; } - if(cond_set & 1) { + if (cond_set & 1) { List temp_list = NULL; temp_list = acct_storage_g_get_clusters(db_conn, my_uid, &cluster_cond); - if(!temp_list) { + if (!temp_list) { exit_code=1; fprintf(stderr, " Problem getting clusters from database. " "Contact your admin.\n"); rc = SLURM_ERROR; goto end_it; - } else if(!list_count(temp_list)) { + } else if (!list_count(temp_list)) { fprintf(stderr, " Query didn't return any clusters.\n"); rc = SLURM_ERROR; @@ -731,16 +732,16 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) /* we are only looking for the clusters returned from this query, so we free the cluster_list and replace it */ - if(assoc_cond->cluster_list) + if (assoc_cond->cluster_list) list_destroy(assoc_cond->cluster_list); assoc_cond->cluster_list = temp_list; } printf(" Setting\n"); - if(rec_set) { + if (rec_set) { printf(" Default Limits =\n"); sacctmgr_print_assoc_limits(assoc); - if(class_rec) + if (class_rec) printf(" Cluster Classification = %s\n", get_classification_str(class_rec)); } @@ -750,7 +751,7 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) ret_list = acct_storage_g_modify_associations( db_conn, my_uid, assoc_cond, assoc); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified cluster defaults for associations...\n"); @@ -759,8 +760,9 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } list_iterator_destroy(itr); set = 1; - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing modified\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -768,10 +770,10 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); - if(class_rec) { + if (class_rec) { slurmdb_cluster_rec_t cluster_rec; slurmdb_init_cluster_rec(&cluster_rec, 0); @@ -782,7 +784,7 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) ret_list = acct_storage_g_modify_clusters( db_conn, my_uid, &cluster_cond, &cluster_rec); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified cluster classifications...\n"); @@ -791,8 +793,9 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } list_iterator_destroy(itr); set = 1; - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing modified\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -800,14 +803,14 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); } notice_thread_fini(); - if(set) { - if(commit_check("Would you like to commit changes?")) + if (set) { + if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); @@ -842,10 +845,10 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) cond_set |= prev_set; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_cluster_cond(cluster_cond); return SLURM_ERROR; - } else if(!cond_set) { + } else if (!cond_set) { exit_code=1; fprintf(stderr, " No conditions given to remove, not executing.\n"); @@ -853,7 +856,7 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) return SLURM_ERROR; } - if(!list_count(cluster_cond->cluster_list) + if (!list_count(cluster_cond->cluster_list) && !cluster_cond->classification) { exit_code=1; fprintf(stderr, @@ -870,13 +873,13 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) slurmdb_destroy_cluster_cond(cluster_cond); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); /* If there were jobs running with an association to be deleted, don't. */ - if(rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { + if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { fprintf(stderr, " Error with request: %s\n", slurm_strerror(rc)); while((object = list_next(itr))) { @@ -891,14 +894,15 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) printf(" %s\n", object); } list_iterator_destroy(itr); - if(commit_check("Would you like to commit changes?")) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing deleted\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -906,7 +910,7 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); return rc; diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c index 20bf18daffa6da98f971e6007fdfb1573be6d40e..ac487f022e6ad646ec1969554ec5993499d55ecc 100644 --- a/src/sacctmgr/common.c +++ b/src/sacctmgr/common.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -155,7 +155,7 @@ static print_field_t *_get_print_field(char *object) char *tmp_char = NULL; int command_len, field_len = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { field_len = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -248,22 +248,22 @@ static print_field_t *_get_print_field(char *object) field->name = xstrdup("Descr"); field->len = 20; field->print_routine = print_fields_str; - } else if(!strncasecmp("Duration", object, MAX(command_len, 2))) { + } else if (!strncasecmp("Duration", object, MAX(command_len, 2))) { field->type = PRINT_DURATION; field->name = xstrdup("Duration"); field->len = 13; field->print_routine = print_fields_time_from_secs; - } else if(!strncasecmp("End", object, MAX(command_len, 2))) { + } else if (!strncasecmp("End", object, MAX(command_len, 2))) { field->type = PRINT_END; field->name = xstrdup("End"); field->len = 19; field->print_routine = print_fields_date; - } else if(!strncasecmp("EventRaw", object, MAX(command_len, 6))) { + } else if (!strncasecmp("EventRaw", object, MAX(command_len, 6))) { field->type = PRINT_EVENTRAW; field->name = xstrdup("EventRaw"); field->len = 8; field->print_routine = print_fields_uint; - } else if(!strncasecmp("Event", object, MAX(command_len, 2))) { + } else if (!strncasecmp("Event", object, MAX(command_len, 2))) { field->type = PRINT_EVENT; field->name = xstrdup("Event"); field->len = 7; @@ -617,7 +617,7 @@ extern int sacctmgr_remove_assoc_usage(slurmdb_association_cond_t *assoc_cond) } } - if(!commit_check("Would you like to reset usage?")) { + if (!commit_check("Would you like to reset usage?")) { printf(" Changes Discarded\n"); return rc; } @@ -1263,7 +1263,7 @@ extern void sacctmgr_print_coord_list( printf("%s", print_this); else if (print_fields_parsable_print) printf("%s|", print_this); - else { + else if (print_this) { if (strlen(print_this) > abs_len) print_this[abs_len-1] = '+'; @@ -1573,7 +1573,7 @@ extern List sacctmgr_process_format_list(List format_list) char *object = NULL; while((object = list_next(itr))) { - if(!(field = _get_print_field(object))) + if (!(field = _get_print_field(object))) exit(1); list_append(print_fields_list, field); diff --git a/src/sacctmgr/config_functions.c b/src/sacctmgr/config_functions.c index 163d321d1c117d79079815c5c727950f5942a162..cc6374aa9c06a0951e90d72dc130aff990f2a3c0 100644 --- a/src/sacctmgr/config_functions.c +++ b/src/sacctmgr/config_functions.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sacctmgr/event_functions.c b/src/sacctmgr/event_functions.c index 67f371e8ee21f692224c838fcc5e54be808082a9..9066ef644b8ac096a42d48138ff804924e438867 100644 --- a/src/sacctmgr/event_functions.c +++ b/src/sacctmgr/event_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -89,13 +89,13 @@ static int _addto_state_char_list(List char_list, char *names) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -104,12 +104,12 @@ static int _addto_state_char_list(List char_list, char *names) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); c = _decode_node_state(name); @@ -120,11 +120,11 @@ static int _addto_state_char_list(List char_list, char *names) name = xstrdup_printf("%u", c); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -133,7 +133,7 @@ static int _addto_state_char_list(List char_list, char *names) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -142,7 +142,7 @@ static int _addto_state_char_list(List char_list, char *names) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); c = _decode_node_state(name); @@ -152,11 +152,11 @@ static int _addto_state_char_list(List char_list, char *names) name = xstrdup_printf("%u", c); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -199,13 +199,13 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -214,23 +214,23 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); //info("got %s %d", name, i-start); name = _convert_to_id( name, gid ); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -239,7 +239,7 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -248,17 +248,17 @@ static int _addto_id_char_list(List char_list, char *names, bool gid) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); name = _convert_to_id(name, gid); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -279,11 +279,11 @@ static int _set_cond(int *start, int argc, char *argv[], int local_cluster_flag = 0; int all_time_flag = 0; - if(!event_cond->cluster_list) + if (!event_cond->cluster_list) event_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -292,39 +292,39 @@ static int _set_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 5))) { local_cluster_flag = 1; - } else if(!end && !strncasecmp(argv[i], "all_time", + } else if (!end && !strncasecmp(argv[i], "all_time", MAX(command_len, 5))) { all_time_flag = 1; - } else if(!end && !strncasecmp(argv[i], "where", + } else if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end || (!strncasecmp (argv[i], "Events", + } else if (!end || (!strncasecmp (argv[i], "Events", MAX(command_len, 1)))) { ListIterator itr = NULL; List tmp_list = list_create(slurm_destroy_char); char *temp = NULL; - if(slurm_addto_char_list(tmp_list, + if (slurm_addto_char_list(tmp_list, argv[i]+end)) set = 1; /* check to make sure user gave ints here */ itr = list_iterator_create(tmp_list); while ((temp = list_next(itr))) { - if(!strncasecmp("Node", temp, + if (!strncasecmp("Node", temp, MAX(strlen(temp), 1))) { - if(event_cond->event_type) + if (event_cond->event_type) event_cond->event_type = SLURMDB_EVENT_ALL; else event_cond->event_type = SLURMDB_EVENT_NODE; - } else if(!strncasecmp("Cluster", temp, + } else if (!strncasecmp("Cluster", temp, MAX(strlen(temp), 1))) { - if(event_cond->event_type) + if (event_cond->event_type) event_cond->event_type = SLURMDB_EVENT_ALL; else @@ -344,10 +344,10 @@ static int _set_cond(int *start, int argc, char *argv[], list_destroy(tmp_list); } else if (!strncasecmp (argv[i], "Clusters", MAX(command_len, 1))) { - if(!event_cond->cluster_list) + if (!event_cond->cluster_list) event_cond->cluster_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(event_cond->cluster_list, + if (slurm_addto_char_list(event_cond->cluster_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "End", MAX(command_len, 1))) { @@ -355,7 +355,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "MinCpus", MAX(command_len, 2))) { @@ -369,18 +369,18 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Nodes", MAX(command_len, 1))) { - if(!event_cond->node_list) + if (!event_cond->node_list) event_cond->node_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(event_cond->node_list, + if (slurm_addto_char_list(event_cond->node_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Reason", MAX(command_len, 1))) { - if(!event_cond->reason_list) + if (!event_cond->reason_list) event_cond->reason_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(event_cond->reason_list, + if (slurm_addto_char_list(event_cond->reason_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Start", @@ -389,20 +389,20 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "States", MAX(command_len, 4))) { - if(!event_cond->state_list) + if (!event_cond->state_list) event_cond->state_list = list_create(slurm_destroy_char); - if(_addto_state_char_list(event_cond->state_list, + if (_addto_state_char_list(event_cond->state_list, argv[i]+end)) { event_cond->event_type = SLURMDB_EVENT_NODE; set = 1; } } else if (!strncasecmp (argv[i], "User", MAX(command_len, 1))) { - if(!event_cond->reason_uid_list) + if (!event_cond->reason_uid_list) event_cond->reason_uid_list = list_create(slurm_destroy_char); - if(_addto_id_char_list(event_cond->reason_uid_list, + if (_addto_id_char_list(event_cond->reason_uid_list, argv[i]+end, 0)) { event_cond->event_type = SLURMDB_EVENT_NODE; set = 1; @@ -414,18 +414,18 @@ static int _set_cond(int *start, int argc, char *argv[], } (*start) = i; - if(!local_cluster_flag && !list_count(event_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(event_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(event_cond->cluster_list, temp); } - if(!all_time_flag && !event_cond->period_start) { + if (!all_time_flag && !event_cond->period_start) { event_cond->period_start = time(NULL); - if(!event_cond->state_list) { + if (!event_cond->state_list) { struct tm start_tm; - if(!localtime_r(&event_cond->period_start, &start_tm)) { + if (!localtime_r(&event_cond->period_start, &start_tm)) { fprintf(stderr, " Couldn't get localtime from %ld", (long)event_cond->period_start); @@ -470,7 +470,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) struct tm start_tm; event_cond->period_start = time(NULL); - if(!localtime_r(&event_cond->period_start, &start_tm)) { + if (!localtime_r(&event_cond->period_start, &start_tm)) { fprintf(stderr, " Couldn't get localtime from %ld", (long)event_cond->period_start); @@ -493,7 +493,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) _set_cond(&i, argc, argv, event_cond, format_list); } - if(exit_code) { + if (exit_code) { slurmdb_destroy_event_cond(event_cond); list_destroy(format_list); return SLURM_ERROR; @@ -501,8 +501,8 @@ extern int sacctmgr_list_event(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); - if(!list_count(format_list)) { - if(event_cond->event_type == SLURMDB_EVENT_CLUSTER) + if (!list_count(format_list)) { + if (event_cond->event_type == SLURMDB_EVENT_CLUSTER) slurm_addto_char_list(format_list, "Cluster,Cpus,Start,End," "ClusterNodes"); @@ -518,7 +518,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) int command_len = 0; int newlen = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -526,76 +526,76 @@ extern int sacctmgr_list_event(int argc, char *argv[]) command_len = strlen(object); field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("ClusterNodes", object, + if (!strncasecmp("ClusterNodes", object, MAX(command_len, 8))) { field->type = PRINT_CLUSTER_NODES; field->name = xstrdup("Cluster Nodes"); field->len = 20; field->print_routine = print_fields_str; - } else if(!strncasecmp("Cluster", object, + } else if (!strncasecmp("Cluster", object, MAX(command_len, 1))) { field->type = PRINT_CLUSTER; field->name = xstrdup("Cluster"); field->len = 10; field->print_routine = print_fields_str; - } else if(!strncasecmp("CPUs", object, + } else if (!strncasecmp("CPUs", object, MAX(command_len, 2))) { field->type = PRINT_CPUS; field->name = xstrdup("CPUs"); field->len = 7; field->print_routine = print_fields_str; - } else if(!strncasecmp("Duration", object, + } else if (!strncasecmp("Duration", object, MAX(command_len, 2))) { field->type = PRINT_DURATION; field->name = xstrdup("Duration"); field->len = 13; field->print_routine = print_fields_time_from_secs; - } else if(!strncasecmp("End", object, MAX(command_len, 2))) { + } else if (!strncasecmp("End", object, MAX(command_len, 2))) { field->type = PRINT_END; field->name = xstrdup("End"); field->len = 19; field->print_routine = print_fields_date; - } else if(!strncasecmp("EventRaw", object, + } else if (!strncasecmp("EventRaw", object, MAX(command_len, 6))) { field->type = PRINT_EVENTRAW; field->name = xstrdup("EventRaw"); field->len = 8; field->print_routine = print_fields_uint; - } else if(!strncasecmp("Event", object, + } else if (!strncasecmp("Event", object, MAX(command_len, 2))) { field->type = PRINT_EVENT; field->name = xstrdup("Event"); field->len = 7; field->print_routine = print_fields_str; - } else if(!strncasecmp("NodeName", object, + } else if (!strncasecmp("NodeName", object, MAX(command_len, 1))) { field->type = PRINT_NODENAME; field->name = xstrdup("Node Name"); field->len = -15; field->print_routine = print_fields_str; - } else if(!strncasecmp("Reason", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Reason", object, MAX(command_len, 1))) { field->type = PRINT_REASON; field->name = xstrdup("Reason"); field->len = 30; field->print_routine = print_fields_str; - } else if(!strncasecmp("Start", object, + } else if (!strncasecmp("Start", object, MAX(command_len, 1))) { field->type = PRINT_START; field->name = xstrdup("Start"); field->len = 19; field->print_routine = print_fields_date; - } else if(!strncasecmp("StateRaw", object, + } else if (!strncasecmp("StateRaw", object, MAX(command_len, 6))) { field->type = PRINT_STATERAW; field->name = xstrdup("StateRaw"); field->len = 8; field->print_routine = print_fields_uint; - } else if(!strncasecmp("State", object, MAX(command_len, 1))) { + } else if (!strncasecmp("State", object, MAX(command_len, 1))) { field->type = PRINT_STATE; field->name = xstrdup("State"); field->len = 6; field->print_routine = print_fields_str; - } else if(!strncasecmp("User", object, MAX(command_len, 1))) { + } else if (!strncasecmp("User", object, MAX(command_len, 1))) { field->type = PRINT_USER; field->name = xstrdup("User"); field->len = 15; @@ -607,7 +607,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) continue; } - if(newlen) + if (newlen) field->len = newlen; list_append(print_fields_list, field); @@ -615,7 +615,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); - if(exit_code) { + if (exit_code) { list_destroy(print_fields_list); return SLURM_ERROR; } @@ -623,7 +623,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) event_list = acct_storage_g_get_events(db_conn, my_uid, event_cond); slurmdb_destroy_event_cond(event_cond); - if(!event_list) { + if (!event_list) { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); @@ -661,7 +661,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_DURATION: - if(!newend) + if (!newend) newend = time(NULL); field->print_routine( field, @@ -679,9 +679,9 @@ extern int sacctmgr_list_event(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_EVENT: - if(event->event_type == SLURMDB_EVENT_CLUSTER) + if (event->event_type == SLURMDB_EVENT_CLUSTER) tmp_char = "Cluster"; - else if(event->event_type == SLURMDB_EVENT_NODE) + else if (event->event_type == SLURMDB_EVENT_NODE) tmp_char = "Node"; else tmp_char = "Unknown"; @@ -708,7 +708,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_STATE: - if(event->event_type == SLURMDB_EVENT_CLUSTER) + if (event->event_type == SLURMDB_EVENT_CLUSTER) tmp_char = NULL; else tmp_char = node_state_string_compact( @@ -719,7 +719,7 @@ extern int sacctmgr_list_event(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_USER: - if(event->reason_uid != NO_VAL) { + if (event->reason_uid != NO_VAL) { tmp_char = uid_to_string( event->reason_uid); snprintf(tmp, sizeof(tmp), "%s(%u)", diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index 9aa00a137f9f09f109c21d300d70f976b359c5be..43732845e1c75d5ce8d9b483d3814f82d78bcda3 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -785,7 +785,7 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->grp_jobs); } - if((file_opts->grp_mem != NO_VAL) + if ((file_opts->grp_mem != NO_VAL) && (assoc->grp_mem != file_opts->grp_mem)) { mod_assoc.grp_mem = file_opts->grp_mem; changed = 1; @@ -1758,7 +1758,7 @@ extern int print_file_add_limits_to_line(char **line, if (assoc->grp_jobs != INFINITE) xstrfmtcat(*line, ":GrpJobs=%u", assoc->grp_jobs); - if(assoc->grp_mem != INFINITE) + if (assoc->grp_mem != INFINITE) xstrfmtcat(*line, ":GrpMemory=%u", assoc->grp_mem); if (assoc->grp_nodes != INFINITE) diff --git a/src/sacctmgr/job_functions.c b/src/sacctmgr/job_functions.c index 5ccfe6104c4a92ac24bfcc79877df581d881c642..c1aeb5aaef12602f8b03b433edaddf3c7c7ce4c4 100644 --- a/src/sacctmgr/job_functions.c +++ b/src/sacctmgr/job_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -112,7 +112,7 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -124,10 +124,10 @@ static int _set_rec(int *start, int argc, char *argv[], if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))) { i--; break; - } else if(!end && !strncasecmp(argv[i], "set", + } else if (!end && !strncasecmp(argv[i], "set", MAX(command_len, 3))) { continue; - } else if(!end) { + } else if (!end) { exit_code=1; fprintf(stderr, " Bad format on %s: End your option with " @@ -146,7 +146,7 @@ static int _set_rec(int *start, int argc, char *argv[], MAX(command_len, 12))) || (!strncasecmp(argv[i], "DerivedES", MAX(command_len, 9)))) { - if(job->derived_es) + if (job->derived_es) xfree(job->derived_es); job->derived_es = strip_quotes(argv[i]+end, NULL, 1); set = 1; diff --git a/src/sacctmgr/problem_functions.c b/src/sacctmgr/problem_functions.c index 653f385f29aa8efa786c81a4a2e94bc592eb0c35..362c048ad135e31fe6f2ec7583058950182d745a 100644 --- a/src/sacctmgr/problem_functions.c +++ b/src/sacctmgr/problem_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,7 +49,7 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -61,14 +61,14 @@ static int _set_cond(int *start, int argc, char *argv[], if (!end && !strncasecmp (argv[i], "Tree", MAX(command_len, 4))) { tree_display = 1; - } else if(!end && !strncasecmp(argv[i], "where", + } else if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end || !strncasecmp (argv[i], "Ids", + } else if (!end || !strncasecmp (argv[i], "Ids", MAX(command_len, 1)) || !strncasecmp (argv[i], "Problems", MAX(command_len, 2))) { - if(!assoc_cond->id_list) + if (!assoc_cond->id_list) assoc_cond->id_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->id_list, @@ -78,7 +78,7 @@ static int _set_cond(int *start, int argc, char *argv[], MAX(command_len, 2)) || !strncasecmp (argv[i], "Acct", MAX(command_len, 4))) { - if(!assoc_cond->acct_list) + if (!assoc_cond->acct_list) assoc_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->acct_list, @@ -86,7 +86,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Clusters", MAX(command_len, 1))) { - if(!assoc_cond->cluster_list) + if (!assoc_cond->cluster_list) assoc_cond->cluster_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->cluster_list, @@ -94,12 +94,12 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Partitions", MAX(command_len, 4))) { - if(!assoc_cond->partition_list) + if (!assoc_cond->partition_list) assoc_cond->partition_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->partition_list, @@ -107,7 +107,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!assoc_cond->user_list) + if (!assoc_cond->user_list) assoc_cond->user_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->user_list, @@ -151,17 +151,17 @@ extern int sacctmgr_list_problem(int argc, char *argv[]) _set_cond(&i, argc, argv, assoc_cond, format_list); } - if(exit_code) { + if (exit_code) { slurmdb_destroy_association_cond(assoc_cond); list_destroy(format_list); return SLURM_ERROR; - } else if(!list_count(format_list)) + } else if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,Acct,User,Problem"); print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); - if(exit_code) { + if (exit_code) { slurmdb_destroy_association_cond(assoc_cond); list_destroy(print_fields_list); return SLURM_ERROR; @@ -170,7 +170,7 @@ extern int sacctmgr_list_problem(int argc, char *argv[]) assoc_list = acct_storage_g_get_problems(db_conn, my_uid, assoc_cond); slurmdb_destroy_association_cond(assoc_cond); - if(!assoc_list) { + if (!assoc_list) { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); @@ -228,7 +228,7 @@ extern int sacctmgr_list_problem(int argc, char *argv[]) printf("\n"); } - if(tree_list) + if (tree_list) list_destroy(tree_list); list_iterator_destroy(itr2); diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c index b6d6c59043aeae5546edbf0d90c6ca318a3a7cda..16f97e9f7816d5af861200e93f0548d1c4af2680 100644 --- a/src/sacctmgr/qos_functions.c +++ b/src/sacctmgr/qos_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,7 +49,7 @@ static uint16_t _parse_preempt_modes(char *names) uint16_t preempt_mode = 0; uint16_t ret_mode = 0; - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -58,17 +58,17 @@ static uint16_t _parse_preempt_modes(char *names) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { + else if (names[i] == ',') { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); //info("got %s %d", name, i-start); ret_mode = preempt_mode_num(name); - if(ret_mode == (uint16_t)NO_VAL) { + if (ret_mode == (uint16_t)NO_VAL) { error("Unknown preempt_mode given '%s'", name); xfree(name); @@ -81,7 +81,7 @@ static uint16_t _parse_preempt_modes(char *names) i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -96,7 +96,7 @@ static uint16_t _parse_preempt_modes(char *names) //info("got %s %d", name, i-start); ret_mode = preempt_mode_num(name); - if(ret_mode == (uint16_t)NO_VAL) { + if (ret_mode == (uint16_t)NO_VAL) { error("Unknown preempt_mode given '%s'", name); xfree(name); @@ -119,14 +119,14 @@ static int _set_cond(int *start, int argc, char *argv[], int end = 0; int command_len = 0; - if(!qos_cond) { + if (!qos_cond) { error("No qos_cond given"); return -1; } for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -141,44 +141,44 @@ static int _set_cond(int *start, int argc, char *argv[], } else if (!end && !strncasecmp (argv[i], "WithDeleted", MAX(command_len, 5))) { qos_cond->with_deleted = 1; - } else if(!end && !strncasecmp(argv[i], "where", + } else if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Names", MAX(command_len, 1)) || !strncasecmp (argv[i], "QOSLevel", MAX(command_len, 1))) { - if(!qos_cond->name_list) { + if (!qos_cond->name_list) { qos_cond->name_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(qos_cond->name_list, + if (slurm_addto_char_list(qos_cond->name_list, argv[i]+end)) set = 1; - } else if(!strncasecmp (argv[i], "Descriptions", + } else if (!strncasecmp (argv[i], "Descriptions", MAX(command_len, 1))) { - if(!qos_cond->description_list) { + if (!qos_cond->description_list) { qos_cond->description_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(qos_cond->description_list, + if (slurm_addto_char_list(qos_cond->description_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); - } else if(!strncasecmp (argv[i], "Ids", MAX(command_len, 1))) { + } else if (!strncasecmp (argv[i], "Ids", MAX(command_len, 1))) { ListIterator itr = NULL; char *temp = NULL; uint32_t id = 0; - if(!qos_cond->id_list) { + if (!qos_cond->id_list) { qos_cond->id_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(qos_cond->id_list, + if (slurm_addto_char_list(qos_cond->id_list, argv[i]+end)) set = 1; @@ -194,11 +194,11 @@ static int _set_cond(int *start, int argc, char *argv[], list_iterator_destroy(itr); } else if (!strncasecmp (argv[i], "PreemptMode", MAX(command_len, 8))) { - if(!qos_cond) + if (!qos_cond) continue; qos_cond->preempt_mode |= _parse_preempt_modes(argv[i]+end); - if(qos_cond->preempt_mode == (uint16_t)NO_VAL) { + if (qos_cond->preempt_mode == (uint16_t)NO_VAL) { fprintf(stderr, " Bad Preempt Mode given: %s\n", argv[i]); @@ -234,11 +234,11 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } @@ -247,23 +247,25 @@ static int _set_rec(int *start, int argc, char *argv[], if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))) { i--; break; - } else if(!end && !strncasecmp(argv[i], "set", + } else if (!end && !strncasecmp(argv[i], "set", MAX(command_len, 3))) { continue; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Name", MAX(command_len, 1))) { - if(name_list) + if (name_list) slurm_addto_char_list(name_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Description", MAX(command_len, 1))) { - if(!qos->description) + if (!qos) + continue; + if (!qos->description) qos->description = strip_quotes(argv[i]+end, NULL, 1); set = 1; } else if (!strncasecmp (argv[i], "Flags", MAX(command_len, 2))) { - if(!qos) + if (!qos) continue; qos->flags = str_2_qos_flags(argv[i]+end, option); if (qos->flags == QOS_FLAG_NOTSET) { @@ -290,7 +292,7 @@ static int _set_rec(int *start, int argc, char *argv[], } } else if (!strncasecmp (argv[i], "GrpCPUMins", MAX(command_len, 7))) { - if(!qos) + if (!qos) continue; if (get_uint64(argv[i]+end, &qos->grp_cpu_mins, @@ -298,49 +300,49 @@ static int _set_rec(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "GrpCPURunMins", MAX(command_len, 7))) { - if(!qos) + if (!qos) continue; if (get_uint64(argv[i]+end, &qos->grp_cpu_run_mins, "GrpCPURunMins") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "GrpCPUs", MAX(command_len, 7))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->grp_cpus, "GrpCPUs") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "GrpJobs", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->grp_jobs, "GrpJobs") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "GrpMemory", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->grp_mem, "GrpMemory") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "GrpNodes", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->grp_nodes, "GrpNodes") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "GrpSubmitJobs", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->grp_submit_jobs, "GrpSubmitJobs") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "GrpWall", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { @@ -354,7 +356,7 @@ static int _set_rec(int *start, int argc, char *argv[], } } else if (!strncasecmp (argv[i], "MaxCPUMinsPerJob", MAX(command_len, 7))) { - if(!qos) + if (!qos) continue; if (get_uint64(argv[i]+end, &qos->max_cpu_mins_pj, @@ -362,28 +364,28 @@ static int _set_rec(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "MaxCPUsPerJob", MAX(command_len, 7))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->max_cpus_pj, "MaxCPUs") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "MaxCPUsPerUser", MAX(command_len, 11))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->max_cpus_pu, "MaxCPUsPerUser") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "MaxJobsPerUser", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->max_jobs_pu, "MaxJobs") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "MaxNodesPerJob", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->max_nodes_pj, @@ -391,7 +393,7 @@ static int _set_rec(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "MaxNodesPerUser", MAX(command_len, 8))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->max_nodes_pu, @@ -399,14 +401,14 @@ static int _set_rec(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "MaxSubmitJobsPerUser", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->max_submit_jobs_pu, "MaxSubmitJobs") == SLURM_SUCCESS) set = 1; } else if (!strncasecmp (argv[i], "MaxWallDurationPerJob", MAX(command_len, 4))) { - if(!qos) + if (!qos) continue; mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { @@ -420,10 +422,10 @@ static int _set_rec(int *start, int argc, char *argv[], } } else if (!strncasecmp (argv[i], "PreemptMode", MAX(command_len, 8))) { - if(!qos) + if (!qos) continue; qos->preempt_mode = preempt_mode_num(argv[i]+end); - if(qos->preempt_mode == (uint16_t)NO_VAL) { + if (qos->preempt_mode == (uint16_t)NO_VAL) { fprintf(stderr, " Bad Preempt Mode given: %s\n", argv[i]); @@ -437,18 +439,18 @@ static int _set_rec(int *start, int argc, char *argv[], /* Preempt needs to follow PreemptMode */ } else if (!strncasecmp (argv[i], "Preempt", MAX(command_len, 7))) { - if(!qos) + if (!qos) continue; - if(!qos->preempt_list) + if (!qos->preempt_list) qos->preempt_list = list_create(slurm_destroy_char); - if(!g_qos_list) + if (!g_qos_list) g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, NULL); - if(slurmdb_addto_qos_char_list(qos->preempt_list, + if (slurmdb_addto_qos_char_list(qos->preempt_list, g_qos_list, argv[i]+end, option)) set = 1; @@ -456,7 +458,7 @@ static int _set_rec(int *start, int argc, char *argv[], exit_code = 1; } else if (!strncasecmp (argv[i], "Priority", MAX(command_len, 3))) { - if(!qos) + if (!qos) continue; if (get_uint(argv[i]+end, &qos->priority, @@ -464,7 +466,7 @@ static int _set_rec(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "UsageFactor", MAX(command_len, 6))) { - if(!qos) + if (!qos) continue; if (get_double(argv[i]+end, &qos->usage_factor, @@ -472,7 +474,7 @@ static int _set_rec(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "UsageThreshold", MAX(command_len, 6))) { - if(!qos) + if (!qos) continue; if (get_double(argv[i]+end, &qos->usage_thres, "UsageThreshold") == SLURM_SUCCESS) @@ -499,7 +501,7 @@ static bool _isdefault(List qos_list) List ret_list = NULL; char *name = NULL; - if(!qos_list || !list_count(qos_list)) + if (!qos_list || !list_count(qos_list)) return rc; /* this needs to happen before any removing takes place so we @@ -513,7 +515,7 @@ static bool _isdefault(List qos_list) itr = list_iterator_create(qos_list); while ((name = list_next(itr))) { uint32_t id = str_2_slurmdb_qos(g_qos_list, name); - if(id == NO_VAL) + if (id == NO_VAL) continue; list_append(assoc_cond.def_qos_id_list, xstrdup_printf("%u", id)); @@ -524,7 +526,7 @@ static bool _isdefault(List qos_list) db_conn, my_uid, &assoc_cond); list_destroy(assoc_cond.def_qos_id_list); - if(!ret_list || !list_count(ret_list)) + if (!ret_list || !list_count(ret_list)) goto end_it; fprintf(stderr," Associations listed below have these " @@ -554,7 +556,7 @@ static bool _isdefault(List qos_list) list_iterator_destroy(itr); rc = 1; end_it: - if(ret_list) + if (ret_list) list_destroy(ret_list); return rc; @@ -585,11 +587,11 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) limit_set += _set_rec(&i, argc, argv, name_list, start_qos); } - if(exit_code) { + if (exit_code) { list_destroy(name_list); xfree(description); return SLURM_ERROR; - } else if(!list_count(name_list)) { + } else if (!list_count(name_list)) { list_destroy(name_list); slurmdb_destroy_qos_rec(start_qos); exit_code=1; @@ -597,10 +599,10 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) return SLURM_SUCCESS; } - if(!g_qos_list) { + if (!g_qos_list) { g_qos_list = acct_storage_g_get_qos(db_conn, my_uid, NULL); - if(!g_qos_list) { + if (!g_qos_list) { exit_code=1; fprintf(stderr, " Problem getting qos's " "from database. " @@ -616,11 +618,11 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) itr = list_iterator_create(name_list); while((name = list_next(itr))) { qos = NULL; - if(!sacctmgr_find_qos_from_list(g_qos_list, name)) { + if (!sacctmgr_find_qos_from_list(g_qos_list, name)) { qos = xmalloc(sizeof(slurmdb_qos_rec_t)); slurmdb_init_qos_rec(qos, 0); qos->name = xstrdup(name); - if(start_qos->description) + if (start_qos->description) qos->description = xstrdup(start_qos->description); else @@ -629,6 +631,7 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) qos->flags = start_qos->flags; qos->grace_time = start_qos->grace_time; qos->grp_cpu_mins = start_qos->grp_cpu_mins; + qos->grp_cpu_run_mins = start_qos->grp_cpu_run_mins; qos->grp_cpus = start_qos->grp_cpus; qos->grp_jobs = start_qos->grp_jobs; qos->grp_mem = start_qos->grp_mem; @@ -668,15 +671,16 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) g_qos_list = NULL; } - if(!list_count(qos_list)) { + if (!list_count(qos_list)) { printf(" Nothing new added.\n"); + rc = SLURM_ERROR; goto end_it; } - if(qos_str) { + if (qos_str) { printf(" Adding QOS(s)\n%s", qos_str); printf(" Settings\n"); - if(description) + if (description) printf(" Description = %s\n", description); else printf(" Description = %s\n", "QOS Name"); @@ -687,15 +691,15 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) } notice_thread_init(); - if(list_count(qos_list)) + if (list_count(qos_list)) rc = acct_storage_g_add_qos(db_conn, my_uid, qos_list); else goto end_it; notice_thread_fini(); - if(rc == SLURM_SUCCESS) { - if(commit_check("Would you like to commit changes?")) { + if (rc == SLURM_SUCCESS) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); @@ -739,11 +743,11 @@ extern int sacctmgr_list_qos(int argc, char *argv[]) _set_cond(&i, argc, argv, qos_cond, format_list); } - if(exit_code) { + if (exit_code) { slurmdb_destroy_qos_cond(qos_cond); list_destroy(format_list); return SLURM_ERROR; - } else if(!list_count(format_list)) { + } else if (!list_count(format_list)) { slurm_addto_char_list(format_list, "Name,Prio,GraceT,Preempt,PreemptM," "Flags%40,UsageThres,UsageFactor," @@ -758,14 +762,14 @@ extern int sacctmgr_list_qos(int argc, char *argv[]) print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); - if(exit_code) { + if (exit_code) { list_destroy(print_fields_list); return SLURM_ERROR; } qos_list = acct_storage_g_get_qos(db_conn, my_uid, qos_cond); slurmdb_destroy_qos_cond(qos_cond); - if(!qos_list) { + if (!qos_list) { exit_code=1; fprintf(stderr, " Problem with query.\n"); list_destroy(print_fields_list); @@ -909,7 +913,7 @@ extern int sacctmgr_list_qos(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_PREE: - if(!g_qos_list) + if (!g_qos_list) g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, NULL); @@ -920,7 +924,7 @@ extern int sacctmgr_list_qos(int argc, char *argv[]) case PRINT_PREEM: { char *tmp_char = "cluster"; - if(qos->preempt_mode) + if (qos->preempt_mode) tmp_char = xstrtolower( preempt_mode_string( qos->preempt_mode)); @@ -984,18 +988,18 @@ extern int sacctmgr_modify_qos(int argc, char *argv[]) } } - if(exit_code) { + if (exit_code) { slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return SLURM_ERROR; - } else if(!rec_set) { + } else if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return SLURM_ERROR; - } else if(!cond_set) { - if(!commit_check("You didn't set any conditions with 'WHERE'.\n" + } else if (!cond_set) { + if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); slurmdb_destroy_qos_cond(qos_cond); @@ -1007,7 +1011,7 @@ extern int sacctmgr_modify_qos(int argc, char *argv[]) notice_thread_init(); ret_list = acct_storage_g_modify_qos(db_conn, my_uid, qos_cond, qos); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified qos...\n"); @@ -1016,8 +1020,9 @@ extern int sacctmgr_modify_qos(int argc, char *argv[]) } list_iterator_destroy(itr); set = 1; - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing modified\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -1025,13 +1030,13 @@ extern int sacctmgr_modify_qos(int argc, char *argv[]) rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); notice_thread_fini(); - if(set) { - if(commit_check("Would you like to commit changes?")) + if (set) { + if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); @@ -1062,13 +1067,13 @@ extern int sacctmgr_delete_qos(int argc, char *argv[]) set += _set_cond(&i, argc, argv, qos_cond, NULL); } - if(!set) { + if (!set) { exit_code=1; fprintf(stderr, " No conditions given to remove, not executing.\n"); slurmdb_destroy_qos_cond(qos_cond); return SLURM_ERROR; - } else if(set == -1) { + } else if (set == -1) { slurmdb_destroy_qos_cond(qos_cond); return SLURM_ERROR; } @@ -1082,7 +1087,7 @@ extern int sacctmgr_delete_qos(int argc, char *argv[]) notice_thread_fini(); slurmdb_destroy_qos_cond(qos_cond); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = NULL; @@ -1109,14 +1114,15 @@ extern int sacctmgr_delete_qos(int argc, char *argv[]) printf(" %s\n", object); } list_iterator_destroy(itr); - if(commit_check("Would you like to commit changes?")) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing deleted\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -1125,7 +1131,7 @@ extern int sacctmgr_delete_qos(int argc, char *argv[]) } end_it: - if(ret_list) + if (ret_list) list_destroy(ret_list); return rc; diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index 69efec54a27a777564b29ad365f8bc339c45b3ba..65bfb902d18191863c8c95f7178d4c06e45870df 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -178,7 +178,7 @@ main (int argc, char *argv[]) /* Check to see if we are running a supported accounting plugin */ temp = slurm_get_accounting_storage_type(); - if(strcasecmp(temp, "accounting_storage/slurmdbd") + if (strcasecmp(temp, "accounting_storage/slurmdbd") && strcasecmp(temp, "accounting_storage/mysql")) { fprintf (stderr, "You are not running a supported " "accounting_storage plugin\n(%s).\n" @@ -192,20 +192,20 @@ main (int argc, char *argv[]) errno = 0; db_conn = slurmdb_connection_get(); - if(errno != SLURM_SUCCESS) { + if (errno != SLURM_SUCCESS) { int tmp_errno = errno; - if((input_field_count == 2) && + if ((input_field_count == 2) && (!strncasecmp(argv[2], "Configuration", strlen(argv[1]))) && ((!strncasecmp(argv[1], "list", strlen(argv[0]))) || (!strncasecmp(argv[1], "show", strlen(argv[0]))))) { - if(tmp_errno == ESLURM_DB_CONNECTION) { + if (tmp_errno == ESLURM_DB_CONNECTION) { tmp_errno = 0; sacctmgr_list_config(true); } else sacctmgr_list_config(false); } errno = tmp_errno; - if(errno) + if (errno) error("Problem talking to the database: %m"); exit(1); } @@ -225,7 +225,7 @@ main (int argc, char *argv[]) * them to fix it and let the process happen since there * are checks for global exit_code we need to reset it. */ - if(exit_code) { + if (exit_code) { local_exit_code = exit_code; exit_code = 0; } @@ -236,11 +236,11 @@ main (int argc, char *argv[]) */ if (exit_flag == 2) putchar('\n'); - if(local_exit_code) + if (local_exit_code) exit_code = local_exit_code; acct_storage_g_close_connection(&db_conn); slurm_acct_storage_fini(); - if(g_qos_list) + if (g_qos_list) list_destroy(g_qos_list); exit(exit_code); } @@ -254,18 +254,23 @@ static char *_getline(const char *prompt) char buf[4096]; char *line; int len; + printf("%s", prompt); - /* Set "line" here to avoid a warning and discard it later. */ + /* Set "line" here to avoid a warning, discard later */ line = fgets(buf, 4096, stdin); if (line == NULL) return NULL; len = strlen(buf); - if ((len > 0) && (buf[len-1] == '\n')) + if ((len == 0) || (len >= 4096)) + return NULL; + if (buf[len-1] == '\n') buf[len-1] = '\0'; else len++; - line = malloc (len * sizeof(char)); + line = malloc(len * sizeof(char)); + if (!line) + return NULL; return strncpy(line, buf, len); } #endif @@ -467,16 +472,16 @@ _process_command (int argc, char *argv[]) argv[0]); } - if(argc > 1) + if (argc > 1) my_start = parse_time(argv[1], 1); - if(argc > 2) + if (argc > 2) my_end = parse_time(argv[2], 1); - if(argc > 3) + if (argc > 3) archive_data = atoi(argv[3]); - if(acct_storage_g_roll_usage(db_conn, my_start, + if (acct_storage_g_roll_usage(db_conn, my_start, my_end, archive_data) == SLURM_SUCCESS) { - if(commit_check("Would you like to commit rollup?")) { + if (commit_check("Would you like to commit rollup?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Rollup Discarded\n"); @@ -509,13 +514,13 @@ static void _add_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; int command_len = 0; - if(readonly_flag) { + if (readonly_flag) { exit_code = 1; fprintf(stderr, "Can't run this command in readonly mode.\n"); return; } - if(!argv[0]) + if (!argv[0]) goto helpme; command_len = strlen(argv[0]); @@ -544,7 +549,7 @@ static void _add_it (int argc, char *argv[]) fprintf(stderr, "\"QOS\", or \"User\"\n"); } - if (error_code == SLURM_ERROR) { + if (error_code != SLURM_SUCCESS) { exit_code = 1; } } @@ -559,13 +564,13 @@ static void _archive_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; int command_len = 0; - if(readonly_flag) { + if (readonly_flag) { exit_code = 1; fprintf(stderr, "Can't run this command in readonly mode.\n"); return; } - if(!argv[0]) + if (!argv[0]) goto helpme; command_len = strlen(argv[0]); @@ -585,7 +590,7 @@ static void _archive_it (int argc, char *argv[]) fprintf(stderr, "\"Dump\", or \"load\"\n"); } - if (error_code == SLURM_ERROR) { + if (error_code != SLURM_SUCCESS) { exit_code = 1; } } @@ -602,7 +607,7 @@ static void _show_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; int command_len = 0; - if(!argv[0]) + if (!argv[0]) goto helpme; command_len = strlen(argv[0]); @@ -648,7 +653,7 @@ static void _show_it (int argc, char *argv[]) "\"QOS\", \"Transaction\", \"User\", or \"WCKey\"\n"); } - if (error_code == SLURM_ERROR) { + if (error_code != SLURM_SUCCESS) { exit_code = 1; } } @@ -664,13 +669,13 @@ static void _modify_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; int command_len = 0; - if(readonly_flag) { + if (readonly_flag) { exit_code = 1; fprintf(stderr, "Can't run this command in readonly mode.\n"); return; } - if(!argv[0]) + if (!argv[0]) goto helpme; command_len = strlen(argv[0]); @@ -699,7 +704,7 @@ static void _modify_it (int argc, char *argv[]) "or \"User\"\n"); } - if (error_code == SLURM_ERROR) { + if (error_code != SLURM_SUCCESS) { exit_code = 1; } } @@ -714,13 +719,13 @@ static void _delete_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; int command_len = 0; - if(readonly_flag) { + if (readonly_flag) { exit_code = 1; fprintf(stderr, "Can't run this command in readonly mode.\n"); return; } - if(!argv[0]) + if (!argv[0]) goto helpme; command_len = strlen(argv[0]); @@ -750,7 +755,7 @@ static void _delete_it (int argc, char *argv[]) fprintf(stderr, "\"QOS\", or \"User\"\n"); } - if (error_code == SLURM_ERROR) { + if (error_code != SLURM_SUCCESS) { exit_code = 1; } } diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h index c535f2265086a7ecbe9d93102ab3eb1b38bb46c6..69bce88bb6854b262415511bcde993288e539908 100644 --- a/src/sacctmgr/sacctmgr.h +++ b/src/sacctmgr/sacctmgr.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sacctmgr/txn_functions.c b/src/sacctmgr/txn_functions.c index a44cab648853837de90af8cb8bbb9ea57ac66a8c..f81d14e89770de7e581f34f14b96d67defd2bc94 100644 --- a/src/sacctmgr/txn_functions.c +++ b/src/sacctmgr/txn_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -51,7 +51,7 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -60,14 +60,14 @@ static int _set_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "where", + if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end && !strncasecmp(argv[i], "withassocinfo", + } else if (!end && !strncasecmp(argv[i], "withassocinfo", MAX(command_len, 5))) { txn_cond->with_assoc_info = 1; set = 1; - } else if(!end + } else if (!end || (!strncasecmp (argv[i], "Ids", MAX(command_len, 1))) || (!strncasecmp (argv[i], "Txn", @@ -76,11 +76,11 @@ static int _set_cond(int *start, int argc, char *argv[], char *temp = NULL; uint32_t id = 0; - if(!txn_cond->id_list) + if (!txn_cond->id_list) txn_cond->id_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(txn_cond->id_list, + if (slurm_addto_char_list(txn_cond->id_list, argv[i]+end)) set = 1; @@ -96,37 +96,37 @@ static int _set_cond(int *start, int argc, char *argv[], list_iterator_destroy(itr); } else if (!strncasecmp (argv[i], "Accounts", MAX(command_len, 3))) { - if(!txn_cond->acct_list) + if (!txn_cond->acct_list) txn_cond->acct_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(txn_cond->acct_list, + if (slurm_addto_char_list(txn_cond->acct_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Action", MAX(command_len, 4))) { - if(!txn_cond->action_list) + if (!txn_cond->action_list) txn_cond->action_list = list_create(slurm_destroy_char); - if(addto_action_char_list(txn_cond->action_list, + if (addto_action_char_list(txn_cond->action_list, argv[i]+end)) set = 1; else exit_code=1; } else if (!strncasecmp (argv[i], "Actors", MAX(command_len, 4))) { - if(!txn_cond->actor_list) + if (!txn_cond->actor_list) txn_cond->actor_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(txn_cond->actor_list, + if (slurm_addto_char_list(txn_cond->actor_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Clusters", MAX(command_len, 3))) { - if(!txn_cond->cluster_list) + if (!txn_cond->cluster_list) txn_cond->cluster_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(txn_cond->cluster_list, + if (slurm_addto_char_list(txn_cond->cluster_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "End", MAX(command_len, 1))) { @@ -134,7 +134,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Start", MAX(command_len, 1))) { @@ -142,10 +142,10 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!txn_cond->user_list) + if (!txn_cond->user_list) txn_cond->user_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(txn_cond->user_list, + if (slurm_addto_char_list(txn_cond->user_list, argv[i]+end)) set = 1; } else { @@ -183,15 +183,15 @@ extern int sacctmgr_list_txn(int argc, char *argv[]) _set_cond(&i, argc, argv, txn_cond, format_list); } - if(exit_code) { + if (exit_code) { slurmdb_destroy_txn_cond(txn_cond); list_destroy(format_list); return SLURM_ERROR; } - if(!list_count(format_list)) { + if (!list_count(format_list)) { slurm_addto_char_list(format_list, "T,Action,Actor,Where,Info"); - if(txn_cond->with_assoc_info) + if (txn_cond->with_assoc_info) slurm_addto_char_list(format_list, "User,Account,Cluster"); } @@ -199,7 +199,7 @@ extern int sacctmgr_list_txn(int argc, char *argv[]) print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); - if(exit_code) { + if (exit_code) { list_destroy(print_fields_list); return SLURM_ERROR; } @@ -207,7 +207,7 @@ extern int sacctmgr_list_txn(int argc, char *argv[]) txn_list = acct_storage_g_get_txn(db_conn, my_uid, txn_cond); slurmdb_destroy_txn_cond(txn_cond); - if(!txn_list) { + if (!txn_list) { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index e26d84549fc7c69bddea3a15226c5a8716f252eb..198e3b4bf807f7886cd72756f77ad6928a7df3d0 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -59,7 +59,7 @@ static int _set_cond(int *start, int argc, char *argv[], int command_len = 0; int option = 0; - if(!user_cond) { + if (!user_cond) { error("No user_cond given"); return -1; } @@ -75,16 +75,16 @@ static int _set_cond(int *start, int argc, char *argv[], * accounts. Having it blank is fine, it just needs to * exist. */ - if(!assoc_cond->user_list) + if (!assoc_cond->user_list) assoc_cond->user_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } @@ -112,15 +112,15 @@ static int _set_cond(int *start, int argc, char *argv[], } else if (!end && !strncasecmp(argv[i], "WOPLimits", MAX(command_len, 4))) { assoc_cond->without_parent_limits = 1; - } else if(!end && !strncasecmp(argv[i], "where", + } else if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end + } else if (!end || !strncasecmp(argv[i], "Names", MAX(command_len, 1)) || !strncasecmp(argv[i], "Users", MAX(command_len, 1))) { - if(slurm_addto_char_list(assoc_cond->user_list, + if (slurm_addto_char_list(assoc_cond->user_list, argv[i]+end)) u_set = 1; else @@ -145,29 +145,29 @@ static int _set_cond(int *start, int argc, char *argv[], } } else if (!strncasecmp(argv[i], "DefaultAccount", MAX(command_len, 8))) { - if(!user_cond->def_acct_list) { + if (!user_cond->def_acct_list) { user_cond->def_acct_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(user_cond->def_acct_list, + if (slurm_addto_char_list(user_cond->def_acct_list, argv[i]+end)) u_set = 1; else exit_code=1; } else if (!strncasecmp(argv[i], "DefaultWCKey", MAX(command_len, 8))) { - if(!user_cond->def_wckey_list) { + if (!user_cond->def_wckey_list) { user_cond->def_wckey_list = list_create(slurm_destroy_char); } - if(slurm_addto_char_list(user_cond->def_wckey_list, + if (slurm_addto_char_list(user_cond->def_wckey_list, argv[i]+end)) u_set = 1; else exit_code=1; } else if (!strncasecmp(argv[i], "Format", MAX(command_len, 1))) { - if(format_list) { + if (format_list) { /* We need this to get the defaults. (Usually * only for the calling cluster) */ if (slurm_strcasestr(argv[i]+end, "default")) @@ -175,7 +175,7 @@ static int _set_cond(int *start, int argc, char *argv[], slurm_addto_char_list(format_list, argv[i]+end); } - } else if(!(a_set = sacctmgr_set_association_cond( + } else if (!(a_set = sacctmgr_set_association_cond( assoc_cond, argv[i], argv[i]+end, command_len, option))) { exit_code=1; @@ -187,11 +187,11 @@ static int _set_cond(int *start, int argc, char *argv[], (*start) = i; - if(u_set && a_set) + if (u_set && a_set) return 3; - else if(a_set) { + else if (a_set) { return 2; - } else if(u_set) + } else if (u_set) return 1; return 0; @@ -210,11 +210,11 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } @@ -223,10 +223,10 @@ static int _set_rec(int *start, int argc, char *argv[], if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) { i--; break; - } else if(!end && !strncasecmp(argv[i], "set", + } else if (!end && !strncasecmp(argv[i], "set", MAX(command_len, 3))) { continue; - } else if(!end) { + } else if (!end) { exit_code=1; fprintf(stderr, " Bad format on %s: End your option with " @@ -238,27 +238,27 @@ static int _set_rec(int *start, int argc, char *argv[], u_set = 1; } else if (!strncasecmp(argv[i], "DefaultAccount", MAX(command_len, 8))) { - if(user->default_acct) + if (user->default_acct) xfree(user->default_acct); user->default_acct = strip_quotes(argv[i]+end, NULL, 1); u_set = 1; } else if (!strncasecmp(argv[i], "DefaultWCKey", MAX(command_len, 8))) { - if(user->default_wckey) + if (user->default_wckey) xfree(user->default_wckey); user->default_wckey = strip_quotes(argv[i]+end, NULL, 1); u_set = 1; } else if (!strncasecmp(argv[i], "NewName", MAX(command_len, 1))) { - if(user->name) + if (user->name) xfree(user->name); user->name = strip_quotes(argv[i]+end, NULL, 1); u_set = 1; } else if (!strncasecmp (argv[i], "RawUsage", MAX(command_len, 7))) { uint32_t usage; - if(!assoc) + if (!assoc) continue; assoc->usage = xmalloc(sizeof( assoc_mgr_association_usage_t)); @@ -267,7 +267,7 @@ static int _set_rec(int *start, int argc, char *argv[], assoc->usage->usage_raw = usage; a_set = 1; } - } else if(!assoc || + } else if (!assoc || (assoc && !(a_set = sacctmgr_set_association_rec( assoc, argv[i], argv[i]+end, command_len, option)))) { @@ -280,11 +280,11 @@ static int _set_rec(int *start, int argc, char *argv[], (*start) = i; - if(u_set && a_set) + if (u_set && a_set) return 3; - else if(u_set) + else if (u_set) return 1; - else if(a_set) + else if (a_set) return 2; return 0; } @@ -373,7 +373,7 @@ static int _check_default_associations(char *def_acct, */ while((user = list_next(itr))) { while((cluster = list_next(itr_c))) { - if(!sacctmgr_find_association_from_list( + if (!sacctmgr_find_association_from_list( local_assoc_list, user, def_acct, cluster, "*")) { regret = xmalloc(sizeof(regret_t)); @@ -383,7 +383,7 @@ static int _check_default_associations(char *def_acct, xfree so we can override it here since we aren't allocating any extra memory */ - if(!regret_list) + if (!regret_list) regret_list = list_create(slurm_destroy_char); list_append(regret_list, regret); @@ -396,7 +396,7 @@ static int _check_default_associations(char *def_acct, list_iterator_destroy(itr_c); list_destroy(local_assoc_list); - if(regret_list) { + if (regret_list) { itr = list_iterator_create(regret_list); printf(" Can't modify because these users " "aren't associated with new " @@ -459,7 +459,7 @@ static int _check_default_wckeys(char *def_wckey, */ while((user = list_next(itr))) { while((cluster = list_next(itr_c))) { - if(!sacctmgr_find_wckey_from_list( + if (!sacctmgr_find_wckey_from_list( local_wckey_list, user, def_wckey, cluster)) { regret = xmalloc(sizeof(regret_t)); @@ -469,7 +469,7 @@ static int _check_default_wckeys(char *def_wckey, xfree so we can override it here since we aren't allocating any extra memory */ - if(!regret_list) + if (!regret_list) regret_list = list_create(slurm_destroy_char); list_append(regret_list, regret); @@ -482,7 +482,7 @@ static int _check_default_wckeys(char *def_wckey, list_iterator_destroy(itr_c); list_destroy(local_wckey_list); - if(regret_list) { + if (regret_list) { itr = list_iterator_create(regret_list); printf(" Can't modify because these users " "aren't associated with new " @@ -517,20 +517,20 @@ static int _check_coord_request(slurmdb_user_cond_t *user_cond, bool check) List local_user_list = NULL; int rc = SLURM_SUCCESS; - if(!user_cond) { + if (!user_cond) { exit_code=1; fprintf(stderr, " You need to specify the user_cond here.\n"); return SLURM_ERROR; } - if(check && (!user_cond->assoc_cond->user_list + if (check && (!user_cond->assoc_cond->user_list || !list_count(user_cond->assoc_cond->user_list))) { exit_code=1; fprintf(stderr, " You need to specify a user list here.\n"); return SLURM_ERROR; } - if(check && (!user_cond->assoc_cond->acct_list + if (check && (!user_cond->assoc_cond->acct_list || !list_count(user_cond->assoc_cond->acct_list))) { exit_code=1; fprintf(stderr, " You need to specify an account list here.\n"); @@ -541,14 +541,14 @@ static int _check_coord_request(slurmdb_user_cond_t *user_cond, bool check) account_cond.assoc_cond = user_cond->assoc_cond; local_acct_list = acct_storage_g_get_accounts(db_conn, my_uid, &account_cond); - if(!local_acct_list) { + if (!local_acct_list) { exit_code=1; fprintf(stderr, " Problem getting accounts from database. " "Contact your admin.\n"); return SLURM_ERROR; } - if(user_cond->assoc_cond->acct_list && + if (user_cond->assoc_cond->acct_list && (list_count(local_acct_list) != list_count(user_cond->assoc_cond->acct_list))) { @@ -557,11 +557,11 @@ static int _check_coord_request(slurmdb_user_cond_t *user_cond, bool check) while((name = list_next(itr))) { while((acct_rec = list_next(itr2))) { - if(!strcmp(name, acct_rec->name)) + if (!strcmp(name, acct_rec->name)) break; } list_iterator_reset(itr2); - if(!acct_rec) { + if (!acct_rec) { fprintf(stderr, " You specified a non-existant " "account '%s'.\n", name); @@ -574,16 +574,16 @@ static int _check_coord_request(slurmdb_user_cond_t *user_cond, bool check) } local_user_list = acct_storage_g_get_users(db_conn, my_uid, user_cond); - if(!local_user_list) { + if (!local_user_list) { exit_code=1; fprintf(stderr, " Problem getting users from database. " "Contact your admin.\n"); - if(local_acct_list) + if (local_acct_list) list_destroy(local_acct_list); return SLURM_ERROR; } - if(user_cond->assoc_cond->user_list && + if (user_cond->assoc_cond->user_list && (list_count(local_user_list) != list_count(user_cond->assoc_cond->user_list))) { @@ -592,11 +592,11 @@ static int _check_coord_request(slurmdb_user_cond_t *user_cond, bool check) while((name = list_next(itr))) { while((user_rec = list_next(itr2))) { - if(!strcmp(name, user_rec->name)) + if (!strcmp(name, user_rec->name)) break; } list_iterator_reset(itr2); - if(!user_rec) { + if (!user_rec) { fprintf(stderr, " You specified a non-existant " "user '%s'.\n", name); @@ -608,9 +608,9 @@ static int _check_coord_request(slurmdb_user_cond_t *user_cond, bool check) list_iterator_destroy(itr2); } - if(local_acct_list) + if (local_acct_list) list_destroy(local_acct_list); - if(local_user_list) + if (local_user_list) list_destroy(local_user_list); return rc; @@ -653,7 +653,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) int option = 0; uint16_t track_wckey = slurm_get_track_wckey(); -/* if(!list_count(sacctmgr_cluster_list)) { */ +/* if (!list_count(sacctmgr_cluster_list)) { */ /* printf(" Can't add users, no cluster defined yet.\n" */ /* " Please contact your administrator.\n"); */ /* return SLURM_ERROR; */ @@ -673,20 +673,20 @@ extern int sacctmgr_add_user(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; - if(argv[i][end] == '=') { + if (argv[i][end] == '=') { option = (int)argv[i][end-1]; end++; } } - if(!end + if (!end || !strncasecmp(argv[i], "Names", MAX(command_len, 1)) || !strncasecmp(argv[i], "Users", MAX(command_len, 1))) { - if(!slurm_addto_char_list(assoc_cond->user_list, + if (!slurm_addto_char_list(assoc_cond->user_list, argv[i]+end)) exit_code=1; } else if (!strncasecmp(argv[i], "AdminLevel", @@ -694,7 +694,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) admin_level = str_2_slurmdb_admin_level(argv[i]+end); } else if (!strncasecmp(argv[i], "DefaultAccount", MAX(command_len, 8))) { - if(default_acct) { + if (default_acct) { fprintf(stderr, " Already listed DefaultAccount %s\n", default_acct); @@ -706,7 +706,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) default_acct); } else if (!strncasecmp(argv[i], "DefaultWCKey", MAX(command_len, 8))) { - if(default_wckey) { + if (default_wckey) { fprintf(stderr, " Already listed DefaultWCKey %s\n", default_wckey); @@ -720,7 +720,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) MAX(command_len, 1))) { slurm_addto_char_list(wckey_cond->name_list, argv[i]+end); - } else if(!(limit_set = sacctmgr_set_association_rec( + } else if (!(limit_set = sacctmgr_set_association_rec( &start_assoc, argv[i], argv[i]+end, command_len, option)) && !(limit_set = sacctmgr_set_association_cond( @@ -731,11 +731,11 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } } - if(exit_code) { + if (exit_code) { slurmdb_destroy_wckey_cond(wckey_cond); slurmdb_destroy_association_cond(assoc_cond); return SLURM_ERROR; - } else if(!list_count(assoc_cond->user_list)) { + } else if (!list_count(assoc_cond->user_list)) { slurmdb_destroy_wckey_cond(wckey_cond); slurmdb_destroy_association_cond(assoc_cond); exit_code=1; @@ -758,7 +758,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) db_conn, my_uid, &user_cond); } - if(!local_user_list) { + if (!local_user_list) { exit_code=1; fprintf(stderr, " Problem getting users from database. " "Contact your admin.\n"); @@ -768,13 +768,13 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } - if(!list_count(assoc_cond->cluster_list)) { + if (!list_count(assoc_cond->cluster_list)) { if (_check_and_set_cluster_list(assoc_cond->cluster_list) != SLURM_SUCCESS) { slurmdb_destroy_wckey_cond(wckey_cond); slurmdb_destroy_association_cond(assoc_cond); list_destroy(local_user_list); - if(local_acct_list) + if (local_acct_list) list_destroy(local_acct_list); return SLURM_ERROR; } @@ -795,10 +795,10 @@ extern int sacctmgr_add_user(int argc, char *argv[]) list_iterator_reset(itr); while((cluster_rec = list_next(itr))) { - if(!strcasecmp(cluster_rec->name, cluster)) + if (!strcasecmp(cluster_rec->name, cluster)) break; } - if(!cluster_rec) { + if (!cluster_rec) { exit_code=1; fprintf(stderr, " This cluster '%s' " "doesn't exist.\n" @@ -812,18 +812,18 @@ extern int sacctmgr_add_user(int argc, char *argv[]) list_iterator_destroy(itr_c); list_destroy(temp_list); - if(!list_count(assoc_cond->cluster_list)) { + if (!list_count(assoc_cond->cluster_list)) { slurmdb_destroy_wckey_cond(wckey_cond); slurmdb_destroy_association_cond(assoc_cond); list_destroy(local_user_list); - if(local_acct_list) + if (local_acct_list) list_destroy(local_acct_list); return SLURM_ERROR; } } - if(!list_count(assoc_cond->acct_list)) { - if(!list_count(wckey_cond->name_list)) { + if (!list_count(assoc_cond->acct_list)) { + if (!list_count(wckey_cond->name_list)) { slurmdb_destroy_wckey_cond(wckey_cond); slurmdb_destroy_association_cond(assoc_cond); exit_code=1; @@ -841,7 +841,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) local_acct_list = acct_storage_g_get_accounts( db_conn, my_uid, &account_cond); - if(!local_acct_list) { + if (!local_acct_list) { exit_code=1; fprintf(stderr, " Problem getting accounts " "from database. Contact your admin.\n"); @@ -870,10 +870,10 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } } - if(track_wckey || default_wckey) { + if (track_wckey || default_wckey) { wckey_cond->cluster_list = assoc_cond->cluster_list; wckey_cond->user_list = assoc_cond->user_list; - if(!(local_wckey_list = acct_storage_g_get_wckeys( + if (!(local_wckey_list = acct_storage_g_get_wckeys( db_conn, my_uid, wckey_cond))) info("If you are a coordinator ignore " "the previous error"); @@ -895,7 +895,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) char *local_def_acct = NULL; char *local_def_wckey = NULL; - if(!name[0]) { + if (!name[0]) { exit_code=1; fprintf(stderr, " No blank names are " "allowed when adding.\n"); @@ -907,7 +907,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) local_def_wckey = xstrdup(default_wckey); user = NULL; - if(!(user_rec = sacctmgr_find_user_from_list( + if (!(user_rec = sacctmgr_find_user_from_list( local_user_list, name))) { uid_t pw_uid; @@ -917,13 +917,13 @@ extern int sacctmgr_add_user(int argc, char *argv[]) local_def_acct = xstrdup( list_peek(assoc_cond->acct_list)); - if(!local_def_wckey + if (!local_def_wckey && wckey_cond->name_list && list_count(wckey_cond->name_list)) local_def_wckey = xstrdup( list_peek(wckey_cond->name_list)); - if(!local_def_acct || !local_def_acct[0]) { + if (!local_def_acct || !local_def_acct[0]) { exit_code=1; fprintf(stderr, " Need a default account for " "these users to add.\n"); @@ -932,8 +932,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) xfree(local_def_wckey); goto no_default; } - if(first) { - if(!sacctmgr_find_account_from_list( + if (first) { + if (!sacctmgr_find_account_from_list( local_acct_list, local_def_acct)) { exit_code=1; fprintf(stderr, " This account '%s' " @@ -953,7 +953,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) "\nAre you sure you want to continue?", name); - if(!commit_check(warning)) { + if (!commit_check(warning)) { xfree(warning); rc = SLURM_ERROR; list_flush(user_list); @@ -1039,7 +1039,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc_cond->partition_list); while((partition = list_next(itr_p))) { partition_set = 1; - if(sacctmgr_find_association_from_list( + if (sacctmgr_find_association_from_list( local_assoc_list, name, account, cluster, partition)) @@ -1063,6 +1063,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc->grp_cpu_mins = start_assoc.grp_cpu_mins; + assoc->grp_cpu_run_mins = + start_assoc.grp_cpu_run_mins; assoc->grp_cpus = start_assoc.grp_cpus; assoc->grp_jobs = start_assoc.grp_jobs; assoc->grp_mem = start_assoc.grp_mem; @@ -1102,13 +1104,13 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc->partition); } list_iterator_destroy(itr_p); - if(partition_set) { + if (partition_set) { if (!default_acct && local_def_acct) xfree(local_def_acct); continue; } - if(sacctmgr_find_association_from_list( + if (sacctmgr_find_association_from_list( local_assoc_list, name, account, cluster, NULL)) { if (!default_acct && local_def_acct) @@ -1120,7 +1122,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) sizeof(slurmdb_association_rec_t)); slurmdb_init_association_rec(assoc, 0); assoc->user = xstrdup(name); - if(local_def_acct + if (local_def_acct && !strcmp(local_def_acct, account)) assoc->is_def = 1; assoc->acct = xstrdup(account); @@ -1132,6 +1134,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc->grp_cpu_mins = start_assoc.grp_cpu_mins; + assoc->grp_cpu_run_mins = + start_assoc.grp_cpu_run_mins; assoc->grp_cpus = start_assoc.grp_cpus; assoc->grp_jobs = start_assoc.grp_jobs; assoc->grp_mem = start_assoc.grp_mem; @@ -1152,7 +1156,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc->qos_list = copy_char_list(start_assoc.qos_list); - if(user) + if (user) list_append(user->assoc_list, assoc); else list_append(assoc_list, assoc); @@ -1172,14 +1176,14 @@ extern int sacctmgr_add_user(int argc, char *argv[]) xfree(local_def_acct); /* continue here if not doing wckeys */ - if(!track_wckey && !local_def_wckey) + if (!track_wckey && !local_def_wckey) continue; itr_w = list_iterator_create(wckey_cond->name_list); while((account = list_next(itr_w))) { itr_c = list_iterator_create(assoc_cond->cluster_list); while((cluster = list_next(itr_c))) { - if(sacctmgr_find_wckey_from_list( + if (sacctmgr_find_wckey_from_list( local_wckey_list, name, account, cluster)) { continue; @@ -1204,10 +1208,10 @@ extern int sacctmgr_add_user(int argc, char *argv[]) wckey->user = xstrdup(name); wckey->name = xstrdup(account); wckey->cluster = xstrdup(cluster); - if(local_def_wckey + if (local_def_wckey && !strcmp(local_def_wckey, account)) wckey->is_def = 1; - if(user) + if (user) list_append(user->wckey_list, wckey); else list_append(wckey_list, wckey); @@ -1228,68 +1232,69 @@ extern int sacctmgr_add_user(int argc, char *argv[]) no_default: list_iterator_destroy(itr); list_destroy(local_user_list); - if(local_acct_list) + if (local_acct_list) list_destroy(local_acct_list); - if(local_assoc_list) + if (local_assoc_list) list_destroy(local_assoc_list); - if(local_wckey_list) + if (local_wckey_list) list_destroy(local_wckey_list); slurmdb_destroy_wckey_cond(wckey_cond); slurmdb_destroy_association_cond(assoc_cond); - if(!list_count(user_list) && !list_count(assoc_list) + if (!list_count(user_list) && !list_count(assoc_list) && !list_count(wckey_list)) { printf(" Nothing new added.\n"); + rc = SLURM_ERROR; goto end_it; - } else if(!assoc_str && !wckey_str) { + } else if (!assoc_str && !wckey_str) { exit_code=1; fprintf(stderr, " No associations or wckeys created.\n"); goto end_it; } - if(user_str) { + if (user_str) { printf(" Adding User(s)\n%s", user_str); printf(" Settings =\n"); printf(" Default Account = %s\n", default_acct); - if(default_wckey) + if (default_wckey) printf(" Default WCKey = %s\n", default_wckey); - if(admin_level != SLURMDB_ADMIN_NOTSET) + if (admin_level != SLURMDB_ADMIN_NOTSET) printf(" Admin Level = %s\n", slurmdb_admin_level_str(admin_level)); xfree(user_str); } - if(assoc_str) { + if (assoc_str) { printf(" Associations =\n%s", assoc_str); xfree(assoc_str); } - if(wckey_str) { + if (wckey_str) { printf(" WCKeys =\n%s", wckey_str); xfree(wckey_str); } - if(limit_set) { + if (limit_set) { printf(" Non Default Settings\n"); sacctmgr_print_assoc_limits(&start_assoc); - if(start_assoc.qos_list) + if (start_assoc.qos_list) list_destroy(start_assoc.qos_list); } notice_thread_init(); - if(list_count(user_list)) { + if (list_count(user_list)) { rc = acct_storage_g_add_users(db_conn, my_uid, user_list); } - if(rc == SLURM_SUCCESS) { - if(list_count(assoc_list)) + if (rc == SLURM_SUCCESS) { + if (list_count(assoc_list)) rc = acct_storage_g_add_associations(db_conn, my_uid, assoc_list); } - if(rc == SLURM_SUCCESS) { - if(list_count(wckey_list)) + if (rc == SLURM_SUCCESS) { + if (list_count(wckey_list)) rc = acct_storage_g_add_wckeys(db_conn, my_uid, wckey_list); } else { @@ -1303,8 +1308,8 @@ no_default: notice_thread_fini(); - if(rc == SLURM_SUCCESS) { - if(commit_check("Would you like to commit changes?")) { + if (rc == SLURM_SUCCESS) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); @@ -1347,10 +1352,10 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) cond_set |= prev_set; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_user_cond(user_cond); return SLURM_ERROR; - } else if(!cond_set) { + } else if (!cond_set) { exit_code=1; fprintf(stderr, " You need to specify conditions to " "to add the coordinator.\n"); @@ -1358,7 +1363,7 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) return SLURM_ERROR; } - if((_check_coord_request(user_cond, true) == SLURM_ERROR) + if ((_check_coord_request(user_cond, true) == SLURM_ERROR) || exit_code) { slurmdb_destroy_user_cond(user_cond); return SLURM_ERROR; @@ -1386,8 +1391,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) notice_thread_fini(); slurmdb_destroy_user_cond(user_cond); - if(rc == SLURM_SUCCESS) { - if(commit_check("Would you like to commit changes?")) { + if (rc == SLURM_SUCCESS) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); @@ -1432,14 +1437,14 @@ extern int sacctmgr_list_user(int argc, char *argv[]) cond_set |= prev_set; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_user_cond(user_cond); list_destroy(format_list); return SLURM_ERROR; } - if(!list_count(format_list)) { - if(slurm_get_track_wckey()) + if (!list_count(format_list)) { + if (slurm_get_track_wckey()) slurm_addto_char_list(format_list, "U,DefaultA,DefaultW,Ad"); else @@ -1460,8 +1465,8 @@ extern int sacctmgr_list_user(int argc, char *argv[]) user_cond->with_wckeys = 1; } - if(!user_cond->with_assocs && cond_set > 1) { - if(!commit_check("You requested options that are only vaild " + if (!user_cond->with_assocs && cond_set > 1) { + if (!commit_check("You requested options that are only vaild " "when querying with the withassoc option.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); @@ -1474,7 +1479,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); - if(exit_code) { + if (exit_code) { slurmdb_destroy_user_cond(user_cond); list_destroy(print_fields_list); return SLURM_ERROR; @@ -1483,7 +1488,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) user_list = acct_storage_g_get_users(db_conn, my_uid, user_cond); slurmdb_destroy_user_cond(user_cond); - if(!user_list) { + if (!user_list) { exit_code=1; fprintf(stderr, " Problem with query.\n"); list_destroy(print_fields_list); @@ -1497,7 +1502,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field_count = list_count(print_fields_list); while((user = list_next(itr))) { - if(user->assoc_list) { + if (user->assoc_list) { char *curr_cluster = NULL; ListIterator itr3 = list_iterator_create(user->assoc_list); @@ -1709,20 +1714,20 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) } } - if(exit_code) { + if (exit_code) { slurmdb_destroy_user_cond(user_cond); slurmdb_destroy_user_rec(user); slurmdb_destroy_association_rec(assoc); return SLURM_ERROR; - } else if(!rec_set) { + } else if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); slurmdb_destroy_user_cond(user_cond); slurmdb_destroy_user_rec(user); slurmdb_destroy_association_rec(assoc); return SLURM_ERROR; - } else if(!cond_set) { - if(!commit_check("You didn't set any conditions with 'WHERE'.\n" + } else if (!cond_set) { + if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); slurmdb_destroy_user_cond(user_cond); @@ -1754,8 +1759,8 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) _check_and_set_cluster_list(user_cond->assoc_cond->cluster_list); notice_thread_init(); - if(rec_set & 1) { // process the account changes - if(cond_set == 2) { + if (rec_set & 1) { // process the account changes + if (cond_set == 2) { rc = SLURM_ERROR; exit_code=1; fprintf(stderr, @@ -1764,15 +1769,15 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) goto assoc_start; } - if(user_cond->assoc_cond + if (user_cond->assoc_cond && user_cond->assoc_cond->acct_list && list_count(user_cond->assoc_cond->acct_list)) { notice_thread_fini(); - if(commit_check( + if (commit_check( " You specified Accounts in your " "request. Did you mean " "DefaultAccounts?\n")) { - if(!user_cond->def_acct_list) + if (!user_cond->def_acct_list) user_cond->def_acct_list = list_create(slurm_destroy_char); list_transfer(user_cond->def_acct_list, @@ -1813,24 +1818,25 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) } } else if (ret_list) { printf(" Nothing modified\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); - if(errno == ESLURM_ONE_CHANGE) + if (errno == ESLURM_ONE_CHANGE) fprintf(stderr, " If you are changing a users " "name you can only specify 1 user " "at a time.\n"); rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); } assoc_start: - if(rec_set & 2) { // process the association changes - if(cond_set == 1 + if (rec_set & 2) { // process the association changes + if (cond_set == 1 && !list_count(user_cond->assoc_cond->user_list)) { rc = SLURM_ERROR; exit_code=1; @@ -1843,7 +1849,7 @@ assoc_start: ret_list = acct_storage_g_modify_associations( db_conn, my_uid, user_cond->assoc_cond, assoc); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr; set = 1; @@ -1864,8 +1870,9 @@ assoc_start: } list_iterator_destroy(itr); } - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing modified\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -1873,14 +1880,14 @@ assoc_start: rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); } assoc_end: notice_thread_fini(); - if(set) { - if(commit_check("Would you like to commit changes?")) + if (set) { + if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); @@ -1919,7 +1926,7 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) && list_count(user_cond->assoc_cond->cluster_list)) cond_set |= 2; - if(!cond_set) { + if (!cond_set) { exit_code=1; fprintf(stderr, " No conditions given to remove, not executing.\n"); @@ -1927,7 +1934,7 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) return SLURM_ERROR; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_user_cond(user_cond); return SLURM_ERROR; } @@ -1946,14 +1953,14 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) slurmdb_destroy_user_cond(user_cond); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; List del_user_list = NULL; ListIterator itr = list_iterator_create(ret_list); /* If there were jobs running with an association to be deleted, don't. */ - if(rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { + if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { fprintf(stderr, " Error with request: %s\n", slurm_strerror(rc)); while((object = list_next(itr))) { @@ -1964,9 +1971,9 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); return rc; } - if(cond_set == 1) { + if (cond_set == 1) { printf(" Deleting users...\n"); - } else if(cond_set & 2) { + } else if (cond_set & 2) { printf(" Deleting user associations...\n"); } while((object = list_next(itr))) { @@ -2047,14 +2054,15 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) } } - if(commit_check("Would you like to commit changes?")) { + if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing deleted\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -2089,23 +2097,23 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) cond_set |= prev_set; } - if(exit_code) { + if (exit_code) { slurmdb_destroy_user_cond(user_cond); return SLURM_ERROR; - } else if(!cond_set) { + } else if (!cond_set) { exit_code=1; fprintf(stderr, " You need to specify a user list " "or account list here.\n"); slurmdb_destroy_user_cond(user_cond); return SLURM_ERROR; } - if((_check_coord_request(user_cond, false) == SLURM_ERROR) + if ((_check_coord_request(user_cond, false) == SLURM_ERROR) || exit_code) { slurmdb_destroy_user_cond(user_cond); return SLURM_ERROR; } - if(user_cond->assoc_cond->user_list) { + if (user_cond->assoc_cond->user_list) { itr = list_iterator_create(user_cond->assoc_cond->user_list); while((name = list_next(itr))) { xstrfmtcat(user_str, " %s\n", name); @@ -2114,7 +2122,7 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) list_iterator_destroy(itr); } - if(user_cond->assoc_cond->acct_list) { + if (user_cond->assoc_cond->acct_list) { itr = list_iterator_create(user_cond->assoc_cond->acct_list); while((name = list_next(itr))) { xstrfmtcat(acct_str, " %s\n", name); @@ -2123,7 +2131,7 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) list_iterator_destroy(itr); } - if(!user_str && !acct_str) { + if (!user_str && !acct_str) { exit_code=1; fprintf(stderr, " You need to specify a user list " "or an account list here.\n"); @@ -2134,9 +2142,9 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) * just assumed. Right now it doesn't do it correctly though. * This is why we are doing it this way. */ - if(user_str) { + if (user_str) { printf(" Removing Coordinators with user name\n%s", user_str); - if(acct_str) + if (acct_str) printf(" From Account(s)\n%s", acct_str); else printf(" From all accounts\n"); @@ -2150,7 +2158,7 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) slurmdb_destroy_user_cond(user_cond); - if(ret_list && list_count(ret_list)) { + if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Removed Coordinators (sub accounts not listed)...\n"); @@ -2159,8 +2167,9 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) } list_iterator_destroy(itr); set = 1; - } else if(ret_list) { + } else if (ret_list) { printf(" Nothing removed\n"); + rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", @@ -2168,11 +2177,11 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) rc = SLURM_ERROR; } - if(ret_list) + if (ret_list) list_destroy(ret_list); notice_thread_fini(); - if(set) { - if(commit_check("Would you like to commit changes?")) + if (set) { + if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); diff --git a/src/sacctmgr/wckey_functions.c b/src/sacctmgr/wckey_functions.c index 31a9e958ae594bfe0f6e6d55464b3b7667bcfa55..c2fc6fc857f802b9236ec2e405980756283b3dc1 100644 --- a/src/sacctmgr/wckey_functions.c +++ b/src/sacctmgr/wckey_functions.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,14 +49,14 @@ static int _set_cond(int *start, int argc, char *argv[], int end = 0; int command_len = 0; - if(!wckey_cond) { + if (!wckey_cond) { error("No wckey_cond given"); return -1; } for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -65,22 +65,22 @@ static int _set_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "where", + if (!end && !strncasecmp(argv[i], "where", MAX(command_len, 5))) { continue; - } else if(!end && !strncasecmp(argv[i], "withdeleted", + } else if (!end && !strncasecmp(argv[i], "withdeleted", MAX(command_len, 5))) { wckey_cond->with_deleted = 1; set = 1; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "WCKeys", MAX(command_len, 3)) || !strncasecmp (argv[i], "Names", MAX(command_len, 3))) { - if(!wckey_cond->name_list) + if (!wckey_cond->name_list) wckey_cond->name_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->name_list, + if (slurm_addto_char_list(wckey_cond->name_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Ids", @@ -89,11 +89,11 @@ static int _set_cond(int *start, int argc, char *argv[], char *temp = NULL; uint32_t id = 0; - if(!wckey_cond->id_list) + if (!wckey_cond->id_list) wckey_cond->id_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->id_list, + if (slurm_addto_char_list(wckey_cond->id_list, argv[i]+end)) set = 1; @@ -109,10 +109,10 @@ static int _set_cond(int *start, int argc, char *argv[], list_iterator_destroy(itr); } else if (!strncasecmp (argv[i], "Clusters", MAX(command_len, 3))) { - if(!wckey_cond->cluster_list) + if (!wckey_cond->cluster_list) wckey_cond->cluster_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->cluster_list, + if (slurm_addto_char_list(wckey_cond->cluster_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "End", MAX(command_len, 1))) { @@ -120,7 +120,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Start", MAX(command_len, 1))) { @@ -128,10 +128,10 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!wckey_cond->user_list) + if (!wckey_cond->user_list) wckey_cond->user_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->user_list, + if (slurm_addto_char_list(wckey_cond->user_list, argv[i]+end)) set = 1; } else { @@ -178,13 +178,13 @@ extern int sacctmgr_list_wckey(int argc, char *argv[]) _set_cond(&i, argc, argv, wckey_cond, format_list); } - if(exit_code) { + if (exit_code) { slurmdb_destroy_wckey_cond(wckey_cond); list_destroy(format_list); return SLURM_ERROR; } - if(!list_count(format_list)) { + if (!list_count(format_list)) { slurm_addto_char_list(format_list, "Name,Cluster,User"); } @@ -197,7 +197,7 @@ extern int sacctmgr_list_wckey(int argc, char *argv[]) int command_len = 0; int newlen = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -205,24 +205,24 @@ extern int sacctmgr_list_wckey(int argc, char *argv[]) command_len = strlen(object); field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("WCKeys", object, MAX(command_len, 1)) + if (!strncasecmp("WCKeys", object, MAX(command_len, 1)) || !strncasecmp("Names", object, MAX(command_len, 1))) { field->type = PRINT_NAME; field->name = xstrdup("WCKey"); field->len = 10; field->print_routine = print_fields_str; - } else if(!strncasecmp("Clusters", object, + } else if (!strncasecmp("Clusters", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER; field->name = xstrdup("Cluster"); field->len = 10; field->print_routine = print_fields_str; - } else if(!strncasecmp("IDs", object, MAX(command_len, 1))) { + } else if (!strncasecmp("IDs", object, MAX(command_len, 1))) { field->type = PRINT_ID; field->name = xstrdup("ID"); field->len = 6; field->print_routine = print_fields_uint; - } else if(!strncasecmp("Users", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Users", object, MAX(command_len, 1))) { field->type = PRINT_USER; field->name = xstrdup("User"); field->len = 10; @@ -234,7 +234,7 @@ extern int sacctmgr_list_wckey(int argc, char *argv[]) continue; } - if(newlen) + if (newlen) field->len = newlen; list_append(print_fields_list, field); @@ -242,7 +242,7 @@ extern int sacctmgr_list_wckey(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); - if(exit_code) { + if (exit_code) { slurmdb_destroy_wckey_cond(wckey_cond); list_destroy(print_fields_list); return SLURM_ERROR; @@ -251,7 +251,7 @@ extern int sacctmgr_list_wckey(int argc, char *argv[]) wckey_list = acct_storage_g_get_wckeys(db_conn, my_uid, wckey_cond); slurmdb_destroy_wckey_cond(wckey_cond); - if(!wckey_list) { + if (!wckey_list) { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in index 5377e99e309d21328f50b7ab97d5fa9667793034..df6909d69e35dd9dc37428be46d4464cb1cc9c9a 100644 --- a/src/salloc/Makefile.in +++ b/src/salloc/Makefile.in @@ -60,6 +60,7 @@ subdir = src/salloc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -158,6 +162,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -178,6 +184,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -187,6 +196,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -194,6 +205,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -228,6 +248,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -255,6 +278,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/salloc/opt.c b/src/salloc/opt.c index c2aa64f86b215eacd463d054099514728805e3c5..4d78d63471867f203503fb59a2681120daa0c6f1 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -83,6 +83,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_resource_info.h" #include "src/common/slurm_rlimits_info.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -114,6 +115,7 @@ #define OPT_SIGNAL 0x15 #define OPT_KILL_CMD 0x16 #define OPT_TIME_VAL 0x17 +#define OPT_PROFILE 0x18 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_CPU_BIND 0x101 @@ -163,6 +165,7 @@ #define LONG_OPT_GRES 0x141 #define LONG_OPT_WAIT_ALL_NODES 0x142 #define LONG_OPT_REQ_SWITCH 0x143 +#define LONG_OPT_PROFILE 0x144 /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -229,12 +232,12 @@ static bool _valid_node_list(char **node_list_pptr) procs to use then we need exactly this many since we are saying, lay it out this way! Same for max and min nodes. Other than that just read in as many in the hostfile */ - if(opt.ntasks_set) + if (opt.ntasks_set) count = opt.ntasks; - else if(opt.nodes_set) { - if(opt.max_nodes) + else if (opt.nodes_set) { + if (opt.max_nodes) count = opt.max_nodes; - else if(opt.min_nodes) + else if (opt.min_nodes) count = opt.min_nodes; } @@ -304,6 +307,7 @@ static void _opt_default() opt.time_min = NO_VAL; opt.time_min_str = NULL; opt.partition = NULL; + opt.profile = ACCT_GATHER_PROFILE_NOT_SET; opt.job_name = NULL; opt.jobid = NO_VAL; @@ -343,7 +347,7 @@ static void _opt_default() for (i=0; i<HIGHEST_DIMENSIONS; i++) { opt.conn_type[i] = (uint16_t) NO_VAL; - opt.geometry[i] = (uint16_t) NO_VAL; + opt.geometry[i] = 0; } opt.reboot = false; opt.no_rotate = false; @@ -352,7 +356,7 @@ static void _opt_default() opt.egid = (gid_t) -1; opt.bell = BELL_AFTER_DELAY; - opt.acctg_freq = -1; + opt.acctg_freq = NULL; opt.no_shell = false; opt.get_user_env_time = -1; opt.get_user_env_mode = -1; @@ -383,7 +387,7 @@ struct env_vars { env_vars_t env_vars[] = { {"SALLOC_ACCOUNT", OPT_STRING, &opt.account, NULL }, - {"SALLOC_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, + {"SALLOC_ACCTG_FREQ", OPT_STRING, &opt.acctg_freq, NULL }, {"SALLOC_BELL", OPT_BELL, NULL, NULL }, {"SALLOC_CONN_TYPE", OPT_CONN_TYPE, NULL, NULL }, {"SALLOC_CPU_BIND", OPT_CPU_BIND, NULL, NULL }, @@ -399,6 +403,7 @@ env_vars_t env_vars[] = { {"SALLOC_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SALLOC_PARTITION", OPT_STRING, &opt.partition, NULL }, + {"SALLOC_PROFILE", OPT_PROFILE, NULL, NULL }, {"SALLOC_QOS", OPT_STRING, &opt.qos, NULL }, {"SALLOC_RESERVATION", OPT_STRING, &opt.reservation, NULL }, {"SALLOC_SIGNAL", OPT_SIGNAL, NULL, NULL }, @@ -564,7 +569,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_TIME_VAL: opt.wait4switch = time_str2secs(val); break; - + case OPT_PROFILE: + opt.profile = acct_gather_profile_from_string((char *)val); + break; default: /* do nothing */ break; @@ -667,6 +674,7 @@ void set_options(const int argc, char **argv) {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"qos", required_argument, 0, LONG_OPT_QOS}, + {"profile", required_argument, 0, LONG_OPT_PROFILE}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, @@ -1008,6 +1016,9 @@ void set_options(const int argc, char **argv) case LONG_OPT_JOBID: opt.jobid = _get_int(optarg, "jobid"); break; + case LONG_OPT_PROFILE: + opt.profile = acct_gather_profile_from_string(optarg); + break; case LONG_OPT_COMMENT: xfree(opt.comment); opt.comment = xstrdup(optarg); @@ -1093,7 +1104,8 @@ void set_options(const int argc, char **argv) opt.ramdiskimage = xstrdup(optarg); break; case LONG_OPT_ACCTG_FREQ: - opt.acctg_freq = _get_int(optarg, "acctg-freq"); + xfree(opt.acctg_freq); + opt.acctg_freq = xstrdup(optarg); break; case LONG_OPT_NOSHELL: opt.no_shell = true; @@ -1398,7 +1410,7 @@ static bool _opt_verify(void) if (opt.distribution == SLURM_DIST_PLANE && opt.plane_size) { if ((opt.ntasks/opt.plane_size) < opt.min_nodes) { if (((opt.min_nodes-1)*opt.plane_size) >= opt.ntasks) { -#if(0) +#if (0) info("Too few processes ((n/plane_size) %d < N %d) " "and ((N-1)*(plane_size) %d >= n %d)) ", opt.ntasks/opt.plane_size, opt.min_nodes, @@ -1474,12 +1486,12 @@ static bool _opt_verify(void) } /* else if (opt.ntasks_set && !opt.nodes_set) */ - if(!opt.nodelist) { - if((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) { + if (!opt.nodelist) { + if ((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) { /* make sure the file being read in has a / in it to make sure it is a file in the valid_node_list function */ - if(!strstr(opt.nodelist, "/")) { + if (!strstr(opt.nodelist, "/")) { char *add_slash = xstrdup("./"); xstrcat(add_slash, opt.nodelist); xfree(opt.nodelist); @@ -1502,14 +1514,14 @@ static bool _opt_verify(void) /* set up the proc and node counts based on the arbitrary list of nodes */ - if((opt.distribution == SLURM_DIST_ARBITRARY) + if ((opt.distribution == SLURM_DIST_ARBITRARY) && (!opt.nodes_set || !opt.ntasks_set)) { hostlist_t hl = hostlist_create(opt.nodelist); - if(!opt.ntasks_set) { + if (!opt.ntasks_set) { opt.ntasks_set = 1; opt.ntasks = hostlist_count(hl); } - if(!opt.nodes_set) { + if (!opt.nodes_set) { opt.nodes_set = 1; hostlist_uniq(hl); opt.min_nodes = opt.max_nodes = hostlist_count(hl); @@ -1570,6 +1582,10 @@ static bool _opt_verify(void) opt.ntasks_per_node); } + if (opt.profile) + setenvfs("SLURM_PROFILE=%s", + acct_gather_profile_to_string(opt.profile)); + return verified; } @@ -1704,7 +1720,6 @@ static char *print_constraints() static void _opt_list(void) { - int i; char *str; info("defined options for program `%s'", opt.progname); @@ -1731,7 +1746,7 @@ static void _opt_list(void) if (opt.jobid != NO_VAL) info("jobid : %u", opt.jobid); info("distribution : %s", format_task_dist_states(opt.distribution)); - if(opt.distribution == SLURM_DIST_PLANE) + if (opt.distribution == SLURM_DIST_PLANE) info("plane size : %u", opt.plane_size); info("verbose : %d", opt.verbose); if (opt.immediate <= 1) @@ -1753,14 +1768,16 @@ static void _opt_list(void) if (opt.gres != NULL) info("gres : %s", opt.gres); info("network : %s", opt.network); + info("profile : `%s'", + acct_gather_profile_to_string(opt.profile)); info("qos : %s", opt.qos); str = print_constraints(); info("constraints : %s", str); xfree(str); - for (i = 0; i < HIGHEST_DIMENSIONS; i++) { - if (opt.conn_type[i] == (uint16_t) NO_VAL) - break; - info("conn_type[%d] : %u", i, opt.conn_type[i]); + if (opt.conn_type[0] != (uint16_t) NO_VAL) { + str = conn_type_string_full(opt.conn_type); + info("conn_type : %s", str); + xfree(str); } str = print_geometry(opt.geometry); info("geometry : %s", str); @@ -1842,7 +1859,7 @@ static void _usage(void) " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" " [--network=type] [--mem-per-cpu=MB] [--qos=qos]\n" " [--cpu_bind=...] [--mem_bind=...] [--reservation=name]\n" -" [--time-min=minutes] [--gres=list]\n" +" [--time-min=minutes] [--gres=list] [--profile=...]\n" " [--switches=max-switches[@max-time-to-wait]]\n" " [executable [args...]]\n"); } @@ -1883,6 +1900,9 @@ static void _help(void) " --ntasks-per-node=n number of tasks to invoke on each node\n" " -N, --nodes=N number of nodes on which to run (N = min[-max])\n" " -O, --overcommit overcommit resources\n" +" --profile=value enable acct_gather_profile for detailed data\n" +" value is all or none or any combination of\n" +" energy, lustre, network or task\n" " -p, --partition=partition partition requested\n" " --qos=qos quality of service\n" " -Q, --quiet quiet mode (suppress informational messages)\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 119d9a57c17c8a5ec731817c780558c4fd705371..dddf118918b52fd40f4bde469a156fad16031f4f 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -94,6 +94,7 @@ typedef struct salloc_options { int time_min; /* --min-time (int minutes) */ char *time_min_str; /* --min-time (string) */ char *partition; /* --partition=n, -p n */ + uint32_t profile; /* --profile=[all | none} */ enum task_dist_states distribution; /* --distribution=, -m dist */ uint32_t plane_size; /* lllp distribution -> plane_size for @@ -112,7 +113,8 @@ typedef struct salloc_options { bool hold; /* --hold, -H */ bool no_kill; /* --no-kill, -k */ - int acctg_freq; /* --acctg-freq=secs */ + char *acctg_freq; /* --acctg-freq=<type1>=<freq1>,*/ + /* <type2>=<freq2>,... */ char *licenses; /* --licenses, -L */ bool overcommit; /* --overcommit -O */ int kill_command_signal;/* --kill-command, -K */ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 46991d1e1a0d2a05f587048cf5df316b0ed5fc1a..85c7cd94d287d2c3d3af7a6f61cfc9f59d11d5f9 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -312,8 +312,8 @@ int main(int argc, char *argv[]) before = time(NULL); while ((alloc = slurm_allocate_resources_blocking(&desc, opt.immediate, _pending_callback)) == NULL) { - if ((errno != ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) || - (retries >= MAX_RETRIES)) + if (((errno != ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) && + (errno != EAGAIN)) || (retries >= MAX_RETRIES)) break; if (retries == 0) error("%s", msg); @@ -407,8 +407,8 @@ int main(int argc, char *argv[]) env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d", opt.overcommit); } - if (opt.acctg_freq >= 0) { - env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d", + if (opt.acctg_freq) { + env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%s", opt.acctg_freq); } if (opt.network) @@ -567,19 +567,22 @@ static void _set_spank_env(void) } } -/* Set SLURM_SUBMIT_DIR environment variable with current state */ +/* Set SLURM_SUBMIT_DIR and SLURM_SUBMIT_HOST environment variables within + * current state */ static void _set_submit_dir_env(void) { + char host[256]; + work_dir = xmalloc(MAXPATHLEN + 1); - if ((getcwd(work_dir, MAXPATHLEN)) == NULL) { + if ((getcwd(work_dir, MAXPATHLEN)) == NULL) error("getcwd failed: %m"); - exit(error_exit); - } - - if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", work_dir) < 0) { + else if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", work_dir) < 0) error("unable to set SLURM_SUBMIT_DIR in environment"); - return; - } + + if ((gethostname(host, sizeof(host)))) + error("gethostname_short failed: %m"); + else if (setenvf(NULL, "SLURM_SUBMIT_HOST", "%s", host) < 0) + error("unable to set SLURM_SUBMIT_HOST in environment"); } /* Returns 0 on success, -1 on failure */ @@ -611,6 +614,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->immediate = 1; desc->name = xstrdup(opt.job_name); desc->reservation = xstrdup(opt.reservation); + desc->profile = opt.profile; desc->wckey = xstrdup(opt.wckey); if (opt.req_switch >= 0) desc->req_switch = opt.req_switch; @@ -654,8 +658,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->begin_time = opt.begin; if (opt.account) desc->account = xstrdup(opt.account); - if (opt.acctg_freq >= 0) - desc->acctg_freq = opt.acctg_freq; + if (opt.acctg_freq) + desc->acctg_freq = xstrdup(opt.acctg_freq); if (opt.comment) desc->comment = xstrdup(opt.comment); if (opt.qos) @@ -1055,7 +1059,7 @@ static int _blocks_dealloc(void) return -1; } for (i=0; i<new_bg_ptr->record_count; i++) { - if(new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { + if (new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { rc = 1; break; } diff --git a/src/salloc/salloc.h b/src/salloc/salloc.h index fe3b8bba85e11c302de2df4304ed911db41485cf..eaa88c8e62301e1ca3f9ff6be4617153216d4abe 100644 --- a/src/salloc/salloc.h +++ b/src/salloc/salloc.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sattach/Makefile.in b/src/sattach/Makefile.in index 7d275309f3875a7e451f22168d6dc3674fb25d08..03b350cef8de441e503fbcea362dd4335f233aff 100644 --- a/src/sattach/Makefile.in +++ b/src/sattach/Makefile.in @@ -59,6 +59,7 @@ subdir = src/sattach DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -158,6 +162,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -178,6 +184,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -187,6 +196,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -194,6 +205,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -228,6 +248,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -255,6 +278,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sattach/attach.c b/src/sattach/attach.c index 5ca53f9440109840439b4bd9aae0b5e126e7e607..4a8329456933a2cf43bbb24fe737248d7b484db9 100644 --- a/src/sattach/attach.c +++ b/src/sattach/attach.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sattach/opt.c b/src/sattach/opt.c index 80668841101f8916910e3677f9633fbafc96d7b2..ee651129ef4221da350cc7de16d5bf7b19e07a8b 100644 --- a/src/sattach/opt.c +++ b/src/sattach/opt.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -392,8 +392,8 @@ static void _parse_jobid_stepid(char *jobid_str) step = ptr + 1; } - jobid = strtol(job, &ptr, 10); - if (!xstring_is_whitespace(ptr)) { + jobid = slurm_xlate_job_id(job); + if (jobid == 0) { error("\"%s\" does not look like a jobid", job); _usage(); xfree(job); diff --git a/src/sattach/opt.h b/src/sattach/opt.h index 2bd3e0978100cbabd8636af3e4fcc71b2c39c85d..7839e7a2bf09201772f1e831a8e5aa50f15af5be 100644 --- a/src/sattach/opt.h +++ b/src/sattach/opt.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c index 3e2904d94043d7973cdb8d1c3376246dce7d7b7f..bb37919031d67d22bc27c737833805a44137bf5a 100644 --- a/src/sattach/sattach.c +++ b/src/sattach/sattach.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sattach/sattach.wrapper.c b/src/sattach/sattach.wrapper.c index c49cf6ef396e97c021cc72dc7d6f43dd26a6399a..8bd315058d4fb7735f02bd4673a9511707805590 100644 --- a/src/sattach/sattach.wrapper.c +++ b/src/sattach/sattach.wrapper.c @@ -8,7 +8,7 @@ * Type "<ctrl-a>" to specify arguments for sattach * Type "g" to start the program * - * Information for other debuggers may be submitted to slurm-dev@lists.llnl.gov + * Information for other debuggers may be submitted to slurm-dev@schedmd.com */ extern int sattach(int argc, char **argv); diff --git a/src/sbatch/Makefile.in b/src/sbatch/Makefile.in index 58e17d5b1eda6a38b300de75f701a15c53690c48..7a81892e3976fc9da6233dbff7456489509bceef 100644 --- a/src/sbatch/Makefile.in +++ b/src/sbatch/Makefile.in @@ -59,6 +59,7 @@ subdir = src/sbatch DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -158,6 +162,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -178,6 +184,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -187,6 +196,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -194,6 +205,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -228,6 +248,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -255,6 +278,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sbatch/mult_cluster.c b/src/sbatch/mult_cluster.c index 6f4f3eb46c30d9303d25f6b8ef06220ffd6c665d..c3fd1c0caf0f5e9e448b2f3b9fcc08b03114c5a9 100644 --- a/src/sbatch/mult_cluster.c +++ b/src/sbatch/mult_cluster.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -68,9 +68,9 @@ static int _sort_local_cluster(local_cluster_rec_t* rec_a, else if (rec_a->preempt_cnt > rec_b->preempt_cnt) return 1; - if(!strcmp(local_cluster_name, rec_a->cluster_rec->name)) + if (!strcmp(local_cluster_name, rec_a->cluster_rec->name)) return -1; - else if(!strcmp(local_cluster_name, rec_b->cluster_rec->name)) + else if (!strcmp(local_cluster_name, rec_b->cluster_rec->name)) return 1; return 0; @@ -174,8 +174,6 @@ extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req) } ret_list = list_create(_destroy_local_cluster_rec); - if (ret_list == NULL) - fatal("list_create malloc failure"); itr = list_iterator_create(opt.clusters); while ((working_cluster_rec = list_next(itr))) { if ((local_cluster = _job_will_run(req))) diff --git a/src/sbatch/mult_cluster.h b/src/sbatch/mult_cluster.h index 6ff21ce41beafc0b8e9a1c5ca301a1975a8ffa97..b6d765a647fbdc032f86368e8f36461e5ca121e5 100644 --- a/src/sbatch/mult_cluster.h +++ b/src/sbatch/mult_cluster.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 287c60bf2164851bb9ab122ff08658de69db2c79..37675e8f392afb487c66fc69e4e420b20c11e737 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -82,6 +82,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_resource_info.h" #include "src/common/slurm_rlimits_info.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -115,6 +116,8 @@ #define OPT_EXPORT 0x17 #define OPT_CLUSTERS 0x18 #define OPT_TIME_VAL 0x19 +#define OPT_ARRAY_INX 0x20 +#define OPT_PROFILE 0x21 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_PROPAGATE 0x100 @@ -170,6 +173,8 @@ #define LONG_OPT_EXPORT 0x151 #define LONG_OPT_REQ_SWITCH 0x152 #define LONG_OPT_EXPORT_FILE 0x153 +#define LONG_OPT_PROFILE 0x154 +#define LONG_OPT_IGNORE_PBS 0x155 /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -192,7 +197,8 @@ static void _opt_default(void); static void _opt_batch_script(const char *file, const void *body, int size); /* set options from pbs batch script */ -static void _opt_pbs_batch_script(const char *file, const void *body, int size); +static void _opt_pbs_batch_script(const char *file, const void *body, int size, + int argc, char **argv); /* set options based upon env vars */ static void _opt_env(void); @@ -252,12 +258,12 @@ static bool _valid_node_list(char **node_list_pptr) procs to use then we need exactly this many since we are saying, lay it out this way! Same for max and min nodes. Other than that just read in as many in the hostfile */ - if(opt.ntasks_set) + if (opt.ntasks_set) count = opt.ntasks; - else if(opt.nodes_set) { - if(opt.max_nodes) + else if (opt.nodes_set) { + if (opt.max_nodes) count = opt.max_nodes; - else if(opt.min_nodes) + else if (opt.min_nodes) count = opt.min_nodes; } @@ -352,7 +358,7 @@ static void _opt_default() for (i=0; i<HIGHEST_DIMENSIONS; i++) { opt.conn_type[i] = (uint16_t) NO_VAL; - opt.geometry[i] = (uint16_t) NO_VAL; + opt.geometry[i] = 0; } opt.reboot = false; opt.no_rotate = false; @@ -360,6 +366,7 @@ static void _opt_default() opt.euid = (uid_t) -1; opt.egid = (gid_t) -1; + opt.profile = ACCT_GATHER_PROFILE_NOT_SET; opt.propagate = NULL; /* propagate specific rlimits */ opt.ifname = xstrdup("/dev/null"); @@ -370,59 +377,18 @@ static void _opt_default() opt.export_file = NULL; opt.get_user_env_time = -1; opt.get_user_env_mode = -1; - opt.acctg_freq = -1; + opt.acctg_freq = NULL; opt.reservation = NULL; - opt.wckey = NULL; opt.req_switch = -1; + opt.umask = -1; opt.wait4switch = -1; + opt.wckey = NULL; opt.ckpt_interval = 0; opt.ckpt_interval_str = NULL; opt.ckpt_dir = xstrdup(opt.cwd); } -static void _set_distribution(task_dist_states_t distribution, - char **dist, char **lllp_dist) -{ - if (((int)distribution >= 0) - && (distribution != SLURM_DIST_UNKNOWN)) { - switch(distribution) { - case SLURM_DIST_CYCLIC: - *dist = "cyclic"; - break; - case SLURM_DIST_BLOCK: - *dist = "block"; - break; - case SLURM_DIST_PLANE: - *dist = "plane"; - *lllp_dist = "plane"; - break; - case SLURM_DIST_ARBITRARY: - *dist = "arbitrary"; - break; - case SLURM_DIST_CYCLIC_CYCLIC: - *dist = "cyclic"; - *lllp_dist = "cyclic"; - break; - case SLURM_DIST_CYCLIC_BLOCK: - *dist = "cyclic"; - *lllp_dist = "block"; - break; - case SLURM_DIST_BLOCK_CYCLIC: - *dist = "block"; - *lllp_dist = "cyclic"; - break; - case SLURM_DIST_BLOCK_BLOCK: - *dist = "block"; - *lllp_dist = "block"; - break; - default: - error("unknown dist, type %d", distribution); - break; - } - } -} - /*---[ env var processing ]-----------------------------------------------*/ /* @@ -444,7 +410,8 @@ struct env_vars { env_vars_t env_vars[] = { {"SBATCH_ACCOUNT", OPT_STRING, &opt.account, NULL }, - {"SBATCH_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, + {"SBATCH_ARRAY_INX", OPT_STRING, &opt.array_inx, NULL }, + {"SBATCH_ACCTG_FREQ", OPT_STRING, &opt.acctg_freq, NULL }, {"SBATCH_BLRTS_IMAGE", OPT_STRING, &opt.blrtsimage, NULL }, {"SBATCH_CHECKPOINT", OPT_STRING, &opt.ckpt_interval_str, NULL }, {"SBATCH_CHECKPOINT_DIR",OPT_STRING, &opt.ckpt_dir, NULL }, @@ -470,6 +437,7 @@ env_vars_t env_vars[] = { {"SBATCH_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SBATCH_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SBATCH_PARTITION", OPT_STRING, &opt.partition, NULL }, + {"SBATCH_PROFILE", OPT_PROFILE, NULL, NULL }, {"SBATCH_QOS", OPT_STRING, &opt.qos, NULL }, {"SBATCH_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL }, {"SBATCH_REQUEUE", OPT_REQUEUE, NULL, NULL }, @@ -547,6 +515,10 @@ _process_env_var(env_vars_t *e, const char *val) } break; + case OPT_ARRAY_INX: + xfree(opt.array_inx); + opt.array_inx = xstrdup(val); + case OPT_DEBUG: if (val != NULL) { opt.verbose = (int) strtol(val, &end, 10); @@ -649,6 +621,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_TIME_VAL: opt.wait4switch = time_str2secs(val); break; + case OPT_PROFILE: + opt.profile = acct_gather_profile_from_string((char *)val); + break; default: /* do nothing */ break; @@ -660,6 +635,7 @@ _process_env_var(env_vars_t *e, const char *val) static struct option long_options[] = { {"account", required_argument, 0, 'A'}, + {"array", required_argument, 0, 'a'}, {"batch", no_argument, 0, 'b'}, /* batch option is only here for moab tansition @@ -736,6 +712,7 @@ static struct option long_options[] = { {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, + {"profile", required_argument, 0, LONG_OPT_PROFILE}, {"qos", required_argument, 0, LONG_OPT_QOS}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, @@ -752,11 +729,12 @@ static struct option long_options[] = { {"wckey", required_argument, 0, LONG_OPT_WCKEY}, {"wrap", required_argument, 0, LONG_OPT_WRAP}, {"switches", required_argument, 0, LONG_OPT_REQ_SWITCH}, + {"ignore-pbs", no_argument, 0, LONG_OPT_IGNORE_PBS}, {NULL, 0, 0, 0} }; static char *opt_string = - "+bA:B:c:C:d:D:e:F:g:hHi:IJ:kL:m:M:n:N:o:Op:P:QRst:uU:vVw:x:"; + "+ba:A:B:c:C:d:D:e:F:g:hHi:IJ:kL:m:M:n:N:o:Op:P:QRst:uU:vVw:x:"; char *pos_delimit; @@ -793,8 +771,8 @@ char *process_options_first_pass(int argc, char **argv) opt.progname = xbasename(argv[0]); optind = 0; - while((opt_char = getopt_long(argc, argv, opt_string, - optz, &option_index)) != -1) { + while ((opt_char = getopt_long(argc, argv, opt_string, + optz, &option_index)) != -1) { switch (opt_char) { case '?': fprintf(stderr, "Try \"sbatch --help\" for more " @@ -875,7 +853,7 @@ int process_options_second_pass(int argc, char *argv[], const char *file, _opt_batch_script(file, script_body, script_size); /* set options from pbs batch script */ - _opt_pbs_batch_script(file, script_body, script_size); + _opt_pbs_batch_script(file, script_body, script_size, argc, argv); /* set options from env vars */ _opt_env(); @@ -1072,7 +1050,8 @@ static void _opt_batch_script(const char * file, const void *body, int size) * Build an argv-style array of options from the script "body", * then pass the array to _set_options for() further parsing. */ -static void _opt_pbs_batch_script(const char *file, const void *body, int size) +static void _opt_pbs_batch_script(const char *file, const void *body, int size, + int cmd_argc, char **cmd_argv) { char *magic_word = "#PBS"; int magic_word_len; @@ -1086,13 +1065,20 @@ static void _opt_pbs_batch_script(const char *file, const void *body, int size) int lineno = 0; int i; + if (getenv("SBATCH_IGNORE_PBS")) + return; + for (i = 0; i < cmd_argc; i++) { + if (!strcmp(cmd_argv[i], "--ignore-pbs")) + return; + } + magic_word_len = strlen(magic_word); /* getopt_long skips over the first argument, so fill it in */ argc = 1; argv = xmalloc(sizeof(char *)); argv[0] = "sbatch"; - while((line = _next_line(body, size, &state)) != NULL) { + while ((line = _next_line(body, size, &state)) != NULL) { lineno++; if (strncmp(line, magic_word, magic_word_len) != 0) { xfree(line); @@ -1132,13 +1118,17 @@ static void _set_options(int argc, char **argv) } optind = 0; - while((opt_char = getopt_long(argc, argv, opt_string, - optz, &option_index)) != -1) { + while ((opt_char = getopt_long(argc, argv, opt_string, + optz, &option_index)) != -1) { switch (opt_char) { case '?': error("Try \"sbatch --help\" for more information"); exit(error_exit); break; + case 'a': + xfree(opt.array_inx); + opt.array_inx = xstrdup(optarg); + break; case 'A': case 'U': /* backwards compatibility */ xfree(opt.account); @@ -1471,6 +1461,9 @@ static void _set_options(int argc, char **argv) case LONG_OPT_REQUEUE: opt.requeue = 1; break; + case LONG_OPT_PROFILE: + opt.profile = acct_gather_profile_from_string(optarg); + break; case LONG_OPT_COMMENT: xfree(opt.comment); opt.comment = xstrdup(optarg); @@ -1581,7 +1574,8 @@ static void _set_options(int argc, char **argv) } break; case LONG_OPT_ACCTG_FREQ: - opt.acctg_freq = _get_int(optarg, "acctg-freq"); + xfree(opt.acctg_freq); + opt.acctg_freq = xstrdup(optarg); break; case LONG_OPT_PROPAGATE: xfree(opt.propagate); @@ -1651,6 +1645,10 @@ static void _set_options(int argc, char **argv) } opt.req_switch = _get_int(optarg, "switches"); break; + case LONG_OPT_IGNORE_PBS: + /* Ignore here, needed to process earlier, + * when the batch script was read. */ + break; default: if (spank_process_option (opt_char, optarg) < 0) { error("Unrecognized command line parameter %c", @@ -1690,8 +1688,8 @@ static void _proc_get_user_env(char *optarg) static void _set_pbs_options(int argc, char **argv) { int opt_char, option_index = 0; - - char *pbs_opt_string = "+a:A:c:C:e:hIj:k:l:m:M:N:o:p:q:r:S:u:v:VWz"; + char *sep; + char *pbs_opt_string = "+a:A:c:C:e:hIj:k:l:m:M:N:o:p:q:r:S:u:v:VW:z"; struct option pbs_long_options[] = { {"start_time", required_argument, 0, 'a'}, @@ -1715,15 +1713,14 @@ static void _set_pbs_options(int argc, char **argv) {"running_user", required_argument, 0, 'u'}, {"variable_list", required_argument, 0, 'v'}, {"all_env", no_argument, 0, 'V'}, - {"attributes", no_argument, 0, 'W'}, + {"attributes", required_argument, 0, 'W'}, {"no_std", no_argument, 0, 'z'}, {NULL, 0, 0, 0} }; - optind = 0; - while((opt_char = getopt_long(argc, argv, pbs_opt_string, - pbs_long_options, &option_index)) + while ((opt_char = getopt_long(argc, argv, pbs_opt_string, + pbs_long_options, &option_index)) != -1) { switch (opt_char) { case 'a': @@ -1736,8 +1733,6 @@ static void _set_pbs_options(int argc, char **argv) case 'c': break; case 'C': - xfree(opt.cwd); - opt.cwd = xstrdup(optarg); break; case 'e': xfree(opt.efname); @@ -1811,12 +1806,25 @@ static void _set_pbs_options(int argc, char **argv) case 'u': break; case 'v': + if (opt.export_env) + sep = ","; + xstrfmtcat(opt.export_env, "%s%s", sep, optarg); break; case 'V': break; case 'W': - xfree(opt.constraints); - opt.constraints = xstrdup(optarg); + if (!strncasecmp(optarg, "umask=", 6)) { + opt.umask = strtol(optarg+6, NULL, 0); + if ((opt.umask < 0) || (opt.umask > 0777)) { + error("Invalid umask ignored"); + opt.umask = -1; + } + } else if (!strncasecmp(optarg, "depend=", 7)) { + xfree(opt.dependency); + opt.dependency = xstrdup(optarg+7); + } else { + verbose("Ignored PBS attributes: %s", optarg); + } break; case 'z': break; @@ -1846,7 +1854,7 @@ static char *_get_pbs_node_name(char *node_options, int *i) value = xmalloc((*i)-start+1); memcpy(value, node_options+start, (*i)-start); - if(node_options[*i]) + if (node_options[*i]) (*i)++; return value; @@ -1858,7 +1866,7 @@ static void _get_next_pbs_node_part(char *node_options, int *i) && node_options[*i] != '+' && node_options[*i] != ':') (*i)++; - if(node_options[*i]) + if (node_options[*i]) (*i)++; } @@ -1871,14 +1879,14 @@ static void _parse_pbs_nodes_opts(char *node_opts) hostlist_t hl = hostlist_create(NULL); while(node_opts[i]) { - if(!strncmp(node_opts+i, "ppn=", 4)) { + if (!strncmp(node_opts+i, "ppn=", 4)) { i+=4; ppn += strtol(node_opts+i, NULL, 10); _get_next_pbs_node_part(node_opts, &i); - } else if(isdigit(node_opts[i])) { + } else if (isdigit(node_opts[i])) { node_cnt += strtol(node_opts+i, NULL, 10); _get_next_pbs_node_part(node_opts, &i); - } else if(isalpha(node_opts[i])) { + } else if (isalpha(node_opts[i])) { temp = _get_pbs_node_name(node_opts, &i); hostlist_push(hl, temp); xfree(temp); @@ -1887,20 +1895,20 @@ static void _parse_pbs_nodes_opts(char *node_opts) } - if(!node_cnt) + if (!node_cnt) node_cnt = 1; else { opt.nodes_set = true; opt.min_nodes = opt.max_nodes = node_cnt; } - if(ppn) { + if (ppn) { ppn *= node_cnt; opt.ntasks_set = true; opt.ntasks = ppn; } - if(hostlist_count(hl) > 0) { + if (hostlist_count(hl) > 0) { xfree(opt.nodelist); opt.nodelist = hostlist_ranged_string_xmalloc(hl); #ifdef HAVE_BG @@ -1918,21 +1926,21 @@ static void _get_next_pbs_option(char *pbs_options, int *i) { while(pbs_options[*i] && pbs_options[*i] != ',') (*i)++; - if(pbs_options[*i]) + if (pbs_options[*i]) (*i)++; } -static char *_get_pbs_option_value(char *pbs_options, int *i) +static char *_get_pbs_option_value(char *pbs_options, int *i, char sep) { int start = (*i); char *value = NULL; - while(pbs_options[*i] && pbs_options[*i] != ',') + while (pbs_options[*i] && pbs_options[*i] != sep) (*i)++; value = xmalloc((*i)-start+1); memcpy(value, pbs_options+start, (*i)-start); - if(pbs_options[*i]) + if (pbs_options[*i]) (*i)++; return value; @@ -1941,15 +1949,22 @@ static char *_get_pbs_option_value(char *pbs_options, int *i) static void _parse_pbs_resource_list(char *rl) { int i = 0; + int gpus = 0; char *temp = NULL; - - while(rl[i]) { - if(!strncmp(rl+i, "arch=", 5)) { + int pbs_pro_flag = 0; /* Bits: select:1 ncpus:2 mpiprocs:4 */ + + while (rl[i]) { + if (!strncasecmp(rl+i, "accelerator=", 12)) { + i += 12; + if (!strncasecmp(rl+i, "true", 4) && (gpus < 1)) + gpus = 1; + /* Also see "naccelerators=" below */ + } else if (!strncmp(rl+i, "arch=", 5)) { i+=5; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "cput=", 5)) { + } else if (!strncmp(rl+i, "cput=", 5)) { i+=5; - temp = _get_pbs_option_value(rl, &i); + temp = _get_pbs_option_value(rl, &i, ','); if (!temp) { error("No value given for cput"); exit(error_exit); @@ -1957,12 +1972,12 @@ static void _parse_pbs_resource_list(char *rl) xfree(opt.time_limit_str); opt.time_limit_str = xstrdup(temp); xfree(temp); - } else if(!strncmp(rl+i, "file=", 5)) { + } else if (!strncmp(rl+i, "file=", 5)) { int end = 0; i+=5; - temp = _get_pbs_option_value(rl, &i); - if(!temp) { + temp = _get_pbs_option_value(rl, &i, ','); + if (!temp) { error("No value given for file"); exit(error_exit); } @@ -1980,15 +1995,15 @@ static void _parse_pbs_resource_list(char *rl) exit(error_exit); } xfree(temp); - } else if(!strncmp(rl+i, "host=", 5)) { + } else if (!strncmp(rl+i, "host=", 5)) { i+=5; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "mem=", 4)) { + } else if (!strncmp(rl+i, "mem=", 4)) { int end = 0; i+=4; - temp = _get_pbs_option_value(rl, &i); - if(!temp) { + temp = _get_pbs_option_value(rl, &i, ','); + if (!temp) { error("No value given for mem"); exit(error_exit); } @@ -2007,6 +2022,14 @@ static void _parse_pbs_resource_list(char *rl) } xfree(temp); + } else if (!strncasecmp(rl+i, "mpiprocs=", 9)) { + i += 9; + temp = _get_pbs_option_value(rl, &i, ':'); + if (temp) { + pbs_pro_flag |= 4; + opt.ntasks_per_node = _get_int(temp, "mpiprocs"); + xfree(temp); + } #ifdef HAVE_CRAY /* * NB: no "mppmem" here since it specifies per-PE memory units, @@ -2015,7 +2038,7 @@ static void _parse_pbs_resource_list(char *rl) } else if (!strncmp(rl + i, "mppdepth=", 9)) { /* Cray: number of CPUs (threads) per processing element */ i += 9; - temp = _get_pbs_option_value(rl, &i); + temp = _get_pbs_option_value(rl, &i, ','); if (temp) { opt.cpus_per_task = _get_int(temp, "mppdepth"); opt.cpus_set = true; @@ -2024,7 +2047,7 @@ static void _parse_pbs_resource_list(char *rl) } else if (!strncmp(rl + i, "mppnodes=", 9)) { /* Cray `nodes' variant: hostlist without prefix */ i += 9; - temp = _get_pbs_option_value(rl, &i); + temp = _get_pbs_option_value(rl, &i, ','); if (!temp) { error("No value given for mppnodes"); exit(error_exit); @@ -2034,23 +2057,38 @@ static void _parse_pbs_resource_list(char *rl) } else if (!strncmp(rl + i, "mppnppn=", 8)) { /* Cray: number of processing elements per node */ i += 8; - temp = _get_pbs_option_value(rl, &i); + temp = _get_pbs_option_value(rl, &i, ','); if (temp) opt.ntasks_per_node = _get_int(temp, "mppnppn"); xfree(temp); } else if (!strncmp(rl + i, "mppwidth=", 9)) { /* Cray: task width (number of processing elements) */ i += 9; - temp = _get_pbs_option_value(rl, &i); + temp = _get_pbs_option_value(rl, &i, ','); if (temp) { opt.ntasks = _get_int(temp, "mppwidth"); opt.ntasks_set = true; } xfree(temp); #endif /* HAVE_CRAY */ - } else if(!strncmp(rl+i, "nice=", 5)) { + } else if (!strncasecmp(rl+i, "naccelerators=", 14)) { + i += 14; + temp = _get_pbs_option_value(rl, &i, ','); + if (temp) { + gpus = _get_int(temp, "naccelerators"); + xfree(temp); + } + } else if (!strncasecmp(rl+i, "ncpus=", 6)) { + i += 6; + temp = _get_pbs_option_value(rl, &i, ':'); + if (temp) { + pbs_pro_flag |= 2; + opt.mincpus = _get_int(temp, "ncpus"); + xfree(temp); + } + } else if (!strncmp(rl+i, "nice=", 5)) { i+=5; - temp = _get_pbs_option_value(rl, &i); + temp = _get_pbs_option_value(rl, &i, ','); if (temp) opt.nice = strtol(temp, NULL, 10); else @@ -2070,47 +2108,65 @@ static void _parse_pbs_resource_list(char *rl) } } xfree(temp); - } else if(!strncmp(rl+i, "nodes=", 6)) { + } else if (!strncmp(rl+i, "nodes=", 6)) { i+=6; - temp = _get_pbs_option_value(rl, &i); - if(!temp) { + temp = _get_pbs_option_value(rl, &i, ','); + if (!temp) { error("No value given for nodes"); exit(error_exit); } _parse_pbs_nodes_opts(temp); xfree(temp); - } else if(!strncmp(rl+i, "opsys=", 6)) { + } else if (!strncmp(rl+i, "opsys=", 6)) { i+=6; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "other=", 6)) { + } else if (!strncmp(rl+i, "other=", 6)) { i+=6; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "pcput=", 6)) { + } else if (!strncmp(rl+i, "pcput=", 6)) { i+=6; - temp = _get_pbs_option_value(rl, &i); - if(!temp) { + temp = _get_pbs_option_value(rl, &i, ','); + if (!temp) { error("No value given for pcput"); exit(error_exit); } xfree(opt.time_limit_str); opt.time_limit_str = xstrdup(temp); xfree(temp); - } else if(!strncmp(rl+i, "pmem=", 5)) { + } else if (!strncmp(rl+i, "pmem=", 5)) { i+=5; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "pvmem=", 6)) { + } else if (!strncmp(rl+i, "proc=", 5)) { + i += 5; + if (opt.constraints) + xstrcat(opt.constraints, ","); + temp = _get_pbs_option_value(rl, &i, ','); + xstrcat(opt.constraints, temp); + xfree(temp); + _get_next_pbs_option(rl, &i); + } else if (!strncmp(rl+i, "pvmem=", 6)) { i+=6; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "software=", 9)) { + } else if (!strncasecmp(rl+i, "select=", 7)) { + i += 7; + temp = _get_pbs_option_value(rl, &i, ':'); + if (temp) { + pbs_pro_flag |= 1; + opt.min_nodes = _get_int(temp, "select"); + opt.max_nodes = opt.min_nodes; + opt.nodes_set = true; + xfree(temp); + } + } else if (!strncmp(rl+i, "software=", 9)) { i+=9; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "vmem=", 5)) { + } else if (!strncmp(rl+i, "vmem=", 5)) { i+=5; _get_next_pbs_option(rl, &i); - } else if(!strncmp(rl+i, "walltime=", 9)) { + } else if (!strncmp(rl+i, "walltime=", 9)) { i+=9; - temp = _get_pbs_option_value(rl, &i); - if(!temp) { + temp = _get_pbs_option_value(rl, &i, ','); + if (!temp) { error("No value given for walltime"); exit(error_exit); } @@ -2120,6 +2176,21 @@ static void _parse_pbs_resource_list(char *rl) } else i++; } + + if ((pbs_pro_flag == 7) && (opt.mincpus > opt.ntasks_per_node)) { + /* This logic will allocate the proper CPU count on each + * node if the CPU count per node is evenly divisible by + * the task count on each node. Slurm can't handle something + * like cpus_per_node=10 and ntasks_per_node=8 */ + opt.cpus_per_task = opt.mincpus / opt.ntasks_per_node; + opt.cpus_set = true; + } + if (gpus > 0) { + char *sep = ""; + if (opt.gres) + sep = ","; + xstrfmtcat(opt.gres, "%sgpu:%d", sep, gpus); + } } /* @@ -2148,7 +2219,7 @@ static bool _opt_verify(void) opt.overcommit, 0); if ((opt.ntasks_per_node > 0) && (!opt.ntasks_set) && - (opt.max_nodes == 0)) { + ((opt.max_nodes == 0) || (opt.min_nodes == opt.max_nodes))) { opt.ntasks = opt.min_nodes * opt.ntasks_per_node; opt.ntasks_set = 1; } @@ -2231,7 +2302,7 @@ static bool _opt_verify(void) if ((opt.min_nodes <= 0) || ((opt.ntasks/opt.plane_size) < opt.min_nodes)) { if (((opt.min_nodes-1)*opt.plane_size) >= opt.ntasks) { -#if(0) +#if (0) info("Too few processes ((n/plane_size) %d < N %d) " "and ((N-1)*(plane_size) %d >= n %d)) ", opt.ntasks/opt.plane_size, opt.min_nodes, @@ -2249,7 +2320,7 @@ static bool _opt_verify(void) error("Can't set SLURM_CPUS_PER_TASK env variable"); } - _set_distribution(opt.distribution, &dist, &lllp_dist); + set_distribution(opt.distribution, &dist, &lllp_dist); if (dist && setenvf(NULL, "SLURM_DISTRIBUTION", "%s", dist)) { error("Can't set SLURM_DISTRIBUTION env variable"); @@ -2328,12 +2399,12 @@ static bool _opt_verify(void) } /* else if (opt.ntasks_set && !opt.nodes_set) */ - if(!opt.nodelist) { - if((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) { + if (!opt.nodelist) { + if ((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) { /* make sure the file being read in has a / in it to make sure it is a file in the valid_node_list function */ - if(!strstr(opt.nodelist, "/")) { + if (!strstr(opt.nodelist, "/")) { char *add_slash = xstrdup("./"); xstrcat(add_slash, opt.nodelist); xfree(opt.nodelist); @@ -2356,14 +2427,14 @@ static bool _opt_verify(void) /* set up the proc and node counts based on the arbitrary list of nodes */ - if((opt.distribution == SLURM_DIST_ARBITRARY) + if ((opt.distribution == SLURM_DIST_ARBITRARY) && (!opt.nodes_set || !opt.ntasks_set)) { hostlist_t hl = hostlist_create(opt.nodelist); - if(!opt.ntasks_set) { + if (!opt.ntasks_set) { opt.ntasks_set = 1; opt.ntasks = hostlist_count(hl); } - if(!opt.nodes_set) { + if (!opt.nodes_set) { opt.nodes_set = 1; hostlist_uniq(hl); opt.min_nodes = opt.max_nodes = hostlist_count(hl); @@ -2425,8 +2496,13 @@ static bool _opt_verify(void) if (opt.dependency) setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency); - if (opt.acctg_freq >= 0) - setenvf(NULL, "SLURM_ACCTG_FREQ", "%d", opt.acctg_freq); + if (opt.profile) + setenvfs("SLURM_PROFILE=%s", + acct_gather_profile_to_string(opt.profile)); + + + if (opt.acctg_freq) + setenvf(NULL, "SLURM_ACCTG_FREQ", "%s", opt.acctg_freq); #ifdef HAVE_AIX if (opt.network == NULL) @@ -2674,7 +2750,6 @@ static void _fullpath(char **filename, const char *cwd) static void _opt_list(void) { - int i; char *str; info("defined options for program `%s'", opt.progname); @@ -2699,12 +2774,14 @@ static void _opt_list(void) opt.jobid_set ? "(set)" : "(default)"); info("partition : %s", opt.partition == NULL ? "default" : opt.partition); + info("profile : `%s'", + acct_gather_profile_to_string(opt.profile)); info("job name : `%s'", opt.job_name); info("reservation : `%s'", opt.reservation); info("wckey : `%s'", opt.wckey); info("distribution : %s", format_task_dist_states(opt.distribution)); - if(opt.distribution == SLURM_DIST_PLANE) + if (opt.distribution == SLURM_DIST_PLANE) info("plane size : %u", opt.plane_size); info("verbose : %d", opt.verbose); info("immediate : %s", tf_(opt.immediate)); @@ -2728,10 +2805,10 @@ static void _opt_list(void) str = print_constraints(); info("constraints : %s", str); xfree(str); - for (i = 0; i < HIGHEST_DIMENSIONS; i++) { - if (opt.conn_type[i] == (uint16_t) NO_VAL) - break; - info("conn_type[%d] : %u", i, opt.conn_type[i]); + if (opt.conn_type[0] != (uint16_t) NO_VAL) { + str = conn_type_string_full(opt.conn_type); + info("conn_type : %s", str); + xfree(str); } str = print_geometry(opt.geometry); info("geometry : %s", str); @@ -2763,6 +2840,8 @@ static void _opt_list(void) slurm_make_time_str(&opt.begin, time_str, sizeof(time_str)); info("begin : %s", time_str); } + info("array : %s", + opt.array_inx == NULL ? "N/A" : opt.array_inx); info("mail_type : %s", print_mail_type(opt.mail_type)); info("mail_user : %s", opt.mail_user); info("sockets-per-node : %d", opt.sockets_per_node); @@ -2819,6 +2898,7 @@ static void _usage(void) " [--network=type] [--mem-per-cpu=MB] [--qos=qos] [--gres=list]\n" " [--cpu_bind=...] [--mem_bind=...] [--reservation=name]\n" " [--switches=max-switches{@max-time-to-wait}]\n" +" [--array=index_values] [--profile=...] [--ignore-pbs]\n" " [--export[=names]] [--export-file=file|fd] executable [args...]\n"); } @@ -2830,6 +2910,7 @@ static void _help(void) "Usage: sbatch [OPTIONS...] executable [args...]\n" "\n" "Parallel run options:\n" +" -a, --array=indexes job array index values\n" " -A, --account=name charge job to specified account\n" " --begin=time defer job until HH:MM MM/DD/YY\n" " -c, --cpus-per-task=ncpus number of cpus required per task\n" @@ -2865,6 +2946,9 @@ static void _help(void) " -o, --output=out file for batch script's standard output\n" " -O, --overcommit overcommit resources\n" " -p, --partition=partition partition requested\n" +" --profile=value enable acct_gather_profile for detailed data\n" +" value is all or none or any combination of\n" +" energy, lustre, network or task\n" " --propagate[=rlimits] propagate all [or specific list of] rlimits\n" " --qos=qos quality of service\n" " -Q, --quiet quiet mode (suppress informational messages)\n" @@ -2877,6 +2961,7 @@ static void _help(void) " --wrap[=command string] wrap commmand string in a sh script and submit\n" " --switches=max-switches{@max-time-to-wait}\n" " Optimum switches and max time to wait for optimum\n" +" --ignore-pbs Ignore #PBS options in the batch script\n" "\n" "Constraint options:\n" " --contiguous demand a contiguous range of nodes\n" diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index f61dd1adb55d2a94888bfd1e12f4e7ebbe379b4f..bbe33a3c4cd731aceaaf44383e5461ad04bccbc0 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -96,6 +96,7 @@ typedef struct sbatch_options { int time_min; /* --min-time (int minutes) */ char *time_min_str; /* --min-time (string) */ char *partition; /* --partition=n, -p n */ + uint32_t profile; /* --profile=[all | none} */ enum task_dist_states distribution; /* --distribution=, -m dist */ uint32_t plane_size; /* lllp distribution -> plane_size for @@ -119,7 +120,8 @@ typedef struct sbatch_options { bool no_kill; /* --no-kill, -k */ int requeue; /* --requeue and --no-requeue */ uint8_t open_mode; /* --open-mode */ - int acctg_freq; /* --acctg-freq=secs */ + char *acctg_freq; /* --acctg-freq=<type1>=<freq1>,*/ + /* <type2>=<freq2>,... */ bool overcommit; /* --overcommit -O */ uint16_t shared; /* --share, -s */ char *licenses; /* --licenses, -L */ @@ -154,6 +156,7 @@ typedef struct sbatch_options { char *ramdiskimage; /* --ramdisk-image RamDiskImage for block */ /*********************/ + char *array_inx; /* -a, --array */ time_t begin; /* --begin */ uint16_t mail_type; /* --mail-type */ char *mail_user; /* --mail-user */ @@ -174,6 +177,7 @@ typedef struct sbatch_options { char **spank_job_env; /* SPANK controlled environment for job * Prolog and Epilog */ int spank_job_env_size; /* size of spank_job_env */ + int umask; /* job umask for PBS */ } opt_t; extern opt_t opt; diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 2b2a9278f5402948da74c9e8a187ea4d7f3977de..cbe7a3c069b1464af88c8f73fd387fe007c015ce 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -201,6 +201,52 @@ int main(int argc, char *argv[]) return 0; } +static char *_find_quote_token(char *tmp, char *sep, char **last) +{ + char *start, *quote_single = 0, *quote_double = 0; + int i; + + xassert(last); + if (*last) + start = *last; + else + start = tmp; + if (start[0] == '\0') + return NULL; + for (i = 0; ; i++) { + if (start[i] == '\'') { + if (quote_single) + quote_single--; + else + quote_single++; + } else if (start[i] == '\"') { + if (quote_double) + quote_double--; + else + quote_double++; + } else if (((start[i] == sep[0]) || (start[i] == '\0')) && + (quote_single == 0) && (quote_double == 0)) { + if (((start[0] == '\'') && (start[i-1] == '\'')) || + ((start[0] == '\"') && (start[i-1] == '\"'))) { + start++; + i -= 2; + } + if (start[i] == '\0') + *last = &start[i]; + else + *last = &start[i] + 1; + start[i] = '\0'; + return start; + } else if (start[i] == '\0') { + error("Improperly formed environment variable (%s)", + start); + *last = &start[i]; + return start; + } + + } +} + /* Propagate select user environment variables to the job */ static void _env_merge_filter(job_desc_msg_t *desc) { @@ -209,7 +255,7 @@ static void _env_merge_filter(job_desc_msg_t *desc) char *save_env[2] = { NULL, NULL }, *tmp, *tok, *last = NULL; tmp = xstrdup(opt.export_env); - tok = strtok_r(tmp, ",", &last); + tok = _find_quote_token(tmp, ",", &last); while (tok) { if (strchr(tok, '=')) { save_env[0] = tok; @@ -227,7 +273,7 @@ static void _env_merge_filter(job_desc_msg_t *desc) break; } } - tok = strtok_r(NULL, ",", &last); + tok = _find_quote_token(NULL, ",", &last); } xfree(tmp); @@ -287,6 +333,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->req_nodes = opt.nodelist; desc->exc_nodes = opt.exc_nodes; desc->partition = opt.partition; + desc->profile = opt.profile; if (opt.licenses) desc->licenses = xstrdup(opt.licenses); if (opt.nodes_set) { @@ -302,6 +349,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) if (opt.dependency) desc->dependency = xstrdup(opt.dependency); + if (opt.array_inx) + desc->array_inx = xstrdup(opt.array_inx); if (opt.cpu_bind) desc->cpu_bind = opt.cpu_bind; if (opt.cpu_bind_type) @@ -443,8 +492,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->requeue = opt.requeue; if (opt.open_mode) desc->open_mode = opt.open_mode; - if (opt.acctg_freq >= 0) - desc->acctg_freq = opt.acctg_freq; + if (opt.acctg_freq) + desc->acctg_freq = xstrdup(opt.acctg_freq); desc->ckpt_dir = opt.ckpt_dir; desc->ckpt_interval = (uint16_t)opt.ckpt_interval; @@ -489,24 +538,21 @@ static void _set_spank_env(void) } } -/* Set SLURM_SUBMIT_DIR environment variable with current state */ +/* Set SLURM_SUBMIT_DIR and SLURM_SUBMIT_HOST environment variables within + * current state */ static void _set_submit_dir_env(void) { - char buf[MAXPATHLEN + 1]; + char buf[MAXPATHLEN + 1], host[256]; - if (getenv("SLURM_SUBMIT_DIR")) /* use this value */ - return; - - if ((getcwd(buf, MAXPATHLEN)) == NULL) { + if ((getcwd(buf, MAXPATHLEN)) == NULL) error("getcwd failed: %m"); - exit(error_exit); - } - - if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", buf) < 0) { + else if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", buf) < 0) error("unable to set SLURM_SUBMIT_DIR in environment"); - return; - } - debug("propagating SUBMIT_DIR=%s", buf); + + if ((gethostname(host, sizeof(host)))) + error("gethostname_short failed: %m"); + else if (setenvf(NULL, "SLURM_SUBMIT_HOST", "%s", host) < 0) + error("unable to set SLURM_SUBMIT_HOST in environment"); } /* Set SLURM_UMASK environment variable with current state */ @@ -518,8 +564,12 @@ static int _set_umask_env(void) if (getenv("SLURM_UMASK")) /* use this value */ return SLURM_SUCCESS; - mask = (int)umask(0); - umask(mask); + if (opt.umask >= 0) { + mask = opt.umask; + } else { + mask = (int)umask(0); + umask(mask); + } sprintf(mask_char, "0%d%d%d", ((mask>>6)&07), ((mask>>3)&07), mask&07); diff --git a/src/sbcast/Makefile.in b/src/sbcast/Makefile.in index 146aca41bf4927807b618230a94bdaff9af19374..3efe97607d92681b014915544940a8043b00f58d 100644 --- a/src/sbcast/Makefile.in +++ b/src/sbcast/Makefile.in @@ -62,6 +62,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -79,6 +80,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -87,11 +89,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sbcast/agent.c b/src/sbcast/agent.c index 9c3864c0e47f27ee90887f726e5e2fe2da9a70ed..fc939c14bef6cddca04cdba40dbfec7f2a7440bd 100644 --- a/src/sbcast/agent.c +++ b/src/sbcast/agent.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -148,7 +148,7 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, while (i < sbcast_cred->node_cnt) { int j = 0; name = hostlist_shift(hl); - if(!name) { + if (!name) { debug3("no more nodes to send to"); break; } @@ -157,7 +157,7 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, i++; for(j = 0; j < span[threads_used]; j++) { name = hostlist_shift(hl); - if(!name) + if (!name) break; hostlist_push(new_hl, name); free(name); diff --git a/src/sbcast/opts.c b/src/sbcast/opts.c index b2f14e9eaa15e088edbfecb43455c1cf758b228f..ad94ecbb6537aa29846317b3d97e2d2ddea5349c 100644 --- a/src/sbcast/opts.c +++ b/src/sbcast/opts.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sbcast/sbcast.c b/src/sbcast/sbcast.c index b1290422693d1e22f5ddec01d6f6ede4e3a18fa3..0aff71d95e6b24e5dd8855f2c4d8ce1cd22592a7 100644 --- a/src/sbcast/sbcast.c +++ b/src/sbcast/sbcast.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sbcast/sbcast.h b/src/sbcast/sbcast.h index d7caa596693006c17710cc36e05f2faed62b88e7..111f656b052754d37d9a3c5fc4bcacd92b6efc23 100644 --- a/src/sbcast/sbcast.h +++ b/src/sbcast/sbcast.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/scancel/Makefile.in b/src/scancel/Makefile.in index 1435c7731b6fcc96d464458c909ad988a32630b4..66dfcc32f8cd3ecf11e8dfabadff2701e6e54990 100644 --- a/src/scancel/Makefile.in +++ b/src/scancel/Makefile.in @@ -61,6 +61,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -78,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -86,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -159,6 +163,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -179,6 +185,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -188,6 +197,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -195,6 +206,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -229,6 +249,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -256,6 +279,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/scancel/opt.c b/src/scancel/opt.c index c9efe6937936c610c81123ec3f9d19d8643a7a25..08baf9888db1f9f9abb176122abbfea074d4278e 100644 --- a/src/scancel/opt.c +++ b/src/scancel/opt.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -439,27 +439,69 @@ static void _opt_args(int argc, char **argv) static void _xlate_job_step_ids(char **rest) { - int i; - long tmp_l; + int buf_size, buf_offset, i; + long job_id, tmp_l; char *next_str; opt.job_cnt = 0; - if (rest != NULL) { - while (rest[opt.job_cnt] != NULL) - opt.job_cnt++; - } - - opt.job_id = xmalloc(opt.job_cnt * sizeof(uint32_t)); - opt.step_id = xmalloc(opt.job_cnt * sizeof(uint32_t)); + buf_size = 0xffff; + buf_offset = 0; + opt.array_id = xmalloc(buf_size * sizeof(uint16_t)); + opt.job_id = xmalloc(buf_size * sizeof(uint32_t)); + opt.step_id = xmalloc(buf_size * sizeof(uint32_t)); - for (i=0; i<opt.job_cnt; i++) { - tmp_l = strtol(rest[i], &next_str, 10); - if (tmp_l <= 0) { + for (i = 0; rest[i] && (buf_offset < buf_size); i++) { + job_id = strtol(rest[i], &next_str, 10); + if (job_id <= 0) { error ("Invalid job_id %s", rest[i]); exit (1); } - opt.job_id[i] = tmp_l; + opt.job_id[buf_offset] = job_id; + + if ((next_str[0] == '_') && (next_str[1] == '[')) { + hostlist_t hl; + char save_char, *next_elem; + char *end_char = strchr(next_str + 2, ']'); + if (!end_char || (end_char[1] != '\0')) { + error ("Invalid job id %s", rest[i]); + exit (1); + } + save_char = end_char[1]; + end_char[1] = '\0'; + hl = hostlist_create(next_str + 1); + if (!hl) { + error ("Invalid job id %s", rest[i]); + exit (1); + } + while ((next_elem = hostlist_shift(hl))) { + tmp_l = strtol(next_elem, &next_str, 10); + if (tmp_l < 0) { + error ("Invalid job id %s", rest[i]); + exit (1); + } + opt.job_id[buf_offset] = job_id; + opt.array_id[buf_offset] = tmp_l; + opt.step_id[buf_offset] = SLURM_BATCH_SCRIPT; + free(next_elem); + if (++buf_offset >= buf_size) + break; + } + hostlist_destroy(hl); + end_char[1] = save_char; + /* No step ID support for job array range */ + break; + } else if (next_str[0] == '_') { + tmp_l = strtol(&next_str[1], &next_str, 10); + if (tmp_l < 0) { + error ("Invalid job id %s", rest[i]); + exit (1); + } + opt.array_id[buf_offset] = tmp_l; + } else { + opt.array_id[buf_offset] = (uint16_t) NO_VAL; + } + if (next_str[0] == '.') { tmp_l = strtol(&next_str[1], &next_str, 10); @@ -467,15 +509,17 @@ _xlate_job_step_ids(char **rest) error ("Invalid job id %s", rest[i]); exit (1); } - opt.step_id[i] = tmp_l; + opt.step_id[buf_offset] = tmp_l; } else - opt.step_id[i] = SLURM_BATCH_SCRIPT; + opt.step_id[buf_offset] = SLURM_BATCH_SCRIPT; + buf_offset++; if (next_str[0] != '\0') { error ("Invalid job ID %s", rest[i]); exit (1); } } + opt.job_cnt = buf_offset; } @@ -544,12 +588,12 @@ static void _usage(void) printf("Usage: scancel [-A account] [--batch] [--interactive] [-n job_name]\n"); printf(" [-p partition] [-Q] [-q qos] [-R reservation][-s signal | integer]\n"); printf(" [-t PENDING | RUNNING | SUSPENDED] [--usage] [-u user_name]\n"); - printf(" [-V] [-v] [-w hosts...] [--wckey=wckey] [job_id[.step_id]]\n"); + printf(" [-V] [-v] [-w hosts...] [--wckey=wckey] [job_id[_array_id][.step_id]]\n"); } static void _help(void) { - printf("Usage: scancel [OPTIONS] [job_id[.step_id]]\n"); + printf("Usage: scancel [OPTIONS] [job_id[_array_id][.step_id]]\n"); printf(" -A, --account=account act only on jobs charging this account\n"); printf(" -b, --batch signal batch shell for specified job\n"); /* printf(" --ctld send request directly to slurmctld\n"); */ diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c index 00f58d9ccbd64d486d7b60adf1f3cd3d88f069a3..906adafdef2a3aac82d7de2b9e8bddba6738697a 100644 --- a/src/scancel/scancel.c +++ b/src/scancel/scancel.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -61,6 +61,7 @@ #include "src/common/list.h" #include "src/common/log.h" +#include "src/common/slurm_protocol_defs.h" #include "src/common/xstring.h" #include "src/common/xmalloc.h" #include "src/common/hostlist.h" @@ -70,12 +71,12 @@ #define MAX_THREADS 20 -static void _cancel_jobs (void); +static void _cancel_jobs (int filter_cnt); static void *_cancel_job_id (void *cancel_info); static void *_cancel_step_id (void *cancel_info); static int _confirmation (int i, uint32_t step_id); -static void _filter_job_records (void); +static int _filter_job_records (void); static void _load_job_records (void); static int _multi_cluster(List clusters); static int _proc_cluster(void); @@ -85,6 +86,9 @@ static job_info_msg_t * job_buffer_ptr = NULL; typedef struct job_cancel_info { uint32_t job_id; + uint32_t array_job_id; + uint16_t array_task_id; + bool array_flag; uint32_t step_id; uint16_t sig; int *num_active_threads; @@ -126,8 +130,6 @@ _multi_cluster(List clusters) int rc = 0, rc2; itr = list_iterator_create(clusters); - if (!itr) - fatal("list_iterator_create: malloc failure"); while ((working_cluster_rec = list_next(itr))) { rc2 = _proc_cluster(); rc = MAX(rc, rc2); @@ -141,6 +143,7 @@ _multi_cluster(List clusters) static int _proc_cluster(void) { + int filter_cnt = 0; int rc; _load_job_records(); @@ -155,9 +158,9 @@ _proc_cluster(void) (opt.state != JOB_END) || (opt.user_name) || (opt.wckey)) { - _filter_job_records(); + filter_cnt = _filter_job_records(); } - _cancel_jobs (); + _cancel_jobs(filter_cnt); slurm_free_job_info_msg(job_buffer_ptr); return rc; @@ -177,6 +180,27 @@ _load_job_records (void) } } +static bool +_match_job(int opt_inx, int job_inx) +{ + job_info_t *job_ptr = job_buffer_ptr->job_array; + + job_ptr += job_inx; + if (opt.array_id[opt_inx] == (uint16_t) NO_VAL) { + if ((opt.step_id[opt_inx] != SLURM_BATCH_SCRIPT) && + (!IS_JOB_RUNNING(job_ptr))) + return false; + + if ((opt.job_id[opt_inx] == job_ptr->job_id) || + (opt.job_id[opt_inx] == job_ptr->array_job_id)) + return true; + } else { + if ((opt.array_id[opt_inx] == job_ptr->array_task_id) && + (opt.job_id[opt_inx] == job_ptr->array_job_id)) + return true; + } + return false; +} static int _verify_job_ids (void) @@ -189,11 +213,12 @@ _verify_job_ids (void) for (j = 0; j < opt.job_cnt; j++ ) { for (i = 0; i < job_buffer_ptr->record_count; i++) { - if (job_ptr[i].job_id == opt.job_id[j]) + if (_match_job(j, i)) break; } if (((job_ptr[i].job_state >= JOB_COMPLETE) || (i >= job_buffer_ptr->record_count)) && + (job_ptr[i].array_task_id == (uint16_t) NO_VAL) && (opt.verbose >= 0)) { if (opt.step_id[j] == SLURM_BATCH_SCRIPT) error("Kill job error on job id %u: %s", @@ -222,10 +247,12 @@ static int _strcmp(char *s1, char *s2) return 0; /* both NULL */ } -/* _filter_job_records - filtering job information per user specification */ -static void +/* _filter_job_records - filtering job information per user specification + * RET Count of job's filtered out OTHER than for job ID value */ +static int _filter_job_records (void) { + int filter_cnt = 0; int i, j; job_info_t *job_ptr = NULL; uint16_t job_base_state; @@ -240,48 +267,56 @@ _filter_job_records (void) (job_base_state != JOB_RUNNING) && (job_base_state != JOB_SUSPENDED)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if (opt.account != NULL && _strcmp(job_ptr[i].account, opt.account)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if (opt.job_name != NULL && _strcmp(job_ptr[i].name, opt.job_name)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if ((opt.partition != NULL) && _strcmp(job_ptr[i].partition,opt.partition)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if ((opt.qos != NULL) && _strcmp(job_ptr[i].qos, opt.qos)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if ((opt.reservation != NULL) && _strcmp(job_ptr[i].resv_name, opt.reservation)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if ((opt.state != JOB_END) && (job_ptr[i].job_state != opt.state)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } if ((opt.user_name != NULL) && (job_ptr[i].user_id != opt.user_id)) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } @@ -300,6 +335,7 @@ _filter_job_records (void) hostset_t hs = hostset_create(job_ptr[i].nodes); if (!hostset_intersects(hs, opt.nodelist)) { job_ptr[i].job_id = 0; + filter_cnt++; hostset_destroy(hs); continue; } else { @@ -321,14 +357,16 @@ _filter_job_records (void) if (strcmp(job_key, opt.wckey) != 0) { job_ptr[i].job_id = 0; + filter_cnt++; continue; } } if (opt.job_cnt == 0) continue; + for (j = 0; j < opt.job_cnt; j++) { - if (job_ptr[i].job_id == opt.job_id[j]) + if (_match_job(j, i)) break; } if (j >= opt.job_cnt) { /* not found */ @@ -336,10 +374,12 @@ _filter_job_records (void) continue; } } + + return filter_cnt; } static void -_cancel_jobs_by_state(uint16_t job_state) +_cancel_jobs_by_state(uint16_t job_state, int filter_cnt) { int i, j, err; job_cancel_info_t *cancel_info; @@ -360,8 +400,9 @@ _cancel_jobs_by_state(uint16_t job_state) * included a step id */ if (opt.job_cnt) { for (j = 0; j < opt.job_cnt; j++ ) { - if (job_ptr[i].job_id != opt.job_id[j]) + if (!_match_job(j, i)) continue; + if (opt.interactive && (_confirmation(i, opt.step_id[j]) == 0)) continue; @@ -369,7 +410,6 @@ _cancel_jobs_by_state(uint16_t job_state) cancel_info = (job_cancel_info_t *) xmalloc(sizeof(job_cancel_info_t)); - cancel_info->job_id = job_ptr[i].job_id; cancel_info->sig = opt.signal; cancel_info->num_active_threads = &num_active_threads; @@ -378,6 +418,24 @@ _cancel_jobs_by_state(uint16_t job_state) cancel_info->num_active_threads_cond = &num_active_threads_cond; + if ((!opt.interactive) && (filter_cnt == 0) && + (opt.array_id[j] == (uint16_t) NO_VAL) && + (opt.job_id[j] == job_ptr[i].array_job_id)&& + (opt.step_id[j] == SLURM_BATCH_SCRIPT)) { + opt.job_id[j] = NO_VAL; /* !match_job */ + cancel_info->array_flag = true; + cancel_info->job_id = + job_ptr[i].array_job_id; + } else { + cancel_info->array_flag = false; + cancel_info->job_id = + job_ptr[i].job_id; + cancel_info->array_job_id = + job_ptr[i].array_job_id; + cancel_info->array_task_id = + job_ptr[i].array_task_id; + } + pthread_mutex_lock(&num_active_threads_lock); num_active_threads++; while (num_active_threads > MAX_THREADS) { @@ -421,6 +479,10 @@ _cancel_jobs_by_state(uint16_t job_state) cancel_info->num_active_threads_cond = &num_active_threads_cond; + cancel_info->array_job_id = 0; + cancel_info->array_task_id = (uint16_t) NO_VAL; + cancel_info->array_flag = false; + pthread_mutex_lock( &num_active_threads_lock ); num_active_threads++; while (num_active_threads > MAX_THREADS) { @@ -440,7 +502,7 @@ _cancel_jobs_by_state(uint16_t job_state) /* _cancel_jobs - filter then cancel jobs or job steps per request */ static void -_cancel_jobs (void) +_cancel_jobs (int filter_cnt) { slurm_attr_init(&attr); if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) @@ -451,8 +513,8 @@ _cancel_jobs (void) if (pthread_cond_init(&num_active_threads_cond, NULL)) error("pthread_cond_init error %m"); - _cancel_jobs_by_state(JOB_PENDING); - _cancel_jobs_by_state(JOB_END); + _cancel_jobs_by_state(JOB_PENDING, filter_cnt); + _cancel_jobs_by_state(JOB_END, filter_cnt); /* Wait for any spawned threads that have not finished */ pthread_mutex_lock( &num_active_threads_lock ); @@ -476,6 +538,8 @@ _cancel_job_id (void *ci) bool msg_to_ctld = opt.ctld; job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci; uint32_t job_id = cancel_info->job_id; + uint32_t array_job_id = cancel_info->array_job_id; + uint16_t array_task_id = cancel_info->array_task_id; uint16_t sig = cancel_info->sig; if (sig == (uint16_t)-1) { @@ -484,21 +548,32 @@ _cancel_job_id (void *ci) } for (i=0; i<MAX_CANCEL_RETRY; i++) { - if (!sig_set) - verbose("Terminating job %u", job_id); - else - verbose("Signal %u to job %u", sig, job_id); + if (!sig_set) { + if (array_job_id) { + verbose("Terminating job %u_%u", + array_job_id, array_task_id); + } else + verbose("Terminating job %u", job_id); + } else { + if (array_job_id) { + verbose("Signal %u to job %u_%u", + sig, array_job_id, array_task_id); + } else + verbose("Signal %u to job %u", sig, job_id); + } if ((sig == SIGKILL) || (!sig_set) || msg_to_ctld || opt.clusters) { - error_code = slurm_kill_job (job_id, sig, - (uint16_t)opt.batch); + uint16_t flags = 0; + if (opt.batch) + flags |= KILL_JOB_BATCH; + if (cancel_info->array_flag) + flags |= KILL_JOB_ARRAY; + error_code = slurm_kill_job (job_id, sig, flags); } else { if (opt.batch) { - error_code = slurm_signal_job_step( - job_id, - SLURM_BATCH_SCRIPT, - sig); + error_code = slurm_signal_job_step(job_id, + SLURM_BATCH_SCRIPT, sig); } else { error_code = slurm_signal_job (job_id, sig); } @@ -542,6 +617,8 @@ _cancel_step_id (void *ci) job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci; uint32_t job_id = cancel_info->job_id; uint32_t step_id = cancel_info->step_id; + uint32_t array_job_id = cancel_info->array_job_id; + uint16_t array_task_id = cancel_info->array_task_id; uint16_t sig = cancel_info->sig; bool sig_set = true; @@ -551,11 +628,23 @@ _cancel_step_id (void *ci) } for (i=0; i<MAX_CANCEL_RETRY; i++) { - if (sig == SIGKILL) - verbose("Terminating step %u.%u", job_id, step_id); - else { - verbose("Signal %u to step %u.%u", - sig, job_id, step_id); + if (sig == SIGKILL) { + if (array_job_id) { + verbose("Terminating step %u_%u.%u", + array_job_id, array_task_id, step_id); + } else { + verbose("Terminating step %u.%u", + job_id, step_id); + } + } else { + if (array_job_id) { + verbose("Signal %u to step %u_%u.%u", + sig, array_job_id, array_task_id, + step_id); + } else { + verbose("Signal %u to step %u.%u", + sig, job_id, step_id); + } } if ((!sig_set) || opt.ctld) @@ -596,18 +685,27 @@ _cancel_step_id (void *ci) static int _confirmation (int i, uint32_t step_id) { - char in_line[128]; + char job_id_str[64], in_line[128]; job_info_t *job_ptr = NULL; job_ptr = job_buffer_ptr->job_array ; while (1) { + if (job_ptr[i].array_task_id == (uint16_t) NO_VAL) { + snprintf(job_id_str, sizeof(job_id_str), "%u", + job_ptr[i].job_id); + } else { + snprintf(job_id_str, sizeof(job_id_str), "%u_%u", + job_ptr[i].array_job_id, + job_ptr[i].array_task_id); + } + if (step_id == SLURM_BATCH_SCRIPT) { - printf ("Cancel job_id=%u name=%s partition=%s [y/n]? ", - job_ptr[i].job_id, job_ptr[i].name, + printf ("Cancel job_id=%s name=%s partition=%s [y/n]? ", + job_id_str, job_ptr[i].name, job_ptr[i].partition); } else { - printf ("Cancel step_id=%u.%u name=%s partition=%s [y/n]? ", - job_ptr[i].job_id, step_id, job_ptr[i].name, + printf ("Cancel step_id=%s.%u name=%s partition=%s [y/n]? ", + job_id_str, step_id, job_ptr[i].name, job_ptr[i].partition); } if (fgets(in_line, sizeof(in_line), stdin) == NULL) diff --git a/src/scancel/scancel.h b/src/scancel/scancel.h index c192aa250283b442c7baf3198003de33ce0275c1..817e3c48f5befbe93ddba165fc8b580f5933a4a9 100644 --- a/src/scancel/scancel.h +++ b/src/scancel/scancel.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -63,6 +63,7 @@ typedef struct scancel_options { uint16_t job_cnt; /* count of job_id's specified */ uint32_t *job_id; /* list of job_id's */ + uint16_t *array_id; /* list of job array IDs */ uint32_t *step_id; /* list of job step id's */ char *wckey; /* --wckey */ char *nodelist; /* --nodelist, -w */ diff --git a/src/scontrol/Makefile.in b/src/scontrol/Makefile.in index 2127d1e7b14ed212d7eac19341e111e0608a65b0..268a701d9a4a95087f46b3dd8529dcd4e4a3bf61 100644 --- a/src/scontrol/Makefile.in +++ b/src/scontrol/Makefile.in @@ -59,6 +59,7 @@ subdir = src/scontrol DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/scontrol/create_res.c b/src/scontrol/create_res.c index 78007f517fe59b0a08f7e5aa8c563be8c28a7877..36cda0519b3c5fca18c6de36ff319ded4b3b933f 100644 --- a/src/scontrol/create_res.c +++ b/src/scontrol/create_res.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -295,8 +295,30 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, strncasecmp(tag, "CoreCount", MAX(taglen,5)) == 0 || strncasecmp(tag, "CPUCnt", MAX(taglen,5)) == 0 || strncasecmp(tag, "CPUCount", MAX(taglen,5)) == 0) { - char *endptr = NULL; - resv_msg_ptr->core_cnt = strtol(val, &endptr, 10); + + char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL; + int node_inx = 0; + + core_cnt = xstrdup(val); + tok = strtok_r(core_cnt, ",", &ptrptr); + while (tok) { + xrealloc(resv_msg_ptr->core_cnt, + sizeof(uint32_t) * (node_inx + 2)); + resv_msg_ptr->core_cnt[node_inx] = + strtol(tok, &endptr, 10); + if ((endptr == NULL) || + (endptr[0] != '\0') || + (tok[0] == '\0')) { + exit_code = 1; + error("Invalid core count %s. %s", + argv[i], msg); + xfree(core_cnt); + return -1; + } + node_inx++; + tok = strtok_r(NULL, ",", &ptrptr); + } + xfree(core_cnt); } else if (strncasecmp(tag, "Nodes", MAX(taglen, 5)) == 0) { resv_msg_ptr->node_list = val; diff --git a/src/scontrol/info_block.c b/src/scontrol/info_block.c index 8a15627c1e174df697e2d6327c895cfededb0b71..40a38a1e8f85d366669cd9ce7d8c35bd7bbc6370 100644 --- a/src/scontrol/info_block.c +++ b/src/scontrol/info_block.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/scontrol/info_job.c b/src/scontrol/info_job.c index 35c414e8d52aa69557b6cc41e4b5c033471fba0d..7aded4865611a8d1345977a7a157f109882f3659 100644 --- a/src/scontrol/info_job.c +++ b/src/scontrol/info_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -47,8 +47,7 @@ #define POLL_SLEEP 3 /* retry interval in seconds */ static bool _in_node_bit_list(int inx, int *node_list_array); -static int _scontrol_load_jobs(job_info_msg_t ** job_buffer_pptr, - uint32_t job_id); + /* * Determine if a node index is in a node list pair array. * RET - true if specified index is in the array @@ -73,8 +72,8 @@ _in_node_bit_list(int inx, int *node_list_array) } /* Load current job table information into *job_buffer_pptr */ -static int -_scontrol_load_jobs(job_info_msg_t ** job_buffer_pptr, uint32_t job_id) +extern int +scontrol_load_job(job_info_msg_t ** job_buffer_pptr, uint32_t job_id) { int error_code; static uint16_t last_show_flags = 0xffff; @@ -175,7 +174,7 @@ scontrol_print_completing (void) node_info_msg_t *node_info_msg; uint16_t show_flags = 0; - error_code = _scontrol_load_jobs (&job_info_msg, 0); + error_code = scontrol_load_job (&job_info_msg, 0); if (error_code) { exit_code = 1; if (quiet_flag != 1) @@ -251,11 +250,11 @@ scontrol_get_job_state(uint32_t job_id) int error_code = SLURM_SUCCESS, i; job_info_t *job_ptr = NULL; - error_code = _scontrol_load_jobs(&job_buffer_ptr, job_id); + error_code = scontrol_load_job(&job_buffer_ptr, job_id); if (error_code) { exit_code = 1; if (quiet_flag == -1) - slurm_perror ("slurm_load_jobs error"); + slurm_perror ("slurm_load_job error"); return (uint16_t) NO_VAL; } if (quiet_flag == -1) { @@ -285,13 +284,18 @@ scontrol_print_job (char * job_id_str) { int error_code = SLURM_SUCCESS, i, print_cnt = 0; uint32_t job_id = 0; + uint16_t array_id = (uint16_t) NO_VAL; job_info_msg_t * job_buffer_ptr = NULL; job_info_t *job_ptr = NULL; + char *end_ptr = NULL; - if (job_id_str) - job_id = (uint32_t) strtol (job_id_str, (char **)NULL, 10); + if (job_id_str) { + job_id = (uint32_t) strtol (job_id_str, &end_ptr, 10); + if (end_ptr[0] == '_') + array_id = strtol( end_ptr + 1, &end_ptr, 10 ); + } - error_code = _scontrol_load_jobs(&job_buffer_ptr, job_id); + error_code = scontrol_load_job(&job_buffer_ptr, job_id); if (error_code) { exit_code = 1; if (quiet_flag != 1) @@ -307,16 +311,26 @@ scontrol_print_job (char * job_id_str) } job_ptr = job_buffer_ptr->job_array ; - for (i = 0; i < job_buffer_ptr->record_count; i++) { + for (i = 0, job_ptr = job_buffer_ptr->job_array; + i < job_buffer_ptr->record_count; i++, job_ptr++) { + if ((array_id != (uint16_t) NO_VAL) && + (array_id != job_ptr->array_task_id)) + continue; + slurm_print_job_info(stdout, job_ptr, one_liner); print_cnt++; - slurm_print_job_info (stdout, & job_ptr[i], one_liner ) ; } if (print_cnt == 0) { if (job_id_str) { exit_code = 1; - if (quiet_flag != 1) - printf ("Job %u not found\n", job_id); + if (quiet_flag != 1) { + if (array_id == (uint16_t) NO_VAL) { + printf("Job %u not found\n", job_id); + } else { + printf("Job %u_%u not found\n", + job_id, array_id); + } + } } else if (quiet_flag != 1) printf ("No jobs in the system\n"); } @@ -330,27 +344,30 @@ scontrol_print_job (char * job_id_str) extern void scontrol_print_step (char *job_step_id_str) { - int error_code, i; + int error_code, i, print_cnt = 0; uint32_t job_id = NO_VAL, step_id = NO_VAL; + uint16_t array_id = (uint16_t) NO_VAL; char *next_str; job_step_info_response_msg_t *job_step_info_ptr; job_step_info_t * job_step_ptr; - static uint32_t last_job_id = 0, last_step_id = 0; + static uint32_t last_job_id = 0, last_array_id, last_step_id = 0; static job_step_info_response_msg_t *old_job_step_info_ptr = NULL; static uint16_t last_show_flags = 0xffff; uint16_t show_flags = 0; if (job_step_id_str) { job_id = (uint32_t) strtol (job_step_id_str, &next_str, 10); + if (next_str[0] == '_') + array_id = (uint16_t) strtol(next_str+1, &next_str, 10); if (next_str[0] == '.') - step_id = (uint32_t) strtol (&next_str[1], NULL, 10); + step_id = (uint32_t) strtol (next_str+1, NULL, 10); } if (all_flag) show_flags |= SHOW_ALL; - if ((old_job_step_info_ptr) && - (last_job_id == job_id) && (last_step_id == step_id)) { + if ((old_job_step_info_ptr) && (last_job_id == job_id) && + (last_array_id == array_id) && (last_step_id == step_id)) { if (last_show_flags != show_flags) old_job_step_info_ptr->last_update = (time_t) 0; error_code = slurm_get_job_steps ( @@ -366,8 +383,7 @@ scontrol_print_step (char *job_step_id_str) if (quiet_flag == -1) printf ("slurm_get_job_steps no change in data\n"); } - } - else { + } else { if (old_job_step_info_ptr) { slurm_free_job_step_info_response_msg ( old_job_step_info_ptr); @@ -399,17 +415,27 @@ scontrol_print_step (char *job_step_id_str) } job_step_ptr = job_step_info_ptr->job_steps ; - for (i = 0; i < job_step_info_ptr->job_step_count; i++) { - slurm_print_job_step_info (stdout, & job_step_ptr[i], - one_liner ) ; + for (i = 0, job_step_ptr = job_step_info_ptr->job_steps; + i < job_step_info_ptr->job_step_count; i++, job_step_ptr++) { + if ((array_id != (uint16_t) NO_VAL) && + (array_id != job_step_ptr->array_task_id)) + continue; + slurm_print_job_step_info(stdout, job_step_ptr, one_liner); + print_cnt++; } - if (job_step_info_ptr->job_step_count == 0) { + if (print_cnt == 0) { if (job_step_id_str) { exit_code = 1; - if (quiet_flag != 1) - printf ("Job step %u.%u not found\n", - job_id, step_id); + if (quiet_flag != 1) { + if (array_id == (uint16_t) NO_VAL) { + printf ("Job step %u.%u not found\n", + job_id, step_id); + } else { + printf ("Job step %u_%u.%u not found\n", + job_id, array_id, step_id); + } + } } else if (quiet_flag != 1) printf ("No job steps in the system\n"); } @@ -510,20 +536,32 @@ _list_pids_one_step(const char *node_name, uint32_t jobid, uint32_t stepid) stepd_task_info(fd, &task_info, &tcount); for (i = 0; i < (int)tcount; i++) { if (!task_info[i].exited) { - printf("%-8d %-8u %-6u %-7d %-8d\n", - task_info[i].pid, - jobid, - stepid, - task_info[i].id, - task_info[i].gtid); + if (stepid == NO_VAL) + printf("%-8d %-8u %-6s %-7d %-8d\n", + task_info[i].pid, + jobid, + "batch", + task_info[i].id, + task_info[i].gtid); + else + printf("%-8d %-8u %-6u %-7d %-8d\n", + task_info[i].pid, + jobid, + stepid, + task_info[i].id, + task_info[i].gtid); } } stepd_list_pids(fd, &pids, &count); for (i = 0; i < count; i++) { if (!_in_task_array((pid_t)pids[i], task_info, tcount)) { - printf("%-8d %-8u %-6u %-7s %-8s\n", - pids[i], jobid, stepid, "-", "-"); + if (stepid == NO_VAL) + printf("%-8d %-8u %-6s %-7s %-8s\n", + pids[i], jobid, "batch", "-", "-"); + else + printf("%-8d %-8u %-6u %-7s %-8s\n", + pids[i], jobid, stepid, "-", "-"); } } @@ -764,7 +802,7 @@ static int _blocks_dealloc(void) return -1; } for (i=0; i<new_bg_ptr->record_count; i++) { - if(new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { + if (new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { rc = 1; break; } @@ -883,7 +921,7 @@ extern int scontrol_job_ready(char *job_id_str) return SLURM_ERROR; } - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { resource_allocation_response_msg_t *alloc; rc = slurm_allocation_lookup_lite(job_id, &alloc); if (rc == SLURM_SUCCESS) { diff --git a/src/scontrol/info_node.c b/src/scontrol/info_node.c index d36a0e8727db78deba7dab37558e2c585c2003b4..076234054977b731eacb82765940d0e0cd3356c7 100644 --- a/src/scontrol/info_node.c +++ b/src/scontrol/info_node.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/scontrol/info_part.c b/src/scontrol/info_part.c index 0e8b8b3f83d19dc5fc9a3159a798244cf37c9704..cc72903c723a6248ddae9a59d0cf8bf2cffd9098 100644 --- a/src/scontrol/info_part.c +++ b/src/scontrol/info_part.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/scontrol/info_res.c b/src/scontrol/info_res.c index 8555abec0fdf8acfa09065bb9143672ec1606c4c..f7ac66e27a9be961eae26949af2f113d9c7fa951 100644 --- a/src/scontrol/info_res.c +++ b/src/scontrol/info_res.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 9ee19f2e29b0923e13d675c6f6711a407391b541..26a00ec2632ee16a277e632c4037ec404b7a02df 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -127,8 +127,13 @@ main (int argc, char *argv[]) working_cluster_rec = list_peek(clusters); } - while((opt_char = getopt_long(argc, argv, "adhM:oQvV", - long_options, &option_index)) != -1) { + while (1) { + if ((optind < argc) && + !strncasecmp(argv[optind], "setdebugflags", 8)) + break; /* avoid parsing "-<flagname>" as option */ + if ((opt_char = getopt_long(argc, argv, "adhM:oQvV", + long_options, &option_index)) == -1) + break; switch (opt_char) { case (int)'?': fprintf(stderr, "Try \"scontrol --help\" for " @@ -249,17 +254,20 @@ static char *_getline(const char *prompt) printf("%s", prompt); - /* Set "line" here to avoid a warning. We throw it away later. */ + /* Set "line" here to avoid a warning, discard later */ line = fgets(buf, 4096, stdin); - + if (line == NULL) + return NULL; len = strlen(buf); - if (len == 0) + if ((len == 0) || (len >= 4096)) return NULL; if (buf[len-1] == '\n') buf[len-1] = '\0'; else len++; - line = malloc (len * sizeof(char)); + line = malloc(len * sizeof(char)); + if (!line) + return NULL; return strncpy(line, buf, len); } #endif @@ -602,7 +610,7 @@ _process_command (int argc, char *argv[]) if (quiet_flag == -1) fprintf(stderr, "no input"); return 0; - } else if(tag) + } else if (tag) tag_len = strlen(tag); else { if (quiet_flag == -1) @@ -668,17 +676,17 @@ _process_command (int argc, char *argv[]) old_res_info_ptr = NULL; slurm_free_ctl_conf(old_slurm_ctl_conf_ptr); old_slurm_ctl_conf_ptr = NULL; - /* if(old_block_info_ptr) */ + /* if (old_block_info_ptr) */ /* old_block_info_ptr->last_update = 0; */ - /* if(old_job_info_ptr) */ + /* if (old_job_info_ptr) */ /* old_job_info_ptr->last_update = 0; */ - /* if(old_node_info_ptr) */ + /* if (old_node_info_ptr) */ /* old_node_info_ptr->last_update = 0; */ - /* if(old_part_info_ptr) */ + /* if (old_part_info_ptr) */ /* old_part_info_ptr->last_update = 0; */ - /* if(old_res_info_ptr) */ + /* if (old_res_info_ptr) */ /* old_res_info_ptr->last_update = 0; */ - /* if(old_slurm_ctl_conf_ptr) */ + /* if (old_slurm_ctl_conf_ptr) */ /* old_slurm_ctl_conf_ptr->last_update = 0; */ } else if (strncasecmp (tag, "create", MAX(tag_len, 2)) == 0) { @@ -1302,7 +1310,7 @@ _delete_it (int argc, char *argv[]) slurm_perror(errmsg); } } else if (strncasecmp (tag, "BlockName", MAX(tag_len, 3)) == 0) { - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { update_block_msg_t block_msg; slurm_init_update_block_msg ( &block_msg ); block_msg.bg_block_id = val; @@ -1511,7 +1519,7 @@ _update_it (int argc, char *argv[]) exit_code = 1; fprintf(stderr, "No valid entity in update command\n"); fprintf(stderr, "Input line must include \"NodeName\", "); - if(cluster_flags & CLUSTER_FLAG_BG) { + if (cluster_flags & CLUSTER_FLAG_BG) { fprintf(stderr, "\"BlockName\", \"SubMPName\" " "(i.e. bgl000[0-3]),"); } @@ -1539,7 +1547,7 @@ _update_bluegene_block (int argc, char *argv[]) int i, update_cnt = 0; update_block_msg_t block_msg; - if(!(cluster_flags & CLUSTER_FLAG_BG)) { + if (!(cluster_flags & CLUSTER_FLAG_BG)) { exit_code = 1; fprintf(stderr, "This only works on a bluegene system.\n"); return 0; @@ -1596,7 +1604,7 @@ _update_bluegene_block (int argc, char *argv[]) } } - if(!block_msg.bg_block_id) { + if (!block_msg.bg_block_id) { error("You didn't supply a block name."); return 0; } else if (block_msg.state == (uint16_t)NO_VAL) { @@ -1626,7 +1634,7 @@ _update_bluegene_subbp (int argc, char *argv[]) int i, update_cnt = 0; update_block_msg_t block_msg; - if(!(cluster_flags & CLUSTER_FLAG_BG)) { + if (!(cluster_flags & CLUSTER_FLAG_BG)) { exit_code = 1; fprintf(stderr, "This only works on a bluegene system.\n"); return 0; @@ -1675,7 +1683,7 @@ _update_bluegene_subbp (int argc, char *argv[]) } } - if(!block_msg.mp_str) { + if (!block_msg.mp_str) { error("You didn't supply an ionode list."); return 0; } else if (block_msg.state == (uint16_t)NO_VAL) { @@ -1699,11 +1707,14 @@ _update_bluegene_subbp (int argc, char *argv[]) */ static int _update_slurmctld_debug(char *val) { - char *endptr; + char *endptr = NULL; int error_code = SLURM_SUCCESS; - uint32_t level = (uint32_t)strtoul(val, &endptr, 10); + uint32_t level; + + if (val) + level = (uint32_t)strtoul(val, &endptr, 10); - if (*endptr != '\0' || level > 9) { + if ((val == NULL) || (*endptr != '\0') || (level > 9)) { error_code = 1; if (quiet_flag != 1) fprintf(stderr, "invalid debug level: %s\n", diff --git a/src/scontrol/scontrol.h b/src/scontrol/scontrol.h index 97034deac1bd9bfdefa86920b82730746d2da232..b7dc1d8fad4addd684aaee0c5b2e86a219cefe6b 100644 --- a/src/scontrol/scontrol.h +++ b/src/scontrol/scontrol.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -126,6 +126,8 @@ extern void scontrol_list_pids(const char *jobid_str, const char *node_name); extern int scontrol_load_front_end(front_end_info_msg_t ** front_end_buffer_pptr); +extern int scontrol_load_job(job_info_msg_t ** job_buffer_pptr, + uint32_t job_id); extern int scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr); extern int scontrol_load_nodes (node_info_msg_t ** node_buffer_pptr, uint16_t show_flags); diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index a09aedfc4d927b171ce574a7fb16ddeb455c4c4a..9780aa2be98d31bf90e13b0aba1e2ae612ca4f4c 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -234,37 +234,39 @@ static uint32_t _get_job_time(uint32_t job_id) extern int scontrol_hold(char *op, char *job_id_str) { - int rc = SLURM_SUCCESS; + int i, rc = SLURM_SUCCESS; char *next_str; job_desc_msg_t job_msg; - uint16_t job_state; - - slurm_init_job_desc_msg (&job_msg); - - /* set current user, needed e.g., for AllowGroups checks */ - job_msg.user_id = getuid(); + uint32_t job_id; + uint16_t array_id; + job_info_msg_t *resp; + slurm_job_info_t *job_ptr; if (job_id_str) { - job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10); - if ((job_msg.job_id == 0) || (next_str[0] != '\0')) { + job_id = (uint32_t) strtol(job_id_str, &next_str, 10); + if (next_str[0] == '_') + array_id = (uint16_t) strtol(next_str+1, &next_str, 10); + else + array_id = (uint16_t) NO_VAL; + if ((job_id == 0) || (next_str[0] != '\0')) { fprintf(stderr, "Invalid job id specified\n"); - exit_code = 1; - return 0; + return 1; } } else { fprintf(stderr, "Invalid job id specified\n"); - exit_code = 1; - return 0; + return 1; } - job_state = scontrol_get_job_state(job_msg.job_id); - if (job_state == (uint16_t) NO_VAL) - return SLURM_ERROR; - if ((job_state & JOB_STATE_BASE) != JOB_PENDING) { - slurm_seterrno(ESLURM_JOB_NOT_PENDING); - return ESLURM_JOB_NOT_PENDING; + if (scontrol_load_job(&resp, job_id)) { + if (quiet_flag == -1) + slurm_perror ("slurm_load_job error"); + return 1; } + slurm_init_job_desc_msg (&job_msg); + job_msg.job_id = job_id; + /* set current user, needed e.g., for AllowGroups checks */ + job_msg.user_id = getuid(); if ((strncasecmp(op, "holdu", 5) == 0) || (strncasecmp(op, "uhold", 5) == 0)) { job_msg.priority = 0; @@ -274,9 +276,24 @@ scontrol_hold(char *op, char *job_id_str) job_msg.alloc_sid = 0; } else job_msg.priority = INFINITE; + for (i = 0, job_ptr = resp->job_array; i < resp->record_count; + i++, job_ptr++) { + if ((array_id != (uint16_t) NO_VAL) && + (job_ptr->array_task_id != array_id)) + continue; - if (slurm_update_job(&job_msg)) - return slurm_get_errno(); + if (!IS_JOB_PENDING(job_ptr)) { + if ((array_id == (uint16_t) NO_VAL) && + (job_ptr->array_task_id != (uint16_t) NO_VAL)) + continue; + slurm_seterrno(ESLURM_JOB_NOT_PENDING); + return ESLURM_JOB_NOT_PENDING; + } + + job_msg.job_id = job_ptr->job_id; + if (slurm_update_job(&job_msg)) + rc = slurm_get_errno(); + } return rc; } @@ -387,7 +404,8 @@ scontrol_update_job (int argc, char *argv[]) } if (strncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) { - if (parse_uint32(val, &job_msg.job_id)) { + job_msg.job_id = slurm_xlate_job_id(val); + if (job_msg.job_id == 0) { error ("Invalid JobId value: %s", val); exit_code = 1; return 0; @@ -608,6 +626,10 @@ scontrol_update_job (int argc, char *argv[]) job_msg.wckey = val; update_cnt++; } + else if (strncasecmp(tag, "StdOut", MAX(taglen, 6)) == 0) { + job_msg.std_out = val; + update_cnt++; + } else if (strncasecmp(tag, "Switches", MAX(taglen, 5)) == 0) { char *sep_char; job_msg.req_switch = @@ -737,7 +759,7 @@ scontrol_update_job (int argc, char *argv[]) } else if (strncasecmp(tag, "Conn-Type", MAX(taglen, 2)) == 0) { verify_conn_type(val, job_msg.conn_type); - if(job_msg.conn_type[0] != (uint16_t)NO_VAL) + if (job_msg.conn_type[0] != (uint16_t)NO_VAL) update_cnt++; } else if (strncasecmp(tag, "Licenses", MAX(taglen, 1)) == 0) { diff --git a/src/scontrol/update_node.c b/src/scontrol/update_node.c index cfc16e6bc29b57a82520007eca5c78aca2802d85..e3ee8b9822309f4d9ae0d154501a03975f0162d5 100644 --- a/src/scontrol/update_node.c +++ b/src/scontrol/update_node.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/scontrol/update_part.c b/src/scontrol/update_part.c index a631ae19a577eef3fba7e78c9f4c60f6118df2f1..388f7364d5de6203fd8529821a102984d081783e 100644 --- a/src/scontrol/update_part.c +++ b/src/scontrol/update_part.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -98,6 +98,19 @@ scontrol_parse_part_options (int argc, char *argv[], int *update_cnt_ptr, part_msg_ptr->default_time = default_time; (*update_cnt_ptr)++; } + else if (strncasecmp(tag, "MaxCPUsPerNode", MAX(taglen, 4)) + == 0) { + if ((strcasecmp(val,"UNLIMITED") == 0) || + (strcasecmp(val,"INFINITE") == 0)) { + part_msg_ptr->max_cpus_per_node = + (uint32_t) INFINITE; + } else if (parse_uint32(val, &part_msg_ptr-> + max_cpus_per_node)) { + error("Invalid MaxCPUsPerNode value: %s", val); + return -1; + } + (*update_cnt_ptr)++; + } else if (strncasecmp(tag, "MaxNodes", MAX(taglen, 4)) == 0) { if ((strcasecmp(val,"UNLIMITED") == 0) || (strcasecmp(val,"INFINITE") == 0)) diff --git a/src/scontrol/update_step.c b/src/scontrol/update_step.c index 82c8f884a091726cc2f4864b887a50a8c5782bf5..43fe3b2fa89416a300cc258e7fbf83398b839f37 100644 --- a/src/scontrol/update_step.c +++ b/src/scontrol/update_step.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -39,6 +39,10 @@ #include "scontrol.h" #include "src/common/proc_args.h" +#include "src/common/slurm_jobacct_gather.h" + +#define MAX_RECORD_FIELDS 100 +#define BUFFER_SIZE 4096 /* Return the current time limit of the specified job/step_id or NO_VAL if the * information is not available */ @@ -66,6 +70,96 @@ static uint32_t _get_step_time(uint32_t job_id, uint32_t step_id) return time_limit; } +static int _parse_comp_file( + char *file, step_update_request_msg_t *step_msg) +{ + int i; + FILE *fd = fopen(file, "r"); + char line[BUFFER_SIZE]; + char *fptr; + int version; + char *update[MAX_RECORD_FIELDS+1]; /* End list with null entry and, + possibly, more data than we + expected */ + + enum { UPDATE_STEP_VERSION, + UPDATE_STEP_EXTRA, + UPDATE_STEP_INBLOCKS, + UPDATE_STEP_OUTBLOCKS, + UPDATE_STEP_EXITCODE, + UPDATE_STEP_CPU_ALLOC, + UPDATE_STEP_START, + UPDATE_STEP_END, + UPDATE_STEP_USER_SEC, + UPDATE_STEP_SYS_SEC, + UPDATE_STEP_MAX_RSS, + UPDATE_STEP_UID, + UPDATE_STEP_STEPNAME, + UPDATE_STEP_VER1_LENGTH + }; + + if (fd == NULL) { + perror(file); + return SLURM_ERROR; + } + + if (!fgets(line, BUFFER_SIZE, fd)) { + fprintf(stderr, "Empty step update completion file\n"); + return SLURM_ERROR; + } + + fptr = line; /* break the record into NULL-terminated strings */ + for (i = 0; i < MAX_RECORD_FIELDS; i++) { + update[i] = fptr; + fptr = strstr(fptr, " "); + if (fptr == NULL) { + fptr = strstr(update[i], "\n"); + if (fptr) + *fptr = 0; + break; + } else + *fptr++ = 0; + } + + if (i < MAX_RECORD_FIELDS) + i++; + update[i] = 0; + + version = atoi(update[UPDATE_STEP_VERSION]); + switch (version) { + case 1: + if (i != UPDATE_STEP_VER1_LENGTH) { + fprintf(stderr, + "Bad step update completion file length\n"); + return SLURM_ERROR; + } + step_msg->jobacct = jobacctinfo_create(NULL); + step_msg->exit_code = atoi(update[UPDATE_STEP_EXITCODE]); + step_msg->start_time = atoi(update[UPDATE_STEP_START]); + step_msg->end_time = atoi(update[UPDATE_STEP_END]); + step_msg->jobacct->user_cpu_sec = + atoi(update[UPDATE_STEP_USER_SEC]); + step_msg->jobacct->sys_cpu_sec = + atoi(update[UPDATE_STEP_SYS_SEC]); + step_msg->jobacct->min_cpu = + step_msg->jobacct->user_cpu_sec + + step_msg->jobacct->sys_cpu_sec; + step_msg->jobacct->max_rss = atoi(update[UPDATE_STEP_MAX_RSS]); + step_msg->name = + xstrdup(xbasename(update[UPDATE_STEP_STEPNAME])); + break; + default: + fprintf(stderr, "Unsupported step update " + "completion file version: %d\n", + version); + return SLURM_ERROR; + break; + } + + + return SLURM_SUCCESS; +} + /* * scontrol_update_step - update the slurm step configuration per the supplied * arguments @@ -109,8 +203,7 @@ extern int scontrol_update_step (int argc, char *argv[]) fprintf (stderr, "Request aborted\n"); return 0; } /* else apply to all steps of this job_id */ - } - else if (strncasecmp(tag, "TimeLimit", MAX(taglen, 2)) == 0) { + } else if (strncasecmp(tag, "TimeLimit", MAX(taglen, 2)) == 0) { bool incr, decr; uint32_t step_current_time, time_limit; @@ -147,8 +240,16 @@ extern int scontrol_update_step (int argc, char *argv[]) } step_msg.time_limit = time_limit; update_cnt++; - } - else { + } else if (strncasecmp(tag, "CompFile", MAX(taglen, 2)) == 0) { + if (_parse_comp_file(val, &step_msg)) { + exit_code = 1; + fprintf(stderr, + "Bad completion file (%s) given\n" + "Request aborted\n", val); + return 0; + } + update_cnt++; + } else { exit_code = 1; fprintf (stderr, "Update of this parameter is not " "supported: %s\n", argv[i]); diff --git a/src/sdiag/Makefile.in b/src/sdiag/Makefile.in index 0881c085adbefe22f982aa0c26ee0bba55843b1f..444fbacce1efbbb0dd179d7e4f26a406a56009ff 100644 --- a/src/sdiag/Makefile.in +++ b/src/sdiag/Makefile.in @@ -60,6 +60,7 @@ subdir = src/sdiag DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -157,6 +161,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -177,6 +183,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -186,6 +195,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -193,6 +204,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -227,6 +247,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -254,6 +277,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sdiag/opts.c b/src/sdiag/opts.c index 035d536f6b5e0648aa71823971ef80f7d89f5956..9d0e84cdec1a79e51edae0e133329f48ae280a38 100644 --- a/src/sdiag/opts.c +++ b/src/sdiag/opts.c @@ -5,7 +5,7 @@ * Written by Alejandro Lucero <alucero@bsc.es> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sdiag/sdiag.c b/src/sdiag/sdiag.c index 8939da651c76e7c63b6fbb55cace9d18a0984b5e..6595b8da58b6a4ea13cef5e056e5f86d4a8c79d0 100644 --- a/src/sdiag/sdiag.c +++ b/src/sdiag/sdiag.c @@ -5,7 +5,7 @@ * Written by Alejandro Lucero <alucero@bsc.es> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sinfo/Makefile.in b/src/sinfo/Makefile.in index 444a2fd1f248162617e7dca6909984b0750cb67e..0eca09d4474295618d223233fc50d6664a88ac3c 100644 --- a/src/sinfo/Makefile.in +++ b/src/sinfo/Makefile.in @@ -62,6 +62,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -79,6 +80,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -87,11 +89,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sinfo/opts.c b/src/sinfo/opts.c index eae7f85ac67f228e743ae001a3505a74e56e31c1..d3141b1da1171c27583303de3c147632d543c814 100644 --- a/src/sinfo/opts.c +++ b/src/sinfo/opts.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -188,7 +188,7 @@ extern void parse_command_line(int argc, char *argv[]) break; case (int) 'n': xfree(params.nodes); - params.nodes= xstrdup(optarg); + params.nodes = xstrdup(optarg); /* * confirm valid nodelist entry */ @@ -198,6 +198,12 @@ extern void parse_command_line(int argc, char *argv[]) optarg); exit(1); } + if (hostlist_count(host_list) == 1) { + params.node_name_single = true; + xfree(params.nodes); + params.nodes = hostlist_deranged_string_xmalloc(host_list); + } else + params.node_name_single = false; hostlist_destroy(host_list); break; case (int) 'N': @@ -508,7 +514,13 @@ _parse_format( char* format ) format_add_nodes_ai( params.format_list, field_size, right_justify, - suffix ); + suffix ); + } else if (field[0] == 'B') { + params.match_flags.max_cpus_per_node_flag = true; + format_add_max_cpus_per_node( params.format_list, + field_size, + right_justify, + suffix ); } else if (field[0] == 'c') { params.match_flags.cpus_flag = true; format_add_cpus( params.format_list, diff --git a/src/sinfo/print.c b/src/sinfo/print.c index a8c0da53418bbe774ed3a0217bed0dcde126e66a..94bde71863c399d0e20b301a009a2a0965bd6617 100644 --- a/src/sinfo/print.c +++ b/src/sinfo/print.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -438,7 +438,7 @@ int _print_cpus_aiot(sinfo_data_t * sinfo_data, int width, char tmpo[8]; char tmpt[8]; if (sinfo_data) { - if(params.cluster_flags & CLUSTER_FLAG_BG) { + if (params.cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)sinfo_data->cpus_alloc, tmpa, sizeof(tmpa), UNIT_NONE); convert_num_unit((float)sinfo_data->cpus_idle, @@ -687,7 +687,7 @@ int _print_node_list(sinfo_data_t * sinfo_data, int width, xfree(tmp); } else { char *title = "NODELIST"; - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) title = "MIDPLANELIST"; _print_str(title, width, right_justify, false); @@ -726,7 +726,7 @@ int _print_nodes_t(sinfo_data_t * sinfo_data, int width, char id[FORMAT_STRING_SIZE]; char tmp[8]; if (sinfo_data) { - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)sinfo_data->nodes_total, tmp, sizeof(tmp), UNIT_NONE); else @@ -749,7 +749,7 @@ int _print_nodes_ai(sinfo_data_t * sinfo_data, int width, char tmpa[8]; char tmpi[8]; if (sinfo_data) { - if(params.cluster_flags & CLUSTER_FLAG_BG) { + if (params.cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)sinfo_data->nodes_alloc, tmpa, sizeof(tmpa), UNIT_NONE); convert_num_unit((float)sinfo_data->nodes_idle, @@ -780,7 +780,7 @@ int _print_nodes_aiot(sinfo_data_t * sinfo_data, int width, char tmpo[8]; char tmpt[8]; if (sinfo_data) { - if(params.cluster_flags & CLUSTER_FLAG_BG) { + if (params.cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)sinfo_data->nodes_alloc, tmpa, sizeof(tmpa), UNIT_NONE); convert_num_unit((float)sinfo_data->nodes_idle, @@ -1135,7 +1135,7 @@ int _print_default_time(sinfo_data_t * sinfo_data, int width, _print_str("infinite", width, right_justify, true); else _print_secs((sinfo_data->part_info->default_time * 60L), - width, right_justify, true); + width, right_justify, true); } else _print_str("DEFAULTTIME", width, right_justify, true); @@ -1145,14 +1145,14 @@ int _print_default_time(sinfo_data_t * sinfo_data, int width, } int _print_weight(sinfo_data_t * sinfo_data, int width, - bool right_justify, char *suffix) + bool right_justify, char *suffix) { char id[FORMAT_STRING_SIZE]; if (sinfo_data) { _build_min_max_32_string(id, FORMAT_STRING_SIZE, - sinfo_data->min_weight, - sinfo_data->max_weight, - false, false); + sinfo_data->min_weight, + sinfo_data->max_weight, + false, false); _print_str(id, width, right_justify, true); } else _print_str("WEIGHT", width, right_justify, true); @@ -1188,5 +1188,22 @@ int _print_cpu_load(sinfo_data_t * sinfo_data, int width, if (suffix) printf("%s", suffix); return SLURM_SUCCESS; +} + +int _print_max_cpus_per_node(sinfo_data_t * sinfo_data, int width, + bool right_justify, char *suffix) +{ + char tmp_line[32]; + if (sinfo_data) { + if (sinfo_data->part_info->max_cpus_per_node == INFINITE) + sprintf(tmp_line, "UNLIMITED"); + else + sprintf(tmp_line, "%u", sinfo_data->max_cpus_per_node); + _print_str(tmp_line, width, right_justify, true); + + } else { + _print_str("MAX_CPUS_PER_NODE", width, right_justify, true); + } + return SLURM_SUCCESS; } diff --git a/src/sinfo/print.h b/src/sinfo/print.h index c11c2d065b16a9375fe4e5078d3098a0617ff734..206910d1f15fa9fc113fb8a238e235bcd0ef6f9e 100644 --- a/src/sinfo/print.h +++ b/src/sinfo/print.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -146,6 +146,8 @@ void print_sinfo_reservation(reserve_info_msg_t *resv_ptr); format_add_function(list,wid,right,suffix,_print_com_invalid) #define format_add_cpu_load(list,wid,right,suffix) \ format_add_function(list,wid,right,suffix,_print_cpu_load) +#define format_add_max_cpus_per_node(list,wid,right,suffix) \ + format_add_function(list,wid,right,suffix,_print_max_cpus_per_node) /***************************************************************************** * Print Field Functions @@ -227,5 +229,7 @@ int _print_com_invalid(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix); int _print_cpu_load(sinfo_data_t * node_ptr, int width, bool right_justify, char *suffix); +int _print_max_cpus_per_node(sinfo_data_t * sinfo_data, int width, + bool right_justify, char *suffix); #endif diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c index 43e3f2025c46a5ff29d625795f8bc0cfe30436c2..1cc38c68626309cf7d546154b88b7c69d2a5b442 100644 --- a/src/sinfo/sinfo.c +++ b/src/sinfo/sinfo.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -276,14 +276,23 @@ _query_server(partition_info_msg_t ** part_pptr, if (old_node_ptr) { if (clear_old) old_node_ptr->last_update = 0; - error_code = slurm_load_node(old_node_ptr->last_update, - &new_node_ptr, show_flags); + if (params.node_name_single) { + error_code = slurm_load_node_single(&new_node_ptr, + params.nodes, + show_flags); + } else { + error_code = slurm_load_node(old_node_ptr->last_update, + &new_node_ptr, show_flags); + } if (error_code == SLURM_SUCCESS) slurm_free_node_info_msg(old_node_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_node_ptr = old_node_ptr; } + } else if (params.node_name_single) { + error_code = slurm_load_node_single(&new_node_ptr, params.nodes, + show_flags); } else { error_code = slurm_load_node((time_t) NULL, &new_node_ptr, show_flags); @@ -428,40 +437,46 @@ static int _build_sinfo_data(List sinfo_list, } continue; } - + j2 = 0; - while(part_ptr->node_inx[j2] >= 0) { + while (part_ptr->node_inx[j2] >= 0) { int i2 = 0; uint16_t subgrp_size = 0; - for(i2 = part_ptr->node_inx[j2]; - i2 <= part_ptr->node_inx[j2+1]; - i2++) { + for (i2 = part_ptr->node_inx[j2]; + i2 <= part_ptr->node_inx[j2+1]; + i2++) { + if (i2 >= node_msg->record_count) { + /* This can happen if info for single + * node name is loaded */ + break; + } node_ptr = &(node_msg->node_array[i2]); - if (node_ptr->name == NULL || + if ((node_ptr->name == NULL) || (params.filtering && _filter_out(node_ptr))) continue; - if(select_g_select_nodeinfo_get( + if (select_g_select_nodeinfo_get( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBGRP_SIZE, 0, &subgrp_size) == SLURM_SUCCESS - && subgrp_size) + && subgrp_size) { _handle_subgrps(sinfo_list, (uint16_t) j, part_ptr, node_ptr, node_msg-> node_scaling); - else + } else { _insert_node_ptr(sinfo_list, (uint16_t) j, part_ptr, node_ptr, node_msg-> node_scaling); + } } j2 += 2; } @@ -536,7 +551,7 @@ static bool _filter_out(node_info_t *node_ptr) SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &cpus); - if(cpus) { + if (cpus) { match = true; break; } @@ -546,7 +561,7 @@ static bool _filter_out(node_info_t *node_ptr) SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &cpus); - if(cpus) { + if (cpus) { match = true; break; } @@ -661,6 +676,7 @@ static bool _match_node_data(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr) (node_ptr->cpu_load != sinfo_ptr->min_cpu_load)) return false; + return true; } @@ -718,6 +734,11 @@ static bool _match_part_data(sinfo_data_t *sinfo_ptr, (part_ptr->priority != sinfo_ptr->part_info->priority)) return false; + if (params.match_flags.max_cpus_per_node_flag && + (part_ptr->max_cpus_per_node != + sinfo_ptr->part_info->max_cpus_per_node)) + return false; + return true; } @@ -756,6 +777,8 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, sinfo_ptr->max_weight = node_ptr->weight; sinfo_ptr->min_cpu_load = node_ptr->cpu_load; sinfo_ptr->max_cpu_load = node_ptr->cpu_load; + sinfo_ptr->max_cpus_per_node = sinfo_ptr->part_info-> + max_cpus_per_node; } else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) { /* we already have this node in this record, * just return, don't duplicate */ @@ -802,9 +825,11 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, sinfo_ptr->max_cpu_load = node_ptr->cpu_load; } - hostlist_push(sinfo_ptr->nodes, node_ptr->name); - hostlist_push(sinfo_ptr->node_addr, node_ptr->node_addr); - hostlist_push(sinfo_ptr->hostnames, node_ptr->node_hostname); + hostlist_push(sinfo_ptr->nodes, node_ptr->name); + if (params.match_flags.node_addr_flag) + hostlist_push(sinfo_ptr->node_addr, node_ptr->node_addr); + if (params.match_flags.hostnames_flag) + hostlist_push(sinfo_ptr->hostnames, node_ptr->node_hostname); total_cpus = node_ptr->cpus; total_nodes = node_scaling; @@ -943,21 +968,21 @@ static int _handle_subgrps(List sinfo_list, uint16_t part_num, iterator = list_iterator_create(params.state_list); for(i=0; i<state_cnt; i++) { - if(iterator) { + if (iterator) { node_info_t tmp_node, *tmp_node_ptr = &tmp_node; while ((node_state = list_next(iterator))) { tmp_node_ptr->node_state = *node_state; - if((((state[i] == NODE_STATE_ALLOCATED) + if ((((state[i] == NODE_STATE_ALLOCATED) && IS_NODE_DRAINING(tmp_node_ptr)) || (*node_state == NODE_STATE_DRAIN)) || (*node_state == state[i])) break; } list_iterator_reset(iterator); - if(!node_state) + if (!node_state) continue; } - if(select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, + if (select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, state[i], &size) == SLURM_SUCCESS @@ -971,22 +996,22 @@ static int _handle_subgrps(List sinfo_list, uint16_t part_num, } /* now handle the idle */ - if(iterator) { + if (iterator) { while ((node_state = list_next(iterator))) { node_info_t tmp_node, *tmp_node_ptr = &tmp_node; tmp_node_ptr->node_state = *node_state; - if(((*node_state == NODE_STATE_DRAIN) + if (((*node_state == NODE_STATE_DRAIN) || IS_NODE_DRAINED(tmp_node_ptr)) || (*node_state == NODE_STATE_IDLE)) break; } list_iterator_destroy(iterator); - if(!node_state) + if (!node_state) return SLURM_SUCCESS; } node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= NODE_STATE_IDLE; - if((int)node_scaling > 0) + if ((int)node_scaling > 0) _insert_node_ptr(sinfo_list, part_num, part_ptr, node_ptr, node_scaling); diff --git a/src/sinfo/sinfo.h b/src/sinfo/sinfo.h index 05ec6be9feffbe7e2cfd615fa4e649e2fd651015..014c926e87fff06d4cace27af8a1661a2368df6d 100644 --- a/src/sinfo/sinfo.h +++ b/src/sinfo/sinfo.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -101,6 +101,8 @@ typedef struct { uint32_t min_cpu_load; uint32_t max_cpu_load; + uint32_t max_cpus_per_node; + char *features; char *gres; char *reason; @@ -148,6 +150,7 @@ struct sinfo_match_flags { bool reason_timestamp_flag; bool reason_user_flag; bool cpu_load_flag; + bool max_cpus_per_node_flag; }; /* Input parameters */ @@ -163,6 +166,7 @@ struct sinfo_parameters { bool no_header; bool node_field_flag; bool node_flag; + bool node_name_single; bool part_field_flag; bool reservation_flag; bool responding_nodes; diff --git a/src/sinfo/sort.c b/src/sinfo/sort.c index 096576f935036219a7928d5990420fc3ff852a7b..76c88939a559e305511bee1720877cc6e71c2f82 100644 --- a/src/sinfo/sort.c +++ b/src/sinfo/sort.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am index db271915bd27e99fdf6c6894174f08e1dce61237..7d759dc65798ff6ae21ca38419107f7f0bd523cb 100644 --- a/src/slurmctld/Makefile.am +++ b/src/slurmctld/Makefile.am @@ -37,6 +37,8 @@ slurmctld_SOURCES = \ partition_mgr.c \ ping_nodes.c \ ping_nodes.h \ + slurmctld_plugstack.c \ + slurmctld_plugstack.h \ port_mgr.c \ port_mgr.h \ power_save.c \ diff --git a/src/slurmctld/Makefile.in b/src/slurmctld/Makefile.in index d4117ce285c2e4c75f43eedb68982a5f5c61a39d..405e01560cf1046ad36a07e3fe5a9d2abfb02f49 100644 --- a/src/slurmctld/Makefile.in +++ b/src/slurmctld/Makefile.in @@ -60,6 +60,7 @@ subdir = src/slurmctld DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -110,11 +114,12 @@ am_slurmctld_OBJECTS = acct_policy.$(OBJEXT) agent.$(OBJEXT) \ job_scheduler.$(OBJEXT) job_submit.$(OBJEXT) \ licenses.$(OBJEXT) locks.$(OBJEXT) node_mgr.$(OBJEXT) \ node_scheduler.$(OBJEXT) partition_mgr.$(OBJEXT) \ - ping_nodes.$(OBJEXT) port_mgr.$(OBJEXT) power_save.$(OBJEXT) \ - preempt.$(OBJEXT) proc_req.$(OBJEXT) read_config.$(OBJEXT) \ - reservation.$(OBJEXT) sched_plugin.$(OBJEXT) \ - srun_comm.$(OBJEXT) state_save.$(OBJEXT) statistics.$(OBJEXT) \ - step_mgr.$(OBJEXT) trigger_mgr.$(OBJEXT) + ping_nodes.$(OBJEXT) slurmctld_plugstack.$(OBJEXT) \ + port_mgr.$(OBJEXT) power_save.$(OBJEXT) preempt.$(OBJEXT) \ + proc_req.$(OBJEXT) read_config.$(OBJEXT) reservation.$(OBJEXT) \ + sched_plugin.$(OBJEXT) srun_comm.$(OBJEXT) \ + state_save.$(OBJEXT) statistics.$(OBJEXT) step_mgr.$(OBJEXT) \ + trigger_mgr.$(OBJEXT) slurmctld_OBJECTS = $(am_slurmctld_OBJECTS) am__DEPENDENCIES_1 = slurmctld_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ @@ -167,6 +172,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +194,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +206,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +215,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +258,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +288,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -387,6 +414,8 @@ slurmctld_SOURCES = \ partition_mgr.c \ ping_nodes.c \ ping_nodes.h \ + slurmctld_plugstack.c \ + slurmctld_plugstack.h \ port_mgr.c \ port_mgr.h \ power_save.c \ @@ -530,6 +559,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/read_config.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reservation.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sched_plugin.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurmctld_plugstack.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srun_comm.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/state_save.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/statistics.Po@am__quote@ diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index 24516713cf3972b4a0a364a7502a905153533593..976d85e15b0894eff84cd465a6d1ba32a0a6f835 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,6 +48,7 @@ #include "src/slurmctld/slurmctld.h" #include "src/slurmctld/acct_policy.h" #include "src/common/node_select.h" +#include "src/common/slurm_priority.h" #define _DEBUG 0 @@ -77,21 +78,6 @@ static slurmdb_used_limits_t *_get_used_limits_for_user( return used_limits; } -static uint64_t _get_unused_cpu_run_secs(struct job_record *job_ptr) -{ - uint64_t unused_cpu_run_secs = 0; - uint64_t time_limit_secs = (uint64_t)job_ptr->time_limit * 60; - - /* No unused cpu_run_secs if job ran past its time limit */ - if (job_ptr->end_time >= job_ptr->start_time + time_limit_secs) { - return 0; - } - - unused_cpu_run_secs = job_ptr->total_cpus * - (job_ptr->start_time + time_limit_secs - job_ptr->end_time); - return unused_cpu_run_secs; -} - static bool _valid_job_assoc(struct job_record *job_ptr) { slurmdb_association_rec_t assoc_rec, *assoc_ptr; @@ -129,7 +115,6 @@ static void _adjust_limit_usage(int type, struct job_record *job_ptr) slurmdb_association_rec_t *assoc_ptr = NULL; assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; - uint64_t unused_cpu_run_secs = 0; uint64_t used_cpu_run_secs = 0; uint32_t job_memory = 0; uint32_t node_cnt; @@ -151,7 +136,7 @@ static void _adjust_limit_usage(int type, struct job_record *job_ptr) #endif if (type == ACCT_POLICY_JOB_FINI) - unused_cpu_run_secs = _get_unused_cpu_run_secs(job_ptr); + priority_g_job_end(job_ptr); else if (type == ACCT_POLICY_JOB_BEGIN) used_cpu_run_secs = (uint64_t)job_ptr->total_cpus * (uint64_t)job_ptr->time_limit * 60; @@ -197,14 +182,14 @@ static void _adjust_limit_usage(int type, struct job_record *job_ptr) used_limits->submit_jobs++; break; case ACCT_POLICY_REM_SUBMIT: - if(qos_ptr->usage->grp_used_submit_jobs) + if (qos_ptr->usage->grp_used_submit_jobs) qos_ptr->usage->grp_used_submit_jobs--; else debug2("acct_policy_remove_job_submit: " "grp_submit_jobs underflow for qos %s", qos_ptr->name); - if(used_limits->submit_jobs) + if (used_limits->submit_jobs) used_limits->submit_jobs--; else debug2("acct_policy_remove_job_submit: " @@ -240,7 +225,7 @@ static void _adjust_limit_usage(int type, struct job_record *job_ptr) } qos_ptr->usage->grp_used_mem -= job_memory; - if((int32_t)qos_ptr->usage->grp_used_mem < 0) { + if ((int32_t)qos_ptr->usage->grp_used_mem < 0) { qos_ptr->usage->grp_used_mem = 0; debug2("acct_policy_job_fini: grp_used_mem " "underflow for qos %s", qos_ptr->name); @@ -253,18 +238,6 @@ static void _adjust_limit_usage(int type, struct job_record *job_ptr) "underflow for qos %s", qos_ptr->name); } - /* If the job finished early remove the extra - time now. */ - if (unused_cpu_run_secs > - qos_ptr->usage->grp_used_cpu_run_secs) { - qos_ptr->usage->grp_used_cpu_run_secs = 0; - debug2("acct_policy_job_fini: " - "grp_used_cpu_run_secs " - "underflow for qos %s", qos_ptr->name); - } else - qos_ptr->usage->grp_used_cpu_run_secs -= - unused_cpu_run_secs; - used_limits->cpus -= job_ptr->total_cpus; if ((int32_t)used_limits->cpus < 0) { used_limits->cpus = 0; @@ -357,27 +330,6 @@ static void _adjust_limit_usage(int type, struct job_record *job_ptr) assoc_ptr->acct); } - /* If the job finished early remove the extra - time now. */ - if (unused_cpu_run_secs > - assoc_ptr->usage->grp_used_cpu_run_secs) { - assoc_ptr->usage->grp_used_cpu_run_secs = 0; - debug2("acct_policy_job_fini: " - "grp_used_cpu_run_secs " - "underflow for account %s", - assoc_ptr->acct); - } else { - assoc_ptr->usage->grp_used_cpu_run_secs -= - unused_cpu_run_secs; - debug4("acct_policy_job_fini: job %u. " - "Removed %"PRIu64" unused seconds " - "from assoc %s " - "grp_used_cpu_run_secs = %"PRIu64"", - job_ptr->job_id, unused_cpu_run_secs, - assoc_ptr->acct, - assoc_ptr->usage->grp_used_cpu_run_secs); - } - break; default: error("acct_policy: association unknown type %d", type); @@ -436,14 +388,13 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, { uint32_t time_limit; slurmdb_association_rec_t *assoc_ptr = assoc_in; - int parent = 0; + int parent = 0, job_cnt = 1; char *user_name = NULL; bool rc = true; uint32_t qos_max_cpus_limit = INFINITE; uint32_t qos_max_nodes_limit = INFINITE; + uint32_t qos_time_limit = INFINITE; uint32_t job_memory = 0; - uint64_t cpu_time_limit; - uint64_t job_cpu_time_limit; bool admin_set_memory_limit = false; assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; @@ -481,6 +432,9 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, } } + if (job_desc->array_bitmap) + job_cnt = bit_set_count(job_desc->array_bitmap); + assoc_mgr_lock(&locks); if (qos_ptr) { @@ -624,8 +578,8 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, } if ((qos_ptr->grp_submit_jobs != INFINITE) && - (qos_ptr->usage->grp_used_submit_jobs - >= qos_ptr->grp_submit_jobs)) { + ((qos_ptr->usage->grp_used_submit_jobs + job_cnt) + > qos_ptr->grp_submit_jobs)) { debug2("job submit for user %s(%u): " "group max submit job limit exceeded %u " "for qos '%s'", @@ -647,23 +601,14 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, * if you can end up in PENDING QOSJobLimit, you need * to validate it if DenyOnLimit is set */ - if (strict_checking && (qos_ptr->max_cpu_mins_pj != INFINITE) - && (job_desc->time_limit != NO_VAL) - && (job_desc->min_cpus != NO_VAL)) { - cpu_time_limit = qos_ptr->max_cpu_mins_pj; - job_cpu_time_limit = (uint64_t)job_desc->time_limit - * (uint64_t)job_desc->min_cpus; - if (job_cpu_time_limit > cpu_time_limit) { - if (reason) - *reason = WAIT_QOS_JOB_LIMIT; - debug2("job submit for user %s(%u): " - "cpu time limit %"PRIu64" exceeds " - "qos max per-job %"PRIu64"", - user_name, job_desc->user_id, - job_cpu_time_limit, cpu_time_limit); - rc = false; - goto end_it; - } + if (((job_desc->min_cpus != NO_VAL) || + (job_desc->min_nodes != NO_VAL)) && + (qos_ptr->max_cpu_mins_pj != INFINITE)) { + uint32_t cpu_cnt = job_desc->min_nodes; + if ((job_desc->min_nodes == NO_VAL) || + (job_desc->min_cpus > job_desc->min_nodes)) + cpu_cnt = job_desc->min_cpus; + qos_time_limit = qos_ptr->max_cpu_mins_pj / cpu_cnt; } if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT) @@ -754,8 +699,9 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, job_desc->user_id); if ((!used_limits && qos_ptr->max_submit_jobs_pu == 0) || - (used_limits && (used_limits->submit_jobs - >= qos_ptr->max_submit_jobs_pu))) { + (used_limits && + ((used_limits->submit_jobs + job_cnt) > + qos_ptr->max_submit_jobs_pu))) { debug2("job submit for user %s(%u): " "qos max submit job limit exceeded %u", user_name, @@ -770,33 +716,36 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, || (qos_ptr->max_wall_pj == INFINITE) || (update_call && (job_desc->time_limit == NO_VAL))) { /* no need to check/set */ - } else { - time_limit = qos_ptr->max_wall_pj; + } else if (qos_time_limit > qos_ptr->max_wall_pj) { + qos_time_limit = qos_ptr->max_wall_pj; + } + + if (qos_time_limit != INFINITE) { if (job_desc->time_limit == NO_VAL) { if (part_ptr->max_time == INFINITE) - job_desc->time_limit = time_limit; - else + job_desc->time_limit = qos_time_limit; + else { job_desc->time_limit = - MIN(time_limit, + MIN(qos_time_limit, part_ptr->max_time); + } acct_policy_limit_set->time = 1; } else if (acct_policy_limit_set->time && - job_desc->time_limit > time_limit) { - job_desc->time_limit = time_limit; + job_desc->time_limit > qos_time_limit) { + job_desc->time_limit = qos_time_limit; } else if (strict_checking - && job_desc->time_limit > time_limit) { + && job_desc->time_limit > qos_time_limit) { if (reason) *reason = WAIT_QOS_JOB_LIMIT; debug2("job submit for user %s(%u): " "time limit %u exceeds qos max %u", user_name, job_desc->user_id, - job_desc->time_limit, time_limit); + job_desc->time_limit, qos_time_limit); rc = false; goto end_it; } } - } while (assoc_ptr) { @@ -897,8 +846,8 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, if ((!qos_ptr || (qos_ptr && qos_ptr->grp_submit_jobs == INFINITE)) && (assoc_ptr->grp_submit_jobs != INFINITE) && - (assoc_ptr->usage->used_submit_jobs - >= assoc_ptr->grp_submit_jobs)) { + ((assoc_ptr->usage->used_submit_jobs + job_cnt) + > assoc_ptr->grp_submit_jobs)) { debug2("job submit for user %s(%u): " "group max submit job limit exceeded %u " "for account '%s'", @@ -1006,8 +955,8 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, if ((!qos_ptr || (qos_ptr && qos_ptr->max_submit_jobs_pu == INFINITE)) && (assoc_ptr->max_submit_jobs != INFINITE) && - (assoc_ptr->usage->used_submit_jobs - >= assoc_ptr->max_submit_jobs)) { + ((assoc_ptr->usage->used_submit_jobs + job_cnt) + > assoc_ptr->max_submit_jobs)) { debug2("job submit for user %s(%u): " "account max submit job limit exceeded %u", user_name, @@ -1815,7 +1764,7 @@ extern bool acct_policy_job_runnable(struct job_record *job_ptr) * parents since we have pre-propogated them, so just * continue with the next parent */ - if(parent) { + if (parent) { assoc_ptr = assoc_ptr->usage->parent_assoc_ptr; continue; } @@ -2072,8 +2021,12 @@ extern bool acct_policy_job_time_out(struct job_record *job_ptr) READ_LOCK, NO_LOCK, NO_LOCK }; time_t now; - /* now see if we are enforcing limits */ - if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)) + /* Now see if we are enforcing limits. If Safe is set then + * return false as well since we are being safe if the limit + * was changed after the job was already deemed safe to start. + */ + if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS) + || (accounting_enforce & ACCOUNTING_ENFORCE_SAFE)) return false; assoc_mgr_lock(&locks); @@ -2195,7 +2148,7 @@ extern bool acct_policy_job_time_out(struct job_record *job_ptr) assoc = assoc->usage->parent_assoc_ptr; /* these limits don't apply to the root assoc */ - if(assoc == assoc_mgr_root_assoc) + if (assoc == assoc_mgr_root_assoc) break; } job_failed: diff --git a/src/slurmctld/acct_policy.h b/src/slurmctld/acct_policy.h index 0f4f3ae24588d115408338ac45fed0f525a90747..a428f2a6655b8ebcf2de973eaad2a29db5991bdb 100644 --- a/src/slurmctld/acct_policy.h +++ b/src/slurmctld/acct_policy.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index fc38313cd7caa17e069d5ac6c66963e087be4f52..906a16f611c0321d4c2a5bc37ebc40f6bf64fb50 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -429,12 +429,12 @@ static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr) thread_ptr[thr_count].state = DSH_NEW; thread_ptr[thr_count].addr = agent_arg_ptr->addr; name = hostlist_shift(agent_arg_ptr->hostlist); - if(!name) { + if (!name) { debug3("no more nodes to send to"); break; } hl = hostlist_create(name); - if(thread_ptr[thr_count].addr && span[thr_count]) { + if (thread_ptr[thr_count].addr && span[thr_count]) { debug("warning: you will only be sending this to %s", name); span[thr_count] = 0; @@ -874,7 +874,7 @@ static void *_thread_per_group_rpc(void *args) msg.address = *thread_ptr->addr; } else { //info("no address given"); - if(slurm_conf_get_addr(thread_ptr->nodelist, + if (slurm_conf_get_addr(thread_ptr->nodelist, &msg.address) == SLURM_ERROR) { error("_thread_per_group_rpc: " "can't find address for host %s, " @@ -1006,7 +1006,7 @@ static void *_thread_per_group_rpc(void *args) } if (srun_agent) thread_state = DSH_FAILED; - else if(ret_data_info->type == RESPONSE_FORWARD_FAILED) + else if (ret_data_info->type == RESPONSE_FORWARD_FAILED) /* check if a forward failed */ thread_state = DSH_NO_RESP; else { /* some will fail that don't mean anything went @@ -1058,7 +1058,7 @@ static int _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr, debug("got the name %s to resend out of %d", ret_data_info->node_name, count); - if(agent_arg_ptr) { + if (agent_arg_ptr) { hostlist_push(agent_arg_ptr->hostlist, ret_data_info->node_name); @@ -1098,7 +1098,7 @@ static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count) j = 0; for (i = 0; i < agent_info_ptr->thread_count; i++) { - if(!thread_ptr[i].ret_list) { + if (!thread_ptr[i].ret_list) { if (thread_ptr[i].state != DSH_NO_RESP) continue; @@ -1110,7 +1110,7 @@ static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count) if ((++j) == count) break; } else { - if(_setup_requeue(agent_arg_ptr, &thread_ptr[i], + if (_setup_requeue(agent_arg_ptr, &thread_ptr[i], count, &j)) break; } @@ -1174,10 +1174,10 @@ extern int agent_retry (int min_wait, bool mail_too) slurm_mutex_lock(&retry_mutex); if (retry_list) { static time_t last_msg_time = (time_t) 0; - uint32_t msg_type[5], i = 0; + uint32_t msg_type[5] = {0, 0, 0, 0, 0}, i = 0; list_size = list_count(retry_list); - if ((list_size > MAX_AGENT_CNT) - && (difftime(now, last_msg_time) > 300)) { + if ((list_size > MAX_AGENT_CNT) && + (difftime(now, last_msg_time) > 300)) { /* Note sizable backlog of work */ info("WARNING: agent retry_list size is %d", list_size); @@ -1459,9 +1459,9 @@ static void _mail_proc(mail_info_t *mi) (void) close(1); (void) close(2); fd = open("/dev/null", O_RDWR); // 0 - if(dup(fd) == -1) // 1 + if (dup(fd) == -1) // 1 error("Couldn't do a dup for 1: %m"); - if(dup(fd) == -1) // 2 + if (dup(fd) == -1) // 2 error("Couldn't do a dup for 2 %m"); execle(slurmctld_conf.mail_prog, "mail", "-s", mi->message, mi->user_name, @@ -1530,11 +1530,10 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type) else mi->user_name = xstrdup(job_ptr->mail_user); - mi->message = xmalloc(256); _set_job_time(job_ptr, mail_type, job_time, sizeof(job_time)); - sprintf(mi->message, "SLURM Job_id=%u Name=%.24s %s%s", - job_ptr->job_id, job_ptr->name, - _mail_type_str(mail_type), job_time); + mi->message = xstrdup_printf("SLURM Job_id=%u Name=%s %s%s", + job_ptr->job_id, job_ptr->name, + _mail_type_str(mail_type), job_time); debug("email msg to %s: %s", mi->user_name, mi->message); @@ -1544,8 +1543,7 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type) if (!mail_list) fatal("list_create failed"); } - if (!list_enqueue(mail_list, (void *) mi)) - fatal("list_enqueue failed"); + (void) list_enqueue(mail_list, (void *) mi); slurm_mutex_unlock(&mail_mutex); return; } diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h index d41aba744f8ba916859477f45d3499dc253ce990..9807d5ff7c458df1f47f5ea04c11b57b54b02e01 100644 --- a/src/slurmctld/agent.h +++ b/src/slurmctld/agent.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index ab6ebbc2bad0ebdff27619bdfab830ae9d9b08e4..7825d18f08428e460008a078f9145c18f3d4ba2d 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -60,6 +60,7 @@ #include "src/common/macros.h" #include "src/common/node_select.h" #include "src/common/slurm_auth.h" +#include "src/common/slurm_accounting_storage.h" #include "src/common/switch.h" #include "src/common/xsignal.h" #include "src/common/xstring.h" @@ -105,8 +106,9 @@ static int backup_sigarray[] = { /* run_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */ -void run_backup(void) +void run_backup(slurm_trigger_callbacks_t *callbacks) { + int i; uint32_t trigger_type; time_t last_ping = 0; pthread_attr_t thread_attr_sig, thread_attr_rpc; @@ -149,7 +151,10 @@ void run_backup(void) trigger_type = TRIGGER_TYPE_BU_CTLD_RES_OP; _trigger_slurmctld_event(trigger_type); - sleep(5); /* Give the primary slurmctld set-up time */ + for (i = 0; ((i < 5) && (slurmctld_config.shutdown_time == 0)); i++) { + sleep(1); /* Give the primary slurmctld set-up time */ + } + /* repeatedly ping ControlMachine */ while (slurmctld_config.shutdown_time == 0) { sleep(1); @@ -212,6 +217,15 @@ void run_backup(void) pthread_join(slurmctld_config.thread_id_sig, NULL); pthread_join(slurmctld_config.thread_id_rpc, NULL); + if (!acct_db_conn) { + /* Make sure we get a connection right away to avoid + race condition on this happening too late. + */ + acct_db_conn = acct_storage_g_get_connection( + callbacks, 0, false, + slurmctld_cluster_name); + } + /* clear old state and read new state */ lock_slurmctld(config_write_lock); job_fini(); diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index f350c1ff4f625901efb39c56878fd5794276000f..985871489eae6840f245d9d2efa0354118b29ac4 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -73,9 +73,11 @@ #include "src/common/pack.h" #include "src/common/proc_args.h" #include "src/common/read_config.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_auth.h" +#include "src/common/slurm_ext_sensors.h" #include "src/common/slurm_jobcomp.h" #include "src/common/slurm_topology.h" #include "src/common/slurm_priority.h" @@ -100,6 +102,7 @@ #include "src/slurmctld/read_config.h" #include "src/slurmctld/reservation.h" #include "src/slurmctld/slurmctld.h" +#include "src/slurmctld/slurmctld_plugstack.h" #include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/srun_comm.h" #include "src/slurmctld/state_save.h" @@ -236,10 +239,17 @@ int main(int argc, char *argv[]) slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; assoc_init_args_t assoc_init_arg; - pthread_t assoc_cache_thread; + pthread_t assoc_cache_thread = (pthread_t) 0; slurm_trigger_callbacks_t callbacks; char *dir_name; + /* + * Make sure we have no extra open files which + * would be propagated to spawned tasks. + */ + for (i=3; i<256; i++) + (void) close(i); + /* * Establish initial configuration */ @@ -255,6 +265,9 @@ int main(int argc, char *argv[]) _update_nice(); _kill_old_slurmctld(); + for (i=0; i<3; i++) + fd_set_close_on_exec(i); + if (daemonize) { slurmctld_config.daemonize = 1; if (daemon(1, 1)) @@ -325,7 +338,7 @@ int main(int argc, char *argv[]) association_based_accounting = slurm_get_is_association_based_accounting(); accounting_enforce = slurmctld_conf.accounting_storage_enforce; - if(!strcasecmp(slurmctld_conf.accounting_storage_type, + if (!strcasecmp(slurmctld_conf.accounting_storage_type, "accounting_storage/slurmdbd")) { with_slurmdbd = 1; /* we need job_list not to be NULL */ @@ -405,8 +418,8 @@ int main(int argc, char *argv[]) slurm_attr_destroy(&thread_attr); } - info("slurmctld version %s started on cluster %s", - SLURM_VERSION_STRING, slurmctld_cluster_name); + info("%s version %s started on cluster %s", + slurm_prog_name, SLURM_VERSION_STRING, slurmctld_cluster_name); if ((error_code = gethostname_short(node_name, MAX_SLURM_NAME))) fatal("getnodename error %s", slurm_strerror(error_code)); @@ -443,6 +456,8 @@ int main(int argc, char *argv[]) fatal( "failed to initialize jobacct_gather plugin"); if (job_submit_plugin_init() != SLURM_SUCCESS ) fatal( "failed to initialize job_submit plugin"); + if (ext_sensors_init() != SLURM_SUCCESS ) + fatal( "failed to initialize ext_sensors plugin"); while (1) { /* initialization for each primary<->backup switch */ @@ -455,7 +470,7 @@ int main(int argc, char *argv[]) slurmctld_conf.backup_controller) == 0)) { slurm_sched_fini(); /* make sure shutdown */ primary = 0; - run_backup(); + run_backup(&callbacks); } else if (_valid_controller()) { (void) _shutdown_backup_controller(SHUTDOWN_WAIT); trigger_primary_ctld_res_ctrl(); @@ -516,10 +531,10 @@ int main(int argc, char *argv[]) if (slurm_priority_init() != SLURM_SUCCESS) fatal("failed to initialize priority plugin"); - if (slurm_sched_init() != SLURM_SUCCESS) fatal("failed to initialize scheduling plugin"); - + if (slurmctld_plugstack_init()) + fatal("failed to initialize slurmctld_plugstack"); /* * create attached thread to process RPCs @@ -575,10 +590,14 @@ int main(int argc, char *argv[]) switch_save(dir_name); xfree(dir_name); slurm_priority_fini(); + slurmctld_plugstack_fini(); shutdown_state_save(); pthread_join(slurmctld_config.thread_id_sig, NULL); pthread_join(slurmctld_config.thread_id_rpc, NULL); pthread_join(slurmctld_config.thread_id_save, NULL); + slurmctld_config.thread_id_sig = (pthread_t) 0; + slurmctld_config.thread_id_rpc = (pthread_t) 0; + slurmctld_config.thread_id_save = (pthread_t) 0; if (running_cache) { /* break out and end the association cache @@ -654,6 +673,7 @@ int main(int argc, char *argv[]) /* Some plugins are needed to purge job/node data structures, * unplug after other data structures are purged */ + ext_sensors_fini(); gres_plugin_fini(); job_submit_plugin_fini(); slurm_preempt_fini(); @@ -785,7 +805,7 @@ static int _reconfigure_slurm(void) assoc_mgr_set_missing_uids(); start_power_mgr(&slurmctld_config.thread_id_power); trigger_reconfig(); - priority_g_reconfig(); /* notify priority plugin too */ + priority_g_reconfig(true); /* notify priority plugin too */ schedule(0); /* has its own locks */ save_all_state(); @@ -901,6 +921,8 @@ static void *_slurmctld_rpc_mgr(void *no_data) /* initialize ports for RPCs */ lock_slurmctld(config_read_lock); nports = slurmctld_conf.slurmctld_port_count; + if (nports == 0) + fatal("slurmctld port count is zero"); sockfd = xmalloc(sizeof(slurm_fd_t) * nports); for (i=0; i<nports; i++) { sockfd[i] = slurm_init_msg_engine_addrname_port( @@ -908,6 +930,7 @@ static void *_slurmctld_rpc_mgr(void *no_data) slurmctld_conf.slurmctld_port+i); if (sockfd[i] == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_addrname_port error %m"); + fd_set_close_on_exec(sockfd[i]); slurm_get_stream_addr(sockfd[i], &srv_addr); slurm_get_ip_str(&srv_addr, &port, ip, sizeof(ip)); debug2("slurmctld listening on %s:%d", ip, ntohs(port)); @@ -959,6 +982,7 @@ static void *_slurmctld_rpc_mgr(void *no_data) _free_server_thread(); continue; } + fd_set_close_on_exec(newsockfd); conn_arg = xmalloc(sizeof(connection_arg_t)); conn_arg->newsockfd = newsockfd; if (slurmctld_config.shutdown_time) @@ -1088,36 +1112,16 @@ static void _free_server_thread(void) static int _accounting_cluster_ready() { - struct node_record *node_ptr; - int i; int rc = SLURM_ERROR; time_t event_time = time(NULL); - uint32_t cpus = 0; bitstr_t *total_node_bitmap = NULL; char *cluster_nodes = NULL; slurmctld_lock_t node_read_lock = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; lock_slurmctld(node_read_lock); - node_ptr = node_record_table_ptr; - for (i = 0; i < node_record_count; i++, node_ptr++) { - if (node_ptr->name == '\0') - continue; -#ifdef SLURM_NODE_ACCT_REGISTER - if (slurmctld_conf.fast_schedule) - cpus += node_ptr->config_ptr->cpus; - else - cpus += node_ptr->cpus; -#else - cpus += node_ptr->config_ptr->cpus; -#endif - } - /* Since cluster_cpus is used else where we need to keep a - local var here to avoid race conditions on cluster_cpus - not being correct. - */ - cluster_cpus = cpus; + set_cluster_cpus(); /* Now get the names of all the nodes on the cluster at this time and send it also. @@ -1132,7 +1136,7 @@ static int _accounting_cluster_ready() cluster_nodes, cluster_cpus, event_time); xfree(cluster_nodes); - if(rc == ACCOUNTING_FIRST_REG) { + if (rc == ACCOUNTING_FIRST_REG) { /* see if we are running directly to a database * instead of a slurmdbd. */ @@ -1163,7 +1167,7 @@ static int _accounting_mark_all_nodes_down(char *reason) } xfree(state_file); - if((rc = acct_storage_g_flush_jobs_on_cluster(acct_db_conn, + if ((rc = acct_storage_g_flush_jobs_on_cluster(acct_db_conn, event_time)) == SLURM_ERROR) return rc; @@ -1172,7 +1176,7 @@ static int _accounting_mark_all_nodes_down(char *reason) for (i = 0; i < node_record_count; i++, node_ptr++) { if (node_ptr->name == '\0') continue; - if((rc = clusteracct_storage_g_node_down( + if ((rc = clusteracct_storage_g_node_down( acct_db_conn, node_ptr, event_time, reason, slurm_get_slurm_user_id())) @@ -1323,6 +1327,7 @@ static void *_slurmctld_background(void *no_data) static time_t last_group_time; static time_t last_health_check_time; static time_t last_acct_gather_node_time; + static time_t last_ext_sensors_time; static time_t last_no_resp_msg_time; static time_t last_ping_node_time; static time_t last_ping_srun_time; @@ -1336,6 +1341,7 @@ static void *_slurmctld_background(void *no_data) static time_t last_uid_update; static time_t last_reboot_msg_time; static bool ping_msg_sent = false; + static bool run_job_scheduler = false; time_t now; int no_resp_msg_interval, ping_interval, purge_job_interval; int group_time, group_force; @@ -1370,8 +1376,8 @@ static void *_slurmctld_background(void *no_data) last_purge_job_time = last_trigger = last_health_check_time = now; last_timelimit_time = last_assert_primary_time = now; last_no_resp_msg_time = last_resv_time = last_ctld_bu_ping = now; - last_uid_update = last_reboot_msg_time = last_acct_gather_node_time - = now; + last_uid_update = last_reboot_msg_time = now; + last_acct_gather_node_time = last_ext_sensors_time = now; if ((slurmctld_conf.min_job_age > 0) && (slurmctld_conf.min_job_age < PURGE_JOB_INTERVAL)) { @@ -1434,7 +1440,8 @@ static void *_slurmctld_background(void *no_data) now = time(NULL); last_resv_time = now; lock_slurmctld(node_write_lock); - set_node_maint_mode(false); + if (set_node_maint_mode(false) > 0) + run_job_scheduler = true; unlock_slurmctld(node_write_lock); } @@ -1479,6 +1486,17 @@ static void *_slurmctld_background(void *no_data) unlock_slurmctld(node_write_lock); } + if (slurmctld_conf.ext_sensors_freq && + (difftime(now, last_ext_sensors_time) >= + slurmctld_conf.ext_sensors_freq) && + is_ping_done()) { + now = time(NULL); + last_ext_sensors_time = now; + lock_slurmctld(node_write_lock); + ext_sensors_g_update_component_data(); + unlock_slurmctld(node_write_lock); + } + if (((difftime(now, last_ping_node_time) >= ping_interval) || ping_nodes_now) && is_ping_done()) { now = time(NULL); @@ -1545,9 +1563,11 @@ static void *_slurmctld_background(void *no_data) unlock_slurmctld(job_write_lock); } - if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) { + if ((difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) || + run_job_scheduler) { now = time(NULL); last_sched_time = now; + run_job_scheduler = false; if (schedule(INFINITE)) last_checkpoint_time = 0; /* force state save */ set_job_elig_time(); @@ -1673,6 +1693,35 @@ extern void send_all_to_accounting(time_t event_time) send_resvs_to_accounting(); } +/* A slurmctld lock needs to at least have a node read lock set before + * this is called */ +extern void set_cluster_cpus(void) +{ + uint32_t cpus = 0; + struct node_record *node_ptr; + int i; + + node_ptr = node_record_table_ptr; + for (i = 0; i < node_record_count; i++, node_ptr++) { + if (node_ptr->name == '\0') + continue; +#ifdef SLURM_NODE_ACCT_REGISTER + if (slurmctld_conf.fast_schedule) + cpus += node_ptr->config_ptr->cpus; + else + cpus += node_ptr->cpus; +#else + cpus += node_ptr->config_ptr->cpus; +#endif + } + + /* Since cluster_cpus is used else where we need to keep a + local var here to avoid race conditions on cluster_cpus + not being correct. + */ + cluster_cpus = cpus; +} + /* * _report_locks_set - report any slurmctld locks left set * RET count of locks currently set @@ -1792,12 +1841,12 @@ static void _parse_commandline(int argc, char *argv[]) break; case 'r': recover = 1; - if(!bg_recover_override) + if (!bg_recover_override) bg_recover = 1; break; case 'R': recover = 2; - if(!bg_recover_override) + if (!bg_recover_override) bg_recover = 1; break; case 'v': @@ -1968,7 +2017,7 @@ void update_logging(void) if (slurmctld_conf.slurmctld_logfile) { rc = chown(slurmctld_conf.slurmctld_logfile, slurm_user_id, slurm_user_gid); - if (rc) { + if (rc && daemonize) { error("chown(%s, %d, %d): %m", slurmctld_conf.slurmctld_logfile, (int) slurm_user_id, (int) slurm_user_gid); @@ -1977,7 +2026,7 @@ void update_logging(void) if (slurmctld_conf.sched_logfile) { rc = chown(slurmctld_conf.sched_logfile, slurm_user_id, slurm_user_gid); - if (rc) { + if (rc && daemonize) { error("chown(%s, %d, %d): %m", slurmctld_conf.sched_logfile, (int) slurm_user_id, (int) slurm_user_gid); @@ -2029,10 +2078,9 @@ static void _init_pidfile(void) error("SlurmctldPid == SlurmdPid, use different names"); /* Don't close the fd returned here since we need to keep the - fd open to maintain the write lock. - */ - create_pidfile(slurmctld_conf.slurmctld_pidfile, - slurmctld_conf.slurm_user_id); + * fd open to maintain the write lock */ + (void) create_pidfile(slurmctld_conf.slurmctld_pidfile, + slurmctld_conf.slurm_user_id); } /* @@ -2080,13 +2128,13 @@ static void *_assoc_cache_mgr(void *no_data) /* This is here to see if we are exiting. If we get NO_VAL then just return since we are closing down. */ - if(running_cache == (uint16_t)NO_VAL) { + if (running_cache == (uint16_t)NO_VAL) { slurm_mutex_unlock(&assoc_cache_mutex); return NULL; } lock_slurmctld(job_write_lock); assoc_mgr_refresh_lists(acct_db_conn, NULL); - if(running_cache) + if (running_cache) unlock_slurmctld(job_write_lock); slurm_mutex_unlock(&assoc_cache_mutex); } @@ -2121,10 +2169,10 @@ static void *_assoc_cache_mgr(void *no_data) (size_t)job_ptr->assoc_ptr, job_ptr->assoc_id, job_ptr->job_id); } - if(job_ptr->qos_id) { + if (job_ptr->qos_id) { memset(&qos_rec, 0, sizeof(slurmdb_qos_rec_t)); qos_rec.id = job_ptr->qos_id; - if((assoc_mgr_fill_in_qos( + if ((assoc_mgr_fill_in_qos( acct_db_conn, &qos_rec, accounting_enforce, (slurmdb_qos_rec_t **)&job_ptr->qos_ptr)) diff --git a/src/slurmctld/front_end.c b/src/slurmctld/front_end.c index 104322e7aa9ea40bf21324615d939eb45767b0a3..394263a708c2c8e9da93c7fa9f935f30ba6daed8 100644 --- a/src/slurmctld/front_end.c +++ b/src/slurmctld/front_end.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,6 +48,7 @@ #include "src/common/node_conf.h" #include "src/common/read_config.h" #include "src/common/slurm_protocol_defs.h" +#include "src/common/uid.h" #include "src/common/xstring.h" #include "src/slurmctld/front_end.h" #include "src/slurmctld/locks.h" @@ -126,7 +127,21 @@ static int _open_front_end_state_file(char **state_file) static void _pack_front_end(struct front_end_record *dump_front_end_ptr, Buf buffer, uint16_t protocol_version) { - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + packstr(dump_front_end_ptr->allow_groups, buffer); + packstr(dump_front_end_ptr->allow_users, buffer); + pack_time(dump_front_end_ptr->boot_time, buffer); + packstr(dump_front_end_ptr->deny_groups, buffer); + packstr(dump_front_end_ptr->deny_users, buffer); + packstr(dump_front_end_ptr->name, buffer); + pack16(dump_front_end_ptr->node_state, buffer); + + packstr(dump_front_end_ptr->reason, buffer); + pack_time(dump_front_end_ptr->reason_time, buffer); + pack32(dump_front_end_ptr->reason_uid, buffer); + + pack_time(dump_front_end_ptr->slurmd_start_time, buffer); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { pack_time(dump_front_end_ptr->boot_time, buffer); packstr(dump_front_end_ptr->name, buffer); pack16(dump_front_end_ptr->node_state, buffer); @@ -143,12 +158,55 @@ static void _pack_front_end(struct front_end_record *dump_front_end_ptr, } #endif +#ifdef HAVE_FRONT_END +/* Validate job's access to a specific front-end node */ +static bool _front_end_access(front_end_record_t *front_end_ptr, + struct job_record *job_ptr) +{ + int i; + + if (!job_ptr) + return true; + + if (front_end_ptr->deny_gids) { + for (i = 0; front_end_ptr->deny_gids[i]; i++) { + if (job_ptr->group_id == front_end_ptr->deny_gids[i]) + return false; + } + } + if (front_end_ptr->deny_uids) { + for (i = 0; front_end_ptr->deny_uids[i]; i++) { + if (job_ptr->user_id == front_end_ptr->deny_uids[i]) + return false; + } + } + if (front_end_ptr->allow_gids || front_end_ptr->allow_uids) { + if (front_end_ptr->allow_gids) { + for (i = 0; front_end_ptr->allow_gids[i]; i++) { + if (job_ptr->group_id == + front_end_ptr->allow_gids[i]) + return true; + } + } + if (front_end_ptr->allow_uids) { + for (i = 0; front_end_ptr->allow_uids[i]; i++) { + if (job_ptr->user_id == + front_end_ptr->allow_uids[i]) + return true; + } + } + return false; + } + return true; +} +#endif + /* * assign_front_end - assign a front end node for starting a job - * IN batch_host - previously set batch_host name + * job_ptr IN - job to assign a front end node (tests access control lists) * RET pointer to the front end node to use or NULL if none found */ -extern front_end_record_t *assign_front_end(char *batch_host) +extern front_end_record_t *assign_front_end(struct job_record *job_ptr) { #ifdef HAVE_FRONT_END static int last_assigned = -1; @@ -159,14 +217,18 @@ extern front_end_record_t *assign_front_end(char *batch_host) for (i = 0; i < front_end_node_cnt; i++) { last_assigned = (last_assigned + 1) % front_end_node_cnt; front_end_ptr = front_end_nodes + last_assigned; - if (batch_host) { /* Find specific front-end node */ - if (strcmp(batch_host, front_end_ptr->name)) + if (job_ptr->batch_host) { /* Find specific front-end node */ + if (strcmp(job_ptr->batch_host, front_end_ptr->name)) continue; + if (!_front_end_access(front_end_ptr, job_ptr)) + break; } else { /* Find some usable front-end node */ if (IS_NODE_DOWN(front_end_ptr) || IS_NODE_DRAIN(front_end_ptr) || IS_NODE_NO_RESPOND(front_end_ptr)) continue; + if (!_front_end_access(front_end_ptr, job_ptr)) + continue; } state_flags = front_end_nodes[last_assigned].node_state & NODE_STATE_FLAGS; @@ -175,9 +237,9 @@ extern front_end_record_t *assign_front_end(char *batch_host) front_end_nodes[last_assigned].job_cnt_run++; return front_end_ptr; } - if (batch_host) { /* Find specific front-end node */ + if (job_ptr->batch_host) { /* Find specific front-end node */ error("assign_front_end: front end node %s not found", - batch_host); + job_ptr->batch_host); } else { /* Find some usable front-end node */ error("assign_front_end: no available front end nodes found"); } @@ -187,8 +249,10 @@ extern front_end_record_t *assign_front_end(char *batch_host) /* * avail_front_end - test if any front end nodes are available for starting job + * job_ptr IN - job to consider for starting (tests access control lists) or + * NULL to test if any job can start (no test of ACL) */ -extern bool avail_front_end(void) +extern bool avail_front_end(struct job_record *job_ptr) { #ifdef HAVE_FRONT_END front_end_record_t *front_end_ptr; @@ -200,6 +264,8 @@ extern bool avail_front_end(void) IS_NODE_DRAIN(front_end_ptr) || IS_NODE_NO_RESPOND(front_end_ptr)) continue; + if (!_front_end_access(front_end_ptr, job_ptr)) + continue; return true; } return false; @@ -312,12 +378,16 @@ extern void log_front_end_state(void) i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); info("FrontendName=%s FrontendAddr=%s Port=%u State=%s " - "Reason=%s JobCntRun=%u JobCntComp=%u", + "Reason=%s JobCntRun=%u JobCntComp=%u " + "AllowGroups=%s AllowUsers=%s " + "DenyGroups=%s DenyUsers=%s ", front_end_ptr->name, front_end_ptr->comm_name, front_end_ptr->port, node_state_string(front_end_ptr->node_state), front_end_ptr->reason, front_end_ptr->job_cnt_run, - front_end_ptr->job_cnt_comp); + front_end_ptr->job_cnt_comp, + front_end_ptr->allow_groups, front_end_ptr->allow_users, + front_end_ptr->deny_groups, front_end_ptr->deny_users); } #endif } @@ -334,7 +404,11 @@ extern void purge_front_end_state(void) for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); + xfree(front_end_ptr->allow_groups); + xfree(front_end_ptr->allow_users); xfree(front_end_ptr->comm_name); + xfree(front_end_ptr->deny_groups); + xfree(front_end_ptr->deny_users); xfree(front_end_ptr->name); xfree(front_end_ptr->reason); } @@ -343,6 +417,60 @@ extern void purge_front_end_state(void) #endif } +/* Translate comma delimited string of GIDs/group names into a zero terminated + * array of GIDs */ +gid_t *_xlate_groups(char *group_str, char *key) +{ + char *tmp_str, *token, *save_ptr = NULL; + gid_t *gids_array = NULL; + int array_size = 0; + gid_t gid; + + if (!group_str || !group_str[0]) + return gids_array; + + tmp_str = xstrdup(group_str); + token = strtok_r(tmp_str, ",", &save_ptr); + while (token) { + if (gid_from_string(token, &gid) || (gid == (gid_t) 0)) { + error("Invalid %s value (%s), ignored", key, token); + } else { + xrealloc(gids_array, sizeof(gid_t) * (array_size+2)); + gids_array[array_size++] = gid; + } + token = strtok_r(NULL, ",", &save_ptr); + } + xfree(tmp_str); + return gids_array; +} + +/* Translate comma delimited string of UIDs/user names into a zero terminated + * array of UIDs */ +uid_t *_xlate_users(char *user_str, char *key) +{ + char *tmp_str, *token, *save_ptr = NULL; + uid_t *uids_array = NULL; + int array_size = 0; + uid_t uid; + + if (!user_str || !user_str[0]) + return uids_array; + + tmp_str = xstrdup(user_str); + token = strtok_r(tmp_str, ",", &save_ptr); + while (token) { + if (uid_from_string(token, &uid) || (uid == (uid_t) 0)) { + error("Invalid %s value (%s), ignored", key, token); + } else { + xrealloc(uids_array, sizeof(uid_t) * (array_size+2)); + uids_array[array_size++] = uid; + } + token = strtok_r(NULL, ",", &save_ptr); + } + xfree(tmp_str); + return uids_array; +} + /* * restore_front_end_state - restore frontend node state * IN recover - replace job, node and/or partition data with latest @@ -368,8 +496,6 @@ extern void restore_front_end_state(int recover) return; /* No front ends in slurm.conf */ iter = list_iterator_create(front_end_list); - if (iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((slurm_conf_fe_ptr = (slurm_conf_frontend_t *) list_next(iter))) { if (slurm_conf_fe_ptr->frontends == NULL) @@ -388,6 +514,44 @@ extern void restore_front_end_state(int recover) xstrdup(slurm_conf_fe_ptr->frontends); front_end_nodes[i].magic = FRONT_END_MAGIC; } + + xfree(front_end_nodes[i].allow_gids); + xfree(front_end_nodes[i].allow_groups); + if (slurm_conf_fe_ptr->allow_groups) { + front_end_nodes[i].allow_groups = + xstrdup(slurm_conf_fe_ptr->allow_groups); + front_end_nodes[i].allow_gids = + _xlate_groups(slurm_conf_fe_ptr->allow_groups, + "AllowGroups"); + } + xfree(front_end_nodes[i].allow_uids); + xfree(front_end_nodes[i].allow_users); + if (slurm_conf_fe_ptr->allow_users) { + front_end_nodes[i].allow_users = + xstrdup(slurm_conf_fe_ptr->allow_users); + front_end_nodes[i].allow_uids = + _xlate_users(slurm_conf_fe_ptr->allow_users, + "AllowUsers"); + } + xfree(front_end_nodes[i].deny_gids); + xfree(front_end_nodes[i].deny_groups); + if (slurm_conf_fe_ptr->deny_groups) { + front_end_nodes[i].deny_groups = + xstrdup(slurm_conf_fe_ptr->deny_groups); + front_end_nodes[i].deny_gids = + _xlate_groups(slurm_conf_fe_ptr->deny_groups, + "DenyGroups"); + } + xfree(front_end_nodes[i].deny_uids); + xfree(front_end_nodes[i].deny_users); + if (slurm_conf_fe_ptr->deny_users) { + front_end_nodes[i].deny_users = + xstrdup(slurm_conf_fe_ptr->deny_users); + front_end_nodes[i].deny_uids = + _xlate_users(slurm_conf_fe_ptr->deny_users, + "DenyUsers"); + } + xfree(front_end_nodes[i].comm_name); if (slurm_conf_fe_ptr->addresses) { front_end_nodes[i].comm_name = diff --git a/src/slurmctld/front_end.h b/src/slurmctld/front_end.h index c6f3cc270bb0a0a2adda9d04b295b15727e81517..bd28db8096f68d3db140c36cd522539357cb40d5 100644 --- a/src/slurmctld/front_end.h +++ b/src/slurmctld/front_end.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -43,15 +43,17 @@ /* * assign_front_end - assign a front end node for starting a job - * IN batch_host - previously set batch_host name + * job_ptr IN - job to assign a front end node (tests access control lists) * RET pointer to the front end node to use or NULL if none found */ -extern front_end_record_t *assign_front_end(char *batch_host); +extern front_end_record_t *assign_front_end(struct job_record *job_ptr); /* * avail_front_end - test if any front end nodes are available for starting job + * job_ptr IN - job to consider for starting (tests access control lists) or + * NULL to test if any job can start (no test of ACL) */ -extern bool avail_front_end(void); +extern bool avail_front_end(struct job_record *job_ptr); /* dump_all_front_end_state - save the state of all front_end nodes to file */ extern int dump_all_front_end_state(void); diff --git a/src/slurmctld/gang.c b/src/slurmctld/gang.c index 4c130f6db8798c2376374dd8dc61229f7294182f..006f70333a307e1da96c4ca51873569e5f97f489 100644 --- a/src/slurmctld/gang.c +++ b/src/slurmctld/gang.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -342,8 +342,6 @@ static void _build_parts(void) gs_part_list = list_create(_destroy_parts); part_iterator = list_iterator_create(part_list); - if (part_iterator == NULL) - fatal ("memory allocation failure"); while ((p_ptr = (struct part_record *) list_next(part_iterator))) { gs_part_ptr = xmalloc(sizeof(struct gs_part)); gs_part_ptr->part_name = xstrdup(p_ptr->name); @@ -422,8 +420,6 @@ static int _job_fits_in_active_row(struct job_record *job_ptr, /* gr_type == GS_NODE || gr_type == GS_CPU */ job_map = bit_copy(job_res->node_bitmap); - if (!job_map) - fatal("gang: memory allocation error"); bit_and(job_map, p_ptr->active_resmap); /* any set bits indicate contention for the same resource */ count = bit_set_count(job_map); @@ -705,8 +701,6 @@ static void _cast_shadow(struct gs_job *j_ptr, uint16_t priority) int i; part_iterator = list_iterator_create(gs_part_list); - if (part_iterator == NULL) - fatal("memory allocation failure"); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) { if (p_ptr->priority >= priority) continue; @@ -747,8 +741,6 @@ static void _clear_shadow(struct gs_job *j_ptr) int i; part_iterator = list_iterator_create(gs_part_list); - if (part_iterator == NULL) - fatal("memory allocation failure"); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) { if (!p_ptr->shadow) continue; @@ -881,8 +873,6 @@ static void _update_all_active_rows(void) list_sort(gs_part_list, _sort_partitions); part_iterator = list_iterator_create(gs_part_list); - if (part_iterator == NULL) - fatal("memory allocation failure"); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) _update_active_row(p_ptr, 1); list_iterator_destroy(part_iterator); @@ -1054,8 +1044,6 @@ static void _scan_slurm_job_list(void) if (gs_debug_flags & DEBUG_FLAG_GANG) info("gang: _scan_slurm_job_list: job_list exists..."); job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (gs_debug_flags & DEBUG_FLAG_GANG) { info("gang: _scan_slurm_job_list: checking job %u", @@ -1368,8 +1356,6 @@ extern int gs_reconfig(void) /* scan the old part list and add existing jobs to the new list */ part_iterator = list_iterator_create(old_part_list); - if (part_iterator == NULL) - fatal ("memory allocation failure"); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) { newp_ptr = (struct gs_part *) list_find_first(gs_part_list, _find_gs_part, @@ -1593,8 +1579,6 @@ static void *_timeslicer_thread(void *arg) if (gs_debug_flags & DEBUG_FLAG_GANG) info("gang: _timeslicer_thread: scanning partitions"); part_iterator = list_iterator_create(gs_part_list); - if (part_iterator == NULL) - fatal("memory allocation failure"); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) { if (gs_debug_flags & DEBUG_FLAG_GANG) { info("gang: _timeslicer_thread: part %s: " diff --git a/src/slurmctld/gang.h b/src/slurmctld/gang.h index f013f6d936c3bf5c9716077f76381974d2d53cb7..579d38020c0d3ee2b3e7405d21452871a1d1207c 100644 --- a/src/slurmctld/gang.h +++ b/src/slurmctld/gang.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/groups.c b/src/slurmctld/groups.c index c3e2cc284b57043e82158b920bb45c794dd41495..77284842422e78cc75df95d842a9f43e86e459e9 100644 --- a/src/slurmctld/groups.c +++ b/src/slurmctld/groups.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -215,8 +215,6 @@ static uid_t *_get_group_cache(char *group_name) } iter = list_iterator_create(group_cache_list); - if (!iter) - fatal("list_iterator_create: malloc failure"); while ((cache_rec = (struct group_cache_rec *) list_next(iter))) { if (strcmp(group_name, cache_rec->group_name)) continue; @@ -250,8 +248,6 @@ static void _put_group_cache(char *group_name, void *group_uids, int uid_cnt) pthread_mutex_lock(&group_cache_mutex); if (!group_cache_list) { group_cache_list = list_create(_cache_del_func); - if (!group_cache_list) - fatal("list_create: malloc failure:"); } sz = sizeof(uid_t) * (uid_cnt); diff --git a/src/slurmctld/groups.h b/src/slurmctld/groups.h index 914fe525c83f50e4026f9db30b1e2cdf6bf6b191..1bcf161cf073a1df86ad61536fbd6bae15236975 100644 --- a/src/slurmctld/groups.h +++ b/src/slurmctld/groups.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index aeafb13e90e90aeb43a5ba3b4fb84c7650d64cde..3f875f0bc830917713f65f443c7a67b4aefba9d2 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -56,12 +56,15 @@ #include <strings.h> #include <sys/stat.h> #include <sys/types.h> +#include <sys/param.h> #include <unistd.h> #include "slurm/slurm_errno.h" +#include "src/common/slurm_acct_gather.h" #include "src/common/assoc_mgr.h" #include "src/common/bitstring.h" +#include "src/common/fd.h" #include "src/common/forward.h" #include "src/common/gres.h" #include "src/common/hostlist.h" @@ -89,6 +92,7 @@ #include "src/slurmctld/reservation.h" #include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/slurmctld.h" +#include "src/slurmctld/slurmctld_plugstack.h" #include "src/slurmctld/srun_comm.h" #include "src/slurmctld/state_save.h" #include "src/slurmctld/trigger_mgr.h" @@ -101,7 +105,8 @@ #define JOB_HASH_INX(_job_id) (_job_id % hash_table_size) /* Change JOB_STATE_VERSION value when changing the state save format */ -#define JOB_STATE_VERSION "VER013" +#define JOB_STATE_VERSION "VER014" +#define JOB_2_6_STATE_VERSION "VER014" /* SLURM version 2.6 */ #define JOB_2_5_STATE_VERSION "VER013" /* SLURM version 2.5 */ #define JOB_2_4_STATE_VERSION "VER012" /* SLURM version 2.4 */ #define JOB_2_3_STATE_VERSION "VER011" /* SLURM version 2.3 */ @@ -131,6 +136,7 @@ static bool wiki_sched_test = false; static void _add_job_hash(struct job_record *job_ptr); static int _checkpoint_job_record (struct job_record *job_ptr, char *image_dir); +static int _copy_job_desc_files(uint32_t job_id_src, uint32_t job_id_dest); static int _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id); static int _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, @@ -182,7 +188,7 @@ static int _reset_detail_bitmaps(struct job_record *job_ptr); static void _reset_step_bitmaps(struct job_record *job_ptr); static int _resume_job_nodes(struct job_record *job_ptr, bool indf_susp); static void _send_job_kill(struct job_record *job_ptr); -static void _set_job_id(struct job_record *job_ptr); +static int _set_job_id(struct job_record *job_ptr); static void _signal_batch_job(struct job_record *job_ptr, uint16_t signal); static void _signal_job(struct job_record *job_ptr, int signal); static void _suspend_job(struct job_record *job_ptr, uint16_t op, @@ -197,7 +203,6 @@ static int _write_data_array_to_file(char *file_name, char **data, uint32_t size); static void _xmit_new_end_time(struct job_record *job_ptr); - /* * create_job_record - create an empty job_record including job_details. * load its values with defaults (zeros, nulls, and magic cookie) @@ -227,18 +232,16 @@ struct job_record *create_job_record(int *error_code) detail_ptr = (struct job_details *)xmalloc(sizeof(struct job_details)); job_ptr->magic = JOB_MAGIC; + job_ptr->array_task_id = (uint16_t) NO_VAL; job_ptr->details = detail_ptr; job_ptr->prio_factors = xmalloc(sizeof(priority_factors_object_t)); job_ptr->step_list = list_create(NULL); - if (job_ptr->step_list == NULL) - fatal("memory allocation failure"); xassert (detail_ptr->magic = DETAILS_MAGIC); /* set value */ detail_ptr->submit_time = time(NULL); job_ptr->requid = -1; /* force to -1 for sacct to know this * hasn't been set yet */ - if (list_append(job_list, job_ptr) == 0) - fatal("list_append memory allocation failure"); + (void) list_append(job_list, job_ptr); return job_ptr; } @@ -261,6 +264,7 @@ void delete_job_details(struct job_record *job_entry) if (IS_JOB_FINISHED(job_entry)) _delete_job_desc_files(job_entry->job_id); + xfree(job_entry->details->acctg_freq); for (i=0; i<job_entry->details->argc; i++) xfree(job_entry->details->argv[i]); xfree(job_entry->details->argv); @@ -496,8 +500,12 @@ int dump_all_job_state(void) new_file); error_code = errno; } else { - int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; - char *data = (char *)get_buf_data(buffer); + int pos = 0, nwrite, amount, rc; + char *data; + + fd_set_close_on_exec(log_fd); + nwrite = get_buf_offset(buffer); + data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); @@ -518,11 +526,11 @@ int dump_all_job_state(void) (void) unlink(new_file); else { /* file shuffle */ (void) unlink(old_file); - if(link(reg_file, old_file)) + if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink(reg_file); - if(link(new_file, reg_file)) + if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink(new_file); @@ -626,10 +634,10 @@ extern int load_all_job_state(void) if (ver_str) { if (!strcmp(ver_str, JOB_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; + } else if (!strcmp(ver_str, JOB_2_5_STATE_VERSION)) { + protocol_version = SLURM_2_5_PROTOCOL_VERSION; } else if (!strcmp(ver_str, JOB_2_4_STATE_VERSION)) { protocol_version = SLURM_2_4_PROTOCOL_VERSION; - } else if (!strcmp(ver_str, JOB_2_3_STATE_VERSION)) { - protocol_version = SLURM_2_3_PROTOCOL_VERSION; } } @@ -689,6 +697,7 @@ extern int load_last_job_id( void ) time_t buf_time; char *ver_str = NULL; uint32_t ver_str_len; + uint16_t protocol_version = (uint16_t)NO_VAL; /* read the file */ state_file = slurm_get_state_save_location(); @@ -729,15 +738,24 @@ extern int load_last_job_id( void ) buffer = create_buf(data, data_size); safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer); debug3("Version string in job_state header is %s", ver_str); - if ((!ver_str) || (strcmp(ver_str, JOB_STATE_VERSION) != 0)) { + if (ver_str) { + if (!strcmp(ver_str, JOB_STATE_VERSION)) { + protocol_version = SLURM_PROTOCOL_VERSION; + } else if (!strcmp(ver_str, JOB_2_5_STATE_VERSION)) { + protocol_version = SLURM_2_5_PROTOCOL_VERSION; + } else if (!strcmp(ver_str, JOB_2_4_STATE_VERSION)) { + protocol_version = SLURM_2_4_PROTOCOL_VERSION; + } + } + xfree(ver_str); + + if (protocol_version == (uint16_t)NO_VAL) { debug("*************************************************"); debug("Can not recover last job ID, incompatible version"); debug("*************************************************"); - xfree(ver_str); free_buf(buffer); return EFAULT; } - xfree(ver_str); safe_unpack_time(&buf_time, buffer); safe_unpack32( &job_id_sequence, buffer); @@ -767,6 +785,8 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) struct step_record *step_ptr; /* Dump basic job info */ + pack32(dump_job_ptr->array_job_id, buffer); + pack16(dump_job_ptr->array_task_id, buffer); pack32(dump_job_ptr->assoc_id, buffer); pack32(dump_job_ptr->job_id, buffer); pack32(dump_job_ptr->user_id, buffer); @@ -786,6 +806,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) pack32(dump_job_ptr->qos_id, buffer); pack32(dump_job_ptr->req_switch, buffer); pack32(dump_job_ptr->wait4switch, buffer); + pack32(dump_job_ptr->profile, buffer); pack_time(dump_job_ptr->preempt_time, buffer); pack_time(dump_job_ptr->start_time, buffer); @@ -872,6 +893,8 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) step_iterator = list_iterator_create(dump_job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; pack16((uint16_t) STEP_FLAG, buffer); dump_job_step_state(dump_job_ptr, step_ptr, buffer); } @@ -886,10 +909,12 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) uint32_t exit_code, assoc_id, db_index, name_len, time_min; uint32_t next_step_id, total_cpus, total_nodes = 0, cpu_cnt; uint32_t resv_id, spank_job_env_size = 0, qos_id, derived_ec = 0; - uint32_t req_switch = 0, wait4switch = 0; + uint32_t array_job_id = 0, req_switch = 0, wait4switch = 0; + uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET; time_t start_time, end_time, suspend_time, pre_sus_time, tot_sus_time; time_t preempt_time = 0; time_t resize_time = 0, now = time(NULL); + uint16_t array_task_id = (uint16_t) NO_VAL; uint16_t job_state, details, batch_flag, step_flag; uint16_t kill_on_node_fail, direct_set_prio; uint16_t alloc_resp_port, other_port, mail_type, state_reason; @@ -917,7 +942,9 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) slurmdb_qos_rec_t qos_rec; bool job_finished = false; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack32(&array_job_id, buffer); + safe_unpack16(&array_task_id, buffer); safe_unpack32(&assoc_id, buffer); safe_unpack32(&job_id, buffer); @@ -956,6 +983,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpack32(&qos_id, buffer); safe_unpack32(&req_switch, buffer); safe_unpack32(&wait4switch, buffer); + safe_unpack32(&profile, buffer); safe_unpack_time(&preempt_time, buffer); safe_unpack_time(&start_time, buffer); @@ -1003,14 +1031,15 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) part_ptr = find_part_record (partition); if (part_ptr == NULL) { part_ptr_list = get_part_list(partition); - if (part_ptr_list) + if (part_ptr_list) { part_ptr = list_peek(part_ptr_list); - } - if (part_ptr == NULL) { - verbose("Invalid partition (%s) for job_id %u", - partition, job_id); - /* not fatal error, partition could have been removed, - * reset_job_bitmaps() will clean-up this job */ + } else { + verbose("Invalid partition (%s) for job_id %u", + partition, job_id); + /* not fatal error, partition could have been + * removed, reset_job_bitmaps() will clean-up + * this job */ + } } safe_unpackstr_xmalloc(&name, &name_len, buffer); @@ -1072,7 +1101,9 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) goto unpack_error; safe_unpack16(&step_flag, buffer); } - } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + /* NOTE: As of 12/18/12 the job state of v2.5 and v2.6 are + * the same, but the step states differ */ safe_unpack32(&assoc_id, buffer); safe_unpack32(&job_id, buffer); @@ -1174,6 +1205,9 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpackstr_xmalloc(&account, &name_len, buffer); safe_unpackstr_xmalloc(&comment, &name_len, buffer); safe_unpackstr_xmalloc(&gres, &name_len, buffer); + safe_unpackstr_xmalloc(&gres_alloc, &name_len, buffer); + safe_unpackstr_xmalloc(&gres_req, &name_len, buffer); + safe_unpackstr_xmalloc(&gres_used, &name_len, buffer); safe_unpackstr_xmalloc(&network, &name_len, buffer); safe_unpackstr_xmalloc(&licenses, &name_len, buffer); safe_unpackstr_xmalloc(&mail_user, &name_len, buffer); @@ -1224,7 +1258,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) goto unpack_error; safe_unpack16(&step_flag, buffer); } - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpack32(&assoc_id, buffer); safe_unpack32(&job_id, buffer); @@ -1287,7 +1321,9 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpack16(&limit_set_max_nodes, buffer); safe_unpack16(&limit_set_min_cpus, buffer); safe_unpack16(&limit_set_min_nodes, buffer); + safe_unpack16(&limit_set_pn_min_memory, buffer); safe_unpack16(&limit_set_time, buffer); + safe_unpack16(&limit_set_qos, buffer); safe_unpackstr_xmalloc(&state_desc, &name_len, buffer); safe_unpackstr_xmalloc(&resp_host, &name_len, buffer); @@ -1409,6 +1445,8 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) alloc_node = NULL; /* reused, nothing left to free */ job_ptr->alloc_resp_port = alloc_resp_port; job_ptr->alloc_sid = alloc_sid; + job_ptr->array_job_id = array_job_id; + job_ptr->array_task_id = array_task_id; job_ptr->assoc_id = assoc_id; job_ptr->batch_flag = batch_flag; xfree(job_ptr->batch_host); @@ -1516,6 +1554,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) job_ptr->limit_set_qos = limit_set_qos; job_ptr->req_switch = req_switch; job_ptr->wait4switch = wait4switch; + job_ptr->profile = profile; /* This needs to always to initialized to "true". The select plugin will deal with it every time it goes through the logic if req_switch or wait4switch are set. @@ -1563,7 +1602,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) } /* make sure we have this job completed in the * database */ - if(IS_JOB_FINISHED(job_ptr)) { + if (IS_JOB_FINISHED(job_ptr)) { jobacct_storage_g_job_complete(acct_db_conn, job_ptr); job_finished = 1; } @@ -1635,7 +1674,7 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer) pack32(detail_ptr->max_nodes, buffer); pack32(detail_ptr->num_tasks, buffer); - pack16(detail_ptr->acctg_freq, buffer); + packstr(detail_ptr->acctg_freq, buffer); pack16(detail_ptr->contiguous, buffer); pack16(detail_ptr->cpus_per_task, buffer); pack16(detail_ptr->nice, buffer); @@ -1683,8 +1722,9 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer) static int _load_job_details(struct job_record *job_ptr, Buf buffer, uint16_t protocol_version) { - char *req_nodes = NULL, *exc_nodes = NULL, *features = NULL; - char *cpu_bind, *dependency = NULL, *orig_dependency = NULL, *mem_bind; + char *acctg_freq = NULL, *req_nodes = NULL, *exc_nodes = NULL; + char *features = NULL, *cpu_bind = NULL, *dependency = NULL; + char *orig_dependency = NULL, *mem_bind; char *err = NULL, *in = NULL, *out = NULL, *work_dir = NULL; char *ckpt_dir = NULL, *restart_dir = NULL; char **argv = (char **) NULL, **env_sup = (char **) NULL; @@ -1693,7 +1733,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer, uint32_t pn_min_cpus, pn_min_memory, pn_min_tmp_disk; uint32_t num_tasks, name_len, argc = 0, env_cnt = 0; uint16_t shared, contiguous, nice, ntasks_per_node; - uint16_t acctg_freq, cpus_per_task, requeue, task_dist; + uint16_t cpus_per_task, requeue, task_dist, tmp_uint16 = 0; uint16_t cpu_bind_type, mem_bind_type, plane_size; uint8_t open_mode, overcommit, prolog_running; time_t begin_time, submit_time; @@ -1701,14 +1741,71 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer, multi_core_data_t *mc_ptr; /* unpack the job's details from the buffer */ - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack32(&min_cpus, buffer); + safe_unpack32(&max_cpus, buffer); + safe_unpack32(&min_nodes, buffer); + safe_unpack32(&max_nodes, buffer); + safe_unpack32(&num_tasks, buffer); + + safe_unpackstr_xmalloc(&acctg_freq, &name_len, buffer); + if (acctg_freq && !strcmp(acctg_freq, "65534")) { + /* This fixes job state generated by version 2.6.0, + * in which a version 2.5 value of NO_VAL was converted + * from uint16_t to a string. */ + xfree(acctg_freq); + } + safe_unpack16(&contiguous, buffer); + safe_unpack16(&cpus_per_task, buffer); + safe_unpack16(&nice, buffer); + safe_unpack16(&ntasks_per_node, buffer); + safe_unpack16(&requeue, buffer); + safe_unpack16(&shared, buffer); + safe_unpack16(&task_dist, buffer); + + safe_unpackstr_xmalloc(&cpu_bind, &name_len, buffer); + safe_unpack16(&cpu_bind_type, buffer); + safe_unpackstr_xmalloc(&mem_bind, &name_len, buffer); + safe_unpack16(&mem_bind_type, buffer); + safe_unpack16(&plane_size, buffer); + + safe_unpack8(&open_mode, buffer); + safe_unpack8(&overcommit, buffer); + safe_unpack8(&prolog_running, buffer); + + safe_unpack32(&pn_min_cpus, buffer); + safe_unpack32(&pn_min_memory, buffer); + safe_unpack32(&pn_min_tmp_disk, buffer); + safe_unpack_time(&begin_time, buffer); + safe_unpack_time(&submit_time, buffer); + + safe_unpackstr_xmalloc(&req_nodes, &name_len, buffer); + safe_unpackstr_xmalloc(&exc_nodes, &name_len, buffer); + safe_unpackstr_xmalloc(&features, &name_len, buffer); + safe_unpackstr_xmalloc(&dependency, &name_len, buffer); + safe_unpackstr_xmalloc(&orig_dependency, &name_len, buffer); + + safe_unpackstr_xmalloc(&err, &name_len, buffer); + safe_unpackstr_xmalloc(&in, &name_len, buffer); + safe_unpackstr_xmalloc(&out, &name_len, buffer); + safe_unpackstr_xmalloc(&work_dir, &name_len, buffer); + safe_unpackstr_xmalloc(&ckpt_dir, &name_len, buffer); + safe_unpackstr_xmalloc(&restart_dir, &name_len, buffer); + + if (unpack_multi_core_data(&mc_ptr, buffer, protocol_version)) + goto unpack_error; + safe_unpackstr_array(&argv, &argc, buffer); + safe_unpackstr_array(&env_sup, &env_cnt, buffer); + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { safe_unpack32(&min_cpus, buffer); safe_unpack32(&max_cpus, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&num_tasks, buffer); - safe_unpack16(&acctg_freq, buffer); + safe_unpack16(&tmp_uint16, buffer); + if (tmp_uint16 && (tmp_uint16 != (uint16_t) NO_VAL)) + acctg_freq = xstrdup_printf("%u", tmp_uint16); safe_unpack16(&contiguous, buffer); safe_unpack16(&cpus_per_task, buffer); safe_unpack16(&nice, buffer); @@ -1774,6 +1871,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer, } /* free any left-over detail data */ + xfree(job_ptr->details->acctg_freq); for (i=0; i<job_ptr->details->argc; i++) xfree(job_ptr->details->argv[i]); xfree(job_ptr->details->argv); @@ -1844,6 +1942,7 @@ unpack_error: /* for (i=0; i<argc; i++) xfree(argv[i]); Don't trust this on unpack error */ + xfree(acctg_freq); xfree(argv); xfree(cpu_bind); xfree(dependency); @@ -1878,12 +1977,44 @@ void _add_job_hash(struct job_record *job_ptr) job_hash[inx] = job_ptr; } +/* + * find_job_array_rec - return a pointer to the job record with the given + * array_job_id/array_task_id + * IN job_id - requested job's id + * IN array_task_id - requested job's task id (NO_VAL if none specified) + * RET pointer to the job's record, NULL on error + */ +extern struct job_record *find_job_array_rec(uint32_t array_job_id, + uint16_t array_task_id) +{ + ListIterator job_iterator; + struct job_record *job_ptr, *match_job_ptr = NULL; + + if (array_task_id == (uint16_t) NO_VAL) + return find_job_record(array_job_id); + + job_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (job_ptr->array_job_id != array_job_id) + continue; + + if (array_task_id == (uint16_t) INFINITE) { + match_job_ptr = job_ptr; + if (!IS_JOB_FINISHED(job_ptr)) + break; + } else if (job_ptr->array_task_id == array_task_id) { + match_job_ptr = job_ptr; + break; + } + } + list_iterator_destroy(job_iterator); + return match_job_ptr; +} + /* * find_job_record - return a pointer to the job record with the given job_id * IN job_id - requested job's id * RET pointer to the job's record, NULL on error - * global: job_list - global job list pointer - * job_hash - hash table into job records */ struct job_record *find_job_record(uint32_t job_id) { @@ -1916,8 +2047,6 @@ static void _rebuild_part_name_list(struct job_record *job_ptr) job_pending = true; part_iterator = list_iterator_create(job_ptr->part_ptr_list); - if (part_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { if (job_pending) { /* Reset job's one partition to a valid one */ @@ -1963,8 +2092,6 @@ extern int kill_job_by_part_name(char *part_name) bool rebuild_name_list = false; part_iterator = list_iterator_create(job_ptr-> part_ptr_list); - if (part_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((part2_ptr = (struct part_record *) list_next(part_iterator))) { if (part2_ptr != part_ptr) @@ -2105,7 +2232,8 @@ extern int kill_job_by_front_end_name(char *node_name) } else if (IS_JOB_RUNNING(job_ptr) || suspended) { job_count++; if (job_ptr->batch_flag && job_ptr->details && - (job_ptr->details->requeue > 0)) { + slurmctld_conf.job_requeue && + (job_ptr->details->requeue > 0)) { char requeue_msg[128]; srun_node_fail(job_ptr->job_id, node_name); @@ -2138,11 +2266,15 @@ extern int kill_job_by_front_end_name(char *node_name) job_completion_logger(job_ptr, true); deallocate_nodes(job_ptr, false, suspended, false); - job_ptr->db_index = 0; + + /* do this after the epilog complete, + * setting it here is too early */ + //job_ptr->db_index = 0; + //job_ptr->details->submit_time = now; + job_ptr->job_state = JOB_PENDING; if (job_ptr->node_cnt) job_ptr->job_state |= JOB_COMPLETING; - job_ptr->details->submit_time = now; /* restart from periodic checkpoint */ if (job_ptr->ckpt_interval && @@ -2212,8 +2344,6 @@ extern bool partition_in_use(char *part_name) return false; job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (job_ptr->part_ptr == part_ptr) { if (!IS_JOB_FINISHED(job_ptr)) { @@ -2245,8 +2375,6 @@ extern bool allocated_session_in_use(job_desc_msg_t *new_alloc) lock_slurmctld(job_read_lock); job_iter = list_iterator_create(job_list); - if (job_iter == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *)list_next(job_iter))) { if (job_ptr->batch_flag || IS_JOB_FINISHED(job_ptr)) @@ -2283,13 +2411,13 @@ extern int kill_running_job_by_node_name(char *node_name) bit_position = node_ptr - node_record_table_ptr; job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { bool suspended = false; if ((job_ptr->node_bitmap == NULL) || (!bit_test(job_ptr->node_bitmap, bit_position))) continue; /* job not on this node */ + if (nonstop_ops.node_fail) + (nonstop_ops.node_fail)(job_ptr, node_ptr); if (IS_JOB_SUSPENDED(job_ptr)) { enum job_states suspend_job_state = job_ptr->job_state; /* we can't have it as suspended when we call the @@ -2339,6 +2467,7 @@ extern int kill_running_job_by_node_name(char *node_name) excise_node_from_job(job_ptr, node_ptr); job_post_resize_acctg(job_ptr); } else if (job_ptr->batch_flag && job_ptr->details && + slurmctld_conf.job_requeue && (job_ptr->details->requeue > 0)) { char requeue_msg[128]; @@ -2371,11 +2500,15 @@ extern int kill_running_job_by_node_name(char *node_name) job_completion_logger(job_ptr, true); deallocate_nodes(job_ptr, false, suspended, false); - job_ptr->db_index = 0; + + /* do this after the epilog complete, + * setting it here is too early */ + //job_ptr->db_index = 0; + //job_ptr->details->submit_time = now; + job_ptr->job_state = JOB_PENDING; if (job_ptr->node_cnt) job_ptr->job_state |= JOB_COMPLETING; - job_ptr->details->submit_time = now; /* restart from periodic checkpoint */ if (job_ptr->ckpt_interval && @@ -2432,15 +2565,8 @@ extern void excise_node_from_job(struct job_record *job_ptr, { int i, orig_pos = -1, new_pos = -1; bitstr_t *orig_bitmap; - job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - - xassert(job_resrcs_ptr); - xassert(job_resrcs_ptr->cpus); - xassert(job_resrcs_ptr->cpus_used); orig_bitmap = bit_copy(job_ptr->node_bitmap); - if (!orig_bitmap) - fatal("bit_copy memory allocation failure"); make_node_idle(node_ptr, job_ptr); /* updates bitmap */ xfree(job_ptr->nodes); job_ptr->nodes = bitmap2node_name(job_ptr->node_bitmap); @@ -2474,7 +2600,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) { long job_id, time_min; long pn_min_cpus, pn_min_memory, pn_min_tmp_disk, min_cpus; - long time_limit, priority, contiguous, acctg_freq; + long time_limit, priority, contiguous; long kill_on_node_fail, shared, immediate, wait_all_nodes; long cpus_per_task, requeue, num_tasks, overcommit; long ntasks_per_node, ntasks_per_socket, ntasks_per_core; @@ -2599,17 +2725,15 @@ void dump_job_desc(job_desc_msg_t * job_specs) job_specs->dependency, job_specs->account, job_specs->qos, job_specs->comment); - num_tasks = (job_specs->num_tasks != (uint16_t) NO_VAL) ? + num_tasks = (job_specs->num_tasks != NO_VAL) ? (long) job_specs->num_tasks : -1L; overcommit = (job_specs->overcommit != (uint8_t) NO_VAL) ? (long) job_specs->overcommit : -1L; - acctg_freq = (job_specs->acctg_freq != (uint16_t) NO_VAL) ? - (long) job_specs->acctg_freq : -1L; debug3(" mail_type=%u mail_user=%s nice=%d num_tasks=%ld " - "open_mode=%u overcommit=%ld acctg_freq=%ld", + "open_mode=%u overcommit=%ld acctg_freq=%s", job_specs->mail_type, job_specs->mail_user, (int)job_specs->nice - NICE_OFFSET, num_tasks, - job_specs->open_mode, overcommit, acctg_freq); + job_specs->open_mode, overcommit, job_specs->acctg_freq); slurm_make_time_str(&job_specs->begin_time, buf, sizeof(buf)); cpus_per_task = (job_specs->cpus_per_task != (uint16_t) NO_VAL) ? @@ -2643,6 +2767,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) job_specs->cpu_bind_type, job_specs->cpu_bind, job_specs->mem_bind_type, job_specs->mem_bind, job_specs->plane_size); + debug3(" array_inx=%s", job_specs->array_inx); select_g_select_jobinfo_sprint(job_specs->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_MIXED); @@ -2666,8 +2791,6 @@ int init_job_conf(void) if (job_list == NULL) { job_count = 0; job_list = list_create(_list_delete_job); - if (job_list == NULL) - fatal ("Memory allocation failure"); } last_job_update = time(NULL); @@ -2694,6 +2817,198 @@ extern void rehash_jobs(void) } } +/* Create an exact copy of an existing job record for a job array. + * Assumes the job has no resource allocaiton */ +struct job_record *_job_rec_copy(struct job_record *job_ptr) +{ + struct job_record *job_ptr_new = NULL, *save_job_next; + struct job_details *job_details, *details_new, *save_details; + uint32_t save_job_id; + priority_factors_object_t *save_prio_factors; + List save_step_list; + int error_code = SLURM_SUCCESS; + int i; + + job_ptr_new = create_job_record(&error_code); + if (!job_ptr_new) /* MaxJobCount checked when job array submitted */ + fatal("job array create_job_record error"); + if (error_code != SLURM_SUCCESS) + return job_ptr_new; + + /* Set job-specific ID and hash table */ + if (_set_job_id(job_ptr_new)) + fatal("job array create_job_record error"); + _add_job_hash(job_ptr_new); + + /* Copy most of original job data. + * This could be done in parallel, but performance was worse. */ + save_job_id = job_ptr_new->job_id; + save_job_next = job_ptr_new->job_next; + save_details = job_ptr_new->details; + save_prio_factors = job_ptr_new->prio_factors; + save_step_list = job_ptr_new->step_list; + memcpy(job_ptr_new, job_ptr, sizeof(struct job_record)); + job_ptr_new->job_id = save_job_id; + job_ptr_new->job_next = save_job_next; + job_ptr_new->details = save_details; + job_ptr_new->prio_factors = save_prio_factors; + job_ptr_new->step_list = save_step_list; + + job_ptr_new->account = xstrdup(job_ptr->account); + job_ptr_new->alias_list = xstrdup(job_ptr->alias_list); + job_ptr_new->alloc_node = xstrdup(job_ptr->alloc_node); + job_ptr_new->batch_host = xstrdup(job_ptr->batch_host); + if (job_ptr->check_job) { + job_ptr_new->check_job = + checkpoint_copy_jobinfo(job_ptr->check_job); + } + job_ptr_new->comment = xstrdup(job_ptr->comment); + /* struct job_details *details; *** NOTE: Copied below */ + job_ptr_new->gres = xstrdup(job_ptr->gres); + if (job_ptr->gres_list) { + job_ptr_new->gres_list = + gres_plugin_job_state_dup(job_ptr->gres_list); + } + job_ptr_new->gres_alloc = xstrdup(job_ptr->gres_alloc); + job_ptr_new->gres_req = xstrdup(job_ptr->gres_req); + job_ptr_new->gres_used = xstrdup(job_ptr->gres_used); + job_ptr_new->licenses = xstrdup(job_ptr->licenses); + job_ptr_new->license_list = license_job_copy(job_ptr->license_list); + job_ptr_new->mail_user = xstrdup(job_ptr->mail_user); + job_ptr_new->name = xstrdup(job_ptr->name); + job_ptr_new->network = xstrdup(job_ptr->network); + job_ptr_new->nodes = xstrdup(job_ptr->nodes); + job_ptr_new->licenses = xstrdup(job_ptr->licenses); + if (job_ptr->node_cnt && job_ptr->node_addr) { + i = sizeof(slurm_addr_t) * job_ptr->node_cnt; + job_ptr_new->node_addr = xmalloc(i); + memcpy(job_ptr_new->node_addr, job_ptr->node_addr, i); + } + if (job_ptr->node_bitmap) + job_ptr_new->node_bitmap = bit_copy(job_ptr->node_bitmap); + if (job_ptr->node_bitmap_cg) + job_ptr_new->node_bitmap_cg = bit_copy(job_ptr->node_bitmap_cg); + job_ptr_new->nodes_completing = xstrdup(job_ptr->nodes_completing); + job_ptr_new->partition = xstrdup(job_ptr->partition); + job_ptr_new->part_ptr_list = part_list_copy(job_ptr->part_ptr_list); + /* On jobs that are held the priority_array isn't set up yet, + so check to see if it exists before copying. + */ + if (job_ptr->part_ptr_list && job_ptr->priority_array) { + i = list_count(job_ptr->part_ptr_list) * sizeof(uint32_t); + job_ptr_new->priority_array = xmalloc(i); + memcpy(job_ptr_new->priority_array, job_ptr->priority_array, i); + } + job_ptr_new->resv_name = xstrdup(job_ptr->resv_name); + job_ptr_new->resp_host = xstrdup(job_ptr->resp_host); + if (job_ptr->select_jobinfo) { + job_ptr_new->select_jobinfo = + select_g_select_jobinfo_copy(job_ptr->select_jobinfo); + } + if (job_ptr->spank_job_env_size) { + job_ptr_new->spank_job_env = + xmalloc(sizeof(char *) * + (job_ptr->spank_job_env_size + 1)); + for (i = 0; i < job_ptr->spank_job_env_size; i++) { + job_ptr_new->spank_job_env[i] = + xstrdup(job_ptr->spank_job_env[i]); + } + } + job_ptr_new->state_desc = xstrdup(job_ptr->state_desc); + job_ptr_new->wckey = xstrdup(job_ptr->wckey); + + job_details = job_ptr->details; + details_new = job_ptr_new->details; + memcpy(details_new, job_details, sizeof(struct job_details)); + details_new->acctg_freq = xstrdup(job_details->acctg_freq); + if (job_details->argc) { + details_new->argv = + xmalloc(sizeof(char *) * (job_details->argc + 1)); + for (i = 0; i < job_details->argc; i++) { + details_new->argv[i] = xstrdup(job_details->argv[i]); + } + } + details_new->ckpt_dir = xstrdup(job_details->ckpt_dir); + details_new->cpu_bind = xstrdup(job_details->cpu_bind); + details_new->depend_list = depended_list_copy(job_details->depend_list); + details_new->dependency = xstrdup(job_details->dependency); + details_new->orig_dependency = xstrdup(job_details->orig_dependency); + if (job_details->env_cnt) { + details_new->env_sup = + xmalloc(sizeof(char *) * (job_details->env_cnt + 1)); + for (i = 0; i < job_details->env_cnt; i++) { + details_new->env_sup[i] = + xstrdup(job_details->env_sup[i]); + } + } + if (job_details->exc_node_bitmap) { + details_new->exc_node_bitmap = + bit_copy(job_details->exc_node_bitmap); + } + details_new->exc_nodes = xstrdup(job_details->exc_nodes); + details_new->feature_list = + feature_list_copy(job_details->feature_list); + details_new->features = xstrdup(job_details->features); + if (job_details->mc_ptr) { + i = sizeof(multi_core_data_t); + details_new->mc_ptr = xmalloc(i); + memcpy(details_new->mc_ptr, job_details->mc_ptr, i); + } + details_new->mem_bind = xstrdup(job_details->mem_bind); + if (job_details->req_node_bitmap) { + details_new->req_node_bitmap = + bit_copy(job_details->req_node_bitmap); + } + if (job_details->req_node_layout && job_details->req_node_bitmap) { + i = bit_set_count(job_details->req_node_bitmap) * + sizeof(uint16_t); + details_new->req_node_layout = xmalloc(i); + memcpy(details_new->req_node_layout, + job_details->req_node_layout, i); + } + details_new->req_nodes = xstrdup(job_details->req_nodes); + details_new->restart_dir = xstrdup(job_details->restart_dir); + details_new->std_err = xstrdup(job_details->std_err); + details_new->std_in = xstrdup(job_details->std_in); + details_new->std_out = xstrdup(job_details->std_out); + details_new->work_dir = xstrdup(job_details->work_dir); + _copy_job_desc_files(job_ptr->job_id, job_ptr_new->job_id); + + return job_ptr_new; +} + +/* Convert a single job record into an array of job records. + * Job record validation is complete, so we only need to duplicate the record + * and update job and array ID values */ +static void _create_job_array(struct job_record *job_ptr, + job_desc_msg_t *job_specs) +{ + struct job_record *job_ptr_new; + int i, i_first, i_last; + + if (!job_specs->array_bitmap) + return; + i_first = bit_ffs(job_specs->array_bitmap); + if (i_first == -1) { + error("_create_job_array: job %u array_bitmap is empty", + job_ptr->job_id); + return; + } + job_ptr->array_job_id = job_ptr->job_id; + job_ptr->array_task_id = i_first; + + i_last = bit_fls(job_specs->array_bitmap); + for (i = (i_first + 1); i <= i_last; i++) { + if (!bit_test(job_specs->array_bitmap, i)) + continue; + job_ptr_new = _job_rec_copy(job_ptr); + if (!job_ptr_new) + break; + job_ptr_new->array_job_id = job_ptr->job_id; + job_ptr_new->array_task_id = i; + } +} + /* * Wrapper for select_nodes() function that will test all valid partitions * for a new job @@ -2717,7 +3032,7 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only, job_ptr->part_ptr = part_ptr; debug2("Try job %u on next partition %s", job_ptr->job_id, part_ptr->name); - if (job_limits_check(&job_ptr) != WAIT_NO_REASON) + if (job_limits_check(&job_ptr, false) != WAIT_NO_REASON) continue; rc = select_nodes(job_ptr, test_only, select_node_bitmap); @@ -2727,7 +3042,7 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only, } list_iterator_destroy(iter); } else { - if (job_limits_check(&job_ptr) != WAIT_NO_REASON) + if (job_limits_check(&job_ptr, false) != WAIT_NO_REASON) test_only = true; rc = select_nodes(job_ptr, test_only, select_node_bitmap); } @@ -2782,7 +3097,10 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, return error_code; } xassert(job_ptr); - independent = job_independent(job_ptr, will_run); + if (job_specs->array_bitmap) + independent = false; + else + independent = job_independent(job_ptr, will_run); /* priority needs to be calculated after this since we set a * begin time in job_independent and that lets us know if the * job is eligible. @@ -2790,7 +3108,8 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, if (job_ptr->priority == NO_VAL) set_job_prio(job_ptr); - if (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS) + if (independent && + (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS)) independent = false; /* Avoid resource fragmentation if important */ @@ -2853,7 +3172,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, test_only = will_run || (allocate == 0); no_alloc = test_only || too_fragmented || - (!top_prio) || (!independent) || !avail_front_end(); + (!top_prio) || (!independent) || !avail_front_end(job_ptr); error_code = _select_nodes_parts(job_ptr, no_alloc, NULL); if (!test_only) { last_job_update = now; @@ -2879,6 +3198,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, job_ptr->start_time = job_ptr->end_time = now; job_completion_logger(job_ptr, false); } else { /* job remains queued */ + _create_job_array(job_ptr, job_specs); if ((error_code == ESLURM_NODES_BUSY) || (error_code == ESLURM_ACCOUNTING_POLICY)) { error_code = SLURM_SUCCESS; @@ -2906,6 +3226,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, jobacct_storage_g_job_start(acct_db_conn, job_ptr); if (!will_run) { + _create_job_array(job_ptr, job_specs); debug2("sched: JobId=%u allocated resources: NodeList=%s", job_ptr->job_id, job_ptr->nodes); rebuild_job_part_list(job_ptr); @@ -2976,12 +3297,12 @@ extern int job_fail(uint32_t job_id) * job_signal - signal the specified job * IN job_id - id of the job to be signaled * IN signal - signal to send, SIGKILL == cancel the job - * IN batch_flag - signal batch shell only if set + * IN flags - see KILL_JOB_* flags in slurm.h * IN uid - uid of requesting user * IN preempt - true if job being preempted * RET 0 on success, otherwise ESLURM error code */ -extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, +extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags, uid_t uid, bool preempt) { struct job_record *job_ptr; @@ -3003,6 +3324,27 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, } job_ptr = find_job_record(job_id); + if ((flags & KILL_JOB_ARRAY) && /* signal entire job array */ + ((job_ptr == NULL) || + (job_ptr->array_task_id != (uint16_t) NO_VAL))) { + int rc = SLURM_SUCCESS, rc1; + ListIterator job_iter; + + flags &= (~KILL_JOB_ARRAY); + job_iter = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iter))) { + if ((job_ptr->array_job_id != job_id) || + (job_ptr->array_task_id == (uint16_t)NO_VAL)) + continue; + if (IS_JOB_FINISHED(job_ptr)) + continue; + rc1 = job_signal(job_ptr->job_id, signal, flags, + uid, preempt); + rc = MAX(rc, rc1); + } + list_iterator_destroy(job_iter); + return rc; + } if (job_ptr == NULL) { info("job_signal: invalid job id %u", job_id); return ESLURM_INVALID_JOB_ID; @@ -3082,7 +3424,7 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, build_cg_bitmap(job_ptr); job_completion_logger(job_ptr, false); deallocate_nodes(job_ptr, false, false, preempt); - } else if (batch_flag) { + } else if (flags & KILL_JOB_BATCH) { if (job_ptr->batch_flag) _signal_batch_job(job_ptr, signal); else @@ -3211,12 +3553,10 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, job_ptr->end_time = now; job_ptr->job_state = JOB_NODE_FAIL; job_completion_logger(job_ptr, true); - job_ptr->db_index = 0; - /* Since this could happen on a launch we need to make - * sure the submit isn't the same as the last submit so - * put now + 1 so we get different records in the - * database */ - job_ptr->details->submit_time = now + 1; + /* do this after the epilog complete, setting it here + * is too early */ + //job_ptr->db_index = 0; + //job_ptr->details->submit_time = now + 1; job_ptr->batch_flag++; /* only one retry */ job_ptr->restart_cnt++; @@ -3247,8 +3587,12 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, job_ptr->exit_code = job_return_code; job_ptr->state_reason = FAIL_EXIT_CODE; xfree(job_ptr->state_desc); - } else if (job_comp_flag && /* job was running */ - (job_ptr->end_time < now)) { /* over time limit */ + } else if (job_comp_flag + && ((job_ptr->end_time + + slurmctld_conf.over_time_limit * 60) < now)) { + /* Test if the job has finished before its allowed + * over time has expired. + */ job_ptr->job_state = JOB_TIMEOUT | job_comp_flag; job_ptr->exit_code = MAX(job_ptr->exit_code, 1); job_ptr->state_reason = FAIL_TIMEOUT; @@ -3256,6 +3600,8 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, } else { job_ptr->job_state = JOB_COMPLETE | job_comp_flag; job_ptr->exit_code = job_return_code; + if (nonstop_ops.job_fini) + (nonstop_ops.job_fini)(job_ptr); } if (suspended) { @@ -3268,11 +3614,13 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, } last_job_update = now; + job_ptr->time_last_active = now; /* Timer for resending kill RPC */ if (job_comp_flag) { /* job was running */ build_cg_bitmap(job_ptr); deallocate_nodes(job_ptr, false, suspended, false); } - info("sched: job_complete for JobId=%u successful", job_id); + info("sched: job_complete for JobId=%u successful, exit code=%u", + job_id, job_return_code); return SLURM_SUCCESS; } @@ -3547,6 +3895,13 @@ static int _valid_job_part(job_desc_msg_t * job_desc, } #endif + if ((job_desc->time_limit == NO_VAL) && + (part_ptr->default_time == 0)) { + info("_valid_job_part: job's default time is 0"); + rc = ESLURM_INVALID_TIME_LIMIT; + goto fini; + } + if ((job_desc->time_limit == NO_VAL) && (part_ptr->default_time != NO_VAL)) job_desc->time_limit = part_ptr->default_time; @@ -3589,9 +3944,11 @@ fini: FREE_NULL_LIST(part_ptr_list); /* * job_limits_check - check the limits specified for the job. * IN job_ptr - pointer to job table entry. + * IN check_min_time - if true test job's minimum time limit, + * otherwise test maximum time limit * RET WAIT_NO_REASON on success, fail status otherwise. */ -extern int job_limits_check(struct job_record **job_pptr) +extern int job_limits_check(struct job_record **job_pptr, bool check_min_time) { struct job_details *detail_ptr; enum job_state_reason fail_reason; @@ -3601,6 +3958,7 @@ extern int job_limits_check(struct job_record **job_pptr) slurmdb_association_rec_t *assoc_ptr; uint32_t job_min_nodes, job_max_nodes; uint32_t part_min_nodes, part_max_nodes; + uint32_t time_check; #ifdef HAVE_BG static uint16_t cpus_per_node = 0; if (!cpus_per_node) @@ -3612,6 +3970,10 @@ extern int job_limits_check(struct job_record **job_pptr) part_ptr = job_ptr->part_ptr; qos_ptr = job_ptr->qos_ptr; assoc_ptr = job_ptr->assoc_ptr; + if (!detail_ptr) { /* To prevent CLANG error */ + fatal("job %u has NULL details_ptr", job_ptr->job_id); + return WAIT_NO_REASON; + } #ifdef HAVE_BG job_min_nodes = detail_ptr->min_cpus / cpus_per_node; @@ -3627,6 +3989,10 @@ extern int job_limits_check(struct job_record **job_pptr) fail_reason = WAIT_NO_REASON; + if (check_min_time && job_ptr->time_min) + time_check = job_ptr->time_min; + else + time_check = job_ptr->time_limit; if ((job_min_nodes > part_max_nodes) && (!qos_ptr || (qos_ptr && !(qos_ptr->flags & QOS_FLAG_PART_MAX_NODE)))) { @@ -3652,13 +4018,12 @@ extern int job_limits_check(struct job_record **job_pptr) debug2("Job %u requested inactive partition %s", job_ptr->job_id, part_ptr->name); fail_reason = WAIT_PART_INACTIVE; - } else if ((((job_ptr->time_limit != NO_VAL) && - (job_ptr->time_limit > part_ptr->max_time)) || - ((job_ptr->time_min != NO_VAL) && - (job_ptr->time_min > part_ptr->max_time))) && - (!qos_ptr || (qos_ptr && !(qos_ptr->flags & - QOS_FLAG_PART_TIME_LIMIT)))) { - debug2("Job %u exceeds partition time limit", job_ptr->job_id); + } else if ((time_check != NO_VAL) && + (time_check > part_ptr->max_time) && + (!qos_ptr || (qos_ptr && !(qos_ptr->flags & + QOS_FLAG_PART_TIME_LIMIT)))) { + info("Job %u exceeds partition time limit (%u > %u)", + job_ptr->job_id, time_check, part_ptr->max_time); fail_reason = WAIT_PART_TIME_LIMIT; } else if (qos_ptr && assoc_ptr && (qos_ptr->flags & QOS_FLAG_ENFORCE_USAGE_THRES) && @@ -3704,6 +4069,7 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, struct job_record **job_pptr, uid_t submit_uid) { static int launch_type_poe = -1; + static uint32_t acct_freq_task = NO_VAL; int error_code = SLURM_SUCCESS, i, qos_error; struct part_record *part_ptr = NULL; List part_ptr_list = NULL; @@ -3717,6 +4083,7 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, static uint32_t node_scaling = 1; static uint32_t cpus_per_mp = 1; acct_policy_limit_set_t acct_policy_limit_set; + int acctg_freq; #ifdef HAVE_BG uint16_t geo[SYSTEM_DIMENSIONS]; @@ -3753,6 +4120,25 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, if (error_code != SLURM_SUCCESS) return error_code; + /* Validate a job's accounting frequency, if specified */ + if (acct_freq_task == NO_VAL) { + char *acct_freq = slurm_get_jobacct_gather_freq(); + int i = acct_gather_parse_freq(PROFILE_TASK, acct_freq); + xfree(acct_freq); + if (i != -1) + acct_freq_task = i; + else + acct_freq_task = (uint16_t) NO_VAL; + } + acctg_freq = acct_gather_parse_freq(PROFILE_TASK, job_desc->acctg_freq); + if ((acctg_freq != -1) && + ((acctg_freq == 0) || (acctg_freq > acct_freq_task))) { + error("Invalid accounting frequency (%d > %u)", + acctg_freq, acct_freq_task); + error_code = ESLURMD_INVALID_ACCT_FREQ; + goto cleanup_fail; + } + /* insure that selected nodes are in this partition */ if (job_desc->req_nodes) { error_code = node_name2bitmap(job_desc->req_nodes, false, @@ -3893,8 +4279,6 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, bitstr_t *tmp_bitmap = NULL; bitoff_t first_set; tmp_bitmap = bit_copy(exc_bitmap); - if (tmp_bitmap == NULL) - fatal("bit_copy malloc failure"); bit_and(tmp_bitmap, req_bitmap); first_set = bit_ffs(tmp_bitmap); FREE_NULL_BITMAP(tmp_bitmap); @@ -4002,6 +4386,7 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, job_ptr = *job_pptr; job_ptr->part_ptr = part_ptr; job_ptr->part_ptr_list = part_ptr_list; + part_ptr_list = NULL; if ((error_code = checkpoint_alloc_jobinfo(&(job_ptr->check_job)))) { error("Failed to allocate checkpoint info for job"); @@ -4131,6 +4516,91 @@ static int _test_strlen(char *test_str, char *str_name, int max_str_len) return SLURM_SUCCESS; } +static bool _valid_array_inx(job_desc_msg_t *job_desc) +{ + slurm_ctl_conf_t *conf; + char *array_str = NULL, *end_ptr = NULL, *sep; + int array_id, max_array_size, step = 1; + bool valid = true; + hostset_t hs; + + FREE_NULL_BITMAP(job_desc->array_bitmap); + if (!job_desc->array_inx || !job_desc->array_inx[0]) + return true; + if (!job_desc->script || !job_desc->script[0]) + return false; + + conf = slurm_conf_lock(); + max_array_size = conf->max_array_sz; + slurm_conf_unlock(); + if (max_array_size == 0) { + verbose("Job arrays disabled, MaxArraySize=0"); + return false; + } + + /* We have a job array request */ + job_desc->immediate = 0; /* Disable immediate option */ + sep = strchr(job_desc->array_inx, (int) ':'); + if (sep) { + step = strtol(sep+1, &end_ptr, 10); + if ((sep[1] == '\0') || (end_ptr[0] != '\0') || + (step <= 0) || (step >= max_array_size)) + return false; + sep[0] = '\0'; + xstrfmtcat(array_str, "[%s]", job_desc->array_inx); + sep[0] = ':'; + } else { + xstrfmtcat(array_str, "[%s]", job_desc->array_inx); + } + hs = hostset_create(array_str); + xfree(array_str); + if (!hs) { + verbose("Invalid job array string (%s)", array_str); + return false; + } + array_str = hostset_shift(hs); + if (!array_str) { + hostset_destroy(hs); + verbose("Invalid job array string (%s)", array_str); + return false; + } + + job_desc->array_bitmap = bit_alloc(max_array_size); + while (array_str) { + array_id = strtol(array_str, &end_ptr, 10); + if ((array_str[0] == '\0') || (end_ptr[0] != '\0') || + (array_id < 0) || (array_id >= max_array_size)) { + valid = false; + verbose("Invalid job array element value (%d)", + array_id); + } + free(array_str); + if (!valid) + break; + bit_set(job_desc->array_bitmap, array_id); + array_str = hostset_shift(hs); + } + hostset_destroy(hs); + if (valid && (bit_set_count(job_desc->array_bitmap) == 0)) { + valid = false; + verbose("Job array has no elements"); + } + + if (valid && (step > 1)) { + int i, j = 0; + i = bit_ffs(job_desc->array_bitmap); + for ( ; i < max_array_size; i++) { + if (!bit_test(job_desc->array_bitmap, i)) + continue; + if (j % step != 0) + bit_clear(job_desc->array_bitmap, i); + j++; + } + } + + return valid; +} + /* Perform some size checks on strings we store to prevent * malicious user filling slurmctld's memory * RET 0 or error code */ @@ -4138,6 +4608,7 @@ extern int validate_job_create_req(job_desc_msg_t * job_desc) { if (_test_strlen(job_desc->account, "account", 1024) || _test_strlen(job_desc->alloc_node, "alloc_node", 1024) || + _test_strlen(job_desc->array_inx, "array_inx", 1024 * 4) || _test_strlen(job_desc->blrtsimage, "blrtsimage", 1024) || _test_strlen(job_desc->ckpt_dir, "ckpt_dir", 1024) || _test_strlen(job_desc->comment, "comment", 1024) || @@ -4159,13 +4630,26 @@ extern int validate_job_create_req(job_desc_msg_t * job_desc) _test_strlen(job_desc->req_nodes, "req_nodes", 1024*64) || _test_strlen(job_desc->reservation, "reservation", 1024) || _test_strlen(job_desc->script, "script", 1024 * 1024 * 4) || - _test_strlen(job_desc->std_err, "std_err", 1024) || - _test_strlen(job_desc->std_in, "std_in", 1024) || - _test_strlen(job_desc->std_out, "std_out", 1024) || + _test_strlen(job_desc->std_err, "std_err", MAXPATHLEN) || + _test_strlen(job_desc->std_in, "std_in", MAXPATHLEN) || + _test_strlen(job_desc->std_out, "std_out", MAXPATHLEN) || _test_strlen(job_desc->wckey, "wckey", 1024) || - _test_strlen(job_desc->work_dir, "work_dir", 1024)) + _test_strlen(job_desc->work_dir, "work_dir", MAXPATHLEN)) return ESLURM_PATHNAME_TOO_LONG; + if (!_valid_array_inx(job_desc)) + return ESLURM_INVALID_ARRAY; + + if (job_desc->array_bitmap) { + int i = bit_set_count(job_desc->array_bitmap); + if ((job_count + i) >= slurmctld_conf.max_job_cnt) { + error("create_job_record: job_count exceeds limit " + "(%d + %d >= %u)", + job_count, i, slurmctld_conf.max_job_cnt); + return EAGAIN; + } + } + /* Make sure anything that may be put in the database will be * lower case */ xstrtolower(job_desc->account); @@ -4180,7 +4664,7 @@ static int _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id) { int error_code = 0; - char *dir_name, job_dir[20], *file_name; + char *dir_name, job_dir[32], *file_name; DEF_TIMERS; START_TIMER; @@ -4217,6 +4701,55 @@ _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id) return error_code; } +/* _copy_job_desc_files - create copies of a job script and environment files */ +static int +_copy_job_desc_files(uint32_t job_id_src, uint32_t job_id_dest) +{ + int error_code = 0; + char *dir_name_src, *dir_name_dest, job_dir[32]; + char *file_name_src, *file_name_dest; + + /* Create state_save_location directory */ + dir_name_src = slurm_get_state_save_location(); + dir_name_dest = xstrdup(dir_name_src); + + /* Create job_id_dest specific directory */ + sprintf(job_dir, "/job.%u", job_id_dest); + xstrcat(dir_name_dest, job_dir); + if (mkdir(dir_name_dest, 0700)) { + error("mkdir(%s) error %m", dir_name_dest); + xfree(dir_name_src); + xfree(dir_name_dest); + return ESLURM_WRITING_TO_FILE; + } + + /* Identify job_id_src specific directory */ + sprintf(job_dir, "/job.%u", job_id_src); + xstrcat(dir_name_src, job_dir); + + file_name_src = xstrdup(dir_name_src); + file_name_dest = xstrdup(dir_name_dest); + xstrcat(file_name_src, "/environment"); + xstrcat(file_name_dest, "/environment"); + error_code = link(file_name_src, file_name_dest); + xfree(file_name_src); + xfree(file_name_dest); + + if (error_code == 0) { + file_name_src = xstrdup(dir_name_src); + file_name_dest = xstrdup(dir_name_dest); + xstrcat(file_name_src, "/script"); + xstrcat(file_name_dest, "/script"); + error_code = link(file_name_src, file_name_dest); + xfree(file_name_src); + xfree(file_name_dest); + } + + xfree(dir_name_src); + xfree(dir_name_dest); + return error_code; +} + /* * Create file with specified name and write the supplied data array to it * IN file_name - file to create and write to @@ -4619,11 +5152,16 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, return error_code; job_ptr->partition = xstrdup(job_desc->partition); + if (job_desc->profile != ACCT_GATHER_PROFILE_NOT_SET) + job_ptr->profile = job_desc->profile; - if (job_desc->job_id != NO_VAL) /* already confirmed unique */ + if (job_desc->job_id != NO_VAL) { /* already confirmed unique */ job_ptr->job_id = job_desc->job_id; - else - _set_job_id(job_ptr); + } else { + error_code = _set_job_id(job_ptr); + if (error_code) + return error_code; + } if (job_desc->name) job_ptr->name = xstrdup(job_desc->name); @@ -4689,7 +5227,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, detail_ptr->argv = job_desc->argv; job_desc->argv = (char **) NULL; /* nothing left to free */ job_desc->argc = 0; /* nothing left to free */ - detail_ptr->acctg_freq = job_desc->acctg_freq; + detail_ptr->acctg_freq = xstrdup(job_desc->acctg_freq); detail_ptr->nice = job_desc->nice; detail_ptr->open_mode = job_desc->open_mode; detail_ptr->min_cpus = job_desc->min_cpus; @@ -4802,7 +5340,7 @@ static bool _valid_pn_min_mem(job_desc_msg_t * job_desc_msg, { uint32_t job_mem_limit = job_desc_msg->pn_min_memory; uint32_t sys_mem_limit; - uint16_t cpus_per_node, ratio; + uint16_t cpus_per_node; if (part_ptr && part_ptr->max_mem_per_cpu) sys_mem_limit = part_ptr->max_mem_per_cpu; @@ -4817,14 +5355,6 @@ static bool _valid_pn_min_mem(job_desc_msg_t * job_desc_msg, sys_mem_limit &= (~MEM_PER_CPU); if (job_mem_limit <= sys_mem_limit) return true; - ratio = (job_mem_limit + sys_mem_limit - 1) / sys_mem_limit; - if (job_desc_msg->cpus_per_task == (uint16_t) NO_VAL) { - job_desc_msg->cpus_per_task = ratio; - job_desc_msg->pn_min_memory = job_mem_limit + ratio-1; - job_desc_msg->pn_min_memory /= ratio; - job_desc_msg->pn_min_memory |= MEM_PER_CPU; - return true; - } return false; } @@ -4916,6 +5446,7 @@ void job_time_limit(void) if (slurmctld_conf.inactive_limit && (job_ptr->batch_flag == 0) && (job_ptr->time_last_active <= old) && + (job_ptr->other_port) && (job_ptr->part_ptr) && (!(job_ptr->part_ptr->flags & PART_FLAG_ROOT_ONLY))) { /* job inactive, kill it */ @@ -4977,7 +5508,6 @@ void job_time_limit(void) if (job_ptr->end_time <= (now + PERIODIC_TIMEOUT * 2)) srun_timeout (job_ptr); } - list_iterator_destroy(job_iterator); fini_job_resv_check(); } @@ -5165,6 +5695,10 @@ static void _list_delete_job(void *job_entry) fatal("job hash error"); *job_pptr = job_ptr->job_next; +/* + * NOTE: Anything you free here also needs to be allocated memory copied + * when a job array is created in _job_rec_copy() above + */ delete_job_details(job_ptr); xfree(job_ptr->account); xfree(job_ptr->alias_list); @@ -5189,6 +5723,7 @@ static void _list_delete_job(void *job_entry) xfree(job_ptr->nodes_completing); xfree(job_ptr->partition); FREE_NULL_LIST(job_ptr->part_ptr_list); + xfree(job_ptr->priority_array); slurm_destroy_priority_factors_object(job_ptr->prio_factors); xfree(job_ptr->resp_host); xfree(job_ptr->resv_name); @@ -5265,7 +5800,7 @@ static int _list_find_job_old(void *job_entry, void *key) the slurmdbd lets put it on the list to be handled later when it comes back up since we won't get another chance. */ - if(with_slurmdbd && !job_ptr->db_index) + if (with_slurmdbd && !job_ptr->db_index) jobacct_storage_g_job_start(acct_db_conn, job_ptr); return 1; /* Purge the job */ } @@ -5278,13 +5813,14 @@ static int _list_find_job_old(void *job_entry, void *key) * OUT buffer_size - set to size of the buffer in bytes * IN show_flags - job filtering options * IN uid - uid of user making request (for partition filtering) + * IN filter_uid - pack only jobs belonging to this user if not NO_VAL * global: job_list - global list of job records * NOTE: the buffer at *buffer_ptr must be xfreed by the caller * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c * whenever the data format changes */ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, - uint16_t show_flags, uid_t uid, + uint16_t show_flags, uid_t uid, uint32_t filter_uid, uint16_t protocol_version) { ListIterator job_iterator; @@ -5327,6 +5863,9 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, (! IS_JOB_COMPLETING(job_ptr)) && IS_JOB_FINISHED(job_ptr)) continue; /* job ready for purging, don't dump */ + if ((filter_uid != NO_VAL) && (filter_uid != job_ptr->user_id)) + continue; + pack_job(job_ptr, show_flags, buffer, protocol_version, uid); jobs_packed++; } @@ -5361,15 +5900,24 @@ extern int pack_one_job(char **buffer_ptr, int *buffer_size, { ListIterator job_iterator; struct job_record *job_ptr; - uint32_t jobs_packed = 0; + uint32_t jobs_packed = 0, tmp_offset; Buf buffer; buffer_ptr[0] = NULL; *buffer_size = 0; + buffer = init_buf(BUF_SIZE); + + /* write message body header : size and time */ + /* put in a place holder job record count of 0 for now */ + pack32(jobs_packed, buffer); + pack_time(time(NULL), buffer); + job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { - if (job_ptr->job_id != job_id) + if ((job_ptr->job_id != job_id) && + ((job_ptr->array_task_id == (uint16_t) NO_VAL) || + (job_ptr->array_job_id != job_id))) continue; if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && @@ -5378,20 +5926,25 @@ extern int pack_one_job(char **buffer_ptr, int *buffer_size, job_ptr->account)) break; + pack_job(job_ptr, show_flags, buffer, protocol_version, uid); jobs_packed++; - break; } list_iterator_destroy(job_iterator); - if (jobs_packed == 0) + + if (jobs_packed == 0) { + free_buf(buffer); return ESLURM_INVALID_JOB_ID; + } - buffer = init_buf(BUF_SIZE); + /* put the real record count in the message body header */ + tmp_offset = get_buf_offset(buffer); + set_buf_offset(buffer, 0); pack32(jobs_packed, buffer); - pack_time(time(NULL), buffer); - pack_job(job_ptr, show_flags, buffer, protocol_version, uid); + set_buf_offset(buffer, tmp_offset); *buffer_size = get_buf_offset(buffer); buffer_ptr[0] = xfer_buf_data(buffer); + return SLURM_SUCCESS; } @@ -5415,7 +5968,135 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + pack32(dump_job_ptr->array_job_id, buffer); + pack16(dump_job_ptr->array_task_id, buffer); + pack32(dump_job_ptr->assoc_id, buffer); + pack32(dump_job_ptr->job_id, buffer); + pack32(dump_job_ptr->user_id, buffer); + pack32(dump_job_ptr->group_id, buffer); + pack32(dump_job_ptr->profile, buffer); + + pack16(dump_job_ptr->job_state, buffer); + pack16(dump_job_ptr->batch_flag, buffer); + pack16(dump_job_ptr->state_reason, buffer); + pack16(dump_job_ptr->restart_cnt, buffer); + pack16(show_flags, buffer); + + pack32(dump_job_ptr->alloc_sid, buffer); + if ((dump_job_ptr->time_limit == NO_VAL) + && dump_job_ptr->part_ptr) + pack32(dump_job_ptr->part_ptr->max_time, buffer); + else + pack32(dump_job_ptr->time_limit, buffer); + pack32(dump_job_ptr->time_min, buffer); + + if (dump_job_ptr->details) { + pack16(dump_job_ptr->details->nice, buffer); + pack_time(dump_job_ptr->details->submit_time, buffer); + /* Earliest possible begin time */ + begin_time = dump_job_ptr->details->begin_time; + } else { + pack16(0, buffer); + pack_time((time_t) 0, buffer); + } + + pack_time(begin_time, buffer); + /* Actual or expected start time */ + if ((dump_job_ptr->start_time) || (begin_time <= time(NULL))) + pack_time(dump_job_ptr->start_time, buffer); + else /* earliest start time in the future */ + pack_time(begin_time, buffer); + + pack_time(dump_job_ptr->end_time, buffer); + pack_time(dump_job_ptr->suspend_time, buffer); + pack_time(dump_job_ptr->pre_sus_time, buffer); + pack_time(dump_job_ptr->resize_time, buffer); + pack_time(dump_job_ptr->preempt_time, buffer); + pack32(dump_job_ptr->priority, buffer); + + /* Only send the allocated nodelist since we are only sending + * the number of cpus and nodes that are currently allocated. */ + if (!IS_JOB_COMPLETING(dump_job_ptr)) + packstr(dump_job_ptr->nodes, buffer); + else { + nodelist = + bitmap2node_name(dump_job_ptr->node_bitmap_cg); + packstr(nodelist, buffer); + xfree(nodelist); + } + + if (!IS_JOB_PENDING(dump_job_ptr) && dump_job_ptr->part_ptr) + packstr(dump_job_ptr->part_ptr->name, buffer); + else + packstr(dump_job_ptr->partition, buffer); + packstr(dump_job_ptr->account, buffer); + packstr(dump_job_ptr->network, buffer); + packstr(dump_job_ptr->comment, buffer); + packstr(dump_job_ptr->gres, buffer); + packstr(dump_job_ptr->batch_host, buffer); + if (!IS_JOB_COMPLETED(dump_job_ptr) && + (show_flags & SHOW_DETAIL2) && + ((dump_job_ptr->user_id == (uint32_t) uid) || + validate_slurm_user(uid))) { + char *batch_script = get_job_script(dump_job_ptr); + packstr(batch_script, buffer); + xfree(batch_script); + } else { + packnull(buffer); + } + + assoc_mgr_lock(&locks); + if (assoc_mgr_qos_list) { + packstr(slurmdb_qos_str(assoc_mgr_qos_list, + dump_job_ptr->qos_id), buffer); + } else + packnull(buffer); + assoc_mgr_unlock(&locks); + + packstr(dump_job_ptr->licenses, buffer); + packstr(dump_job_ptr->state_desc, buffer); + packstr(dump_job_ptr->resv_name, buffer); + + pack32(dump_job_ptr->exit_code, buffer); + pack32(dump_job_ptr->derived_ec, buffer); + + if (show_flags & SHOW_DETAIL) { + pack_job_resources(dump_job_ptr->job_resrcs, buffer, + protocol_version); + } else { + uint32_t empty = NO_VAL; + pack32(empty, buffer); + } + + packstr(dump_job_ptr->name, buffer); + packstr(dump_job_ptr->wckey, buffer); + pack32(dump_job_ptr->req_switch, buffer); + pack32(dump_job_ptr->wait4switch, buffer); + + packstr(dump_job_ptr->alloc_node, buffer); + if (!IS_JOB_COMPLETING(dump_job_ptr)) + pack_bit_fmt(dump_job_ptr->node_bitmap, buffer); + else + pack_bit_fmt(dump_job_ptr->node_bitmap_cg, buffer); + + select_g_select_jobinfo_pack(dump_job_ptr->select_jobinfo, + buffer, protocol_version); + + detail_ptr = dump_job_ptr->details; + /* A few details are always dumped here */ + _pack_default_job_details(dump_job_ptr, buffer, + protocol_version); + + /* other job details are only dumped until the job starts + * running (at which time they become meaningless) */ + if (detail_ptr) + _pack_pending_job_details(detail_ptr, buffer, + protocol_version); + else + _pack_pending_job_details(NULL, buffer, + protocol_version); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { pack32(dump_job_ptr->assoc_id, buffer); pack32(dump_job_ptr->job_id, buffer); pack32(dump_job_ptr->user_id, buffer); @@ -5546,17 +6227,43 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, } } +static int _find_node_max_cpu_cnt(void) +{ + int i, max_cpu_cnt = 1; + struct node_record *node_ptr = node_record_table_ptr; + + for (i = 0; i < node_record_count; i++, node_ptr++) { +#ifndef HAVE_BG + if (slurmctld_conf.fast_schedule) { + /* Only data from config_record used for scheduling */ + max_cpu_cnt = MAX(max_cpu_cnt, + node_ptr->config_ptr->cpus); + } else { +#endif + /* Individual node data used for scheduling */ + max_cpu_cnt = MAX(max_cpu_cnt, node_ptr->cpus); +#ifndef HAVE_BG + } +#endif + } + return max_cpu_cnt; +} + /* pack default job details for "get_job_info" RPC */ static void _pack_default_job_details(struct job_record *job_ptr, Buf buffer, uint16_t protocol_version) { + static int max_cpu_cnt = -1; int i; struct job_details *detail_ptr = job_ptr->details; char *cmd_line = NULL; char *tmp = NULL; uint32_t len = 0; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (max_cpu_cnt == -1) + max_cpu_cnt = _find_node_max_cpu_cnt(); + + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { if (detail_ptr) { packstr(detail_ptr->features, buffer); packstr(detail_ptr->work_dir, buffer); @@ -5593,7 +6300,86 @@ static void _pack_default_job_details(struct job_record *job_ptr, pack32((uint32_t) 0, buffer); } else { pack32(detail_ptr->min_cpus, buffer); - if(detail_ptr->max_cpus != NO_VAL) + if (detail_ptr->max_cpus != NO_VAL) + pack32(detail_ptr->max_cpus, buffer); + else + pack32((uint32_t) 0, buffer); + + } + if (IS_JOB_COMPLETING(job_ptr) && job_ptr->node_cnt) { + pack32(job_ptr->node_cnt, buffer); + pack32((uint32_t) 0, buffer); + } else if (job_ptr->total_nodes) { + pack32(job_ptr->total_nodes, buffer); + pack32((uint32_t) 0, buffer); + } else { + /* Use task count to help estimate min_nodes */ + uint32_t min_nodes; + min_nodes = detail_ptr->num_tasks + + max_cpu_cnt - 1; + min_nodes /= max_cpu_cnt; + min_nodes = MAX(min_nodes, + detail_ptr->min_nodes); + pack32(min_nodes, buffer); + pack32(detail_ptr->max_nodes, buffer); + } + pack16(detail_ptr->requeue, buffer); + pack16(detail_ptr->ntasks_per_node, buffer); + } else { + packnull(buffer); + packnull(buffer); + packnull(buffer); + packnull(buffer); + + if (job_ptr->total_cpus) + pack32(job_ptr->total_cpus, buffer); + else + pack32(job_ptr->cpu_cnt, buffer); + pack32((uint32_t) 0, buffer); + + pack32(job_ptr->node_cnt, buffer); + pack32((uint32_t) 0, buffer); + pack16((uint16_t) 0, buffer); + pack16((uint16_t) 0, buffer); + } + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (detail_ptr) { + packstr(detail_ptr->features, buffer); + packstr(detail_ptr->work_dir, buffer); + packstr(detail_ptr->dependency, buffer); + + if (detail_ptr->argv) { + /* Determine size needed for a string + * containing all arguments */ + for (i=0; detail_ptr->argv[i]; i++) { + len += strlen(detail_ptr->argv[i]); + } + len += i; + + cmd_line = xmalloc(len*sizeof(char)); + tmp = cmd_line; + for (i=0; detail_ptr->argv[i]; i++) { + if (i != 0) { + *tmp = ' '; + tmp++; + } + strcpy(tmp,detail_ptr->argv[i]); + tmp += strlen(detail_ptr->argv[i]); + } + packstr(cmd_line, buffer); + xfree(cmd_line); + } else + packnull(buffer); + + if (IS_JOB_COMPLETING(job_ptr) && job_ptr->cpu_cnt) { + pack32(job_ptr->cpu_cnt, buffer); + pack32((uint32_t) 0, buffer); + } else if (job_ptr->total_cpus) { + pack32(job_ptr->total_cpus, buffer); + pack32((uint32_t) 0, buffer); + } else { + pack32(detail_ptr->min_cpus, buffer); + if (detail_ptr->max_cpus != NO_VAL) pack32(detail_ptr->max_cpus, buffer); else pack32((uint32_t) 0, buffer); @@ -5902,6 +6688,8 @@ static void _reset_step_bitmaps(struct job_record *job_ptr) step_iterator = list_iterator_create (job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; FREE_NULL_BITMAP(step_ptr->step_node_bitmap); if (step_ptr->step_layout && step_ptr->step_layout->node_list && @@ -5950,7 +6738,7 @@ extern uint32_t get_next_job_id(void) * _set_job_id - set a default job_id, insure that it is unique * IN job_ptr - pointer to the job_record */ -static void _set_job_id(struct job_record *job_ptr) +static int _set_job_id(struct job_record *job_ptr) { int i; uint32_t new_id; @@ -5959,9 +6747,6 @@ static void _set_job_id(struct job_record *job_ptr) xassert(job_ptr); xassert (job_ptr->magic == JOB_MAGIC); - if ((job_ptr->partition == NULL) - || (strlen(job_ptr->partition) == 0)) - fatal("_set_job_id: partition not set"); /* Insure no conflict in job id if we roll over 32 bits */ for (i = 0; i < 1000; i++) { @@ -5970,12 +6755,14 @@ static void _set_job_id(struct job_record *job_ptr) new_id = job_id_sequence; if (find_job_record(new_id) == NULL) { job_ptr->job_id = new_id; - return; + return SLURM_SUCCESS; } } - fatal("We have exhausted our supply of valid job id values." + error("We have exhausted our supply of valid job id values. " "FirstJobId=%u MaxJobId=%u", slurmctld_conf.first_job_id, slurmctld_conf.max_job_id); + job_ptr->job_id = NO_VAL; + return EAGAIN; } @@ -5991,7 +6778,7 @@ extern void set_job_prio(struct job_record *job_ptr) return; job_ptr->priority = slurm_sched_initial_priority(lowest_prio, job_ptr); - if ((job_ptr->priority <= 1) || + if ((job_ptr->priority == 0) || (job_ptr->direct_set_prio) || (job_ptr->details && (job_ptr->details->nice != NICE_OFFSET))) return; @@ -6464,6 +7251,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) xfree(job_ptr->partition); job_ptr->partition = xstrdup(job_specs->partition); job_ptr->part_ptr = tmp_part_ptr; + xfree(job_ptr->priority_array); /* Rebuilt in plugin */ FREE_NULL_LIST(job_ptr->part_ptr_list); job_ptr->part_ptr_list = part_ptr_list; part_ptr_list = NULL; /* nothing to free */ @@ -7267,7 +8055,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) goto fini; if (job_specs->name) { - if (!IS_JOB_PENDING(job_ptr)) { + if (IS_JOB_FINISHED(job_ptr)) { error_code = ESLURM_DISABLED; goto fini; } else { @@ -7280,6 +8068,18 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } } + if (job_specs->std_out) { + if (!IS_JOB_PENDING(job_ptr)) + error_code = ESLURM_DISABLED; + else if (detail_ptr) { + xfree(detail_ptr->std_out); + detail_ptr->std_out = job_specs->std_out; + job_specs->std_out = NULL; + } + } + if (error_code != SLURM_SUCCESS) + goto fini; + if (job_specs->wckey) { if (!IS_JOB_PENDING(job_ptr)) error_code = ESLURM_DISABLED; @@ -7448,9 +8248,6 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) detail_ptr->begin_time = job_specs->begin_time; update_accounting = true; - if ((job_ptr->priority == 1) && - (detail_ptr->begin_time <= now)) - set_job_prio(job_ptr); slurm_make_time_str(&detail_ptr->begin_time, time_str, sizeof(time_str)); info("sched: update_job: setting begin to %s for " @@ -7508,12 +8305,13 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (error_code != SLURM_SUCCESS) goto fini; - fail_reason = job_limits_check(&job_ptr); + fail_reason = job_limits_check(&job_ptr, false); if (fail_reason != WAIT_NO_REASON) { if (fail_reason == WAIT_QOS_THRES) error_code = ESLURM_QOS_THRES; else if ((fail_reason == WAIT_PART_TIME_LIMIT) || (fail_reason == WAIT_PART_NODE_LIMIT) || + (fail_reason == WAIT_PART_DOWN) || (fail_reason == WAIT_HELD)) error_code = SLURM_SUCCESS; else @@ -7534,7 +8332,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) error_code = ESLURM_DISABLED; else { char *conn_type_char = conn_type_string_full(conn_type); - if((conn_type[0] >= SELECT_SMALL) + if ((conn_type[0] >= SELECT_SMALL) && (detail_ptr->min_cpus >= cpus_per_mp)) { info("update_job: could not change " "conn_type to '%s' because cpu " @@ -7544,7 +8342,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) detail_ptr->min_cpus, job_ptr->job_id); error_code = ESLURM_INVALID_NODE_COUNT; - } else if(((conn_type[0] == SELECT_TORUS) + } else if (((conn_type[0] == SELECT_TORUS) || (conn_type[0] == SELECT_MESH)) && (detail_ptr->min_cpus < cpus_per_mp)) { info("update_job: could not change " @@ -7574,7 +8372,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) /* check to make sure we didn't mess up with the proc count */ select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); - if(detail_ptr && + if (detail_ptr && (((conn_type[0] >= SELECT_SMALL) && (detail_ptr->min_cpus >= cpus_per_mp)) || (((conn_type[0] == SELECT_TORUS)|| (conn_type[0] == SELECT_MESH)) @@ -7730,7 +8528,7 @@ fini: * based upon job submit order), recalculate the job priority, since * many factors of an update may affect priority considerations. * If job has a hold then do nothing */ - if ((error_code == SLURM_SUCCESS) && (job_ptr->priority > 1) && + if ((error_code == SLURM_SUCCESS) && (job_ptr->priority != 0) && strcmp(slurmctld_conf.priority_type, "priority/basic")) set_job_prio(job_ptr); @@ -7763,8 +8561,6 @@ static void _send_job_kill(struct job_record *job_ptr) agent_args->msg_type = REQUEST_TERMINATE_JOB; agent_args->retry = 0; /* re_kill_job() resends as needed */ agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); kill_job = xmalloc(sizeof(kill_job_msg_t)); last_node_update = time(NULL); kill_job->job_id = job_ptr->job_id; @@ -7787,9 +8583,11 @@ static void _send_job_kill(struct job_record *job_ptr) agent_args->node_count++; } #else + if (!job_ptr->node_bitmap_cg) + build_cg_bitmap(job_ptr); for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; i++, node_ptr++) { - if (!bit_test(job_ptr->node_bitmap, i)) + if (!bit_test(job_ptr->node_bitmap_cg, i)) continue; hostlist_push(agent_args->hostlist, node_ptr->name); agent_args->node_count++; @@ -7819,7 +8617,7 @@ extern void job_pre_resize_acctg(struct job_record *job_ptr) /* if we don't have a db_index go a start this one up since if running with the slurmDBD the job may not have started yet. */ - if(!job_ptr->db_index) + if (!job_ptr->db_index) jobacct_storage_g_job_start(acct_db_conn, job_ptr); job_ptr->job_state |= JOB_RESIZING; @@ -8033,9 +8831,13 @@ static void _purge_missing_jobs(int node_inx, time_t now) (job_ptr->time_last_active < startup_time) && (job_ptr->start_time < startup_time) && (node_inx == bit_ffs(job_ptr->node_bitmap))) { - info("Batch JobId=%u missing from node 0, killing it", + bool requeue = false; + if (slurmctld_conf.job_requeue && + (job_ptr->start_time < node_ptr->boot_time)) + requeue = true; + info("Batch JobId=%u missing from node 0", job_ptr->job_id); - job_complete(job_ptr->job_id, 0, false, true, NO_VAL); + job_complete(job_ptr->job_id, 0, requeue, true, NO_VAL); } else { _notify_srun_missing_step(job_ptr, node_inx, now, node_boot_time); @@ -8054,6 +8856,8 @@ static void _notify_srun_missing_step(struct job_record *job_ptr, int node_inx, xassert(job_ptr); step_iterator = list_iterator_create (job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; if (!bit_test(step_ptr->step_node_bitmap, node_inx)) continue; if (step_ptr->time_last_active >= now) { @@ -8347,8 +9151,6 @@ _xmit_new_end_time(struct job_record *job_ptr) agent_args->msg_type = REQUEST_UPDATE_JOB_TIME; agent_args->retry = 1; agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); job_time_msg_ptr = xmalloc(sizeof(job_time_msg_t)); job_time_msg_ptr->job_id = job_ptr->job_id; job_time_msg_ptr->expiration_time = job_ptr->end_time; @@ -8492,6 +9294,7 @@ extern bool job_epilog_complete(uint32_t job_id, char *node_name, xfree(job_ptr->nodes_completing); if (!IS_JOB_COMPLETING(job_ptr)) { /* COMPLETED */ if (IS_JOB_PENDING(job_ptr) && (job_ptr->batch_flag)) { + time_t now = time(NULL); info("requeue batch job %u", job_ptr->job_id); /* Clear everything so this appears to be a new job * and then restart it in accounting. */ @@ -8525,6 +9328,19 @@ extern bool job_epilog_complete(uint32_t job_id, char *node_name, jobacct_storage_g_job_start( acct_db_conn, job_ptr); } + + /* Reset this after the batch step has + * finished or the batch step information will + * be attributed to the next run of the job. */ + job_ptr->db_index = 0; + + /* Since this could happen on a launch we need to make + * sure the submit isn't the same as the last submit so + * put now + 1 so we get different records in the + * database */ + if (now == job_ptr->details->submit_time) + now++; + job_ptr->details->submit_time = now; } return true; } else @@ -8626,15 +9442,16 @@ extern bool job_independent(struct job_record *job_ptr, int will_run) time_t now = time(NULL); int depend_rc; + if ((job_ptr->state_reason == WAIT_HELD) || + (job_ptr->state_reason == WAIT_HELD_USER)) + return false; + /* Test dependencies first so we can cancel jobs before dependent * job records get purged (e.g. afterok, afternotok) */ depend_rc = test_job_dependency(job_ptr); if (depend_rc == 1) { - if ((job_ptr->state_reason != WAIT_HELD) && - (job_ptr->state_reason != WAIT_HELD_USER)) { - job_ptr->state_reason = WAIT_DEPENDENCY; - xfree(job_ptr->state_desc); - } + job_ptr->state_reason = WAIT_DEPENDENCY; + xfree(job_ptr->state_desc); return false; } else if (depend_rc == 2) { time_t now = time(NULL); @@ -8772,8 +9589,6 @@ static void _signal_job(struct job_record *job_ptr, int signal) agent_args->msg_type = REQUEST_SIGNAL_JOB; agent_args->retry = 1; agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); signal_job_msg = xmalloc(sizeof(kill_tasks_msg_t)); signal_job_msg->job_id = job_ptr->job_id; signal_job_msg->signal = signal; @@ -8810,9 +9625,9 @@ static void *_switch_suspend_info(struct job_record *job_ptr) void *switch_suspend_info = NULL; step_iterator = list_iterator_create (job_ptr->step_list); - if (!step_iterator) - fatal("list_iterator_create: malloc failure"); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; interconnect_suspend_info_get(step_ptr->switch_job, &switch_suspend_info); } @@ -8843,8 +9658,6 @@ static void _suspend_job(struct job_record *job_ptr, uint16_t op, * quickly induce huge backlog * of agent.c RPCs */ agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); sus_ptr = xmalloc(sizeof(suspend_int_msg_t)); sus_ptr->job_id = job_ptr->job_id; sus_ptr->op = op; @@ -8988,9 +9801,9 @@ static int _job_suspend_switch_test(struct job_record *job_ptr) struct step_record *step_ptr; step_iterator = list_iterator_create(job_ptr->step_list); - if (!step_iterator) - fatal("list_iterator_create: malloc failure"); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; rc = interconnect_suspend_test(step_ptr->switch_job); if (rc != SLURM_SUCCESS) break; @@ -9221,15 +10034,19 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd_t conn_fd, job_completion_logger(job_ptr, true); deallocate_nodes(job_ptr, false, suspended, preempt); xfree(job_ptr->details->req_node_layout); - job_ptr->db_index = 0; + + /* do this after the epilog complete, setting it here is too early */ + //job_ptr->db_index = 0; + //job_ptr->details->submit_time = now; + job_ptr->job_state = JOB_PENDING; if (job_ptr->node_cnt) job_ptr->job_state |= JOB_COMPLETING; - job_ptr->details->submit_time = now; job_ptr->pre_sus_time = (time_t) 0; job_ptr->suspend_time = (time_t) 0; job_ptr->tot_sus_time = (time_t) 0; + job_ptr->restart_cnt++; /* Since the job completion logger removes the submit we need * to add it again. */ @@ -9285,8 +10102,13 @@ extern void update_job_nodes_completing(void) (job_ptr->node_bitmap == NULL)) continue; xfree(job_ptr->nodes_completing); - job_ptr->nodes_completing = - bitmap2node_name(job_ptr->node_bitmap); + if (job_ptr->node_bitmap_cg) { + job_ptr->nodes_completing = + bitmap2node_name(job_ptr->node_bitmap_cg); + } else { + job_ptr->nodes_completing = + bitmap2node_name(job_ptr->node_bitmap); + } } list_iterator_destroy(job_iterator); } @@ -9330,13 +10152,13 @@ extern int job_hold_by_assoc_id(uint32_t assoc_id) job_ptr->assoc_ptr = ((slurmdb_association_rec_t *) job_ptr->assoc_ptr)->usage->parent_assoc_ptr; - if(job_ptr->assoc_ptr) + if (job_ptr->assoc_ptr) job_ptr->assoc_id = ((slurmdb_association_rec_t *) job_ptr->assoc_ptr)->id; } - if(IS_JOB_FINISHED(job_ptr)) + if (IS_JOB_FINISHED(job_ptr)) continue; info("Association deleted, holding job %u", @@ -9375,20 +10197,20 @@ extern int job_hold_by_qos_id(uint32_t qos_id) continue; /* move up to the parent that should still exist */ - if(job_ptr->qos_ptr) { + if (job_ptr->qos_ptr) { /* Force a start so the association doesn't get lost. Since there could be some delay in the start of the job when running with the slurmdbd. */ - if(!job_ptr->db_index) { + if (!job_ptr->db_index) { jobacct_storage_g_job_start(acct_db_conn, job_ptr); } job_ptr->qos_ptr = NULL; } - if(IS_JOB_FINISHED(job_ptr)) + if (IS_JOB_FINISHED(job_ptr)) continue; info("QOS deleted, holding job %u", job_ptr->job_id); @@ -9500,7 +10322,7 @@ extern int update_job_wckey(char *module, struct job_record *job_ptr, info("%s: invalid wckey %s for job_id %u", module, new_wckey, job_ptr->job_id); return ESLURM_INVALID_WCKEY; - } else if(association_based_accounting + } else if (association_based_accounting && !wckey_ptr && !(accounting_enforce & ACCOUNTING_ENFORCE_WCKEYS)) { /* if not enforcing associations we want to look for @@ -9510,7 +10332,7 @@ extern int update_job_wckey(char *module, struct job_record *job_ptr, wckey_rec.name = NULL; assoc_mgr_fill_in_wckey(acct_db_conn, &wckey_rec, accounting_enforce, &wckey_ptr); - if(!wckey_ptr) { + if (!wckey_ptr) { debug("%s: we didn't have a wckey record for wckey " "'%s' and user '%u', and we can't seem to find " "a default one either. Setting it anyway. " @@ -9670,6 +10492,8 @@ extern int job_checkpoint(checkpoint_msg_t *ckpt_ptr, uid_t uid, while ((step_ptr = (struct step_record *) list_next (step_iterator))) { char *image_dir = NULL; + if (step_ptr->state != JOB_RUNNING) + continue; if (ckpt_ptr->image_dir) { image_dir = xstrdup(ckpt_ptr->image_dir); } else { @@ -9775,11 +10599,11 @@ static int _checkpoint_job_record (struct job_record *job_ptr, char *image_dir) (void) unlink(new_file); else { /* file shuffle */ (void) unlink(old_file); - if(link(ckpt_file, old_file)) + if (link(ckpt_file, old_file)) debug4("unable to create link for %s -> %s: %m", ckpt_file, old_file); (void) unlink(ckpt_file); - if(link(new_file, ckpt_file)) + if (link(new_file, ckpt_file)) debug4("unable to create link for %s -> %s: %m", new_file, ckpt_file); (void) unlink(new_file); @@ -9843,7 +10667,7 @@ _copy_job_record_to_job_desc(struct job_record *job_ptr) job_desc = xmalloc(sizeof(job_desc_msg_t)); job_desc->account = xstrdup(job_ptr->account); - job_desc->acctg_freq = details->acctg_freq; + job_desc->acctg_freq = xstrdup(details->acctg_freq); job_desc->alloc_node = xstrdup(job_ptr->alloc_node); /* Since the allocating salloc or srun is not expected to exist * when this checkpointed job is restarted, do not save these: @@ -10183,6 +11007,4 @@ extern void build_cg_bitmap(struct job_record *job_ptr) job_ptr->node_bitmap_cg = bit_alloc(node_record_count); job_ptr->job_state &= (~JOB_COMPLETING); } - if (job_ptr->node_bitmap_cg == NULL) - fatal("bit_copy: memory allocation failure"); } diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 1694a896eae1df12ae79ff1d9cb7a697a09965f2..b6ef2f5612687974610dde88eb66f232b721e904 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -62,6 +62,7 @@ #include "src/common/macros.h" #include "src/common/node_select.h" #include "src/common/slurm_accounting_storage.h" +#include "src/common/slurm_acct_gather.h" #include "src/common/timers.h" #include "src/common/uid.h" #include "src/common/xassert.h" @@ -83,15 +84,22 @@ #define _DEBUG 0 #define MAX_RETRIES 10 +typedef struct epilog_arg { + char *epilog_slurmctld; + uint32_t job_id; + char **my_env; +} epilog_arg_t; + static char ** _build_env(struct job_record *job_ptr); static void _depend_list_del(void *dep_ptr); static void _feature_list_delete(void *x); static void _job_queue_append(List job_queue, struct job_record *job_ptr, - struct part_record *part_ptr); + struct part_record *part_ptr, uint32_t priority); static void _job_queue_rec_del(void *x); static bool _job_runnable_test1(struct job_record *job_ptr, bool clear_start); -static bool _job_runnable_test2(struct job_record *job_ptr); +static bool _job_runnable_test2(struct job_record *job_ptr, + bool check_min_time); static void * _run_epilog(void *arg); static void * _run_prolog(void *arg); static bool _scan_depend(List dependency_list, uint32_t job_id); @@ -117,11 +125,7 @@ static List _build_user_job_list(uint32_t user_id, char* job_name) struct job_record *job_ptr = NULL; job_queue = list_create(NULL); - if (job_queue == NULL) - fatal("list_create memory allocation failure"); job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { xassert (job_ptr->magic == JOB_MAGIC); if (job_ptr->user_id != user_id) @@ -137,13 +141,15 @@ static List _build_user_job_list(uint32_t user_id, char* job_name) } static void _job_queue_append(List job_queue, struct job_record *job_ptr, - struct part_record *part_ptr) + struct part_record *part_ptr, uint32_t prio) { job_queue_rec_t *job_queue_rec; job_queue_rec = xmalloc(sizeof(job_queue_rec_t)); + job_queue_rec->job_id = job_ptr->job_id; job_queue_rec->job_ptr = job_ptr; job_queue_rec->part_ptr = part_ptr; + job_queue_rec->priority = prio; list_append(job_queue, job_queue_rec); } @@ -197,15 +203,20 @@ static bool _job_runnable_test1(struct job_record *job_ptr, bool clear_start) return true; } -/* Job and partition tests for ability to run now */ -static bool _job_runnable_test2(struct job_record *job_ptr) +/* + * Job and partition tests for ability to run now + * IN job_ptr - job to test + * IN check_min_time - If set, test job's minimum time limit + * otherwise test maximum time limit + */ +static bool _job_runnable_test2(struct job_record *job_ptr, bool check_min_time) { int reason; - reason = job_limits_check(&job_ptr); - if ((reason != WAIT_NO_REASON) && - (reason != job_ptr->state_reason) && - (part_policy_job_runnable_state(job_ptr))) { + reason = job_limits_check(&job_ptr, check_min_time); + if ((reason != job_ptr->state_reason) && + ((reason != WAIT_NO_REASON) || + (!part_policy_job_runnable_state(job_ptr)))) { job_ptr->state_reason = reason; xfree(job_ptr->state_desc); } @@ -217,10 +228,11 @@ static bool _job_runnable_test2(struct job_record *job_ptr) /* * build_job_queue - build (non-priority ordered) list of pending jobs * IN clear_start - if set then clear the start_time for pending jobs + * IN backfill - true if running backfill scheduler, enforce min time limit * RET the job queue * NOTE: the caller must call list_destroy() on RET value to free memory */ -extern List build_job_queue(bool clear_start) +extern List build_job_queue(bool clear_start, bool backfill) { List job_queue; ListIterator job_iterator, part_iterator; @@ -229,33 +241,40 @@ extern List build_job_queue(bool clear_start) int reason; job_queue = list_create(_job_queue_rec_del); - if (job_queue == NULL) - fatal("list_create memory allocation failure"); job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create memory allocation failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (!_job_runnable_test1(job_ptr, clear_start)) continue; if (job_ptr->part_ptr_list) { - part_iterator = list_iterator_create(job_ptr-> - part_ptr_list); - if (part_iterator == NULL) - fatal("list_iterator_create malloc failure"); + int inx = -1; + part_iterator = list_iterator_create( + job_ptr->part_ptr_list); while ((part_ptr = (struct part_record *) - list_next(part_iterator))) { + list_next(part_iterator))) { job_ptr->part_ptr = part_ptr; - reason = job_limits_check(&job_ptr); + reason = job_limits_check(&job_ptr, backfill); if ((reason != WAIT_NO_REASON) && (reason != job_ptr->state_reason) && (!part_policy_job_runnable_state(job_ptr))){ job_ptr->state_reason = reason; xfree(job_ptr->state_desc); } + /* priority_array index matches part_ptr_list + * position: increment inx*/ + inx++; if (reason != WAIT_NO_REASON) continue; - _job_queue_append(job_queue, job_ptr, part_ptr); + if (job_ptr->priority_array) { + _job_queue_append(job_queue, job_ptr, + part_ptr, + job_ptr-> + priority_array[inx]); + } else { + _job_queue_append(job_queue, job_ptr, + part_ptr, + job_ptr->priority); + } } list_iterator_destroy(part_iterator); } else { @@ -272,10 +291,10 @@ extern List build_job_queue(bool clear_start) "part %s", job_ptr->job_id, job_ptr->partition); } - if (!_job_runnable_test2(job_ptr)) + if (!_job_runnable_test2(job_ptr, backfill)) continue; _job_queue_append(job_queue, job_ptr, - job_ptr->part_ptr); + job_ptr->part_ptr, job_ptr->priority); } } list_iterator_destroy(job_iterator); @@ -389,14 +408,13 @@ static bool _failed_partition(struct part_record *part_ptr, return false; } -static void do_diag_stats(struct timeval tv1, struct timeval tv2) +static void _do_diag_stats(long delta_t) { - if (slurm_diff_tv(&tv1,&tv2) > slurmctld_diag_stats.schedule_cycle_max) - slurmctld_diag_stats.schedule_cycle_max = slurm_diff_tv(&tv1, - &tv2); + if (delta_t > slurmctld_diag_stats.schedule_cycle_max) + slurmctld_diag_stats.schedule_cycle_max = delta_t; - slurmctld_diag_stats.schedule_cycle_sum += slurm_diff_tv(&tv1, &tv2); - slurmctld_diag_stats.schedule_cycle_last = slurm_diff_tv(&tv1, &tv2); + slurmctld_diag_stats.schedule_cycle_sum += delta_t; + slurmctld_diag_stats.schedule_cycle_last = delta_t; slurmctld_diag_stats.schedule_cycle_counter++; } @@ -446,13 +464,7 @@ extern bool replace_batch_job(slurm_msg_t * msg, void *fini_job) unlock_slurmctld(job_write_lock); goto send_reply; } - if (!avail_front_end()) { - unlock_slurmctld(job_write_lock); - goto send_reply; - } job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create memory allocation failure"); while (1) { if (job_ptr && part_iterator) goto next_part; @@ -461,10 +473,9 @@ extern bool replace_batch_job(slurm_msg_t * msg, void *fini_job) if (!job_ptr) break; - if (job_ptr == fini_job_ptr) - continue; - - if (job_ptr->priority == 0) + if ((job_ptr == fini_job_ptr) || + (job_ptr->priority == 0) || + !avail_front_end(job_ptr)) continue; if (!IS_JOB_PENDING(job_ptr)) { @@ -491,8 +502,6 @@ extern bool replace_batch_job(slurm_msg_t * msg, void *fini_job) if (job_ptr->part_ptr_list) { part_iterator = list_iterator_create(job_ptr-> part_ptr_list); - if (!part_iterator) - fatal("list_iterator_create: malloc failure"); next_part: part_ptr = (struct part_record *) list_next(part_iterator); if (part_ptr) { @@ -503,7 +512,7 @@ next_part: part_ptr = (struct part_record *) continue; } } - if (job_limits_check(&job_ptr) != WAIT_NO_REASON) + if (job_limits_check(&job_ptr, false) != WAIT_NO_REASON) continue; /* Test for valid account, QOS and required nodes on each pass */ @@ -514,7 +523,7 @@ next_part: part_ptr = (struct part_record *) if (job_ptr->part_ptr) assoc_rec.partition = job_ptr->part_ptr->name; assoc_rec.uid = job_ptr->user_id; - + if (!assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, accounting_enforce, (slurmdb_association_rec_t **) @@ -639,6 +648,30 @@ send_reply: return pending_jobs; } +/* Return true of all partitions have the same priority, otherwise false. */ +static bool _all_partition_priorities_same(void) +{ + struct part_record *part_ptr; + ListIterator iter; + bool part_priority_set = false; + uint32_t part_priority = 0; + bool result = true; + + iter = list_iterator_create(part_list); + while ((part_ptr = (struct part_record *) list_next(iter))) { + if (!part_priority_set) { + part_priority = part_ptr->priority; + part_priority_set = true; + } else if (part_priority != part_ptr->priority) { + result = false; + break; + } + } + list_iterator_destroy(iter); + + return result; +} + /* * schedule - attempt to schedule all pending jobs * pending jobs for each partition will be scheduled in priority @@ -690,8 +723,11 @@ extern int schedule(uint32_t job_limit) backfill_sched = true; #endif if ((strcmp(sched_type, "sched/builtin") == 0) && - (strcmp(prio_type, "priority/basic") == 0)) + (strcmp(prio_type, "priority/basic") == 0) && + _all_partition_priorities_same()) fifo_sched = true; + else + fifo_sched = false; /* Disable avoiding of fragmentation with sched/wiki */ if ((strcmp(sched_type, "sched/wiki") == 0) || (strcmp(sched_type, "sched/wiki2") == 0)) @@ -723,7 +759,7 @@ extern int schedule(uint32_t job_limit) lock_slurmctld(job_write_lock); START_TIMER; - if (!avail_front_end()) { + if (!avail_front_end(NULL)) { ListIterator job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { @@ -781,10 +817,8 @@ extern int schedule(uint32_t job_limit) if (fifo_sched) { slurmctld_diag_stats.schedule_queue_len = list_count(job_list); job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create memory allocation failure"); } else { - job_queue = build_job_queue(false); + job_queue = build_job_queue(false, false); slurmctld_diag_stats.schedule_queue_len = list_count(job_queue); } while (1) { @@ -795,18 +829,20 @@ extern int schedule(uint32_t job_limit) job_ptr = (struct job_record *) list_next(job_iterator); if (!job_ptr) break; + if (!avail_front_end(job_ptr)) { + job_ptr->state_reason = WAIT_FRONT_END; + continue; + } if (!_job_runnable_test1(job_ptr, false)) continue; if (job_ptr->part_ptr_list) { part_iterator = list_iterator_create( job_ptr->part_ptr_list); - if (!part_iterator) - fatal("list_iterator_create: malloc failure"); next_part: part_ptr = (struct part_record *) list_next(part_iterator); if (part_ptr) { job_ptr->part_ptr = part_ptr; - if (job_limits_check(&job_ptr) != + if (job_limits_check(&job_ptr, false) != WAIT_NO_REASON) continue; } else { @@ -815,7 +851,7 @@ next_part: part_ptr = (struct part_record *) continue; } } else { - if (!_job_runnable_test2(job_ptr)) + if (!_job_runnable_test2(job_ptr, false)) continue; } } else { @@ -826,6 +862,10 @@ next_part: part_ptr = (struct part_record *) job_ptr = job_queue_rec->job_ptr; part_ptr = job_queue_rec->part_ptr; xfree(job_queue_rec); + if (!avail_front_end(job_ptr)) { + job_ptr->state_reason = WAIT_FRONT_END; + continue; + } if (!IS_JOB_PENDING(job_ptr)) continue; /* started in other partition */ job_ptr->part_ptr = part_ptr; @@ -842,6 +882,20 @@ next_part: part_ptr = (struct part_record *) slurmctld_diag_stats.schedule_cycle_depth++; + if ((job_ptr->resv_name == NULL) && + _failed_partition(job_ptr->part_ptr, failed_parts, + failed_part_cnt)) { + if (job_ptr->state_reason == WAIT_NO_REASON) { + job_ptr->state_reason = WAIT_PRIORITY; + xfree(job_ptr->state_desc); + } + debug3("sched: JobId=%u. State=PENDING. " + "Reason=Priority. Priority=%u. Partition=%s.", + job_ptr->job_id, job_ptr->priority, + job_ptr->partition); + continue; + } + /* Test for valid account, QOS and required nodes on each pass */ if (job_ptr->state_reason == FAIL_ACCOUNT) { slurmdb_association_rec_t assoc_rec; @@ -850,7 +904,7 @@ next_part: part_ptr = (struct part_record *) if (job_ptr->part_ptr) assoc_rec.partition = job_ptr->part_ptr->name; assoc_rec.uid = job_ptr->user_id; - + if (!assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, accounting_enforce, (slurmdb_association_rec_t **) @@ -890,22 +944,6 @@ next_part: part_ptr = (struct part_record *) continue; } - if ((job_ptr->resv_name == NULL) && - _failed_partition(job_ptr->part_ptr, failed_parts, - failed_part_cnt)) { - if (job_ptr->state_reason == WAIT_NO_REASON) { - job_ptr->state_reason = WAIT_PRIORITY; - xfree(job_ptr->state_desc); - } - debug3("sched: JobId=%u. State=%s. Reason=%s. " - "Priority=%u. Partition=%s.", - job_ptr->job_id, - job_state_string(job_ptr->job_state), - job_reason_string(job_ptr->state_reason), - job_ptr->priority, - job_ptr->partition); - continue; - } i = bit_overlap(avail_node_bitmap, job_ptr->part_ptr->node_bitmap); if ((job_ptr->details && @@ -1073,7 +1111,7 @@ next_part: part_ptr = (struct part_record *) unlock_slurmctld(job_write_lock); END_TIMER2("schedule"); - do_diag_stats(tv1, tv2); + _do_diag_stats(DELTA_TIMER); return job_cnt; } @@ -1096,6 +1134,7 @@ extern int sort_job_queue2(void *x, void *y) bool has_resv1, has_resv2; static time_t config_update = 0; static bool preemption_enabled = true; + uint32_t p1, p2; /* The following block of code is designed to minimize run time in * typical configurations for this frequently executed function. */ @@ -1117,9 +1156,23 @@ extern int sort_job_queue2(void *x, void *y) if (!has_resv1 && has_resv2) return 1; - if (job_rec1->job_ptr->priority < job_rec2->job_ptr->priority) + if (job_rec1->job_ptr->part_ptr_list && + job_rec1->job_ptr->priority_array) + p1 = job_rec1->priority; + else + p1 = job_rec1->job_ptr->priority; + + + if (job_rec2->job_ptr->part_ptr_list && + job_rec2->job_ptr->priority_array) + p2 = job_rec2->priority; + else + p2 = job_rec2->job_ptr->priority; + + + if (p1 < p2) return 1; - if (job_rec1->job_ptr->priority > job_rec2->job_ptr->priority) + if (p1 > p2) return -1; return 0; } @@ -1134,6 +1187,8 @@ extern batch_job_launch_msg_t *build_launch_job_msg(struct job_record *job_ptr) xmalloc(sizeof(batch_job_launch_msg_t)); launch_msg_ptr->job_id = job_ptr->job_id; launch_msg_ptr->step_id = NO_VAL; + launch_msg_ptr->array_job_id = job_ptr->array_job_id; + launch_msg_ptr->array_task_id = job_ptr->array_task_id; launch_msg_ptr->uid = job_ptr->user_id; launch_msg_ptr->gid = job_ptr->group_id; launch_msg_ptr->ntasks = job_ptr->details->num_tasks; @@ -1141,7 +1196,7 @@ extern batch_job_launch_msg_t *build_launch_job_msg(struct job_record *job_ptr) launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); launch_msg_ptr->overcommit = job_ptr->details->overcommit; launch_msg_ptr->open_mode = job_ptr->details->open_mode; - launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq; + launch_msg_ptr->acctg_freq = xstrdup(job_ptr->details->acctg_freq); launch_msg_ptr->cpus_per_task = job_ptr->details->cpus_per_task; launch_msg_ptr->pn_min_memory = job_ptr->details->pn_min_memory; launch_msg_ptr->restart_cnt = job_ptr->restart_cnt; @@ -1274,12 +1329,37 @@ static void _depend_list_del(void *dep_ptr) xfree(dep_ptr); } +/* + * Copy a job's dependency list + * IN depend_list_src - a job's depend_lst + * RET copy of depend_list_src, must bee freed by caller + */ +extern List depended_list_copy(List depend_list_src) +{ + struct depend_spec *dep_src, *dep_dest; + ListIterator iter; + List depend_list_dest = NULL; + + if (!depend_list_src) + return depend_list_dest; + + depend_list_dest = list_create(_depend_list_del); + iter = list_iterator_create(depend_list_src); + while ((dep_src = (struct depend_spec *) list_next(iter))) { + dep_dest = xmalloc(sizeof(struct depend_spec)); + memcpy(dep_dest, dep_src, sizeof(struct depend_spec)); + list_append(depend_list_dest, dep_dest); + } + list_iterator_destroy(iter); + return depend_list_dest; +} + /* Print a job's dependency information based upon job_ptr->depend_list */ extern void print_job_dependency(struct job_record *job_ptr) { ListIterator depend_iter; struct depend_spec *dep_ptr; - char *dep_str; + char *array_task_id, *dep_str; info("Dependency information for job %u", job_ptr->job_id); if ((job_ptr->details == NULL) || @@ -1287,8 +1367,6 @@ extern void print_job_dependency(struct job_record *job_ptr) return; depend_iter = list_iterator_create(job_ptr->details->depend_list); - if (!depend_iter) - fatal("list_iterator_create memory allocation failure"); while ((dep_ptr = list_next(depend_iter))) { if (dep_ptr->depend_type == SLURM_DEPEND_SINGLETON) { info(" singleton"); @@ -1306,7 +1384,11 @@ extern void print_job_dependency(struct job_record *job_ptr) dep_str = "expand"; else dep_str = "unknown"; - info(" %s:%u", dep_str, dep_ptr->job_id); + if (dep_ptr->array_task_id == (uint16_t) INFINITE) + array_task_id = "_*"; + else + array_task_id = ""; + info(" %s:%u%s", dep_str, dep_ptr->job_id, array_task_id); } list_iterator_destroy(depend_iter); } @@ -1325,7 +1407,7 @@ extern int test_job_dependency(struct job_record *job_ptr) List job_queue = NULL; bool run_now; int count = 0; - struct job_record *qjob_ptr; + struct job_record *qjob_ptr, *djob_ptr; if ((job_ptr->details == NULL) || (job_ptr->details->depend_list == NULL)) @@ -1333,11 +1415,15 @@ extern int test_job_dependency(struct job_record *job_ptr) count = list_count(job_ptr->details->depend_list); depend_iter = list_iterator_create(job_ptr->details->depend_list); - if (!depend_iter) - fatal("list_iterator_create memory allocation failure"); while ((dep_ptr = list_next(depend_iter))) { bool clear_dep = false; count--; + if (dep_ptr->array_task_id == (uint16_t) INFINITE) { + /* Advance to latest element of this job array */ + dep_ptr->job_ptr = find_job_array_rec(dep_ptr->job_id, + dep_ptr->array_task_id); + } + djob_ptr = dep_ptr->job_ptr; if ((dep_ptr->depend_type == SLURM_DEPEND_SINGLETON) && job_ptr->name) { /* get user jobs with the same user and name */ @@ -1345,8 +1431,6 @@ extern int test_job_dependency(struct job_record *job_ptr) job_ptr->name); run_now = true; job_iterator = list_iterator_create(job_queue); - if (job_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((qjob_ptr = (struct job_record *) list_next(job_iterator))) { /* already running/suspended job or previously @@ -1366,8 +1450,10 @@ extern int test_job_dependency(struct job_record *job_ptr) list_delete_item(depend_iter); else depends = true; - } else if ((dep_ptr->job_ptr->magic != JOB_MAGIC) || - (dep_ptr->job_ptr->job_id != dep_ptr->job_id)) { + } else if ((djob_ptr == NULL) || + (djob_ptr->magic != JOB_MAGIC) || + ((djob_ptr->job_id != dep_ptr->job_id) && + (djob_ptr->array_job_id != dep_ptr->job_id))){ /* job is gone, dependency lifted */ clear_dep = true; } else if (dep_ptr->depend_type == SLURM_DEPEND_AFTER) { @@ -1419,8 +1505,18 @@ extern int test_job_dependency(struct job_record *job_ptr) } else failure = true; if (clear_dep) { - char *rmv_dep = xstrdup_printf( - ":%u", dep_ptr->job_ptr->job_id); + char *rmv_dep; + if (dep_ptr->array_task_id == (uint16_t) INFINITE) { + rmv_dep = xstrdup_printf(":%u_*", + dep_ptr->job_id); + } else if (dep_ptr->array_task_id != (uint16_t)NO_VAL) { + rmv_dep = xstrdup_printf(":%u_%u", + dep_ptr->job_id, + dep_ptr->array_task_id); + } else { + rmv_dep = xstrdup_printf(":%u", + dep_ptr->job_id); + } xstrsubstitute(job_ptr->details->dependency, rmv_dep, ""); xfree(rmv_dep); @@ -1451,7 +1547,8 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) int rc = SLURM_SUCCESS; uint16_t depend_type = 0; uint32_t job_id = 0; - char *tok = new_depend, *sep_ptr, *sep_ptr2; + uint16_t array_task_id; + char *tok = new_depend, *sep_ptr, *sep_ptr2 = NULL; List new_depend_list = NULL; struct depend_spec *dep_ptr; struct job_record *dep_job_ptr; @@ -1475,8 +1572,6 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) } new_depend_list = list_create(_depend_list_del); - if (new_depend_list == NULL) - fatal("list_create: malloc failure"); /* validate new dependency string */ while (rc == SLURM_SUCCESS) { @@ -1488,10 +1583,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) dep_ptr->depend_type = depend_type; /* dep_ptr->job_id = 0; set by xmalloc */ /* dep_ptr->job_ptr = NULL; set by xmalloc */ - if (!list_append(new_depend_list, dep_ptr)) { - fatal("list_append memory allocation " - "failure for singleton"); - } + (void) list_append(new_depend_list, dep_ptr); if ( *(tok + 9 ) == ',' ) { tok += 10; continue; @@ -1503,24 +1595,54 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) sep_ptr = strchr(tok, ':'); if ((sep_ptr == NULL) && (job_id == 0)) { job_id = strtol(tok, &sep_ptr, 10); + if ((sep_ptr != NULL) && (sep_ptr[0] == '_')) { + if (sep_ptr[1] == '*') { + array_task_id = (uint16_t) INFINITE; + sep_ptr++; + } else { + array_task_id = strtol(sep_ptr+1, + &sep_ptr, 10); + } + } else + array_task_id = (uint16_t) NO_VAL; if ((sep_ptr == NULL) || (sep_ptr[0] != '\0') || (job_id == 0) || (job_id == job_ptr->job_id)) { rc = ESLURM_DEPENDENCY; break; } /* old format, just a single job_id */ - dep_job_ptr = find_job_record(job_id); + if (array_task_id == (uint16_t) NO_VAL) { + dep_job_ptr = find_job_record(job_id); + if (dep_job_ptr && + (dep_job_ptr->array_job_id == job_id) && + (dep_job_ptr->array_task_id != + (uint16_t) NO_VAL)) { + array_task_id = (uint16_t) INFINITE; + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u_*", job_id); + } else { + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u", job_id); + } + } else { + dep_job_ptr = find_job_array_rec(job_id, + array_task_id); + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u_%u", job_id, + array_task_id); + } if (!dep_job_ptr) /* assume already done */ break; - snprintf(dep_buf, sizeof(dep_buf), - "afterany:%u", job_id); new_depend = dep_buf; dep_ptr = xmalloc(sizeof(struct depend_spec)); + dep_ptr->array_task_id = array_task_id; dep_ptr->depend_type = SLURM_DEPEND_AFTER_ANY; - dep_ptr->job_id = job_id; + if (array_task_id == (uint16_t) NO_VAL) + dep_ptr->job_id = dep_job_ptr->job_id; + else + dep_ptr->job_id = dep_job_ptr->array_job_id; dep_ptr->job_ptr = dep_job_ptr; - if (!list_append(new_depend_list, dep_ptr)) - fatal("list_append memory allocation failure"); + (void) list_append(new_depend_list, dep_ptr); break; } else if (sep_ptr == NULL) { rc = ESLURM_DEPENDENCY; @@ -1548,6 +1670,16 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) sep_ptr++; /* skip over ":" */ while (rc == SLURM_SUCCESS) { job_id = strtol(sep_ptr, &sep_ptr2, 10); + if ((sep_ptr2 != NULL) && (sep_ptr2[0] == '_')) { + if (sep_ptr2[1] == '*') { + array_task_id = (uint16_t) INFINITE; + sep_ptr++; + } else { + array_task_id = strtol(sep_ptr2+1, + &sep_ptr2, 10); + } + } else + array_task_id = (uint16_t) NO_VAL; if ((sep_ptr2 == NULL) || (job_id == 0) || (job_id == job_ptr->job_id) || ((sep_ptr2[0] != '\0') && (sep_ptr2[0] != ',') && @@ -1555,7 +1687,17 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) rc = ESLURM_DEPENDENCY; break; } - dep_job_ptr = find_job_record(job_id); + if (array_task_id == (uint16_t) NO_VAL) { + dep_job_ptr = find_job_record(job_id); + if (dep_job_ptr && + (dep_job_ptr->array_job_id == job_id) && + (dep_job_ptr->array_task_id != + (uint16_t) NO_VAL)) { + array_task_id = (uint16_t) INFINITE; + } + } else + dep_job_ptr = find_job_array_rec(job_id, + array_task_id); if ((depend_type == SLURM_DEPEND_EXPAND) && ((expand_cnt++ > 0) || (dep_job_ptr == NULL) || (!IS_JOB_RUNNING(dep_job_ptr)) || @@ -1581,19 +1723,17 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) } if (dep_job_ptr) { /* job still active */ dep_ptr = xmalloc(sizeof(struct depend_spec)); + dep_ptr->array_task_id = array_task_id; dep_ptr->depend_type = depend_type; - dep_ptr->job_id = job_id; + dep_ptr->job_id = dep_job_ptr->job_id; dep_ptr->job_ptr = dep_job_ptr; - if (!list_append(new_depend_list, dep_ptr)) { - fatal("list_append memory allocation " - "failure"); - } + (void) list_append(new_depend_list, dep_ptr); } if (sep_ptr2[0] != ':') break; sep_ptr = sep_ptr2 + 1; /* skip over ":" */ } - if (sep_ptr2[0] == ',') + if (sep_ptr2 && (sep_ptr2[0] == ',')) tok = sep_ptr2 + 1; else break; @@ -1661,8 +1801,6 @@ static bool _scan_depend(List dependency_list, uint32_t job_id) xassert(job_id); iter = list_iterator_create(dependency_list); - if (iter == NULL) - fatal("list_iterator_create malloc failure"); while (!rc && (dep_ptr = (struct depend_spec *) list_next(iter))) { if (dep_ptr->job_id == 0) /* Singleton */ continue; @@ -1671,7 +1809,8 @@ static bool _scan_depend(List dependency_list, uint32_t job_id) else if ((dep_ptr->job_id != dep_ptr->job_ptr->job_id) || (dep_ptr->job_ptr->magic != JOB_MAGIC)) continue; /* purged job, ptr not yet cleared */ - else if (dep_ptr->job_ptr->details && + else if (!IS_JOB_FINISHED(dep_ptr->job_ptr) && + dep_ptr->job_ptr->details && dep_ptr->job_ptr->details->depend_list) { rc = _scan_depend(dep_ptr->job_ptr->details-> depend_list, job_id); @@ -1706,14 +1845,12 @@ static void _delayed_job_start_time(struct job_record *job_ptr) return; part_node_cnt = job_ptr->part_ptr->total_nodes; part_cpu_cnt = job_ptr->part_ptr->total_cpus; - if (part_node_cnt > part_cpu_cnt) - part_cpus_per_node = part_node_cnt / part_cpu_cnt; + if (part_cpu_cnt > part_node_cnt) + part_cpus_per_node = part_cpu_cnt / part_node_cnt; else part_cpus_per_node = 1; job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create memory allocation failure"); while ((job_q_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_PENDING(job_q_ptr) || !job_q_ptr->details || (job_q_ptr->part_ptr != job_ptr->part_ptr) || @@ -1730,7 +1867,7 @@ static void _delayed_job_start_time(struct job_record *job_ptr) job_size_cpus = job_q_ptr->details->min_nodes; job_size_cpus = MAX(job_size_cpus, (job_size_nodes * part_cpus_per_node)); - if (job_ptr->time_limit == NO_VAL) + if (job_q_ptr->time_limit == NO_VAL) job_time = job_q_ptr->part_ptr->max_time; else job_time = job_q_ptr->time_limit; @@ -1791,8 +1928,6 @@ extern int job_start_data(job_desc_msg_t *job_desc_msg, if (job_ptr->details->exc_node_bitmap) { bitstr_t *exc_node_mask = NULL; exc_node_mask = bit_copy(job_ptr->details->exc_node_bitmap); - if (exc_node_mask == NULL) - fatal("bit_copy malloc failure"); bit_not(exc_node_mask); bit_and(avail_bitmap, exc_node_mask); FREE_NULL_BITMAP(exc_node_mask); @@ -1874,8 +2009,6 @@ extern int job_start_data(job_desc_msg_t *job_desc_msg, uint32_t *preemptee_jid; struct job_record *tmp_job_ptr; resp_data->preemptee_job_id=list_create(_pre_list_del); - if (resp_data->preemptee_job_id == NULL) - fatal("list_create: malloc failure"); preemptee_iterator = list_iterator_create( preemptee_job_list); while ((tmp_job_ptr = (struct job_record *) @@ -1911,6 +2044,7 @@ extern int epilog_slurmctld(struct job_record *job_ptr) int rc; pthread_t thread_id_epilog; pthread_attr_t thread_attr_epilog; + epilog_arg_t *epilog_arg; if ((slurmctld_conf.epilog_slurmctld == NULL) || (slurmctld_conf.epilog_slurmctld[0] == '\0')) @@ -1921,13 +2055,18 @@ extern int epilog_slurmctld(struct job_record *job_ptr) return errno; } + epilog_arg = xmalloc(sizeof(epilog_arg_t)); + epilog_arg->job_id = job_ptr->job_id; + epilog_arg->epilog_slurmctld = xstrdup(slurmctld_conf.epilog_slurmctld); + epilog_arg->my_env = _build_env(job_ptr); + slurm_attr_init(&thread_attr_epilog); pthread_attr_setdetachstate(&thread_attr_epilog, PTHREAD_CREATE_DETACHED); while (1) { rc = pthread_create(&thread_id_epilog, &thread_attr_epilog, - _run_epilog, (void *) job_ptr); + _run_epilog, (void *) epilog_arg); if (rc == 0) { slurm_attr_destroy(&thread_attr_epilog); return SLURM_SUCCESS; @@ -2005,21 +2144,13 @@ static char **_build_env(struct job_record *job_ptr) static void *_run_epilog(void *arg) { - struct job_record *job_ptr = (struct job_record *) arg; - uint32_t job_id; + epilog_arg_t *epilog_arg = (epilog_arg_t *) arg; pid_t cpid; int i, status, wait_rc; - char *argv[2], **my_env; - /* Locks: Read config, job */ - slurmctld_lock_t config_read_lock = { - READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; + char *argv[2]; - lock_slurmctld(config_read_lock); - argv[0] = xstrdup(slurmctld_conf.epilog_slurmctld); + argv[0] = epilog_arg->epilog_slurmctld; argv[1] = NULL; - my_env = _build_env(job_ptr); - job_id = job_ptr->job_id; - unlock_slurmctld(config_read_lock); if ((cpid = fork()) < 0) { error("epilog_slurmctld fork error: %m"); @@ -2031,7 +2162,7 @@ static void *_run_epilog(void *arg) #else setpgrp(); #endif - execve(argv[0], argv, my_env); + execve(argv[0], argv, epilog_arg->my_env); exit(127); } @@ -2049,14 +2180,18 @@ static void *_run_epilog(void *arg) } if (status != 0) { error("epilog_slurmctld job %u epilog exit status %u:%u", - job_id, WEXITSTATUS(status), WTERMSIG(status)); - } else - debug2("epilog_slurmctld job %u epilog completed", job_id); + epilog_arg->job_id, WEXITSTATUS(status), + WTERMSIG(status)); + } else { + debug2("epilog_slurmctld job %u epilog completed", + epilog_arg->job_id); + } - fini: xfree(argv[0]); - for (i=0; my_env[i]; i++) - xfree(my_env[i]); - xfree(my_env); + fini: xfree(epilog_arg->epilog_slurmctld); + for (i=0; epilog_arg->my_env[i]; i++) + xfree(epilog_arg->my_env[i]); + xfree(epilog_arg->my_env); + xfree(epilog_arg); return NULL; } @@ -2222,6 +2357,32 @@ static void *_run_prolog(void *arg) return NULL; } +/* + * Copy a job's feature list + * IN feature_list_src - a job's depend_lst + * RET copy of depend_list_src, must be freed by caller + */ +extern List feature_list_copy(List feature_list_src) +{ + struct feature_record *feat_src, *feat_dest; + ListIterator iter; + List feature_list_dest = NULL; + + if (!feature_list_src) + return feature_list_dest; + + feature_list_dest = list_create(_feature_list_delete); + iter = list_iterator_create(feature_list_src); + while ((feat_src = (struct feature_record *) list_next(iter))) { + feat_dest = xmalloc(sizeof(struct feature_record)); + memcpy(feat_dest, feat_src, sizeof(struct feature_record)); + feat_dest->name = xstrdup(feat_src->name); + list_append(feature_list_dest, feat_dest); + } + list_iterator_destroy(iter); + return feature_list_dest; +} + /* * build_feature_list - Translate a job's feature string into a feature_list * IN details->features @@ -2404,8 +2565,6 @@ static int _valid_node_feature(char *feature) /* Clear these nodes from the feature_list record, * then restore as needed */ feature_iter = list_iterator_create(feature_list); - if (feature_iter == NULL) - fatal("list_inerator_create malloc failure"); while ((feature_ptr = (struct features_record *) list_next(feature_iter))) { if (strcmp(feature_ptr->name, feature)) @@ -2432,8 +2591,6 @@ extern void rebuild_job_part_list(struct job_record *job_ptr) job_ptr->partition = xstrdup(job_ptr->part_ptr->name); part_iterator = list_iterator_create(job_ptr->part_ptr_list); - if (part_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { if (part_ptr == job_ptr->part_ptr) continue; diff --git a/src/slurmctld/job_scheduler.h b/src/slurmctld/job_scheduler.h index 6d2396a8bdb92afbdcfaf0f0a8a68cd0e573df09..37d654a6a362c3d73c88c6f1c08bad8ef129ec71 100644 --- a/src/slurmctld/job_scheduler.h +++ b/src/slurmctld/job_scheduler.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -45,8 +45,10 @@ #include "src/slurmctld/slurmctld.h" typedef struct job_queue_rec { + uint32_t job_id; struct job_record *job_ptr; struct part_record *part_ptr; + uint32_t priority; } job_queue_rec_t; /* @@ -60,10 +62,11 @@ extern int build_feature_list(struct job_record *job_ptr); /* * build_job_queue - build (non-priority ordered) list of pending jobs * IN clear_start - if set then clear the start_time for pending jobs + * IN backfill - true if running backfill scheduler, enforce min time limit * RET the job queue * NOTE: the caller must call list_destroy() on RET value to free memory */ -extern List build_job_queue(bool clear_start); +extern List build_job_queue(bool clear_start, bool backfill); /* Given a scheduled job, return a pointer to it batch_job_launch_msg_t data */ extern batch_job_launch_msg_t *build_launch_job_msg( diff --git a/src/slurmctld/job_submit.c b/src/slurmctld/job_submit.c index 4a6d45251478d76b04fd262726d5d8dc61aab8d3..f2963f9d757bfdba5aee94fe22eb31e44ae5c9aa 100644 --- a/src/slurmctld/job_submit.c +++ b/src/slurmctld/job_submit.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -231,13 +231,18 @@ extern int job_submit_plugin_reconfig(void) extern int job_submit_plugin_submit(struct job_descriptor *job_desc, uint32_t submit_uid) { + DEF_TIMERS; int i, rc; + START_TIMER; rc = job_submit_plugin_init(); slurm_mutex_lock(&g_context_lock); for (i=0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].submit))(job_desc, submit_uid); slurm_mutex_unlock(&g_context_lock); + END_TIMER; + debug("job_submit_plugin_submit: %s", TIME_STR); + return rc; } @@ -250,12 +255,17 @@ extern int job_submit_plugin_modify(struct job_descriptor *job_desc, struct job_record *job_ptr, uint32_t submit_uid) { + DEF_TIMERS; int i, rc; + START_TIMER; rc = job_submit_plugin_init(); slurm_mutex_lock(&g_context_lock); for (i=0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].modify))(job_desc, job_ptr, submit_uid); slurm_mutex_unlock(&g_context_lock); + END_TIMER; + debug("job_submit_plugin_modify: %s", TIME_STR); + return rc; } diff --git a/src/slurmctld/job_submit.h b/src/slurmctld/job_submit.h index c05c606e83e146a9388bd5924976af97e55faaa0..ef1bc172759e776d335780cb094554ae576ac682 100644 --- a/src/slurmctld/job_submit.h +++ b/src/slurmctld/job_submit.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/licenses.c b/src/slurmctld/licenses.c index 6223e0fb66bbb2fd145ff2a34b77c33c78cdbb1d..4b7be506a048da404f47f464390f985568b8a372 100644 --- a/src/slurmctld/licenses.c +++ b/src/slurmctld/licenses.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,8 +66,6 @@ static inline void _licenses_print(char *header, List licenses, int job_id) return; iter = list_iterator_create(licenses); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { if (job_id == 0) { info("licenses: %s=%s total=%u used=%u", @@ -176,8 +174,6 @@ static char * _build_license_string(List license_list) return licenses; iter = list_iterator_create(license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { if (licenses) sep = ","; @@ -202,8 +198,6 @@ extern char *get_licenses_used(void) slurm_mutex_lock(&license_mutex); if (license_list) { iter = list_iterator_create(license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { if (licenses_used) xstrcat(licenses_used, ","); @@ -258,8 +252,6 @@ extern int license_update(char *licenses) } iter = list_iterator_create(license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { match = list_find_first(new_list, _license_find_rec, license_entry->name); @@ -314,22 +306,22 @@ extern List license_validate(char *licenses, bool *valid) slurm_mutex_lock(&license_mutex); _licenses_print("request_license", job_license_list, 0); iter = list_iterator_create(job_license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { if (license_list) { match = list_find_first(license_list, - _license_find_rec, license_entry->name); + _license_find_rec, + license_entry->name); } else match = NULL; if (!match) { - debug("could not find license %s for job", + debug("License name requested (%s) does not exist", license_entry->name); *valid = false; break; } else if (license_entry->total > match->total) { - debug("job wants more %s licenses than configured", - match->name); + debug("Licenses count requested higher than configured " + "(%s: %u > %u)", + match->name, license_entry->total, match->total); *valid = false; break; } @@ -377,8 +369,6 @@ extern int license_job_test(struct job_record *job_ptr, time_t when) slurm_mutex_lock(&license_mutex); iter = list_iterator_create(job_ptr->license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { match = list_find_first(license_list, _license_find_rec, license_entry->name); @@ -412,6 +402,32 @@ extern int license_job_test(struct job_record *job_ptr, time_t when) return rc; } +/* + * license_job_copy - create a copy of a job's license list + * IN license_list_src - job license list to be copied + * RET a copy of the original job license list + */ +extern List license_job_copy(List license_list_src) +{ + licenses_t *license_entry_src, *license_entry_dest; + ListIterator iter; + List license_list_dest = NULL; + + if (!license_list_src) + return license_list_dest; + + license_list_dest = list_create(license_free_rec); + iter = list_iterator_create(license_list_src); + while ((license_entry_src = (licenses_t *) list_next(iter))) { + license_entry_dest = xmalloc(sizeof(licenses_t)); + license_entry_dest->name = xstrdup(license_entry_src->name); + license_entry_dest->total = license_entry_src->total; + list_push(license_list_dest, license_entry_dest); + } + list_iterator_destroy(iter); + return license_list_dest; +} + /* * license_job_get - Get the licenses required for a job * IN job_ptr - job identification @@ -428,8 +444,6 @@ extern int license_job_get(struct job_record *job_ptr) slurm_mutex_lock(&license_mutex); iter = list_iterator_create(job_ptr->license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { match = list_find_first(license_list, _license_find_rec, license_entry->name); @@ -464,8 +478,6 @@ extern int license_job_return(struct job_record *job_ptr) slurm_mutex_lock(&license_mutex); iter = list_iterator_create(job_ptr->license_list); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { match = list_find_first(license_list, _license_find_rec, license_entry->name); @@ -481,7 +493,7 @@ extern int license_job_return(struct job_record *job_ptr) license_entry->used = 0; } else { /* This can happen after a reconfiguration */ - error("job returning unknown license %s", + error("job returning unknown license name %s", license_entry->name); } } @@ -505,8 +517,6 @@ extern bool license_list_overlap(List list_1, List list_2) return false; iter = list_iterator_create(list_1); - if (iter == NULL) - fatal("malloc failure from list_iterator_create"); while ((license_entry = (licenses_t *) list_next(iter))) { if (list_find_first(list_2, _license_find_rec, license_entry->name)) { diff --git a/src/slurmctld/licenses.h b/src/slurmctld/licenses.h index 1ecdc0c028a71a4418cd6e25b28df611bc51c060..7f534a36ee3686fe601aba6ad584240344d973c9 100644 --- a/src/slurmctld/licenses.h +++ b/src/slurmctld/licenses.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,6 +66,13 @@ extern void license_free(void); /* Free a license_t record (for use by list_destroy) */ extern void license_free_rec(void *x); +/* + * license_job_copy - create a copy of a job's license list + * IN license_list_src - job license list to be copied + * RET a copy of the original job license list + */ +extern List license_job_copy(List license_list_src); + /* * license_job_get - Get the licenses required for a job * IN job_ptr - job identification diff --git a/src/slurmctld/locks.c b/src/slurmctld/locks.c index a54e356e2c50cab1151236ad396b3c20f8f9fa93..6057473fc205d8f08899e20f49ffe64fccc32eed 100644 --- a/src/slurmctld/locks.c +++ b/src/slurmctld/locks.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -185,16 +185,31 @@ extern void unlock_slurmctld(slurmctld_lock_t lock_levels) _wr_wrunlock(CONFIG_LOCK); } -/* _wr_rdlock - Issue a read lock on the specified data type */ +/* _wr_rdlock - Issue a read lock on the specified data type + * Wait until there are no write locks AND + * no pending write locks (write_wait_lock == 0) + * + * NOTE: Always favoring write locks can result in starvation for + * read locks. To prevent this, read locks were permitted to be satisified + * after 10 consecutive write locks. This prevented starvation, but + * deadlock has been observed with some values for the count. */ static bool _wr_rdlock(lock_datatype_t datatype, bool wait_lock) { bool success = true; slurm_mutex_lock(&locks_mutex); while (1) { - if ((slurmctld_locks.entity[write_wait_lock(datatype)] == 0) && - (slurmctld_locks.entity[write_lock(datatype)] == 0)) { +#if 1 + if ((slurmctld_locks.entity[write_lock(datatype)] == 0) && + (slurmctld_locks.entity[write_wait_lock(datatype)] == 0)) { +#else + /* SEE NOTE ABOVE */ + if ((slurmctld_locks.entity[write_lock(datatype)] == 0) && + ((slurmctld_locks.entity[write_wait_lock(datatype)] == 0) || + (slurmctld_locks.entity[write_cnt_lock(datatype)] > 10))) { +#endif slurmctld_locks.entity[read_lock(datatype)]++; + slurmctld_locks.entity[write_cnt_lock(datatype)] = 0; break; } else if (!wait_lock) { success = false; @@ -231,6 +246,7 @@ static bool _wr_wrlock(lock_datatype_t datatype, bool wait_lock) (slurmctld_locks.entity[write_lock(datatype)] == 0)) { slurmctld_locks.entity[write_lock(datatype)]++; slurmctld_locks.entity[write_wait_lock(datatype)]--; + slurmctld_locks.entity[write_cnt_lock(datatype)]++; break; } else if (!wait_lock) { slurmctld_locks.entity[write_wait_lock(datatype)]--; diff --git a/src/slurmctld/locks.h b/src/slurmctld/locks.h index 0d8f38eab9979c510449e4156fa596e6e811192e..93ce6683a039b8511c2776585b8b08911a8b1f30 100644 --- a/src/slurmctld/locks.h +++ b/src/slurmctld/locks.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -103,10 +103,13 @@ typedef struct { } slurmctld_lock_t; /* Interval lock structure - * we actually use three semaphores for each data type, see macros below - * (lock_datatype_t * 3 + 0) = read_lock - * (lock_datatype_t * 3 + 1) = write_lock - * (lock_datatype_t * 3 + 2) = write_wait_lock + * we actually use the count for each data type, see macros below + * (lock_datatype_t * 4 + 0) = read_lock read locks in use + * (lock_datatype_t * 4 + 1) = write_lock write locks in use + * (lock_datatype_t * 4 + 2) = write_wait_lock write locks pending + * (lock_datatype_t * 4 + 3) = write_cnt_lock write lock count + * NOTE: If changing the number of functions (array size), then also change + * the size of "entity" in src/common/assoc_mgr.h */ typedef enum { CONFIG_LOCK, @@ -116,12 +119,13 @@ typedef enum { ENTITY_COUNT } lock_datatype_t; -#define read_lock(data_type) (data_type * 3 + 0) -#define write_lock(data_type) (data_type * 3 + 1) -#define write_wait_lock(data_type) (data_type * 3 + 2) +#define read_lock(data_type) (data_type * 4 + 0) +#define write_lock(data_type) (data_type * 4 + 1) +#define write_wait_lock(data_type) (data_type * 4 + 2) +#define write_cnt_lock(data_type) (data_type * 4 + 3) typedef struct { - int entity[ENTITY_COUNT * 3]; + int entity[ENTITY_COUNT * 4]; } slurmctld_lock_flags_t; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 2ea01e0abc9f338948d74a939f0614111380403c..5ca96adfe2b7232d3b0ef7e49d0ef147eff521b3 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,6 +66,7 @@ #include "src/common/read_config.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_ext_sensors.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/front_end.h" #include "src/slurmctld/locks.h" @@ -74,6 +75,7 @@ #include "src/slurmctld/reservation.h" #include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/slurmctld.h" +#include "src/slurmctld/slurmctld_plugstack.h" #include "src/slurmctld/state_save.h" #include "src/common/timers.h" #include "src/slurmctld/trigger_mgr.h" @@ -188,11 +190,11 @@ int dump_all_node_state ( void ) (void) unlink (new_file); else { /* file shuffle */ (void) unlink (old_file); - if(link(reg_file, old_file)) + if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink (reg_file); - if(link(new_file, reg_file)) + if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink (new_file); @@ -686,7 +688,7 @@ extern void pack_all_node (char **buffer_ptr, int *buffer_size, buffer = init_buf (BUF_SIZE*16); nodes_packed = 0; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { /* write header: count and time */ pack32(nodes_packed, buffer); select_g_alter_node_cnt(SELECT_GET_NODE_SCALING, @@ -744,6 +746,86 @@ extern void pack_all_node (char **buffer_ptr, int *buffer_size, buffer_ptr[0] = xfer_buf_data (buffer); } +/* + * pack_one_node - dump all configuration and node information for one node + * in machine independent form (for network transmission) + * OUT buffer_ptr - pointer to the stored data + * OUT buffer_size - set to size of the buffer in bytes + * IN show_flags - node filtering options + * IN uid - uid of user making request (for partition filtering) + * IN node_name - name of node for which information is desired, + * use first node if name is NULL + * IN protocol_version - slurm protocol version of client + * global: node_record_table_ptr - pointer to global node table + * NOTE: the caller must xfree the buffer at *buffer_ptr + * NOTE: change slurm_load_node() in api/node_info.c when data format changes + * NOTE: READ lock_slurmctld config before entry + */ +extern void pack_one_node (char **buffer_ptr, int *buffer_size, + uint16_t show_flags, uid_t uid, char *node_name, + uint16_t protocol_version) +{ + uint32_t nodes_packed, tmp_offset, node_scaling; + Buf buffer; + time_t now = time(NULL); + struct node_record *node_ptr; + bool hidden; + + buffer_ptr[0] = NULL; + *buffer_size = 0; + + buffer = init_buf (BUF_SIZE); + nodes_packed = 0; + + if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + /* write header: count and time */ + pack32(nodes_packed, buffer); + select_g_alter_node_cnt(SELECT_GET_NODE_SCALING, + &node_scaling); + pack32(node_scaling, buffer); + + pack_time(now, buffer); + + /* write node records */ + part_filter_set(uid); + if (node_name) + node_ptr = find_node_record(node_name); + else + node_ptr = node_record_table_ptr; + if (node_ptr) { + hidden = false; + if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && + (_node_is_hidden(node_ptr))) + hidden = true; + else if (IS_NODE_FUTURE(node_ptr) && + !IS_NODE_MAINT(node_ptr)) /* reboot req sent */ + hidden = true; + else if (IS_NODE_CLOUD(node_ptr) && + IS_NODE_POWER_SAVE(node_ptr)) + hidden = true; + else if ((node_ptr->name == NULL) || + (node_ptr->name[0] == '\0')) + hidden = true; + + if (!hidden) { + _pack_node(node_ptr, buffer, protocol_version); + nodes_packed++; + } + } + part_filter_clear(); + } else { + error("select_g_select_jobinfo_pack: protocol_version " + "%hu not supported", protocol_version); + } + + tmp_offset = get_buf_offset (buffer); + set_buf_offset (buffer, 0); + pack32 (nodes_packed, buffer); + set_buf_offset (buffer, tmp_offset); + + *buffer_size = get_buf_offset (buffer); + buffer_ptr[0] = xfer_buf_data (buffer); +} /* * _pack_node - dump all configuration information about a specific node in @@ -758,7 +840,61 @@ extern void pack_all_node (char **buffer_ptr, int *buffer_size, static void _pack_node (struct node_record *dump_node_ptr, Buf buffer, uint16_t protocol_version) { - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + packstr (dump_node_ptr->name, buffer); + packstr (dump_node_ptr->node_hostname, buffer); + packstr (dump_node_ptr->comm_name, buffer); + pack16 (dump_node_ptr->node_state, buffer); + /* On a bluegene system always use the regular node + * infomation not what is in the config_ptr. + */ + #ifndef HAVE_BG + if (slurmctld_conf.fast_schedule) { + /* Only data from config_record used for scheduling */ + pack16(dump_node_ptr->config_ptr->cpus, buffer); + pack16(dump_node_ptr->config_ptr->boards, buffer); + pack16(dump_node_ptr->config_ptr->sockets, buffer); + pack16(dump_node_ptr->config_ptr->cores, buffer); + pack16(dump_node_ptr->config_ptr->threads, buffer); + pack32(dump_node_ptr->config_ptr->real_memory, buffer); + pack32(dump_node_ptr->config_ptr->tmp_disk, buffer); + } else { + #endif + /* Individual node data used for scheduling */ + pack16(dump_node_ptr->cpus, buffer); + pack16(dump_node_ptr->boards, buffer); + pack16(dump_node_ptr->sockets, buffer); + pack16(dump_node_ptr->cores, buffer); + pack16(dump_node_ptr->threads, buffer); + pack32(dump_node_ptr->real_memory, buffer); + pack32(dump_node_ptr->tmp_disk, buffer); + #ifndef HAVE_BG + } + #endif + pack32(dump_node_ptr->cpu_load, buffer); + pack32(dump_node_ptr->config_ptr->weight, buffer); + pack32(dump_node_ptr->reason_uid, buffer); + + pack_time(dump_node_ptr->boot_time, buffer); + pack_time(dump_node_ptr->reason_time, buffer); + pack_time(dump_node_ptr->slurmd_start_time, buffer); + + select_g_select_nodeinfo_pack(dump_node_ptr->select_nodeinfo, + buffer, protocol_version); + + packstr(dump_node_ptr->arch, buffer); + packstr(dump_node_ptr->features, buffer); + if (dump_node_ptr->gres) + packstr(dump_node_ptr->gres, buffer); + else + packstr(dump_node_ptr->config_ptr->gres, buffer); + packstr(dump_node_ptr->os, buffer); + packstr(dump_node_ptr->reason, buffer); + acct_gather_energy_pack(dump_node_ptr->energy, buffer, + protocol_version); + ext_sensors_data_pack(dump_node_ptr->ext_sensors, buffer, + protocol_version); + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { packstr (dump_node_ptr->name, buffer); packstr (dump_node_ptr->node_hostname, buffer); packstr (dump_node_ptr->comm_name, buffer); @@ -810,7 +946,7 @@ static void _pack_node (struct node_record *dump_node_ptr, Buf buffer, packstr(dump_node_ptr->reason, buffer); acct_gather_energy_pack(dump_node_ptr->energy, buffer, protocol_version); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { packstr (dump_node_ptr->name, buffer); packstr (dump_node_ptr->node_hostname, buffer); packstr (dump_node_ptr->comm_name, buffer); @@ -1123,9 +1259,9 @@ int update_node ( update_node_msg_t * update_node_msg ) bit_clear (idle_node_bitmap, node_inx); } else if ((state_val == NODE_STATE_DRAIN) || (state_val == NODE_STATE_FAIL)) { + uint16_t new_state = state_val; bit_clear (avail_node_bitmap, node_inx); - state_val = node_ptr->node_state |= - NODE_STATE_DRAIN; + state_val = node_ptr->node_state |= state_val; if ((node_ptr->run_job_cnt == 0) && (node_ptr->comp_job_cnt == 0)) { trigger_node_drained(node_ptr); @@ -1134,6 +1270,9 @@ int update_node ( update_node_msg_t * update_node_msg ) node_ptr, now, NULL, node_ptr->reason_uid); } + if ((new_state == NODE_STATE_FAIL) && + (nonstop_ops.node_fail)) + (nonstop_ops.node_fail)(NULL, node_ptr); } else if (state_val == NODE_STATE_POWER_SAVE) { if (IS_NODE_POWER_SAVE(node_ptr)) { verbose("node %s already powered down", @@ -1326,8 +1465,6 @@ static int _update_node_weight(char *node_names, uint32_t weight) * update it (if all nodes updated) or split it into * a new entry */ config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { if (config_ptr == first_new) @@ -1397,8 +1534,6 @@ static int _update_node_features(char *node_names, char *features) * update it (if all nodes updated) or split it into * a new entry */ config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { if (config_ptr == first_new) @@ -1474,8 +1609,6 @@ static int _update_node_gres(char *node_names, char *gres) * update it (if all nodes updated) or split it into * a new entry */ config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { if (config_ptr == first_new) @@ -1720,7 +1853,7 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) if (gres_plugin_node_config_unpack(reg_msg->gres_info, node_ptr->name) != SLURM_SUCCESS) { error_code = SLURM_ERROR; - reason_down = "Could not unpack gres data"; + xstrcat(reason_down, "Could not unpack gres data"); } else if (gres_plugin_node_config_validate( node_ptr->name, config_ptr->gres, &node_ptr->gres, &node_ptr->gres_list, @@ -1748,13 +1881,17 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) "(%d < %d)", reg_msg->node_name, threads1, threads2); error_code = EINVAL; - reason_down = "Low socket*core*thread count"; + if (reason_down) + xstrcat(reason_down, ", "); + xstrcat(reason_down, "Low socket*core*thread count"); } else if ((slurmctld_conf.fast_schedule == 0) && ((cr_flag == 1) || gang_flag) && (cores1 < cores2)) { error("Node %s has low socket*core count (%d < %d)", reg_msg->node_name, cores1, cores2); error_code = EINVAL; - reason_down = "Low socket*core count"; + if (reason_down) + xstrcat(reason_down, ", "); + xstrcat(reason_down, "Low socket*core count"); } else if ((slurmctld_conf.fast_schedule == 0) && ((cr_flag == 1) || gang_flag) && ((sockets1 > sockets2) || (cores1 > cores2) || @@ -1774,7 +1911,9 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) reg_msg->node_name, reg_msg->cpus, config_ptr->cpus); error_code = EINVAL; - reason_down = "Low CPUs"; + if (reason_down) + xstrcat(reason_down, ", "); + xstrcat(reason_down, "Low CPUs"); } else if ((slurmctld_conf.fast_schedule == 0) && ((cr_flag == 1) || gang_flag) && (reg_msg->cpus > config_ptr->cpus)) { @@ -1808,7 +1947,9 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) reg_msg->node_name, reg_msg->real_memory, config_ptr->real_memory); error_code = EINVAL; - reason_down = "Low RealMemory"; + if (reason_down) + xstrcat(reason_down, ", "); + xstrcat(reason_down, "Low RealMemory"); } node_ptr->real_memory = reg_msg->real_memory; @@ -1818,7 +1959,9 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) reg_msg->node_name, reg_msg->tmp_disk, config_ptr->tmp_disk); error_code = EINVAL; - reason_down = "Low TmpDisk"; + if (reason_down) + xstrcat(reason_down, ", "); + xstrcat(reason_down, "Low TmpDisk"); } node_ptr->tmp_disk = reg_msg->tmp_disk; @@ -1830,7 +1973,10 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) node_ptr->os = reg_msg->os; reg_msg->os = NULL; /* Nothing left to free */ - node_ptr->cpu_load = reg_msg->cpu_load; + if (node_ptr->cpu_load != reg_msg->cpu_load) { + node_ptr->cpu_load = reg_msg->cpu_load; + last_node_update = now; + } if (IS_NODE_NO_RESPOND(node_ptr)) { node_ptr->node_state &= (~NODE_STATE_NO_RESPOND); @@ -1957,9 +2103,11 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) _sync_bitmaps(node_ptr, reg_msg->job_count); } + xfree(reason_down); if (reg_msg->energy) memcpy(node_ptr->energy, reg_msg->energy, sizeof(acct_gather_energy_t)); + node_ptr->last_response = now; return error_code; @@ -1997,7 +2145,7 @@ static front_end_record_t * _front_end_reg( front_end_ptr->slurmd_start_time = reg_msg->slurmd_start_time; state_base = front_end_ptr->node_state & JOB_STATE_BASE; state_flags = front_end_ptr->node_state & JOB_STATE_FLAGS; - if ((state_base == NODE_STATE_DOWN) && + if ((state_base == NODE_STATE_DOWN) && (front_end_ptr->reason) && (!strncmp(front_end_ptr->reason, "Not responding", 14))) { error("front end node %s returned to service", reg_msg->node_name); @@ -2027,7 +2175,7 @@ static front_end_record_t * _front_end_reg( extern int validate_nodes_via_front_end( slurm_node_registration_status_msg_t *reg_msg) { - int error_code = 0, i, j; + int error_code = 0, i, j, rc; bool update_node_state = false; struct job_record *job_ptr; struct config_record *config_ptr; @@ -2035,7 +2183,7 @@ extern int validate_nodes_via_front_end( time_t now = time(NULL); ListIterator job_iterator; hostlist_t reg_hostlist = NULL; - char *host_str = NULL; + char *host_str = NULL, *reason_down = NULL; uint16_t node_flags; front_end_record_t *front_end_ptr; @@ -2157,13 +2305,22 @@ extern int validate_nodes_via_front_end( config_ptr = node_ptr->config_ptr; node_ptr->last_response = now; - (void) gres_plugin_node_config_validate(node_ptr->name, - config_ptr->gres, - &node_ptr->gres, - &node_ptr->gres_list, - slurmctld_conf. - fast_schedule, - NULL); + rc = gres_plugin_node_config_validate(node_ptr->name, + config_ptr->gres, + &node_ptr->gres, + &node_ptr->gres_list, + slurmctld_conf. + fast_schedule, + &reason_down); + if (rc) { + if (!IS_NODE_DOWN(node_ptr)) { + error("Setting node %s state to DOWN", + node_ptr->name); + } + set_node_down(node_ptr->name, reason_down); + last_node_update = now; + } + xfree(reason_down); gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name); if (reg_msg->up_time) { @@ -2633,8 +2790,6 @@ void msg_to_slurmd (slurm_msg_type_t msg_type) kill_agent_args->msg_type = msg_type; kill_agent_args->retry = 0; kill_agent_args->hostlist = hostlist_create(""); - if (kill_agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); if (msg_type == REQUEST_SHUTDOWN) { shutdown_req = xmalloc(sizeof(shutdown_msg_t)); shutdown_req->options = 0; @@ -2801,7 +2956,7 @@ void make_node_idle(struct node_record *node_ptr, bitstr_t *node_bitmap = NULL; if (job_ptr) { /* Specific job completed */ - if(job_ptr->node_bitmap_cg) + if (job_ptr->node_bitmap_cg) node_bitmap = job_ptr->node_bitmap_cg; else node_bitmap = job_ptr->node_bitmap; @@ -2920,7 +3075,7 @@ extern int send_nodes_to_accounting(time_t event_time) /* send nodes not in not 'up' state */ node_ptr = node_record_table_ptr; for (i = 0; i < node_record_count; i++, node_ptr++) { - if(node_ptr->reason) + if (node_ptr->reason) reason = node_ptr->reason; else reason = "First Registration"; @@ -2930,14 +3085,14 @@ extern int send_nodes_to_accounting(time_t event_time) /* At this point, the node appears to be up, but on some systems we need to make sure there aren't some part of a node in an error state. */ - if(node_ptr->select_nodeinfo) { + if (node_ptr->select_nodeinfo) { uint16_t err_cpus = 0; select_g_select_nodeinfo_get( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &err_cpus); - if(err_cpus) { + if (err_cpus) { struct node_record send_node; struct config_record config_rec; int cpus_per_node = 1; @@ -2952,7 +3107,7 @@ extern int send_nodes_to_accounting(time_t event_time) SELECT_GET_NODE_SCALING, &node_scaling); - if(node_scaling) + if (node_scaling) cpus_per_node = node_ptr->cpus / node_scaling; err_cpus *= cpus_per_node; diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index dec0a15982c03ff98973d4736bd77e640322fc2f..c160a769627a69df99e3ad19d41a948bb5e84a87 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -86,6 +86,7 @@ #include "src/slurmctld/reservation.h" #include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/slurmctld.h" +#include "src/slurmctld/slurmctld_plugstack.h" #define MAX_FEATURES 32 /* max exclusive features "[fs1|fs2]"=2 */ #define MAX_RETRIES 10 @@ -129,6 +130,19 @@ static bitstr_t *_valid_features(struct job_details *detail_ptr, static int _fill_in_gres_fields(struct job_record *job_ptr); +/* + * _get_ntasks_per_core - Retrieve the value of ntasks_per_core from + * the given job_details record. If it wasn't set, return 0xffff. + * Intended for use with the adjust_cpus_nppcu function. + */ +static uint16_t _get_ntasks_per_core(struct job_details *details) { + + if (details->mc_ptr) + return details->mc_ptr->ntasks_per_core; + else + return 0xffff; +} + /* * _build_gres_alloc_string - Fill in the gres_alloc string field for a * given job_record @@ -397,8 +411,6 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, agent_args->msg_type = REQUEST_TERMINATE_JOB; agent_args->retry = 0; /* re_kill_job() resends as needed */ agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); kill_job = xmalloc(sizeof(kill_job_msg_t)); last_node_update = time(NULL); kill_job->job_id = job_ptr->job_id; @@ -467,9 +479,11 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, agent_args->node_count++; } #else + if (!job_ptr->node_bitmap_cg) + build_cg_bitmap(job_ptr); for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; i++, node_ptr++) { - if (!bit_test(job_ptr->node_bitmap, i)) + if (!bit_test(job_ptr->node_bitmap_cg, i)) continue; if (IS_NODE_DOWN(node_ptr)) { /* Issue the KILL RPC, but don't verify response */ @@ -942,8 +956,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, if ((job_ptr->details->min_nodes == 0) && (job_ptr->details->max_nodes == 0)) { avail_bitmap = bit_alloc(node_record_count); - if (!avail_bitmap) - fatal("bit_alloc: malloc failure"); pick_code = select_g_job_test(job_ptr, avail_bitmap, 0, 0, 0, @@ -1068,8 +1080,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } else { avail_bitmap = bit_copy(node_set_ptr[i]. my_bitmap); - if (avail_bitmap == NULL) - fatal("bit_copy malloc failure"); } } if (!bit_super_set(job_ptr->details->req_node_bitmap, @@ -1081,8 +1091,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, FREE_NULL_BITMAP(avail_bitmap); avail_bitmap = bit_copy(job_ptr->details-> req_node_bitmap); - if (avail_bitmap == NULL) - fatal("bit_copy malloc failure"); } for (i = 0; i < node_set_size; i++) { int count1 = 0, count2 = 0; @@ -1096,8 +1104,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } else { total_bitmap = bit_copy( node_set_ptr[i].my_bitmap); - if (total_bitmap == NULL) - fatal("bit_copy malloc failure"); } bit_and(node_set_ptr[i].my_bitmap, avail_node_bitmap); @@ -1140,8 +1146,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } else { avail_bitmap = bit_copy(node_set_ptr[i]. my_bitmap); - if (avail_bitmap == NULL) - fatal("bit_copy malloc failure"); } avail_nodes = bit_set_count(avail_bitmap); tried_sched = false; /* need to test these nodes */ @@ -1256,8 +1260,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, if (!runable_avail && (avail_nodes >= min_nodes)) { FREE_NULL_BITMAP(avail_bitmap); avail_bitmap = bit_copy(total_bitmap); - if (avail_bitmap == NULL) - fatal("bit_copy malloc failure"); bit_and(avail_bitmap, avail_node_bitmap); pick_code = select_g_job_test(job_ptr, avail_bitmap, @@ -1319,20 +1321,22 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, return error_code; } -static void _preempt_jobs(List preemptee_job_list, int *error_code) +static void _preempt_jobs(List preemptee_job_list, bool kill_pending, + int *error_code) { ListIterator iter; struct job_record *job_ptr; uint16_t mode; int job_cnt = 0, rc = SLURM_SUCCESS; + checkpoint_msg_t ckpt_msg; iter = list_iterator_create(preemptee_job_list); - if (!iter) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *) list_next(iter))) { mode = slurm_job_preempt_mode(job_ptr); if (mode == PREEMPT_MODE_CANCEL) { job_cnt++; + if (!kill_pending) + continue; if (slurm_job_check_grace(job_ptr) == SLURM_SUCCESS) continue; rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true); @@ -1341,7 +1345,9 @@ static void _preempt_jobs(List preemptee_job_list, int *error_code) job_ptr->job_id); } } else if (mode == PREEMPT_MODE_CHECKPOINT) { - checkpoint_msg_t ckpt_msg; + job_cnt++; + if (!kill_pending) + continue; memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t)); ckpt_msg.op = CHECK_REQUEUE; ckpt_msg.job_id = job_ptr->job_id; @@ -1358,15 +1364,16 @@ static void _preempt_jobs(List preemptee_job_list, int *error_code) info("preempted job %u has been checkpointed", job_ptr->job_id); } - job_cnt++; } else if (mode == PREEMPT_MODE_REQUEUE) { + job_cnt++; + if (!kill_pending) + continue; rc = job_requeue(0, job_ptr->job_id, -1, (uint16_t)NO_VAL, true); if (rc == SLURM_SUCCESS) { info("preempted job %u has been requeued", job_ptr->job_id); } - job_cnt++; } else if ((mode == PREEMPT_MODE_SUSPEND) && (slurm_get_preempt_mode() & PREEMPT_MODE_GANG)) { debug("preempted job %u suspended by gang scheduler", @@ -1529,19 +1536,19 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, if (!test_only && preemptee_job_list && (error_code == SLURM_SUCCESS)){ struct job_details *detail_ptr = job_ptr->details; time_t now = time(NULL); + bool kill_pending = true; if ((detail_ptr->preempt_start_time != 0) && (detail_ptr->preempt_start_time > (now - slurmctld_conf.kill_wait - slurmctld_conf.msg_timeout))) { /* Job preemption may still be in progress, - * do not preempt any more jobs yet */ - error_code = ESLURM_NODES_BUSY; - } else { - _preempt_jobs(preemptee_job_list, &error_code); - if ((error_code == ESLURM_NODES_BUSY) && - (detail_ptr->preempt_start_time == 0)) { - detail_ptr->preempt_start_time = now; - } + * do not cancel or requeue any more jobs yet */ + kill_pending = false; + } + _preempt_jobs(preemptee_job_list, kill_pending, &error_code); + if ((error_code == ESLURM_NODES_BUSY) && + (detail_ptr->preempt_start_time == 0)) { + detail_ptr->preempt_start_time = now; } } if (error_code) { @@ -1638,6 +1645,9 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, configuring = IS_JOB_CONFIGURING(job_ptr); job_ptr->job_state = JOB_RUNNING; + if (nonstop_ops.job_begin) + (nonstop_ops.job_begin)(job_ptr); + if (configuring || bit_overlap(job_ptr->node_bitmap, power_node_bitmap)) job_ptr->job_state |= JOB_CONFIGURING; @@ -1651,6 +1661,16 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, slurmctld_diag_stats.jobs_started++; acct_policy_job_begin(job_ptr); + /* Update the job_record's gres and gres_alloc fields with + * strings representing the amount of each GRES type requested + * and allocated. */ + _fill_in_gres_fields(job_ptr); + if (slurm_get_debug_flags() & DEBUG_FLAG_GRES) + debug("(%s:%d) job id: %u -- job_record->gres: (%s), " + "job_record->gres_alloc: (%s)", + THIS_FILE, __LINE__, job_ptr->job_id, + job_ptr->gres, job_ptr->gres_alloc); + /* If ran with slurmdbd this is handled out of band in the * job if happening right away. If the job has already * become eligible and registered in the db then the start @@ -1677,16 +1697,6 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, xfree(node_set_ptr); } - /* Update the job_record's gres and gres_alloc fields with - * strings representing the amount of each GRES type requested - * and allocated. */ - _fill_in_gres_fields(job_ptr); - if (slurm_get_debug_flags() & DEBUG_FLAG_GRES) - debug("(%s:%d) job id: %u -- job_record->gres: (%s), " - "job_record->gres_alloc: (%s)", - THIS_FILE, __LINE__, job_ptr->job_id, - job_ptr->gres, job_ptr->gres_alloc); - return error_code; } @@ -1718,13 +1728,13 @@ static int _fill_in_gres_fields(struct job_record *job_ptr) "was requested", THIS_FILE, __LINE__, job_ptr->job_id); - xfree(job_ptr->gres_req); - xstrcat(job_ptr->gres_req, ""); - } else if ( job_ptr->node_cnt > 0 ) { + if (job_ptr->gres_req == NULL) + xstrcat(job_ptr->gres_req, ""); + + } else if (job_ptr->node_cnt > 0 + && job_ptr->gres_req == NULL) { /* job_ptr->gres_req is rebuilt/replaced here */ tmp_str = xstrdup(req_config); - xfree(job_ptr->gres_req); - job_ptr->gres_req = xstrdup(""); tok = strtok_r(tmp_str, ",", &last); while (tok) { @@ -1835,11 +1845,7 @@ static bool _valid_feature_counts(struct job_details *detail_ptr, return rc; feature_bitmap = bit_copy(node_bitmap); - if (feature_bitmap == NULL) - fatal("bit_copy malloc error"); job_feat_iter = list_iterator_create(detail_ptr->feature_list); - if (job_feat_iter == NULL) - fatal("list_iterator_create malloc error"); while ((job_feat_ptr = (struct feature_record *) list_next(job_feat_iter))) { feat_ptr = list_find_first(feature_list, list_find_feature, @@ -1868,8 +1874,6 @@ static bool _valid_feature_counts(struct job_details *detail_ptr, if (have_count) { job_feat_iter = list_iterator_create(detail_ptr-> feature_list); - if (job_feat_iter == NULL) - fatal("list_iterator_create malloc error"); while ((job_feat_ptr = (struct feature_record *) list_next(job_feat_iter))) { if (job_feat_ptr->count == 0) @@ -1882,8 +1886,6 @@ static bool _valid_feature_counts(struct job_details *detail_ptr, break; } tmp_bitmap = bit_copy(feature_bitmap); - if (tmp_bitmap == NULL) - fatal("bit_copy malloc error"); bit_and(tmp_bitmap, feat_ptr->node_bitmap); if (bit_set_count(tmp_bitmap) < job_feat_ptr->count) rc = false; @@ -2002,7 +2004,7 @@ static int _build_node_list(struct job_record *job_ptr, struct node_set **node_set_pptr, int *node_set_size) { - int i, node_set_inx, power_cnt, rc; + int adj_cpus, i, node_set_inx, power_cnt, rc; struct node_set *node_set_ptr; struct config_record *config_ptr; struct part_record *part_ptr = job_ptr->part_ptr; @@ -2061,14 +2063,10 @@ static int _build_node_list(struct job_record *job_ptr, } else { usable_node_mask = bit_copy(detail_ptr->exc_node_bitmap); - if (usable_node_mask == NULL) - fatal("bit_copy malloc failure"); bit_not(usable_node_mask); } } else if (usable_node_mask == NULL) { usable_node_mask = bit_alloc(node_record_count); - if (usable_node_mask == NULL) - fatal("bit_alloc malloc failure"); bit_nset(usable_node_mask, 0, (node_record_count - 1)); } @@ -2080,14 +2078,14 @@ static int _build_node_list(struct job_record *job_ptr, } config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal("list_iterator_create malloc failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { - config_filter = 0; - if ((detail_ptr->pn_min_cpus > config_ptr->cpus ) || + adj_cpus = adjust_cpus_nppcu(_get_ntasks_per_core(detail_ptr), + config_ptr->threads, + config_ptr->cpus); + if ((detail_ptr->pn_min_cpus > adj_cpus) || ((detail_ptr->pn_min_memory & (~MEM_PER_CPU)) > config_ptr->real_memory) || (detail_ptr->pn_min_tmp_disk > config_ptr->tmp_disk)) @@ -2115,8 +2113,6 @@ static int _build_node_list(struct job_record *job_ptr, node_set_ptr[node_set_inx].my_bitmap = bit_copy(config_ptr->node_bitmap); - if (node_set_ptr[node_set_inx].my_bitmap == NULL) - fatal("bit_copy malloc failure"); bit_and(node_set_ptr[node_set_inx].my_bitmap, part_ptr->node_bitmap); if (usable_node_mask) { @@ -2239,7 +2235,7 @@ static int _build_node_list(struct job_record *job_ptr, static void _filter_nodes_in_set(struct node_set *node_set_ptr, struct job_details *job_con) { - int i; + int adj_cpus, i; multi_core_data_t *mc_ptr = job_con->mc_ptr; if (slurmctld_conf.fast_schedule) { /* test config records */ @@ -2248,9 +2244,11 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, int job_ok = 0, job_mc_ptr_ok = 0; if (bit_test(node_set_ptr->my_bitmap, i) == 0) continue; - node_con = node_record_table_ptr[i].config_ptr; - if ((job_con->pn_min_cpus <= node_con->cpus) && + adj_cpus = adjust_cpus_nppcu(_get_ntasks_per_core(job_con), + node_con->threads, + node_con->cpus); + if ((job_con->pn_min_cpus <= adj_cpus) && ((job_con->pn_min_memory & (~MEM_PER_CPU)) <= node_con->real_memory) && (job_con->pn_min_tmp_disk <= node_con->tmp_disk)) @@ -2279,9 +2277,12 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, continue; node_ptr = &node_record_table_ptr[i]; - if ((job_con->pn_min_cpus <= node_ptr->cpus) && + adj_cpus = adjust_cpus_nppcu(_get_ntasks_per_core(job_con), + node_ptr->threads, + node_ptr->cpus); + if ((job_con->pn_min_cpus <= adj_cpus) && ((job_con->pn_min_memory & (~MEM_PER_CPU)) <= - node_ptr->real_memory) && + node_ptr->real_memory) && (job_con->pn_min_tmp_disk <= node_ptr->tmp_disk)) job_ok = 1; if (mc_ptr && @@ -2323,8 +2324,6 @@ static int _nodes_in_sets(bitstr_t *req_bitmap, else { scratch_bitmap = bit_copy(node_set_ptr[i].my_bitmap); - if (scratch_bitmap == NULL) - fatal("bit_copy malloc failure"); } } @@ -2366,14 +2365,14 @@ extern void build_node_details(struct job_record *job_ptr, bool new_alloc) if (new_alloc) { /* Find available front-end node and assign it to this job */ xfree(job_ptr->batch_host); - job_ptr->front_end_ptr = assign_front_end(NULL); + job_ptr->front_end_ptr = assign_front_end(job_ptr); if (job_ptr->front_end_ptr) { job_ptr->batch_host = xstrdup(job_ptr-> front_end_ptr->name); } } else if (job_ptr->batch_host) { /* Reset pointer to this job's front-end node */ - job_ptr->front_end_ptr = assign_front_end(job_ptr->batch_host); + job_ptr->front_end_ptr = assign_front_end(job_ptr); if (!job_ptr->front_end_ptr) xfree(job_ptr->batch_host); } @@ -2423,16 +2422,12 @@ static bitstr_t *_valid_features(struct job_details *details_ptr, int last_op = FEATURE_OP_AND, position = 0; result_bits = bit_alloc(MAX_FEATURES); - if (result_bits == NULL) - fatal("bit_alloc malloc failure"); if (details_ptr->feature_list == NULL) { /* no constraints */ bit_set(result_bits, 0); return result_bits; } feat_iter = list_iterator_create(details_ptr->feature_list); - if (feat_iter == NULL) - fatal("list_iterator_create malloc failure"); while ((job_feat_ptr = (struct feature_record *) list_next(feat_iter))) { if ((job_feat_ptr->op_code == FEATURE_OP_XAND) || @@ -2480,14 +2475,10 @@ extern void re_kill_job(struct job_record *job_ptr) xassert(job_ptr->details); kill_hostlist = hostlist_create(""); - if (kill_hostlist == NULL) - fatal("hostlist_create: malloc failure"); agent_args = xmalloc(sizeof(agent_arg_t)); agent_args->msg_type = REQUEST_TERMINATE_JOB; agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); agent_args->retry = 0; kill_job = xmalloc(sizeof(kill_job_msg_t)); kill_job->job_id = job_ptr->job_id; diff --git a/src/slurmctld/node_scheduler.h b/src/slurmctld/node_scheduler.h index b1649092639f896207487cc7984262d5abe2dd6a..0092d5e1f6c01386badb98501443cc2577400873 100644 --- a/src/slurmctld/node_scheduler.h +++ b/src/slurmctld/node_scheduler.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 40ef2cd7a22218111f28b7f9ab2dee9d9660c2e4..dab291e88cf85de98bb1b1cfa4f62731423d62af 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -71,9 +71,9 @@ /* Change PART_STATE_VERSION value when changing the state save format */ -#define PART_STATE_VERSION "VER003" -#define PART_2_2_STATE_VERSION "VER003" /* SLURM version 2.2 */ -#define PART_2_1_STATE_VERSION "VER002" /* SLURM version 2.1 */ +#define PART_STATE_VERSION "VER004" +#define PART_2_6_STATE_VERSION "VER004" /* SLURM version 2.6 */ +#define PART_2_5_STATE_VERSION "VER003" /* SLURM version 2.5 to 2.2 */ /* Global variables */ struct part_record default_part; /* default configuration values */ @@ -117,13 +117,9 @@ static int _build_part_bitmap(struct part_record *part_ptr) if (part_ptr->node_bitmap == NULL) { part_ptr->node_bitmap = bit_alloc(node_record_count); - if (part_ptr->node_bitmap == NULL) - fatal("bit_alloc malloc failure"); old_bitmap = NULL; } else { old_bitmap = bit_copy(part_ptr->node_bitmap); - if (old_bitmap == NULL) - fatal("bit_copy malloc failure"); bit_nclear(part_ptr->node_bitmap, 0, node_record_count - 1); } @@ -225,9 +221,11 @@ struct part_record *create_part_record(void) xassert (part_ptr->magic = PART_MAGIC); /* set value */ part_ptr->name = xstrdup("DEFAULT"); part_ptr->alternate = xstrdup(default_part.alternate); + part_ptr->cr_type = default_part.cr_type; part_ptr->flags = default_part.flags; part_ptr->max_time = default_part.max_time; part_ptr->default_time = default_part.default_time; + part_ptr->max_cpus_per_node = default_part.max_cpus_per_node; part_ptr->max_nodes = default_part.max_nodes; part_ptr->max_nodes_orig = default_part.max_nodes; part_ptr->min_nodes = default_part.min_nodes; @@ -237,7 +235,7 @@ struct part_record *create_part_record(void) part_ptr->preempt_mode = default_part.preempt_mode; part_ptr->priority = default_part.priority; part_ptr->grace_time = default_part.grace_time; - if(part_max_priority) + if (part_max_priority) part_ptr->norm_priority = (double)default_part.priority / (double)part_max_priority; part_ptr->node_bitmap = NULL; @@ -258,8 +256,7 @@ struct part_record *create_part_record(void) else part_ptr->nodes = NULL; - if (list_append(part_list, part_ptr) == NULL) - fatal("create_part_record: unable to allocate memory"); + (void) list_append(part_list, part_ptr); return part_ptr; } @@ -313,8 +310,6 @@ int dump_all_part_state(void) /* write partition records to buffer */ lock_slurmctld(part_read_lock); part_iterator = list_iterator_create(part_list); - if (!part_iterator) - fatal("list_iterator_create malloc"); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { xassert (part_ptr->magic == PART_MAGIC); _dump_part_state(part_ptr, buffer); @@ -398,6 +393,7 @@ static void _dump_part_state(struct part_record *part_ptr, Buf buffer) pack32(part_ptr->grace_time, buffer); pack32(part_ptr->max_time, buffer); pack32(part_ptr->default_time, buffer); + pack32(part_ptr->max_cpus_per_node, buffer); pack32(part_ptr->max_nodes_orig, buffer); pack32(part_ptr->min_nodes_orig, buffer); @@ -407,6 +403,8 @@ static void _dump_part_state(struct part_record *part_ptr, Buf buffer) pack16(part_ptr->priority, buffer); pack16(part_ptr->state_up, buffer); + pack16(part_ptr->cr_type, buffer); + packstr(part_ptr->allow_groups, buffer); packstr(part_ptr->allow_alloc_nodes, buffer); packstr(part_ptr->alternate, buffer); @@ -455,10 +453,10 @@ int load_all_part_state(void) char *part_name = NULL, *allow_groups = NULL, *nodes = NULL; char *state_file, *data = NULL; uint32_t max_time, default_time, max_nodes, min_nodes; - uint32_t grace_time = 0; + uint32_t max_cpus_per_node = INFINITE, grace_time = 0; time_t time; uint16_t flags; - uint16_t max_share, preempt_mode, priority, state_up; + uint16_t max_share, preempt_mode, priority, state_up, cr_type; struct part_record *part_ptr; uint32_t data_size = 0, name_len; int data_allocated, data_read = 0, error_code = 0, part_cnt = 0; @@ -505,9 +503,11 @@ int load_all_part_state(void) safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); debug3("Version string in part_state header is %s", ver_str); - if(ver_str) { + if (ver_str) { if (!strcmp(ver_str, PART_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; + } else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) { + protocol_version = SLURM_2_5_PROTOCOL_VERSION; } } @@ -523,7 +523,42 @@ int load_all_part_state(void) safe_unpack_time(&time, buffer); while (remaining_buf(buffer) > 0) { - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpackstr_xmalloc(&part_name, &name_len, buffer); + safe_unpack32(&grace_time, buffer); + safe_unpack32(&max_time, buffer); + safe_unpack32(&default_time, buffer); + safe_unpack32(&max_cpus_per_node, buffer); + safe_unpack32(&max_nodes, buffer); + safe_unpack32(&min_nodes, buffer); + + safe_unpack16(&flags, buffer); + safe_unpack16(&max_share, buffer); + safe_unpack16(&preempt_mode, buffer); + safe_unpack16(&priority, buffer); + + if (priority > part_max_priority) + part_max_priority = priority; + + safe_unpack16(&state_up, buffer); + safe_unpack16(&cr_type, buffer); + + safe_unpackstr_xmalloc(&allow_groups, + &name_len, buffer); + safe_unpackstr_xmalloc(&allow_alloc_nodes, + &name_len, buffer); + safe_unpackstr_xmalloc(&alternate, &name_len, buffer); + safe_unpackstr_xmalloc(&nodes, &name_len, buffer); + if ((flags & PART_FLAG_DEFAULT_CLR) || + (flags & PART_FLAG_HIDDEN_CLR) || + (flags & PART_FLAG_NO_ROOT_CLR) || + (flags & PART_FLAG_ROOT_ONLY_CLR) || + (flags & PART_FLAG_REQ_RESV_CLR)) { + error("Invalid data for partition %s: flags=%u", + part_name, flags); + error_code = EINVAL; + } + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); @@ -538,6 +573,7 @@ int load_all_part_state(void) if (priority > part_max_priority) part_max_priority = priority; + cr_type = 0; /* Default value */ safe_unpack16(&state_up, buffer); safe_unpackstr_xmalloc(&allow_groups, @@ -570,7 +606,8 @@ int load_all_part_state(void) error("No more partition data will be processed from " "the checkpoint file"); xfree(allow_groups); - xfree(allow_groups); + xfree(allow_alloc_nodes); + xfree(alternate); xfree(part_name); xfree(nodes); error_code = EINVAL; @@ -597,6 +634,7 @@ int load_all_part_state(void) } part_ptr->max_time = max_time; part_ptr->default_time = default_time; + part_ptr->max_cpus_per_node = max_cpus_per_node; part_ptr->max_nodes = max_nodes; part_ptr->max_nodes_orig = max_nodes; part_ptr->min_nodes = min_nodes; @@ -607,6 +645,7 @@ int load_all_part_state(void) part_ptr->preempt_mode = preempt_mode; part_ptr->priority = priority; part_ptr->state_up = state_up; + part_ptr->cr_type = cr_type; xfree(part_ptr->allow_groups); part_ptr->allow_groups = allow_groups; xfree(part_ptr->allow_alloc_nodes); @@ -640,6 +679,30 @@ struct part_record *find_part_record(char *name) return list_find_first(part_list, &list_find_part, name); } +/* + * Create a copy of a job's part_list *partition list + * IN part_list_src - a job's part_list + * RET copy of part_list_src, must be freed by caller + */ +extern List part_list_copy(List part_list_src) +{ + struct part_record *part_ptr; + ListIterator iter; + List part_list_dest = NULL; + + if (!part_list_src) + return part_list_dest; + + part_list_dest = list_create(NULL); + iter = list_iterator_create(part_list_src); + while ((part_ptr = (struct part_record *) list_next(iter))) { + list_append(part_list_dest, part_ptr); + } + list_iterator_destroy(iter); + + return part_list_dest; +} + /* * get_part_list - find record for named partition(s) * IN name - partition name(s) in a comma separated list @@ -662,8 +725,6 @@ extern List get_part_list(char *name) if (part_ptr) { if (job_part_list == NULL) { job_part_list = list_create(NULL); - if (job_part_list == NULL) - fatal("list_create: malloc failure"); } list_append(job_part_list, part_ptr); } else { @@ -695,6 +756,7 @@ int init_part_conf(void) default_part.flags |= PART_FLAG_NO_ROOT; default_part.max_time = INFINITE; default_part.default_time = NO_VAL; + default_part.max_cpus_per_node = INFINITE; default_part.max_nodes = INFINITE; default_part.max_nodes_orig = INFINITE; default_part.min_nodes = 1; @@ -707,6 +769,7 @@ int init_part_conf(void) default_part.total_nodes = 0; default_part.total_cpus = 0; default_part.grace_time = 0; + default_part.cr_type = 0; xfree(default_part.nodes); xfree(default_part.allow_groups); xfree(default_part.allow_uids); @@ -719,9 +782,6 @@ int init_part_conf(void) else part_list = list_create(_list_delete_part); - if (part_list == NULL) - fatal ("memory allocation failure"); - xfree(default_part_name); default_part_loc = (struct part_record *) NULL; @@ -894,7 +954,39 @@ void pack_part(struct part_record *part_ptr, Buf buffer, { uint32_t altered; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + if (default_part_loc == part_ptr) + part_ptr->flags |= PART_FLAG_DEFAULT; + else + part_ptr->flags &= (~PART_FLAG_DEFAULT); + + packstr(part_ptr->name, buffer); + pack32(part_ptr->grace_time, buffer); + pack32(part_ptr->max_time, buffer); + pack32(part_ptr->default_time, buffer); + pack32(part_ptr->max_nodes_orig, buffer); + pack32(part_ptr->min_nodes_orig, buffer); + altered = part_ptr->total_nodes; + select_g_alter_node_cnt(SELECT_APPLY_NODE_MAX_OFFSET, &altered); + pack32(altered, buffer); + pack32(part_ptr->total_cpus, buffer); + pack32(part_ptr->def_mem_per_cpu, buffer); + pack32(part_ptr->max_cpus_per_node, buffer); + pack32(part_ptr->max_mem_per_cpu, buffer); + + pack16(part_ptr->flags, buffer); + pack16(part_ptr->max_share, buffer); + pack16(part_ptr->preempt_mode, buffer); + pack16(part_ptr->priority, buffer); + pack16(part_ptr->state_up, buffer); + pack16(part_ptr->cr_type, buffer); + + packstr(part_ptr->allow_groups, buffer); + packstr(part_ptr->allow_alloc_nodes, buffer); + packstr(part_ptr->alternate, buffer); + packstr(part_ptr->nodes, buffer); + pack_bit_fmt(part_ptr->node_bitmap, buffer); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { if (default_part_loc == part_ptr) part_ptr->flags |= PART_FLAG_DEFAULT; else @@ -973,6 +1065,12 @@ extern int update_part (update_part_msg_t * part_desc, bool create_flag) last_part_update = time(NULL); + if (part_desc->max_cpus_per_node != NO_VAL) { + info("update_part: setting MaxCPUsPerNode to %u for partition %s", + part_desc->max_cpus_per_node, part_desc->name); + part_ptr->max_cpus_per_node = part_desc->max_cpus_per_node; + } + if (part_desc->max_time != NO_VAL) { info("update_part: setting max_time to %u for partition %s", part_desc->max_time, part_desc->name); @@ -1121,7 +1219,7 @@ extern int update_part (update_part_msg_t * part_desc, bool create_flag) * the normalized priorities of all the other * partitions. If not then just set this partition. */ - if(part_ptr->priority > part_max_priority) { + if (part_ptr->priority > part_max_priority) { ListIterator itr = list_iterator_create(part_list); struct part_record *part2 = NULL; @@ -1474,8 +1572,8 @@ extern bool misc_policy_job_runnable_state(struct job_record *job_ptr) /* * Determine of the specified job can execute right now or is currently - * blocked by a partition state or limit. Execute job_limits_check() to - * re-validate job state. + * blocked by a partition state or limit. These job states should match the + * reason values returned by job_limits_check(). */ extern bool part_policy_job_runnable_state(struct job_record *job_ptr) { diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index dd361d61196e5ca2f1a91f4f72576361a0a88314..085d3e94ec8aa89993363aa20b62c61a3e760489 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,6 +49,7 @@ #include <string.h> #include "src/common/hostlist.h" +#include "src/common/node_select.h" #include "src/common/read_config.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/front_end.h" @@ -194,8 +195,10 @@ void ping_nodes (void) else { down_hostlist = hostlist_create(front_end_ptr->name); - if (down_hostlist == NULL) - fatal("hostlist_create: malloc error"); + if (!down_hostlist) { + fatal("invalid front_end list: %s", + front_end_ptr->name); + } } set_front_end_down(front_end_ptr, "Not responding"); front_end_ptr->not_responding = false; @@ -254,8 +257,10 @@ void ping_nodes (void) else { down_hostlist = hostlist_create(node_ptr->name); - if (down_hostlist == NULL) - fatal("hostlist_create: malloc error"); + if (!down_hostlist) { + fatal("Invalid host name: %s", + node_ptr->name); + } } set_node_down_ptr(node_ptr, "Not responding"); node_ptr->not_responding = false; /* logged below */ @@ -342,6 +347,7 @@ extern void run_health_check(void) front_end_record_t *front_end_ptr; #else struct node_record *node_ptr; + int node_states = slurmctld_conf.health_check_node_state; #endif int i; char *host_str = NULL; @@ -351,9 +357,6 @@ extern void run_health_check(void) check_agent_args->msg_type = REQUEST_HEALTH_CHECK; check_agent_args->retry = 0; check_agent_args->hostlist = hostlist_create(""); - if (check_agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); - #ifdef HAVE_FRONT_END for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { @@ -363,11 +366,43 @@ extern void run_health_check(void) check_agent_args->node_count++; } #else + if ((node_states != HEALTH_CHECK_NODE_ANY) && + (node_states != HEALTH_CHECK_NODE_IDLE)) { + /* Update each node's alloc_cpus count */ + select_g_select_nodeinfo_set_all(); + } + for (i=0, node_ptr=node_record_table_ptr; i<node_record_count; i++, node_ptr++) { if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) || IS_NODE_POWER_SAVE(node_ptr)) continue; + if (node_states != HEALTH_CHECK_NODE_ANY) { + uint16_t cpus_total, cpus_used = 0; + if (slurmctld_conf.fast_schedule) { + cpus_total = node_ptr->config_ptr->cpus; + } else { + cpus_total = node_ptr->cpus; + } + if (!IS_NODE_IDLE(node_ptr)) { + select_g_select_nodeinfo_get( + node_ptr->select_nodeinfo, + SELECT_NODEDATA_SUBCNT, + NODE_STATE_ALLOCATED, + &cpus_used); + } + if (cpus_used == 0) { + if (!(node_states & HEALTH_CHECK_NODE_IDLE)) + continue; + } else if (cpus_used < cpus_total) { + if (!(node_states & HEALTH_CHECK_NODE_MIXED)) + continue; + } else { + if (!(node_states & HEALTH_CHECK_NODE_ALLOC)) + continue; + } + } + hostlist_push(check_agent_args->hostlist, node_ptr->name); check_agent_args->node_count++; } @@ -403,8 +438,6 @@ extern void update_nodes_acct_gather_data(void) agent_args->msg_type = REQUEST_ACCT_GATHER_UPDATE; agent_args->retry = 0; agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); #ifdef HAVE_FRONT_END for (i = 0, front_end_ptr = front_end_nodes; diff --git a/src/slurmctld/ping_nodes.h b/src/slurmctld/ping_nodes.h index 3fa9d62beb03aec494d81216d19f56aaaf53a024..0ece6d9221c2d58172b436fd06817cb12adaf6a9 100644 --- a/src/slurmctld/ping_nodes.h +++ b/src/slurmctld/ping_nodes.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/port_mgr.c b/src/slurmctld/port_mgr.c index 403a5085e631085bf2bf8e34212a914e4f3db9f6..3d20044c75ddc50353aa481b1f6d1bad1100a4b1 100644 --- a/src/slurmctld/port_mgr.c +++ b/src/slurmctld/port_mgr.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -91,9 +91,9 @@ static void _rebuild_port_array(struct step_record *step_ptr) tmp_char = xmalloc(i+3); sprintf(tmp_char, "[%s]", step_ptr->resv_ports); hl = hostlist_create(tmp_char); + if (!hl) + fatal("Invalid reserved ports: %s", step_ptr->resv_ports); xfree(tmp_char); - if (hl == NULL) - fatal("malloc failure: hostlist_create"); step_ptr->resv_port_array = xmalloc(sizeof(int) * step_ptr->resv_port_cnt); @@ -149,6 +149,8 @@ static void _make_all_resv(void) step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; _make_step_resv(step_ptr); } list_iterator_destroy(step_iterator); @@ -254,8 +256,6 @@ extern int resv_port_alloc(struct step_record *step_ptr) /* Reserve selected ports */ hl = hostlist_create(NULL); - if (hl == NULL) - fatal("malloc: hostlist_create"); for (i=0; i<port_inx; i++) { /* NOTE: We give the port a name like "[1234]" rather than * just "1234" to avoid hostlists of the form "1[234-236]" */ diff --git a/src/slurmctld/port_mgr.h b/src/slurmctld/port_mgr.h index acd44e10640110733324ffa679bafd5030aad877..10a476b09ec44be809b7b261decf42e13dcbc996 100644 --- a/src/slurmctld/port_mgr.h +++ b/src/slurmctld/port_mgr.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c index 8d08c4e57bc4de754c2831cf7cd942151ba9e699..5658df81cdda12ac7835d39757afcb6230486987 100644 --- a/src/slurmctld/power_save.c +++ b/src/slurmctld/power_save.c @@ -13,7 +13,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -620,8 +620,6 @@ static void *_init_power_save(void *arg) goto fini; suspend_node_bitmap = bit_alloc(node_record_count); - if (suspend_node_bitmap == NULL) - fatal("power_save: malloc error"); while (slurmctld_config.shutdown_time == 0) { sleep(1); diff --git a/src/slurmctld/preempt.c b/src/slurmctld/preempt.c index 50923f08b2bf4d58e6bc242b73751f1729a130a0..f58eef853ba3d768eb4b099461eb365aeea63673 100644 --- a/src/slurmctld/preempt.c +++ b/src/slurmctld/preempt.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/preempt.h b/src/slurmctld/preempt.h index d1aacb473e850c5f756fca3fd2e2b5d74a2f46cd..ca897ec60547c8c13656ae728cb5d0578f952587 100644 --- a/src/slurmctld/preempt.h +++ b/src/slurmctld/preempt.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 5c4c3fecc474e6f9d36ff23d82247e0ac7198069..0c356db7dfbdfc480bfc583e20e1e8718054a894 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -72,6 +72,7 @@ #include "src/common/slurm_topology.h" #include "src/common/switch.h" #include "src/common/xstring.h" +#include "src/common/slurm_ext_sensors.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/front_end.h" @@ -90,12 +91,17 @@ #include "src/plugins/select/bluegene/bg_enums.h" +static pthread_mutex_t throttle_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t throttle_cond = PTHREAD_COND_INITIALIZER; + static void _fill_ctld_conf(slurm_ctl_conf_t * build_ptr); static void _kill_job_on_msg_fail(uint32_t job_id); static int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, uint32_t *step_id); static int _make_step_cred(struct step_record *step_rec, slurm_cred_t **slurm_cred); +static void _throttle_fini(int *active_rpc_cnt); +static void _throttle_start(int *active_rpc_cnt); inline static void _slurm_rpc_accounting_first_reg(slurm_msg_t *msg); inline static void _slurm_rpc_accounting_register_ctld(slurm_msg_t *msg); @@ -110,8 +116,10 @@ inline static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg); inline static void _slurm_rpc_dump_conf(slurm_msg_t * msg); inline static void _slurm_rpc_dump_front_end(slurm_msg_t * msg); inline static void _slurm_rpc_dump_jobs(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_jobs_user(slurm_msg_t * msg); inline static void _slurm_rpc_dump_job_single(slurm_msg_t * msg); inline static void _slurm_rpc_dump_nodes(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_node_single(slurm_msg_t * msg); inline static void _slurm_rpc_dump_partitions(slurm_msg_t * msg); inline static void _slurm_rpc_end_time(slurm_msg_t * msg); inline static void _slurm_rpc_epilog_complete(slurm_msg_t * msg); @@ -192,6 +200,10 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_dump_jobs(msg); slurm_free_job_info_request_msg(msg->data); break; + case REQUEST_JOB_USER_INFO: + _slurm_rpc_dump_jobs_user(msg); + slurm_free_job_user_id_msg(msg->data); + break; case REQUEST_JOB_INFO_SINGLE: _slurm_rpc_dump_job_single(msg); slurm_free_job_id_msg(msg->data); @@ -216,6 +228,10 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_dump_nodes(msg); slurm_free_node_info_request_msg(msg->data); break; + case REQUEST_NODE_INFO_SINGLE: + _slurm_rpc_dump_node_single(msg); + slurm_free_node_info_single_msg(msg->data); + break; case REQUEST_PARTITION_INFO: _slurm_rpc_dump_partitions(msg); slurm_free_part_info_request_msg(msg->data); @@ -446,6 +462,31 @@ void slurmctld_req (slurm_msg_t * msg) } } +/* These functions prevent certain RPCs from keeping the slurmctld write locks + * constantly set, which can prevent other RPCs and system functions from being + * processed. For example, a steady stream of batch submissions can prevent + * squeue from responding or jobs from being scheduled. */ +static void _throttle_start(int *active_rpc_cnt) +{ + slurm_mutex_lock(&throttle_mutex); + while (1) { + if (*active_rpc_cnt == 0) { + (*active_rpc_cnt)++; + break; + } + pthread_cond_wait(&throttle_cond, &throttle_mutex); + } + slurm_mutex_unlock(&throttle_mutex); + usleep(1); +} +static void _throttle_fini(int *active_rpc_cnt) +{ + slurm_mutex_lock(&throttle_mutex); + (*active_rpc_cnt)--; + pthread_cond_broadcast(&throttle_cond); + slurm_mutex_unlock(&throttle_mutex); +} + /* * _fill_ctld_conf - make a copy of current slurm configuration * this is done with locks set so the data can change at other times @@ -453,6 +494,7 @@ void slurmctld_req (slurm_msg_t * msg) */ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) { + char *licenses_used = get_licenses_used(); /* Do before config lock */ slurm_ctl_conf_t *conf = slurm_conf_lock(); memset(conf_ptr, 0, sizeof(slurm_ctl_conf_t)); @@ -462,6 +504,8 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf->accounting_storage_enforce; conf_ptr->accounting_storage_host = xstrdup(conf->accounting_storage_host); + conf_ptr->accounting_storage_backup_host = + xstrdup(conf->accounting_storage_backup_host); conf_ptr->accounting_storage_loc = xstrdup(conf->accounting_storage_loc); conf_ptr->accounting_storage_port = conf->accounting_storage_port; @@ -474,6 +518,12 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->acct_gather_energy_type = xstrdup(conf->acct_gather_energy_type); + conf_ptr->acct_gather_filesystem_type = + xstrdup(conf->acct_gather_filesystem_type); + conf_ptr->acct_gather_infiniband_type = + xstrdup(conf->acct_gather_infiniband_type); + conf_ptr->acct_gather_profile_type = + xstrdup(conf->acct_gather_profile_type); conf_ptr->acct_gather_node_freq = conf->acct_gather_node_freq; conf_ptr->authtype = xstrdup(conf->authtype); @@ -493,11 +543,14 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->def_mem_per_cpu = conf->def_mem_per_cpu; conf_ptr->debug_flags = conf->debug_flags; conf_ptr->disable_root_jobs = conf->disable_root_jobs; + conf_ptr->dynalloc_port = conf->dynalloc_port; conf_ptr->enforce_part_limits = conf->enforce_part_limits; conf_ptr->epilog = xstrdup(conf->epilog); conf_ptr->epilog_msg_time = conf->epilog_msg_time; conf_ptr->epilog_slurmctld = xstrdup(conf->epilog_slurmctld); + conf_ptr->ext_sensors_type = xstrdup(conf->ext_sensors_type); + conf_ptr->ext_sensors_freq = conf->ext_sensors_freq; conf_ptr->fast_schedule = conf->fast_schedule; conf_ptr->first_job_id = conf->first_job_id; @@ -509,9 +562,10 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->hash_val = conf->hash_val; conf_ptr->health_check_interval = conf->health_check_interval; + conf_ptr->health_check_node_state = conf->health_check_node_state; conf_ptr->health_check_program = xstrdup(conf->health_check_program); - conf_ptr->job_acct_gather_freq = conf->job_acct_gather_freq; + conf_ptr->job_acct_gather_freq = xstrdup(conf->job_acct_gather_freq); conf_ptr->job_acct_gather_type = xstrdup(conf->job_acct_gather_type); conf_ptr->job_ckpt_dir = xstrdup(conf->job_ckpt_dir); @@ -531,14 +585,16 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->get_env_timeout = conf->get_env_timeout; + conf_ptr->keep_alive_time = conf->keep_alive_time; conf_ptr->kill_wait = conf->kill_wait; conf_ptr->kill_on_bad_exit = conf->kill_on_bad_exit; conf_ptr->launch_type = xstrdup(conf->launch_type); conf_ptr->licenses = xstrdup(conf->licenses); - conf_ptr->licenses_used = get_licenses_used(); + conf_ptr->licenses_used = licenses_used; conf_ptr->mail_prog = xstrdup(conf->mail_prog); + conf_ptr->max_array_sz = conf->max_array_sz; conf_ptr->max_job_cnt = conf->max_job_cnt; conf_ptr->max_job_id = conf->max_job_id; conf_ptr->max_mem_per_cpu = conf->max_mem_per_cpu; @@ -587,7 +643,9 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->resume_program = xstrdup(conf->resume_program); conf_ptr->resume_rate = conf->resume_rate; conf_ptr->resume_timeout = conf->resume_timeout; + conf_ptr->resv_epilog = xstrdup(conf->resv_epilog); conf_ptr->resv_over_run = conf->resv_over_run; + conf_ptr->resv_prolog = xstrdup(conf->resv_prolog); conf_ptr->ret2service = conf->ret2service; conf_ptr->salloc_default_command = xstrdup(conf-> @@ -612,6 +670,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->slurmctld_debug = conf->slurmctld_debug; conf_ptr->slurmctld_logfile = xstrdup(conf->slurmctld_logfile); conf_ptr->slurmctld_pidfile = xstrdup(conf->slurmctld_pidfile); + conf_ptr->slurmctld_plugstack = xstrdup(conf->slurmctld_plugstack); conf_ptr->slurmctld_port = conf->slurmctld_port; conf_ptr->slurmctld_port_count = conf->slurmctld_port_count; conf_ptr->slurmctld_timeout = conf->slurmctld_timeout; @@ -748,8 +807,8 @@ static int _make_step_cred(struct step_record *step_ptr, #else cred_arg.step_hostlist = step_ptr->step_layout->node_list; #endif - if (step_ptr->mem_per_cpu) - cred_arg.step_mem_limit = step_ptr->mem_per_cpu | MEM_PER_CPU; + if (step_ptr->pn_min_memory) + cred_arg.step_mem_limit = step_ptr->pn_min_memory; cred_arg.cores_per_socket = job_resrcs_ptr->cores_per_socket; cred_arg.sockets_per_node = job_resrcs_ptr->sockets_per_node; @@ -768,7 +827,7 @@ static int _make_step_cred(struct step_record *step_ptr, * a job */ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) { - /* init */ + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; slurm_msg_t response_msg; DEF_TIMERS; @@ -781,7 +840,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) int immediate = job_desc_msg->immediate; bool do_unlock = false; bool job_waiting = false; - struct job_record *job_ptr; + struct job_record *job_ptr = NULL; uint16_t port; /* dummy value */ slurm_addr_t resp_addr; @@ -821,6 +880,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) dump_job_desc(job_desc_msg); if (error_code == SLURM_SUCCESS) { do_unlock = true; + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, immediate, @@ -882,6 +942,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) alloc_msg.pn_min_memory = 0; } unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); slurm_msg_t_init(&response_msg); response_msg.flags = msg->flags; @@ -897,8 +958,10 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) schedule_job_save(); /* has own locks */ schedule_node_save(); /* has own locks */ } else { /* allocate error */ - if (do_unlock) + if (do_unlock) { unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); + } info("_slurm_rpc_allocate_resources: %s ", slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); @@ -972,11 +1035,12 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) pack_all_jobs(&dump, &dump_size, job_info_request_msg->show_flags, g_slurm_auth_get_uid(msg->auth_cred, NULL), - msg->protocol_version); + NO_VAL, msg->protocol_version); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_jobs"); -/* info("_slurm_rpc_dump_jobs, size=%d %s", */ -/* dump_size, TIME_STR); */ +#if 0 + info("_slurm_rpc_dump_jobs, size=%d %s", dump_size, TIME_STR); +#endif /* init response_msg structure */ slurm_msg_t_init(&response_msg); @@ -993,6 +1057,47 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) } } +/* _slurm_rpc_dump_jobs - process RPC for job state information */ +static void _slurm_rpc_dump_jobs_user(slurm_msg_t * msg) +{ + DEF_TIMERS; + char *dump; + int dump_size; + slurm_msg_t response_msg; + job_user_id_msg_t *job_info_request_msg = + (job_user_id_msg_t *) msg->data; + /* Locks: Read config job, write node (for hiding) */ + slurmctld_lock_t job_read_lock = { + READ_LOCK, READ_LOCK, NO_LOCK, WRITE_LOCK }; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + + START_TIMER; + debug3("Processing RPC: REQUEST_JOB_USER_INFO from uid=%d", uid); + lock_slurmctld(job_read_lock); + pack_all_jobs(&dump, &dump_size, + job_info_request_msg->show_flags, + g_slurm_auth_get_uid(msg->auth_cred, NULL), + job_info_request_msg->user_id, msg->protocol_version); + unlock_slurmctld(job_read_lock); + END_TIMER2("_slurm_rpc_dump_job_user"); +#if 0 + info("_slurm_rpc_dump_user_jobs, size=%d %s", dump_size, TIME_STR); +#endif + + /* init response_msg structure */ + slurm_msg_t_init(&response_msg); + response_msg.flags = msg->flags; + response_msg.protocol_version = msg->protocol_version; + response_msg.address = msg->address; + response_msg.msg_type = RESPONSE_JOB_INFO; + response_msg.data = dump; + response_msg.data_size = dump_size; + + /* send message */ + slurm_send_node_msg(msg->conn_fd, &response_msg); + xfree(dump); +} + /* _slurm_rpc_dump_job_single - process RPC for one job's state information */ static void _slurm_rpc_dump_job_single(slurm_msg_t * msg) { @@ -1016,7 +1121,9 @@ static void _slurm_rpc_dump_job_single(slurm_msg_t * msg) msg->protocol_version); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_job_single"); -/* info("_slurm_rpc_dump_job_single, size=%d %s",dump_size, TIME_STR); */ +#if 0 + info("_slurm_rpc_dump_job_single, size=%d %s", dump_size, TIME_STR); +#endif /* init response_msg structure */ if (rc != SLURM_SUCCESS) { @@ -1056,7 +1163,7 @@ static void _slurm_rpc_get_shares(slurm_msg_t *msg) response_msg.msg_type = RESPONSE_SHARE_INFO; response_msg.data = &resp_msg; slurm_send_node_msg(msg->conn_fd, &response_msg); - if(resp_msg.assoc_shares_list) + if (resp_msg.assoc_shares_list) list_destroy(resp_msg.assoc_shares_list); END_TIMER2("_slurm_rpc_get_share"); debug2("_slurm_rpc_get_shares %s", TIME_STR); @@ -1083,7 +1190,7 @@ static void _slurm_rpc_get_priority_factors(slurm_msg_t *msg) response_msg.msg_type = RESPONSE_PRIORITY_FACTORS; response_msg.data = &resp_msg; slurm_send_node_msg(msg->conn_fd, &response_msg); - if(resp_msg.priority_factors_list) + if (resp_msg.priority_factors_list) list_destroy(resp_msg.priority_factors_list); END_TIMER2("_slurm_rpc_get_priority_factors"); debug2("_slurm_rpc_get_priority_factors %s", TIME_STR); @@ -1170,7 +1277,7 @@ static void _slurm_rpc_dump_front_end(slurm_msg_t * msg) } } -/* _slurm_rpc_dump_nodes - process RPC for node state information */ +/* _slurm_rpc_dump_nodes - dump RPC for node state information */ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) { DEF_TIMERS; @@ -1205,13 +1312,13 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) debug3("_slurm_rpc_dump_nodes, no change"); slurm_send_rc_msg(msg, SLURM_NO_CHANGE_IN_DATA); } else { - pack_all_node(&dump, &dump_size, node_req_msg->show_flags, uid, msg->protocol_version); unlock_slurmctld(node_write_lock); END_TIMER2("_slurm_rpc_dump_nodes"); - debug3("_slurm_rpc_dump_nodes, size=%d %s", - dump_size, TIME_STR); +#if 0 + info("_slurm_rpc_dump_nodes, size=%d %s", dump_size, TIME_STR); +#endif /* init response_msg structure */ slurm_msg_t_init(&response_msg); @@ -1228,6 +1335,61 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) } } +/* _slurm_rpc_dump_node_single - done RPC state information for one node */ +static void _slurm_rpc_dump_node_single(slurm_msg_t * msg) +{ + DEF_TIMERS; + char *dump; + int dump_size; + slurm_msg_t response_msg; + node_info_single_msg_t *node_req_msg = + (node_info_single_msg_t *) msg->data; + /* Locks: Read config, read node */ + slurmctld_lock_t node_read_lock = { + READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + + START_TIMER; + debug3("Processing RPC: REQUEST_NODE_INFO_SINGLE from uid=%d", uid); + lock_slurmctld(node_read_lock); + + if ((slurmctld_conf.private_data & PRIVATE_DATA_NODES) && + (!validate_operator(uid))) { + unlock_slurmctld(node_read_lock); + error("Security violation, REQUEST_NODE_INFO_SINGLE RPC from " + "uid=%d", uid); + slurm_send_rc_msg(msg, ESLURM_ACCESS_DENIED); + return; + } + +#if 0 + /* This function updates each node's alloc_cpus count and too slow for + * our use here. Node write lock is needed if this function is used */ + select_g_select_nodeinfo_set_all(); +#endif + pack_one_node(&dump, &dump_size, node_req_msg->show_flags, + uid, node_req_msg->node_name, msg->protocol_version); + unlock_slurmctld(node_read_lock); + END_TIMER2("_slurm_rpc_dump_node_single"); +#if 0 + info("_slurm_rpc_dump_node_single, name=%s size=%d %s", + node_req_msg->node_name, dump_size, TIME_STR); +#endif + + /* init response_msg structure */ + slurm_msg_t_init(&response_msg); + response_msg.flags = msg->flags; + response_msg.protocol_version = msg->protocol_version; + response_msg.address = msg->address; + response_msg.msg_type = RESPONSE_NODE_INFO; + response_msg.data = dump; + response_msg.data_size = dump_size; + + /* send message */ + slurm_send_node_msg(msg->conn_fd, &response_msg); + xfree(dump); +} + /* _slurm_rpc_dump_partitions - process RPC for partition state information */ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) { @@ -1284,6 +1446,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) * the epilog denoting the completion of a job it its entirety */ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) { + static time_t config_update = 0; + static bool defer_sched = false; DEF_TIMERS; /* Locks: Read configuration, write job, write node */ slurmctld_lock_t job_write_lock = { @@ -1301,6 +1465,12 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) return; } + if (config_update != slurmctld_conf.last_update) { + char *sched_params = slurm_get_sched_params(); + defer_sched = (sched_params && strstr(sched_params,"defer")); + xfree(sched_params); + } + lock_slurmctld(job_write_lock); if (job_epilog_complete(epilog_msg->job_id, epilog_msg->node_name, epilog_msg->return_code)) @@ -1319,7 +1489,16 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) /* Functions below provide their own locking */ if (run_scheduler) { - (void) schedule(0); + /* + * In defer mode, avoid triggering the scheduler logic + * for every epilog complete message. + * As one epilog message is sent from every node of each + * job at termination, the number of simultaneous schedule + * calls can be very high for large machine or large number + * of managed jobs. + */ + if (!defer_sched) + (void) schedule(0); schedule_node_save(); schedule_job_save(); } @@ -1331,7 +1510,7 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) * an individual job step */ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) { - /* init */ + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; DEF_TIMERS; job_step_kill_msg_t *job_step_kill_msg = @@ -1343,6 +1522,7 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) START_TIMER; debug2("Processing RPC: REQUEST_CANCEL_JOB_STEP uid=%d", uid); + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); /* do RPC call */ @@ -1350,9 +1530,10 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) /* NOTE: SLURM_BATCH_SCRIPT == NO_VAL */ error_code = job_signal(job_step_kill_msg->job_id, job_step_kill_msg->signal, - job_step_kill_msg->batch_flag, uid, + job_step_kill_msg->flags, uid, false); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_job_step_kill"); /* return result */ @@ -1383,6 +1564,7 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) job_step_kill_msg->signal, uid); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_job_step_kill"); /* return result */ @@ -1420,6 +1602,7 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) * completion of a job allocation */ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) { + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; DEF_TIMERS; complete_job_allocation_msg_t *comp_msg = @@ -1437,6 +1620,7 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) "uid=%u, JobId=%u rc=%d", uid, comp_msg->job_id, comp_msg->job_rc); + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); /* do RPC call */ @@ -1444,6 +1628,7 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) error_code = job_complete(comp_msg->job_id, uid, job_requeue, false, comp_msg->job_rc); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_complete_job_allocation"); /* return result */ @@ -1464,6 +1649,7 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) * completion of a batch script */ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) { + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS, i; DEF_TIMERS; complete_batch_script_msg_t *comp_msg = @@ -1496,7 +1682,7 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) return; } - + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); job_ptr = find_job_record(comp_msg->job_id); @@ -1552,11 +1738,12 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) comp_msg->job_id, slurm_strerror(comp_msg->slurm_rc)); dump_job = job_requeue = true; #endif - /* Handle non-fatal errors here */ + /* Handle non-fatal errors here. All others drain the node. */ } else if ((comp_msg->slurm_rc == SLURM_COMMUNICATIONS_SEND_ERROR) || (comp_msg->slurm_rc == ESLURM_USER_ID_MISSING) || - (comp_msg->slurm_rc == ESLURMD_UID_NOT_FOUND) || - (comp_msg->slurm_rc == ESLURMD_GID_NOT_FOUND)) { + (comp_msg->slurm_rc == ESLURMD_UID_NOT_FOUND) || + (comp_msg->slurm_rc == ESLURMD_GID_NOT_FOUND) || + (comp_msg->slurm_rc == ESLURMD_INVALID_ACCT_FREQ)) { error("Slurmd error running JobId=%u on %s=%s: %s", comp_msg->job_id, msg_title, nodes, slurm_strerror(comp_msg->slurm_rc)); @@ -1611,6 +1798,7 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) comp_msg->job_rc); error_code = MAX(error_code, i); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); #ifdef HAVE_BG if (block_desc.bg_block_id) { @@ -1653,7 +1841,7 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) * with the step_mgr */ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) { - /* init */ + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; DEF_TIMERS; slurm_msg_t resp; @@ -1690,17 +1878,20 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) } #endif if (error_code == SLURM_SUCCESS) { - /* issue the RPC */ + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); error_code = step_create(req_step_msg, &step_rec, false); } - if (error_code == SLURM_SUCCESS) + if (error_code == SLURM_SUCCESS) { error_code = _make_step_cred(step_rec, &slurm_cred); + ext_sensors_g_get_stepstartdata(step_rec); + } END_TIMER2("_slurm_rpc_job_step_create"); /* return result */ if (error_code) { unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); if (error_code == ESLURM_PROLOG_RUNNING) { debug("_slurm_rpc_job_step_create for job %u: %s", req_step_msg->job_id, slurm_strerror(error_code)); @@ -1727,9 +1918,10 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) #endif job_step_resp.cred = slurm_cred; job_step_resp.select_jobinfo = step_rec->select_jobinfo; - job_step_resp.switch_job = step_rec->switch_job; + job_step_resp.switch_job = step_rec->switch_job; unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); slurm_msg_t_init(&resp); resp.flags = msg->flags; resp.protocol_version = msg->protocol_version; @@ -2275,7 +2467,7 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) info("_slurm_rpc_reconfigure_controller: completed %s", TIME_STR); slurm_send_rc_msg(msg, SLURM_SUCCESS); - priority_g_reconfig(); /* notify priority plugin too */ + priority_g_reconfig(false); /* notify priority plugin too */ schedule(0); /* has its own locks */ save_all_state(); } @@ -2398,6 +2590,7 @@ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) * represent the termination of an entire job */ static void _slurm_rpc_step_complete(slurm_msg_t *msg) { + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS, rc, rem; uint32_t step_rc; DEF_TIMERS; @@ -2417,12 +2610,14 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg) req->range_first, req->range_last, req->step_rc, uid); + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); rc = step_partial_comp(req, uid, &rem, &step_rc); if (rc || rem) { /* some error or not totally done */ /* Note: Error printed within step_partial_comp */ unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); slurm_send_rc_msg(msg, rc); if (!rc) /* partition completion */ schedule_job_save(); /* Has own locking */ @@ -2434,6 +2629,7 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg) error_code = job_complete(req->job_id, uid, job_requeue, false, step_rc); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_step_complete"); /* return result */ @@ -2451,6 +2647,7 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg) error_code = job_step_complete(req->job_id, req->job_step_id, uid, job_requeue, step_rc); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_step_complete"); /* return result */ @@ -2563,6 +2760,9 @@ static void _slurm_rpc_step_update(slurm_msg_t *msg) /* _slurm_rpc_submit_batch_job - process RPC to submit a batch job */ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) { + static time_t config_update = 0; + static bool defer_sched = false; + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; DEF_TIMERS; uint32_t step_id = 0; @@ -2574,10 +2774,17 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + int schedule_cnt = 1; START_TIMER; debug2("Processing RPC: REQUEST_SUBMIT_BATCH_JOB from uid=%d", uid); + if (config_update != slurmctld_conf.last_update) { + char *sched_params = slurm_get_sched_params(); + defer_sched = (sched_params && strstr(sched_params,"defer")); + xfree(sched_params); + } + slurm_msg_t_init(&response_msg); response_msg.flags = msg->flags; response_msg.protocol_version = msg->protocol_version; @@ -2593,10 +2800,14 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) error_code = ESLURM_INVALID_NODE_NAME; error("REQUEST_SUBMIT_BATCH_JOB lacks alloc_node from uid=%d", uid); } - if (error_code == SLURM_SUCCESS) + if (error_code == SLURM_SUCCESS) { error_code = validate_job_create_req(job_desc_msg); + if (job_desc_msg->array_bitmap) + schedule_cnt = 0; /* Do full schedule cycle */ + } dump_job_desc(job_desc_msg); if (error_code == SLURM_SUCCESS) { + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); if (job_desc_msg->job_id != SLURM_BATCH_SCRIPT) { job_ptr = find_job_record(job_desc_msg->job_id); @@ -2608,6 +2819,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) msg, ESLURM_DUPLICATE_JOB_ID); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); return; } job_ptr = NULL; /* OK to re-use job id */ @@ -2626,6 +2838,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) "uid=%d", uid); slurm_send_rc_msg(msg, ESLURM_NO_STEPS); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); return; } #endif @@ -2638,18 +2851,21 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) job_ptr->user_id); slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); return; } if (job_ptr->details && job_ptr->details->prolog_running) { slurm_send_rc_msg(msg, EAGAIN); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); return; } error_code = _launch_batch_step(job_desc_msg, uid, &step_id); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_submit_batch_job"); if (error_code != SLURM_SUCCESS) { @@ -2679,6 +2895,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) job_desc_msg->immediate, false, NULL, 0, uid, &job_ptr); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_submit_batch_job"); if (job_desc_msg->immediate && (error_code != SLURM_SUCCESS)) error_code = ESLURM_CAN_NOT_START_IMMEDIATELY; @@ -2695,9 +2912,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { - int sched_count = 1; /* Job count to attempt to schedule */ if (job_ptr->part_ptr_list) - sched_count = list_count(job_ptr->part_ptr_list); + schedule_cnt *= list_count(job_ptr->part_ptr_list); info("_slurm_rpc_submit_batch_job JobId=%u %s", job_ptr->job_id, TIME_STR); /* send job_ID */ @@ -2711,8 +2927,13 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) * to run the various prologs, boot the node, etc. * We also run schedule() even if this job could not start, * say due to a higher priority job, since the locks are - * released above and we might start some other job here. */ - schedule(sched_count); /* has own locks */ + * released above and we might start some other job here. + * + * In defer mode, avoid triggering the scheduler logic + * for every submit batch job request. + */ + if (!defer_sched) + (void) schedule(schedule_cnt); /* has own locks */ schedule_job_save(); /* has own locks */ schedule_node_save(); /* has own locks */ } @@ -3629,6 +3850,9 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, agent_arg_t *agent_arg_ptr; struct node_record *node_ptr; + if (job_desc_msg->array_inx && job_desc_msg->array_inx[0]) + return ESLURM_INVALID_ARRAY; + /* * Create a job step. Note that a credential is not necessary, * since the slurmctld will be submitting this job directly to @@ -3722,12 +3946,14 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, launch_msg_ptr->std_err = xstrdup(job_desc_msg->std_err); launch_msg_ptr->std_in = xstrdup(job_desc_msg->std_in); launch_msg_ptr->std_out = xstrdup(job_desc_msg->std_out); - launch_msg_ptr->acctg_freq = job_desc_msg->acctg_freq; + launch_msg_ptr->acctg_freq = xstrdup(job_desc_msg->acctg_freq); launch_msg_ptr->open_mode = job_desc_msg->open_mode; launch_msg_ptr->work_dir = xstrdup(job_desc_msg->work_dir); launch_msg_ptr->argc = job_desc_msg->argc; launch_msg_ptr->argv = xduparray(job_desc_msg->argc, job_desc_msg->argv); + launch_msg_ptr->array_job_id = job_ptr->array_job_id; + launch_msg_ptr->array_task_id = job_ptr->array_task_id; launch_msg_ptr->spank_job_env_size = job_ptr->spank_job_env_size; launch_msg_ptr->spank_job_env = xduparray(job_ptr->spank_job_env_size, job_ptr->spank_job_env); @@ -3776,8 +4002,8 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, agent_arg_ptr->retry = 0; xassert(job_ptr->batch_host); agent_arg_ptr->hostlist = hostlist_create(job_ptr->batch_host); - if (agent_arg_ptr->hostlist == NULL) - fatal("hostlist_create: malloc failure"); + if (!agent_arg_ptr->hostlist) + fatal("Invalid batch host: %s", job_ptr->batch_host); agent_arg_ptr->msg_type = REQUEST_BATCH_JOB_LAUNCH; agent_arg_ptr->msg_args = (void *) launch_msg_ptr; @@ -3969,16 +4195,17 @@ inline static void _slurm_rpc_set_debug_flags(slurm_msg_t *msg) slurmctld_conf.last_update = time(NULL); /* Reset cached debug_flags values */ + log_set_debug_flags(); gs_reconfig(); gres_plugin_reconfig(NULL); - priority_g_reconfig(); + priority_g_reconfig(false); select_g_reconfigure(); (void) slurm_sched_reconfig(); (void) switch_g_reconfig(); unlock_slurmctld (config_write_lock); flag_string = debug_flags2str(debug_flags); - info("Set DebugFlags to %s", flag_string); + info("Set DebugFlags to %s", flag_string ? flag_string : "none"); xfree(flag_string); slurm_send_rc_msg(msg, SLURM_SUCCESS); } @@ -4109,7 +4336,7 @@ inline static void _slurm_rpc_accounting_update_msg(slurm_msg_t *msg) slurm_send_rc_msg(msg, EACCES); return; } - if(update_ptr->update_list && list_count(update_ptr->update_list)) + if (update_ptr->update_list && list_count(update_ptr->update_list)) rc = assoc_mgr_update(update_ptr->update_list); END_TIMER2("_slurm_rpc_accounting_update_msg"); @@ -4149,8 +4376,6 @@ inline static void _slurm_rpc_reboot_nodes(slurm_msg_t * msg) nodelist = reboot_msg->node_list; if (!nodelist || !strcasecmp(nodelist, "ALL")) { bitmap = bit_alloc(node_record_count); - if (!bitmap) - fatal("malloc failure"); bit_nset(bitmap, 0, (node_record_count - 1)); } else if (node_name2bitmap(nodelist, false, &bitmap) != 0) { FREE_NULL_BITMAP(bitmap); diff --git a/src/slurmctld/proc_req.h b/src/slurmctld/proc_req.h index 6f5f252d40a89e9ea8403e7c013cfffe92da2c25..b0e6ad56a5076561839464514f661e78a2dfc74f 100644 --- a/src/slurmctld/proc_req.h +++ b/src/slurmctld/proc_req.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 82ccd3329d185b2ec68e052af440b78ae5fce397..20081a5f6490c3d7a15d1613095a944fa101cb6b 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -111,6 +111,7 @@ static int _restore_node_state(int recover, int old_node_record_count); static int _restore_part_state(List old_part_list, char *old_def_part_name, uint16_t flags); +static void _stat_slurm_dirs(void); static int _strcmp(const char *s1, const char *s2); static int _sync_nodes_to_comp_job(void); static int _sync_nodes_to_jobs(void); @@ -122,6 +123,38 @@ static int _update_preempt(uint16_t old_enable_preempt); static void _validate_node_proc_count(void); #endif +/* Verify that Slurm directories are secure, not world writable */ +static void _stat_slurm_dirs(void) +{ + struct stat stat_buf; + char *problem_dir = NULL; + + if ((stat(slurmctld_conf.plugindir, &stat_buf) == 0) && + (stat_buf.st_mode & S_IWOTH)) { + problem_dir = "PluginDir"; + } + if ((stat(slurmctld_conf.plugstack, &stat_buf) == 0) && + (stat_buf.st_mode & S_IWOTH)) { + problem_dir = "PlugStack"; + } + if ((stat(slurmctld_conf.slurmd_spooldir, &stat_buf) == 0) && + (stat_buf.st_mode & S_IWOTH)) { + problem_dir = "SlurmdSpoolDir"; + } + if ((stat(slurmctld_conf.state_save_location, &stat_buf) == 0) && + (stat_buf.st_mode & S_IWOTH)) { + problem_dir = "StateSaveLocation"; + } + + if (problem_dir) { + error("################################################"); + error("### SEVERE SECURITY VULERABILTY ###"); + error("### %s DIRECTORY IS WORLD WRITABLE ###", problem_dir); + error("### CORRECT FILE PERMISSIONS ###"); + error("################################################"); + } +} + /* * _reorder_nodes_by_name - order node table in ascending order of name */ @@ -219,9 +252,6 @@ static void _build_bitmaps_pre_select(void) /* scan partition table and identify nodes in each */ part_iterator = list_iterator_create(part_list); - if (part_iterator == NULL) - fatal ("memory allocation failure"); - while ((part_ptr = (struct part_record *) list_next(part_iterator))) { FREE_NULL_BITMAP(part_ptr->node_bitmap); @@ -292,25 +322,14 @@ static int _build_bitmaps(void) power_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); share_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); up_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); - if ((avail_node_bitmap == NULL) || - (cg_node_bitmap == NULL) || - (idle_node_bitmap == NULL) || - (power_node_bitmap == NULL) || - (share_node_bitmap == NULL) || - (up_node_bitmap == NULL)) - fatal ("bit_alloc malloc failure"); /* initialize the configuration bitmaps */ config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal ("memory allocation failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { FREE_NULL_BITMAP(config_ptr->node_bitmap); config_ptr->node_bitmap = (bitstr_t *) bit_alloc(node_record_count); - if (config_ptr->node_bitmap == NULL) - fatal ("bit_alloc malloc failure"); } list_iterator_destroy(config_iterator); @@ -327,8 +346,6 @@ static int _build_bitmaps(void) (job_ptr->details->shared != 0)) continue; tmp_bits = bit_copy(job_ptr->node_bitmap); - if (tmp_bits == NULL) - fatal ("bit_copy malloc failure"); bit_not(tmp_bits); bit_and(share_node_bitmap, tmp_bits); FREE_NULL_BITMAP(tmp_bits); @@ -365,8 +382,6 @@ static int _build_bitmaps(void) } config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal ("memory allocation failure"); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { build_config_feature_list(config_ptr); @@ -583,6 +598,7 @@ static int _build_single_partitionline_info(slurm_conf_partition_t *part) part_ptr->max_time = part->max_time; part_ptr->def_mem_per_cpu = part->def_mem_per_cpu; part_ptr->default_time = part->default_time; + part_ptr->max_cpus_per_node = part->max_cpus_per_node; part_ptr->max_share = part->max_share; part_ptr->max_mem_per_cpu = part->max_mem_per_cpu; part_ptr->max_nodes = part->max_nodes; @@ -593,6 +609,7 @@ static int _build_single_partitionline_info(slurm_conf_partition_t *part) part_ptr->priority = part->priority; part_ptr->state_up = part->state_up; part_ptr->grace_time = part->grace_time; + part_ptr->cr_type = part->cr_type; if (part->allow_groups) { xfree(part_ptr->allow_groups); @@ -681,8 +698,6 @@ static void _sync_part_prio(void) part_max_priority = 0; itr = list_iterator_create(part_list); - if (itr == NULL) - fatal("list_iterator_create malloc failure"); while ((part_ptr = list_next(itr))) { if (part_ptr->priority > part_max_priority) part_max_priority = part_ptr->priority; @@ -691,8 +706,6 @@ static void _sync_part_prio(void) if (part_max_priority) { itr = list_iterator_create(part_list); - if (itr == NULL) - fatal("list_iterator_create malloc failure"); while ((part_ptr = list_next(itr))) { part_ptr->norm_priority = (double)part_ptr->priority / (double)part_max_priority; @@ -742,7 +755,7 @@ int read_slurm_conf(int recover, bool reconfig) if (reconfig) { /* in order to re-use job state information, - * update nodes_completing string (based on node_bitmap) */ + * update nodes_completing string (based on node bitmaps) */ update_job_nodes_completing(); /* save node and partition states for reconfig RPC */ @@ -824,6 +837,7 @@ int read_slurm_conf(int recover, bool reconfig) rehash_jobs(); set_slurmd_addr(); + _stat_slurm_dirs(); if (reconfig) { /* Preserve state from memory */ if (old_node_table_ptr) { info("restoring original state of nodes"); @@ -909,7 +923,7 @@ int read_slurm_conf(int recover, bool reconfig) } else { load_all_resv_state(recover); if (recover >= 1) { - (void) trigger_state_restore(); + trigger_state_restore(); (void) slurm_sched_reconfig(); } } @@ -1057,7 +1071,7 @@ static int _restore_node_state(int recover, node_ptr->config_ptr->cpus); } #endif - node_ptr->boot_time = old_node_ptr->boot_time; + node_ptr->boot_time = old_node_ptr->boot_time; node_ptr->cpus = old_node_ptr->cpus; node_ptr->cores = old_node_ptr->cores; node_ptr->last_idle = old_node_ptr->last_idle; @@ -1065,7 +1079,7 @@ static int _restore_node_state(int recover, node_ptr->sockets = old_node_ptr->sockets; node_ptr->threads = old_node_ptr->threads; node_ptr->real_memory = old_node_ptr->real_memory; - node_ptr->slurmd_start_time = old_node_ptr->slurmd_start_time; + node_ptr->slurmd_start_time = old_node_ptr->slurmd_start_time; node_ptr->tmp_disk = old_node_ptr->tmp_disk; node_ptr->weight = old_node_ptr->weight; @@ -1173,8 +1187,6 @@ static int _restore_part_state(List old_part_list, char *old_def_part_name, /* For each part in list, find and update recs */ part_iterator = list_iterator_create(old_part_list); - if (!part_iterator) - fatal("list_iterator_create malloc"); while ((old_part_ptr = (struct part_record *) list_next(part_iterator))) { xassert(old_part_ptr->magic == PART_MAGIC); @@ -1188,7 +1200,7 @@ static int _restore_part_state(List old_part_list, char *old_def_part_name, part_ptr->state_up = old_part_ptr->state_up; } continue; - } + } /* Current partition found in slurm.conf, * report differences from slurm.conf configuration */ if (_strcmp(part_ptr->allow_groups, @@ -1204,8 +1216,9 @@ static int _restore_part_state(List old_part_list, char *old_def_part_name, error("Partition %s AllowNodes differs from " "slurm.conf", part_ptr->name); xfree(part_ptr->allow_alloc_nodes); - part_ptr->allow_groups = xstrdup(old_part_ptr-> - allow_alloc_nodes); + part_ptr->allow_alloc_nodes = + xstrdup(old_part_ptr-> + allow_alloc_nodes); } if (part_ptr->default_time != old_part_ptr->default_time) { @@ -1537,6 +1550,13 @@ static int _sync_nodes_to_comp_job(void) while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if ((job_ptr->node_bitmap) && IS_JOB_COMPLETING(job_ptr)) { update_cnt++; + /* This needs to be set up for the priority + plugin and this happens before it is + normally set up so do it now. + */ + if (!cluster_cpus) + set_cluster_cpus(); + info("Job %u in completing state", job_ptr->job_id); if (!job_ptr->node_bitmap_cg) build_cg_bitmap(job_ptr); @@ -1560,9 +1580,15 @@ static int _sync_nodes_to_active_job(struct job_record *job_ptr) uint16_t node_flags; struct node_record *node_ptr = node_record_table_ptr; - job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap); + if (job_ptr->node_bitmap_cg) /* job completing */ + job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap_cg); + else + job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap); for (i = 0; i < node_record_count; i++, node_ptr++) { - if (bit_test(job_ptr->node_bitmap, i) == 0) + if (job_ptr->node_bitmap_cg) { /* job completing */ + if (bit_test(job_ptr->node_bitmap_cg, i) == 0) + continue; + } else if (bit_test(job_ptr->node_bitmap, i) == 0) continue; node_flags = node_ptr->node_state & NODE_STATE_FLAGS; diff --git a/src/slurmctld/read_config.h b/src/slurmctld/read_config.h index 43ea8487ec27655f8d17bba9c008f421d773de67..3b5300899ce081ec428b510286dd8e5ac9c6e776 100644 --- a/src/slurmctld/read_config.h +++ b/src/slurmctld/read_config.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index e151c4a28fd9315438f16b2a3f5472279ebcc28c..c2b63aad8196f43e434b7c73cc45201792915004 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -44,6 +44,10 @@ # include <pthread.h> #endif /* WITH_PTHREADS */ +#if defined(__FreeBSD__) +#include <signal.h> +#endif + #include <fcntl.h> #include <string.h> #include <stdlib.h> @@ -86,6 +90,11 @@ #define RESV_2_5_STATE_VERSION "VER004" #define RESV_2_4_STATE_VERSION "VER003" +typedef struct resv_thread_args { + char *script; + char *resv_name; +} resv_thread_args_t; + time_t last_resv_update = (time_t) 0; List resv_list = (List) NULL; uint32_t resv_over_run; @@ -106,6 +115,8 @@ static void _del_resv_rec(void *x); static void _dump_resv_req(resv_desc_msg_t *resv_ptr, char *mode); static int _find_resv_id(void *x, void *key); static int _find_resv_name(void *x, void *key); +static void *_fork_script(void *x); +static void _free_script_arg(resv_thread_args_t *args); static void _generate_resv_id(void); static void _generate_resv_name(resv_desc_msg_t *resv_ptr); static uint32_t _get_job_duration(struct job_record *job_ptr); @@ -134,6 +145,7 @@ static void _restore_resv(slurmctld_resv_t *dest_resv, static bool _resv_overlap(time_t start_time, time_t end_time, uint16_t flags, bitstr_t *node_bitmap, slurmctld_resv_t *this_resv_ptr); +static void _run_script(char *script, slurmctld_resv_t *resv_ptr); static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, struct part_record **part_ptr, bitstr_t **resv_bitmap, bitstr_t **core_bitmap); @@ -177,11 +189,7 @@ static List _list_dup(List license_list) return lic_list; lic_list = list_create(license_free_rec); - if (lic_list == NULL) - fatal("list_create malloc failure"); iter = list_iterator_create(license_list); - if (!iter) - fatal("list_interator_create malloc failure"); while ((license_src = (licenses_t *) list_next(iter))) { license_dest = xmalloc(sizeof(licenses_t)); license_dest->name = xstrdup(license_src->name); @@ -632,8 +640,6 @@ static int _set_assoc_list(slurmctld_resv_t *resv_ptr) xfree(resv_ptr->assoc_list); /* clear for modify */ if (list_count(assoc_list_allow)) { ListIterator itr = list_iterator_create(assoc_list_allow); - if (!itr) - fatal("malloc: list_iterator_create"); while ((assoc_ptr = list_next(itr))) { if (resv_ptr->assoc_list) { xstrfmtcat(resv_ptr->assoc_list, "%u,", @@ -647,8 +653,6 @@ static int _set_assoc_list(slurmctld_resv_t *resv_ptr) } if (list_count(assoc_list_deny)) { ListIterator itr = list_iterator_create(assoc_list_deny); - if (!itr) - fatal("malloc: list_iterator_create"); while ((assoc_ptr = list_next(itr))) { if (resv_ptr->assoc_list) { xstrfmtcat(resv_ptr->assoc_list, "-%u,", @@ -770,9 +774,9 @@ static int _post_resv_update(slurmctld_resv_t *resv_ptr, * new start time of now. */ if ((resv_ptr->start_time < now) && (resv.assocs - || resv.nodes - || (resv.flags != (uint16_t)NO_VAL) - || (resv.cpus != (uint32_t)NO_VAL))) { + || resv.nodes + || (resv.flags != (uint16_t)NO_VAL) + || (resv.cpus != (uint32_t)NO_VAL))) { resv_ptr->start_time_prev = resv_ptr->start_time; resv_ptr->start_time = now; } @@ -1291,7 +1295,7 @@ static void _pack_resv(slurmctld_resv_t *resv_ptr, Buf buffer, cnode_cnt = resv_ptr->node_cnt; if (cnodes_per_bp && !internal) cnode_cnt *= cnodes_per_bp; - pack32(cnode_cnt, buffer); + pack32(cnode_cnt, buffer); #else pack32(resv_ptr->node_cnt, buffer); #endif @@ -1331,7 +1335,7 @@ static void _pack_resv(slurmctld_resv_t *resv_ptr, Buf buffer, cnode_cnt = resv_ptr->node_cnt; if (cnodes_per_bp && !internal) cnode_cnt *= cnodes_per_bp; - pack32(cnode_cnt, buffer); + pack32(cnode_cnt, buffer); #else pack32(resv_ptr->node_cnt, buffer); #endif @@ -1456,8 +1460,6 @@ static bool _job_overlap(time_t start_time, uint16_t flags, return overlap; job_iterator = list_iterator_create(job_list); - if (!job_iterator) - fatal("malloc: list_iterator_create"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (IS_JOB_RUNNING(job_ptr) && (job_ptr->end_time > start_time) && @@ -1492,8 +1494,6 @@ static bool _resv_overlap(time_t start_time, time_t end_time, return rc; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (resv_ptr == this_resv_ptr) @@ -1502,6 +1502,8 @@ static bool _resv_overlap(time_t start_time, time_t end_time, continue; /* no specific nodes in reservation */ if (!bit_overlap(resv_ptr->node_bitmap, node_bitmap)) continue; /* no overlap */ + if (!resv_ptr->full_nodes) + continue; for (i=0; ((i<7) && (!rc)); i++) { /* look forward one week */ s_time1 = start_time; @@ -1570,7 +1572,7 @@ extern int create_resv(resv_desc_msg_t *resv_desc_ptr) List license_list = (List) NULL; char *name1, *name2, *val1, *val2; uint32_t total_node_cnt = NO_VAL; - bool account_not, user_not; + bool account_not = false, user_not = false; if (!resv_list) resv_list = list_create(_del_resv_rec); @@ -1675,9 +1677,6 @@ extern int create_resv(resv_desc_msg_t *resv_desc_ptr) } } - if (resv_desc_ptr->core_cnt == NO_VAL) - resv_desc_ptr->core_cnt = 0; - #ifdef HAVE_BG if (!cnodes_per_bp) { select_g_alter_node_cnt(SELECT_GET_NODE_SCALING, @@ -1749,18 +1748,37 @@ extern int create_resv(resv_desc_msg_t *resv_desc_ptr) } total_node_cnt = bit_set_count(node_bitmap); if (!(resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS) && + !resv_desc_ptr->core_cnt && _job_overlap(resv_desc_ptr->start_time, resv_desc_ptr->flags, node_bitmap)) { info("Reservation request overlaps jobs"); rc = ESLURM_NODES_BUSY; goto bad_parse; } - /* We do no allow to request cores with nodelist */ - info("Reservation CoreCnt cleared due to Nodes specification"); - resv_desc_ptr->core_cnt = 0; + /* We do allow to request cores with nodelist */ + if (resv_desc_ptr->core_cnt) { + int nodecnt = bit_set_count(node_bitmap); + int nodeinx = 0; + while (nodeinx < nodecnt) { + if (!resv_desc_ptr->core_cnt[nodeinx]) { + info("Core count for reservation node " + "list is not consistent!"); + rc = ESLURM_INVALID_NODE_NAME; + goto bad_parse; + } + debug2("Requesting %d cores for node_list %d", + resv_desc_ptr->core_cnt[nodeinx], + nodeinx); + nodeinx++; + } + rc = _select_nodes(resv_desc_ptr, &part_ptr, + &node_bitmap, &core_bitmap); + if (rc != SLURM_SUCCESS) + goto bad_parse; + } } else if (((resv_desc_ptr->node_cnt == NULL) || (resv_desc_ptr->node_cnt[0] == 0)) && - (resv_desc_ptr->core_cnt == 0) && + (!resv_desc_ptr->core_cnt) && ((resv_desc_ptr->flags & RESERVE_FLAG_LIC_ONLY) == 0)) { info("Reservation request lacks node specification"); rc = ESLURM_INVALID_NODE_NAME; @@ -1832,14 +1850,14 @@ extern int create_resv(resv_desc_msg_t *resv_desc_ptr) resv_ptr->user_not = user_not; resv_desc_ptr->users = NULL; /* Nothing left to free */ - if (resv_desc_ptr->core_cnt == 0) { + if (!resv_desc_ptr->core_cnt) { debug2("reservation using full nodes"); _set_cpu_cnt(resv_ptr); resv_ptr->full_nodes = 1; } else { + resv_ptr->cpu_cnt = bit_set_count(resv_ptr->core_bitmap); debug2("reservation using partial nodes: core count %u", - resv_desc_ptr->core_cnt); - resv_ptr->cpu_cnt = resv_desc_ptr->core_cnt; + resv_ptr->cpu_cnt); resv_ptr->full_nodes = 0; } @@ -2203,7 +2221,7 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) _post_resv_update(resv_ptr, resv_backup); _del_resv_rec(resv_backup); - set_node_maint_mode(true); + (void) set_node_maint_mode(true); last_resv_update = now; schedule_resv_save(); return error_code; @@ -2223,8 +2241,6 @@ static bool _is_resv_used(slurmctld_resv_t *resv_ptr) bool match = false; job_iterator = list_iterator_create(job_list); - if (!job_iterator) - fatal("malloc: list_iterator_create"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if ((!IS_JOB_FINISHED(job_ptr)) && (job_ptr->resv_id == resv_ptr->resv_id)) { @@ -2244,8 +2260,6 @@ static void _clear_job_resv(slurmctld_resv_t *resv_ptr) struct job_record *job_ptr; job_iterator = list_iterator_create(job_list); - if (!job_iterator) - fatal("malloc: list_iterator_create"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (job_ptr->resv_ptr != resv_ptr) continue; @@ -2273,8 +2287,6 @@ extern int delete_resv(reservation_name_msg_t *resv_desc_ptr) info("delete_resv: Name=%s", resv_desc_ptr->name); iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (strcmp(resv_ptr->name, resv_desc_ptr->name)) continue; @@ -2307,6 +2319,15 @@ extern int delete_resv(reservation_name_msg_t *resv_desc_ptr) return rc; } +/* Return pointer to the named reservation or NULL if not found */ +extern slurmctld_resv_t *find_resv_name(char *resv_name) +{ + slurmctld_resv_t *resv_ptr; + resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list, + _find_resv_name, resv_name); + return resv_ptr; +} + /* Dump the reservation records to a buffer */ extern void show_resv(char **buffer_ptr, int *buffer_size, uid_t uid, uint16_t protocol_version) @@ -2335,8 +2356,6 @@ extern void show_resv(char **buffer_ptr, int *buffer_size, uid_t uid, /* write individual reservation records */ iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if ((slurmctld_conf.private_data & PRIVATE_DATA_RESERVATIONS) && !validate_operator(uid)) { @@ -2391,8 +2410,6 @@ extern int dump_all_resv_state(void) /* write reservation records to buffer */ lock_slurmctld(resv_read_lock); iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) _pack_resv(resv_ptr, buffer, true, SLURM_PROTOCOL_VERSION); list_iterator_destroy(iter); @@ -2557,8 +2574,6 @@ static void _validate_all_reservations(void) uint32_t res_num; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (!_validate_one_reservation(resv_ptr)) { error("Purging invalid reservation record %s", @@ -2579,8 +2594,6 @@ static void _validate_all_reservations(void) /* Validate all job reservation pointers */ iter = list_iterator_create(job_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((job_ptr = (struct job_record *) list_next(iter))) { if (job_ptr->resv_name == NULL) continue; @@ -2919,8 +2932,6 @@ static int _resize_resv(slurmctld_resv_t *resv_ptr, uint32_t node_cnt) static void _create_cluster_core_bitmap(bitstr_t **core_bitmap) { *core_bitmap = bit_alloc(cr_get_coremap_offset(node_record_count)); - if (*core_bitmap == NULL) - fatal("bit_alloc: malloc failure"); } /* Given a reservation create request, select appropriate nodes for use */ @@ -2944,13 +2955,14 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, } /* Start with all nodes in the partition */ - node_bitmap = bit_copy((*part_ptr)->node_bitmap); + if (*resv_bitmap) + node_bitmap = bit_copy(*resv_bitmap); + else + node_bitmap = bit_copy((*part_ptr)->node_bitmap); /* Don't use node already reserved */ if (!(resv_desc_ptr->flags & RESERVE_FLAG_OVERLAP)) { iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (resv_ptr->end_time <= now) _advance_resv_time(resv_ptr); @@ -2986,9 +2998,6 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, ListIterator feature_iter; bool match; - if (feature_bitmap == NULL) - fatal("bit_copy malloc failure"); - while (1) { for (i=0; ; i++) { if (token[i] == '\0') { @@ -3010,8 +3019,6 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, match = false; feature_iter = list_iterator_create(feature_list); - if (feature_iter == NULL) - fatal("list_iterator_create malloc failure"); while ((feature_ptr = (struct features_record *) list_next(feature_iter))) { if (strcmp(token, feature_ptr->name)) @@ -3170,8 +3177,6 @@ static void _check_job_compatibility(struct job_record *job_ptr, debug2("job coremap: %s", str); full_node_bitmap = bit_copy(job_res->node_bitmap); - if (full_node_bitmap == NULL) - fatal("bit_alloc: malloc failure"); debug2("Let's see core distribution for jobid: %u", job_ptr->job_id); @@ -3245,8 +3250,6 @@ static bitstr_t *_pick_idle_node_cnt(bitstr_t *avail_bitmap, save_bitmap = bit_copy(avail_bitmap); bit_or(avail_bitmap, save_bitmap); /* restore avail_bitmap */ job_iterator = list_iterator_create(job_list); - if (job_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr)) @@ -3254,7 +3257,7 @@ static bitstr_t *_pick_idle_node_cnt(bitstr_t *avail_bitmap, if (job_ptr->end_time < resv_desc_ptr->start_time) continue; - if (resv_desc_ptr->core_cnt == 0) { + if (!resv_desc_ptr->core_cnt) { bit_not(job_ptr->node_bitmap); bit_and(avail_bitmap, job_ptr->node_bitmap); bit_not(job_ptr->node_bitmap); @@ -3276,8 +3279,6 @@ static bitstr_t *_pick_idle_node_cnt(bitstr_t *avail_bitmap, * the unsorted job list. */ if (resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS) { job_iterator = list_iterator_create(job_list); - if (!job_iterator) - fatal("list_iterator_create: malloc failure"); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(job_ptr) && @@ -3477,8 +3478,6 @@ extern void job_time_adj_resv(struct job_record *job_ptr) int32_t resv_begin_time; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (resv_ptr->end_time <= now) _advance_resv_time(resv_ptr); @@ -3514,8 +3513,6 @@ static int _license_cnt(List license_list, char *lic_name) return lic_cnt; iter = list_iterator_create(license_list); - if (!iter) - fatal("list_interator_create malloc failure"); while ((license_ptr = list_next(iter))) { if (strcmp(license_ptr->name, lic_name) == 0) lic_cnt += license_ptr->total; @@ -3572,8 +3569,6 @@ extern int job_test_lic_resv(struct job_record *job_ptr, char *lic_name, job_start_time = when; job_end_time = when + _get_job_duration(job_ptr); iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (resv_ptr->end_time <= now) _advance_resv_time(resv_ptr); @@ -3664,8 +3659,6 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, } if (resv_ptr->flags & RESERVE_FLAG_LIC_ONLY) { *node_bitmap = bit_alloc(node_record_count); - if (*node_bitmap == NULL) - fatal("bit_alloc: malloc failure"); bit_nset(*node_bitmap, 0, (node_record_count - 1)); } else *node_bitmap = bit_copy(resv_ptr->node_bitmap); @@ -3673,8 +3666,6 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, /* if there are any overlapping reservations, we need to * prevent the job from using those nodes (e.g. MAINT nodes) */ iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((res2_ptr = (slurmctld_resv_t *) list_next(iter))) { if ((resv_ptr->flags & RESERVE_FLAG_MAINT) || (resv_ptr->flags & RESERVE_FLAG_OVERLAP) || @@ -3709,8 +3700,6 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, job_ptr->resv_ptr = NULL; /* should be redundant */ *node_bitmap = bit_alloc(node_record_count); - if (*node_bitmap == NULL) - fatal("bit_alloc: malloc failure"); bit_nset(*node_bitmap, 0, (node_record_count - 1)); if (list_count(resv_list) == 0) return SLURM_SUCCESS; @@ -3721,8 +3710,6 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, lic_resv_time = (time_t) 0; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (resv_ptr->end_time <= now) _advance_resv_time(resv_ptr); @@ -3757,10 +3744,10 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, bit_and(*node_bitmap, resv_ptr->node_bitmap); bit_not(resv_ptr->node_bitmap); } else { - info("job_test_resv: %s reservation uses " - "partial nodes", resv_ptr->name); + debug2("job_test_resv: %s reservation uses " + "partial nodes", resv_ptr->name); if (*exc_core_bitmap == NULL) { - *exc_core_bitmap = + *exc_core_bitmap = bit_copy(resv_ptr->core_bitmap); } else { bit_or(*exc_core_bitmap, @@ -3811,8 +3798,6 @@ extern time_t find_resv_end(time_t start_time) return end_time; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if ((start_time < resv_ptr->start_time) || (start_time > resv_ptr->end_time)) @@ -3843,8 +3828,6 @@ extern void begin_job_resv_check(void) resv_over_run *= 60; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { resv_ptr->job_pend_cnt = 0; resv_ptr->job_run_cnt = 0; @@ -3882,7 +3865,7 @@ extern int job_resv_check(struct job_record *job_ptr) return SLURM_SUCCESS; } -/* Advance a expired reservation's time stamps one day or one week +/* Advance a expired reservation's time stamps one day or one week * as appropriate. */ static void _advance_resv_time(slurmctld_resv_t *resv_ptr) { @@ -3911,6 +3894,91 @@ static void _advance_resv_time(slurmctld_resv_t *resv_ptr) } } +static void _free_script_arg(resv_thread_args_t *args) +{ + if (args) { + xfree(args->script); + xfree(args->resv_name); + xfree(args); + } +} + +static void *_fork_script(void *x) +{ + resv_thread_args_t *args = (resv_thread_args_t *) x; + char *argv[3], *envp[1]; + int status, wait_rc; + pid_t cpid; + + argv[0] = args->script; + argv[1] = args->resv_name; + argv[2] = NULL; + envp[0] = NULL; + if ((cpid = fork()) < 0) { + error("_fork_script fork error: %m"); + goto fini; + } + if (cpid == 0) { +#ifdef SETPGRP_TWO_ARGS + setpgrp(0, 0); +#else + setpgrp(); +#endif + execve(argv[0], argv, envp); + exit(127); + } + + while (1) { + wait_rc = waitpid(cpid, &status, 0); + if (wait_rc < 0) { + if (errno == EINTR) + continue; + error("_fork_script waitpid error: %m"); + break; + } else if (wait_rc > 0) { + killpg(cpid, SIGKILL); /* kill children too */ + break; + } + } +fini: _free_script_arg(args); + return NULL; +} + +static void _run_script(char *script, slurmctld_resv_t *resv_ptr) +{ + int rc; + resv_thread_args_t *args; + pthread_t thread_id_prolog; + pthread_attr_t thread_attr_prolog; + + if (!script || !script[0]) + return; + if (access(script, X_OK) < 0) { + error("Invalid ResvProlog or ResvEpilog(%s): %m", script); + return; + } + + slurm_attr_init(&thread_attr_prolog); + pthread_attr_setdetachstate(&thread_attr_prolog, + PTHREAD_CREATE_DETACHED); + args = xmalloc(sizeof(resv_thread_args_t)); + args->script = xstrdup(script); + args->resv_name = xstrdup(resv_ptr->name); + while (1) { + rc = pthread_create(&thread_id_prolog, &thread_attr_prolog, + _fork_script, (void *) args); + if (rc != 0) { + if (errno == EAGAIN) + continue; + error("pthread_create: %m"); + } + break; + } + slurm_attr_destroy(&thread_attr_prolog); + if (rc != 0) + _free_script_arg(args); +} + /* Finish scan of all jobs for valid reservations * * Purge vestigial reservation records. @@ -3927,27 +3995,31 @@ extern void fini_job_resv_check(void) return; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { - if (resv_ptr->end_time > now) { /* reservation not over */ + if (!resv_ptr->run_prolog || !resv_ptr->run_epilog) + continue; + if (resv_ptr->end_time >= now) { /* reservation not over */ _validate_node_choice(resv_ptr); continue; } _advance_resv_time(resv_ptr); - if ((resv_ptr->job_pend_cnt == 0) && - (resv_ptr->job_run_cnt == 0) && + if ((resv_ptr->job_run_cnt == 0) && (resv_ptr->maint_set_node == 0) && ((resv_ptr->flags & RESERVE_FLAG_DAILY ) == 0) && ((resv_ptr->flags & RESERVE_FLAG_WEEKLY) == 0)) { - debug("Purging vestigial reservation record %s", - resv_ptr->name); + if (resv_ptr->job_pend_cnt) { + info("Purging vestigial reservation %s " + "with %u pending jobs", + resv_ptr->name, resv_ptr->job_pend_cnt); + } else { + debug("Purging vestigial reservation %s", + resv_ptr->name); + } _clear_job_resv(resv_ptr); list_delete_item(iter); last_resv_update = now; schedule_resv_save(); } - } list_iterator_destroy(iter); } @@ -3964,8 +4036,6 @@ extern int send_resvs_to_accounting(void) return SLURM_SUCCESS; itr = list_iterator_create(resv_list); - if (!itr) - fatal("malloc: list_iterator_create"); while ((resv_ptr = list_next(itr))) { _post_resv_create(resv_ptr); } @@ -3974,16 +4044,21 @@ extern int send_resvs_to_accounting(void) return SLURM_SUCCESS; } - -/* Set or clear NODE_STATE_MAINT for node_state as needed */ -extern void set_node_maint_mode(bool reset_all) +/* Set or clear NODE_STATE_MAINT for node_state as needed + * IN reset_all - if true, then re-initialize all node information for all + * reservations, but do not run any prologs or epilogs or count started + * reservations + * RET count of newly started reservations + */ +extern int set_node_maint_mode(bool reset_all) { + int res_start_cnt = 0; ListIterator iter; slurmctld_resv_t *resv_ptr; time_t now = time(NULL); if (!resv_list) - return; + return res_start_cnt; if (reset_all) { int i; @@ -3995,27 +4070,39 @@ extern void set_node_maint_mode(bool reset_all) } } iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if (reset_all) resv_ptr->maint_set_node = false; - if ((resv_ptr->flags & RESERVE_FLAG_MAINT) == 0) - continue; - if ((now >= resv_ptr->start_time) && - (now < resv_ptr->end_time )) { - if (!resv_ptr->maint_set_node) { - resv_ptr->maint_set_node = true; + if (resv_ptr->flags & RESERVE_FLAG_MAINT) { + if ((now >= resv_ptr->start_time) && + (now < resv_ptr->end_time )) { + if (!resv_ptr->maint_set_node) { + resv_ptr->maint_set_node = true; + _set_nodes_maint(resv_ptr, now); + last_node_update = now; + } + } else if (resv_ptr->maint_set_node) { + resv_ptr->maint_set_node = false; _set_nodes_maint(resv_ptr, now); last_node_update = now; } - } else if (resv_ptr->maint_set_node) { - resv_ptr->maint_set_node = false; - _set_nodes_maint(resv_ptr, now); - last_node_update = now; + } + + if (reset_all) /* Defer reservation prolog/epilog */ + continue; + if ((resv_ptr->start_time <= now) && !resv_ptr->run_prolog) { + res_start_cnt++; + resv_ptr->run_prolog = true; + _run_script(slurmctld_conf.resv_prolog, resv_ptr); + } + if ((resv_ptr->end_time <= now) && !resv_ptr->run_epilog) { + resv_ptr->run_epilog = true; + _run_script(slurmctld_conf.resv_epilog, resv_ptr); } } list_iterator_destroy(iter); + + return res_start_cnt; } /* checks if node within node_record_table_ptr is in maint reservation */ @@ -4029,8 +4116,6 @@ extern bool is_node_in_maint_reservation(int nodenum) return false; iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if ((resv_ptr->flags & RESERVE_FLAG_MAINT) == 0) continue; @@ -4053,8 +4138,6 @@ extern void update_assocs_in_resvs(void) error("No reservation list given for updating associations"); iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = list_next(iter))) { _set_assoc_list(resv_ptr); } @@ -4069,8 +4152,6 @@ extern void update_part_nodes_in_resv(struct part_record *part_ptr) xassert(part_ptr); iter = list_iterator_create(resv_list); - if (!iter) - fatal("malloc: list_iterator_create"); while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) { if ((resv_ptr->flags & RESERVE_FLAG_PART_NODES) && (resv_ptr->partition != NULL) && diff --git a/src/slurmctld/reservation.h b/src/slurmctld/reservation.h index cfd166bb673b6acf81b3fe5d3df63cb38b7cd171..f29320eeb9494278ec58c8ca4fd8df7607a7bb42 100644 --- a/src/slurmctld/reservation.h +++ b/src/slurmctld/reservation.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -57,6 +57,9 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr); /* Delete an existing resource reservation */ extern int delete_resv(reservation_name_msg_t *resv_desc_ptr); +/* Return pointer to the named reservation or NULL if not found */ +extern slurmctld_resv_t *find_resv_name(char *resv_name); + /* Dump the reservation records to a buffer */ extern void show_resv(char **buffer_ptr, int *buffer_size, uid_t uid, uint16_t protocol_version); @@ -74,8 +77,9 @@ extern int send_resvs_to_accounting(void); /* Set or clear NODE_STATE_MAINT for node_state as needed * IN reset_all - re-initialize all node information for all reservations + * RET count of newly started reservations */ -extern void set_node_maint_mode(bool reset_all); +extern int set_node_maint_mode(bool reset_all); /* checks if node within node_record_table_ptr is in maint reservation */ extern bool is_node_in_maint_reservation(int nodenum); diff --git a/src/slurmctld/sched_plugin.c b/src/slurmctld/sched_plugin.c index 2f078e9a0100b6af14cfd015522675a22622c2b5..99b602d8e7eedd341d51252d9d01fe06f41e6c0a 100644 --- a/src/slurmctld/sched_plugin.c +++ b/src/slurmctld/sched_plugin.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/sched_plugin.h b/src/slurmctld/sched_plugin.h index 1d3e76f5100d6fb4ceda499d408ebe9d04ee1753..507fad826628cabf3c72a741deff32d214f74367 100644 --- a/src/slurmctld/sched_plugin.h +++ b/src/slurmctld/sched_plugin.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 75465489215b2a6376e40e29ca7eedd620ebc8b4..8f5fb0d3619e986c98d032a6653d46c0388ce822 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -85,6 +85,11 @@ /*****************************************************************************\ * GENERAL CONFIGURATION parameters and data structures \*****************************************************************************/ +/* Maximum index for a job array. The minimum index will always be 0. */ +#ifndef MAX_JOB_ARRAY_VALUE +#define MAX_JOB_ARRAY_VALUE 1000 +#endif + /* Maximum parallel threads to service incoming RPCs. * Since some systems schedule pthread on a First-In-Last-Out basis, * increasing this value is strongly discouraged. */ @@ -249,9 +254,17 @@ extern bitstr_t *up_node_bitmap; /* bitmap of up nodes, not DOWN */ #define FRONT_END_MAGIC 0xfe9b82fe typedef struct front_end_record { + gid_t *allow_gids; /* zero terminated list of allowed groups */ + char *allow_groups; /* allowed group string */ + uid_t *allow_uids; /* zero terminated list of allowed users */ + char *allow_users; /* allowed user string */ time_t boot_time; /* Time of node boot, * computed from up_time */ char *comm_name; /* communications path name to node */ + gid_t *deny_gids; /* zero terminated list of denied groups */ + char *deny_groups; /* denied group string */ + uid_t *deny_uids; /* zero terminated list of denied users */ + char *deny_users; /* denied user string */ uint32_t job_cnt_comp; /* count of completing jobs on node */ uint16_t job_cnt_run; /* count of running jobs on node */ time_t last_response; /* Time of last communication */ @@ -294,6 +307,7 @@ struct part_record { uint16_t flags; /* see PART_FLAG_* in slurm.h */ uint32_t grace_time; /* default preempt grace time in seconds */ uint32_t magic; /* magic cookie to test data integrity */ + uint32_t max_cpus_per_node; /* maximum allocated CPUs per node */ uint32_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ uint32_t max_nodes; /* per job or INFINITE */ uint32_t max_nodes_orig;/* unscaled value (c-nodes on BlueGene) */ @@ -313,6 +327,7 @@ struct part_record { uint16_t state_up; /* See PARTITION_* states in slurm.h */ uint32_t total_nodes; /* total number of nodes in the partition */ uint32_t total_cpus; /* total number of cpus in the partition */ + uint16_t cr_type; /* Custom CR values for partition (if supported by select plugin) */ }; extern List part_list; /* list of part_record entries */ @@ -353,6 +368,8 @@ typedef struct slurmctld_resv { char *partition; /* name of partition to be used */ struct part_record *part_ptr; /* pointer to partition used */ uint32_t resv_id; /* unique reservation ID, internal use */ + bool run_epilog; /* set if epilog has been executed */ + bool run_prolog; /* set if prolog has been executed */ time_t start_time; /* start time of reservation */ time_t start_time_first;/* when the reservation first started */ time_t start_time_prev; /* If start time was changed this is @@ -387,7 +404,7 @@ struct feature_record { /* job_details - specification of a job's constraints, * can be purged after initiation */ struct job_details { - uint16_t acctg_freq; /* accounting polling interval */ + char *acctg_freq; /* accounting polling interval */ uint32_t argc; /* count of argv elements */ char **argv; /* arguments for a batch job script */ time_t begin_time; /* start at this time (srun --begin), @@ -467,6 +484,8 @@ struct job_record { char *alloc_node; /* local node making resource alloc */ uint16_t alloc_resp_port; /* RESPONSE_RESOURCE_ALLOCATION port */ uint32_t alloc_sid; /* local sid making resource alloc */ + uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ + uint16_t array_task_id; /* task_id of a job array */ uint32_t assoc_id; /* used for accounting plugins */ void *assoc_ptr; /* job's association record ptr, it is * void* because of interdependencies @@ -562,8 +581,10 @@ struct job_record { time_t preempt_time; /* job preemption signal time */ uint32_t priority; /* relative priority of the job, * zero == held (don't initiate) */ + uint32_t *priority_array; /* partition based priority */ priority_factors_object_t *prio_factors; /* cached value used * by sprio command */ + uint32_t profile; /* Acct_gather_profile option */ uint32_t qos_id; /* quality of service id */ void *qos_ptr; /* pointer to the quality of * service record used for @@ -627,6 +648,7 @@ struct job_record { * user/name at a time */ #define SLURM_DEPEND_EXPAND 6 /* Expand running job */ struct depend_spec { + uint16_t array_task_id; /* INFINITE for all array tasks */ uint16_t depend_type; /* SLURM_DEPEND_* type */ uint32_t job_id; /* SLURM job_id */ struct job_record *job_ptr; /* pointer to this job */ @@ -649,13 +671,16 @@ struct step_record { uint16_t exclusive; /* dedicated resources for the step */ uint32_t exit_code; /* highest exit code from any task */ bitstr_t *exit_node_bitmap; /* bitmap of exited nodes */ + ext_sensors_data_t *ext_sensors; /* external sensors plugin data */ char *gres; /* generic resources required */ List gres_list; /* generic resource allocation detail */ char *host; /* host for srun communications */ struct job_record* job_ptr; /* ptr to the job that owns the step */ jobacctinfo_t *jobacct; /* keep track of process info in the * step */ - uint32_t mem_per_cpu; /* MB memory per CPU, 0=no limit */ + uint32_t pn_min_memory; /* minimum real memory per node OR + * real memory per CPU | MEM_PER_CPU, + * default=0 (use job limit) */ char *name; /* name of job step */ char *network; /* step's network specification */ uint8_t no_kill; /* 1 if no kill on node failure */ @@ -668,6 +693,7 @@ struct step_record { time_t start_time; /* step allocation start time */ uint32_t time_limit; /* step allocation time limit */ dynamic_plugin_data_t *select_jobinfo;/* opaque data, BlueGene */ + uint16_t state; /* state of the step. See job_states */ uint32_t step_id; /* step number */ slurm_step_layout_t *step_layout;/* info about how tasks are laid out * in the step */ @@ -755,9 +781,11 @@ extern struct part_record *create_part_record (void); /* * job_limits_check - check the limits specified for the job. * IN job_ptr - pointer to job table entry. + * IN check_min_time - if true test job's minimum time limit, + * otherwise test maximum time limit * RET WAIT_NO_REASON on success, fail status otherwise. */ -extern int job_limits_check(struct job_record **job_pptr); +extern int job_limits_check(struct job_record **job_pptr, bool check_min_time); /* * delete_job_details - delete a job's detail record and clear it's pointer @@ -790,6 +818,13 @@ extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id); */ extern void delete_step_records (struct job_record *job_ptr); +/* + * Copy a job's dependency list + * IN depend_list_src - a job's depend_lst + * RET copy of depend_list_src, must bee freed by caller + */ +extern List depended_list_copy(List depend_list_src); + /* * drain_nodes - drain one or more nodes, * no-op for nodes already drained or draining @@ -837,14 +872,29 @@ extern void dump_step_desc(job_step_create_request_msg_t *step_spec); extern void excise_node_from_job(struct job_record *job_ptr, struct node_record *node_ptr); +/* + * Copy a job's feature list + * IN feature_list_src - a job's depend_lst + * RET copy of depend_list_src, must be freed by caller + */ +extern List feature_list_copy(List feature_list_src); + +/* + * find_job_array_rec - return a pointer to the job record with the given + * array_job_id/array_task_id + * IN job_id - requested job's id + * IN array_task_id - requested job's task id (NO_VAL if none specified) + * RET pointer to the job's record, NULL on error + */ +extern struct job_record *find_job_array_rec(uint32_t array_job_id, + uint16_t array_task_id); + /* * find_job_record - return a pointer to the job record with the given job_id * IN job_id - requested job's id * RET pointer to the job's record, NULL on error - * global: job_list - global job list pointer - * job_hash - hash table into job records */ -extern struct job_record *find_job_record (uint32_t job_id); +struct job_record *find_job_record(uint32_t job_id); /* * find_first_node_record - find a record for first node in the bitmap @@ -917,7 +967,7 @@ extern int init_job_conf (void); * global: node_record_table_ptr - pointer to global node table * default_node_record - default values for node records * default_config_record - default values for configuration records - * hash_table - table of hash indecies + * hash_table - table of hash indexes * last_node_update - time of last node table update */ extern int init_node_conf (); @@ -1074,25 +1124,14 @@ extern int job_restart(checkpoint_msg_t *ckpt_ptr, uid_t uid, * job_signal - signal the specified job * IN job_id - id of the job to be signaled * IN signal - signal to send, SIGKILL == cancel the job - * IN batch_flag - signal batch shell only if set + * IN flags - see KILL_JOB_* flags in slurm.h * IN uid - uid of requesting user * IN preempt - true if job being preempted * RET 0 on success, otherwise ESLURM error code */ -extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, +extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags, uid_t uid, bool preempt); -/* - * job_step_cancel - cancel the specified job step - * IN job_id - id of the job to be cancelled - * IN step_id - id of the job step to be cancelled - * IN uid - user id of user issuing the RPC - * RET 0 on success, otherwise ESLURM error code - * global: job_list - pointer global job list - * last_job_update - time of last job table update - */ -extern int job_step_cancel (uint32_t job_id, uint32_t job_step_id, uid_t uid ); - /* * job_step_checkpoint - perform some checkpoint operation * IN ckpt_ptr - checkpoint request message @@ -1376,8 +1415,8 @@ extern void make_node_idle(struct node_record *node_ptr, /* * Determine of the specified job can execute right now or is currently - * blocked by a miscellaneous limit. This does not re-validate job state, - * but relies upon schedule() in src/slurmctld/job_scheduler.c to do so. + * blocked by a partition state or limit. These job states should match the + * reason values returned by job_limits_check(). */ extern bool misc_policy_job_runnable_state(struct job_record *job_ptr); @@ -1409,6 +1448,7 @@ extern void node_no_resp_msg(void); * OUT buffer_size - set to size of the buffer in bytes * IN show_flags - job filtering options * IN uid - uid of user making request (for partition filtering) + * IN filter_uid - pack only jobs belonging to this user if not NO_VAL * IN protocol_version - slurm protocol version of client * global: job_list - global list of job records * NOTE: the buffer at *buffer_ptr must be xfreed by the caller @@ -1416,7 +1456,7 @@ extern void node_no_resp_msg(void); * whenever the data format changes */ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, - uint16_t show_flags, uid_t uid, + uint16_t show_flags, uid_t uid, uint32_t filter_uid, uint16_t protocol_version); /* @@ -1514,6 +1554,25 @@ extern int pack_one_job(char **buffer_ptr, int *buffer_size, uint32_t job_id, uint16_t show_flags, uid_t uid, uint16_t protocol_version); +/* + * pack_one_node - dump all configuration and node information for one node + * in machine independent form (for network transmission) + * OUT buffer_ptr - pointer to the stored data + * OUT buffer_size - set to size of the buffer in bytes + * IN show_flags - node filtering options + * IN uid - uid of user making request (for partition filtering) + * IN node_name - name of node for which information is desired, + * use first node if name is NULL + * IN protocol_version - slurm protocol version of client + * global: node_record_table_ptr - pointer to global node table + * NOTE: the caller must xfree the buffer at *buffer_ptr + * NOTE: change slurm_load_node() in api/node_info.c when data format changes + * NOTE: READ lock_slurmctld config before entry + */ +extern void pack_one_node (char **buffer_ptr, int *buffer_size, + uint16_t show_flags, uid_t uid, char *node_name, + uint16_t protocol_version); + /* part_filter_clear - Clear the partition's hidden flag based upon a user's * group access. This must follow a call to part_filter_set() */ extern void part_filter_clear(void); @@ -1525,6 +1584,13 @@ extern void part_filter_set(uid_t uid); /* part_fini - free all memory associated with partition records */ extern void part_fini (void); +/* + * Create a copy of a job's part_list *partition list + * IN part_list_src - a job's part_list + * RET copy of part_list_src, must be freed by caller + */ +extern List part_list_copy(List part_list_src); + /* * Determine of the specified job can execute right now or is currently * blocked by a partition state or limit. Execute job_limits_check() to @@ -1596,7 +1662,7 @@ extern void resume_job_step(struct job_record *job_ptr); /* run_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */ -extern void run_backup(void); +extern void run_backup(slurm_trigger_callbacks_t *callbacks); /* Spawn health check function for every node that is not DOWN */ extern void run_health_check(void); @@ -1607,6 +1673,10 @@ extern void save_all_state(void); /* send all info for the controller to accounting */ extern void send_all_to_accounting(time_t event_time); +/* A slurmctld lock needs to at least have a node read lock set before + * this is called */ +extern void set_cluster_cpus(void); + /* sends all jobs in eligible state to accounting. Only needed at * first registration */ diff --git a/src/slurmctld/slurmctld_plugstack.c b/src/slurmctld/slurmctld_plugstack.c new file mode 100644 index 0000000000000000000000000000000000000000..c28c6552b0157bdc16846f265c11653503f2d5b2 --- /dev/null +++ b/src/slurmctld/slurmctld_plugstack.c @@ -0,0 +1,182 @@ +/*****************************************************************************\ + * slurmctld_plugstack.c - driver for slurmctld plugstack plugin + ***************************************************************************** + * Copyright (C) 2012 SchedMD LLC + * Written by Morris Jette <jette@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include "config.h" +# if STDC_HEADERS +# include <string.h> +# endif +# if HAVE_SYS_TYPES_H +# include <sys/types.h> +# endif /* HAVE_SYS_TYPES_H */ +# if HAVE_UNISTD_H +# include <unistd.h> +# endif +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else /* ! HAVE_INTTYPES_H */ +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* ! HAVE_CONFIG_H */ +# include <sys/types.h> +# include <unistd.h> +# include <stdint.h> +# include <string.h> +#endif /* HAVE_CONFIG_H */ +#include <stdio.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" + +#include "src/common/macros.h" +#include "src/common/plugin.h" +#include "src/common/plugrack.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/slurmctld/slurmctld_plugstack.h" + +slurm_nonstop_ops_t nonstop_ops = { NULL, NULL, NULL }; + +typedef struct slurmctld_plugstack_ops { + /* NO FUNCTIONS */ +} slurmctld_plugstack_ops_t; + +/* + * Must be synchronized with slurmctld_plugstack_t above. + */ +static const char *syms[] = { + /* NO FUNCTIONS */ +}; + +static int g_context_cnt = -1; +static slurmctld_plugstack_ops_t *ops = NULL; +static plugin_context_t **g_context = NULL; +static char *slurmctld_plugstack_list = NULL; +static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; + +/* + * Initialize the slurmctld plugstack plugin. + * + * Returns a SLURM errno. + */ +extern int slurmctld_plugstack_init(void) +{ + int rc = SLURM_SUCCESS; + char *last = NULL, *names; + char *plugin_type = "slurmctld_plugstack"; + char *type; + + if (init_run && (g_context_cnt >= 0)) + return rc; + + slurm_mutex_lock(&g_context_lock); + if (g_context_cnt >= 0) + goto fini; + + slurmctld_plugstack_list = slurm_get_slurmctld_plugstack(); + g_context_cnt = 0; + if ((slurmctld_plugstack_list == NULL) || + (slurmctld_plugstack_list[0] == '\0')) + goto fini; + + names = slurmctld_plugstack_list; + while ((type = strtok_r(names, ",", &last))) { + xrealloc(ops, (sizeof(slurmctld_plugstack_ops_t) * + (g_context_cnt + 1))); + xrealloc(g_context, + (sizeof(plugin_context_t *) * (g_context_cnt + 1))); + if (strncmp(type, "slurmctld/", 10) == 0) + type += 10; /* backward compatibility */ + type = xstrdup_printf("slurmctld/%s", type); + g_context[g_context_cnt] = plugin_context_create( + plugin_type, type, (void **)&ops[g_context_cnt], + syms, sizeof(syms)); + if (!g_context[g_context_cnt]) { + error("cannot create %s context for %s", + plugin_type, type); + rc = SLURM_ERROR; + xfree(type); + break; + } + + xfree(type); + g_context_cnt++; + names = NULL; /* for next iteration */ + } + init_run = true; + +fini: + slurm_mutex_unlock(&g_context_lock); + + if (rc != SLURM_SUCCESS) + slurmctld_plugstack_fini(); + + return rc; +} + +/* + * Terminate the slurmctld plugstack plugin. Free memory. + * + * Returns a SLURM errno. + */ +extern int slurmctld_plugstack_fini(void) +{ + int i, j, rc = SLURM_SUCCESS; + + slurm_mutex_lock(&g_context_lock); + if (g_context_cnt < 0) + goto fini; + + init_run = false; + for (i=0; i<g_context_cnt; i++) { + if (g_context[i]) { + j = plugin_context_destroy(g_context[i]); + if (j != SLURM_SUCCESS) + rc = j; + } + } + xfree(ops); + xfree(g_context); + xfree(slurmctld_plugstack_list); + g_context_cnt = -1; + +fini: slurm_mutex_unlock(&g_context_lock); + return rc; +} diff --git a/src/slurmctld/slurmctld_plugstack.h b/src/slurmctld/slurmctld_plugstack.h new file mode 100644 index 0000000000000000000000000000000000000000..21090c7400e8cdf44cc490ffe289a6469c173930 --- /dev/null +++ b/src/slurmctld/slurmctld_plugstack.h @@ -0,0 +1,68 @@ +/*****************************************************************************\ + * slurmctld_plugstack.h - driver for slurmctld plugstack plugin + ***************************************************************************** + * Copyright (C) 2012 SchedMD LLC + * Written by Morris Jette <jette@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef _SLURMCTLD_PLUGSTACK_H +#define _SLURMCTLD_PLUGSTACK_H + +#include "slurm/slurm.h" +#include "src/slurmctld/slurmctld.h" + +/*****************************************************************************\ + * Plugin slurmctld/nonstop callback functions +\*****************************************************************************/ +typedef struct slurm_nonstop_ops { + void (*job_begin) ( struct job_record *job_ptr ); + void (*job_fini) ( struct job_record *job_ptr ); + void (*node_fail) ( struct job_record *job_ptr, + struct node_record *node_ptr); +} slurm_nonstop_ops_t; +extern slurm_nonstop_ops_t nonstop_ops; + +/* + * Initialize the slurmctld plugstack plugin. + * + * Returns a SLURM errno. + */ +extern int slurmctld_plugstack_init(void); + +/* + * Terminate the slurmctld plugstack plugin. Free memory. + * + * Returns a SLURM errno. + */ +extern int slurmctld_plugstack_fini(void); + +#endif /* !_SLURMCTLD_PLUGSTACK_H */ diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c index 42c5800db51a5b97105ac8eea17c01352e81f5f6..a2bc0bb5b3b1b8a46f9a76d980542ffb5e3d715f 100644 --- a/src/slurmctld/srun_comm.c +++ b/src/slurmctld/srun_comm.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -172,6 +172,8 @@ extern void srun_node_fail (uint32_t job_id, char *node_name) step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) { + if (step_ptr->step_node_bitmap == NULL) /* pending step */ + continue; if ((bit_position >= 0) && (!bit_test(step_ptr->step_node_bitmap, bit_position))) continue; /* job step not on this node */ @@ -331,16 +333,18 @@ extern int srun_user_message(struct job_record *job_ptr, char *msg) return ESLURM_DISABLED; /* no allocated nodes */ agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); agent_arg_ptr->hostlist = hostlist_create(job_ptr->batch_host); + if (!agent_arg_ptr->hostlist) + fatal("Invalid srun host: %s", job_ptr->batch_host); #else node_ptr = find_first_node_record(job_ptr->node_bitmap); if (node_ptr == NULL) return ESLURM_DISABLED; /* no allocated nodes */ agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); agent_arg_ptr->hostlist = hostlist_create(node_ptr->name); + if (!agent_arg_ptr->hostlist) + fatal("Invalid srun host: %s", node_ptr->name); #endif - if (agent_arg_ptr->hostlist == NULL) - fatal("hostlist_create: malloc failure"); - notify_msg_ptr = (job_notify_msg_t *) + notify_msg_ptr = (job_notify_msg_t *) xmalloc(sizeof(job_notify_msg_t)); notify_msg_ptr->job_id = job_ptr->job_id; notify_msg_ptr->message = xstrdup(msg); @@ -532,4 +536,3 @@ extern void srun_response(uint32_t job_id, uint32_t step_id) return; job_ptr->time_last_active = now; } - diff --git a/src/slurmctld/srun_comm.h b/src/slurmctld/srun_comm.h index 1bf5e6d4936fc9cce24ed8e3e74aa3094ef4c4dd..e764db55e116ba2fbacd5299d0a4c72e5ceda2bc 100644 --- a/src/slurmctld/srun_comm.h +++ b/src/slurmctld/srun_comm.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/state_save.c b/src/slurmctld/state_save.c index 45c43fbd5d9722947c2e5138ab3ec83da7c78c8d..3cd06757cfd39e05c7cd399dda455e6210ccbed9 100644 --- a/src/slurmctld/state_save.c +++ b/src/slurmctld/state_save.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -51,7 +51,10 @@ #include "src/slurmctld/slurmctld.h" #include "src/slurmctld/trigger_mgr.h" -#define SAVE_MAX_WAIT 2 /* Maximum time in seconds to wait for save */ +/* Maximum delay for pending state save to be processed, in seconds */ +#ifndef SAVE_MAX_WAIT +#define SAVE_MAX_WAIT 5 +#endif static pthread_mutex_t state_save_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t state_save_cond = PTHREAD_COND_INITIALIZER; diff --git a/src/slurmctld/state_save.h b/src/slurmctld/state_save.h index 32dc75e0c62cd76fb018eca6016aacc5a292da63..4abaff851ee510f5bd916e5741f6c1dbda769010 100644 --- a/src/slurmctld/state_save.h +++ b/src/slurmctld/state_save.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/statistics.c b/src/slurmctld/statistics.c index eda10619426b0e4249ff8d87c2208d20a8e0f7fd..c71613950d835b8f3a6d3d86f267885a42d9711c 100644 --- a/src/slurmctld/statistics.c +++ b/src/slurmctld/statistics.c @@ -5,7 +5,7 @@ * Written by Alejandro Lucero <alucero@bsc.es> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 5bcf3eb46faffed48682568fcd8343ea3e966dcd..abfe49c9b7233c8461f92425f0be1417f41c8571 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -3,12 +3,13 @@ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2010 Lawrence Livermore National Security. + * Copyright (C) 2012 SchedMD LLC. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>, et. al. * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,6 +49,7 @@ #include <stdio.h> #include <stdlib.h> #include <sys/types.h> +#include <sys/param.h> #include <string.h> #include <strings.h> #include <unistd.h> @@ -65,6 +67,7 @@ #include "src/common/slurm_protocol_interface.h" #include "src/common/switch.h" #include "src/common/xstring.h" +#include "src/common/slurm_ext_sensors.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/locks.h" @@ -75,6 +78,8 @@ #define MAX_RETRIES 10 +static void _build_pending_step(struct job_record *job_ptr, + job_step_create_request_msg_t *step_specs); static int _count_cpus(struct job_record *job_ptr, bitstr_t *bitmap, uint32_t *usable_cpu_cnt); static struct step_record * _create_step_record(struct job_record *job_ptr); @@ -171,20 +176,60 @@ static struct step_record * _create_step_record(struct job_record *job_ptr) step_ptr = (struct step_record *) xmalloc(sizeof(struct step_record)); last_job_update = time(NULL); - step_ptr->job_ptr = job_ptr; - step_ptr->start_time = time(NULL); + step_ptr->job_ptr = job_ptr; + step_ptr->exit_code = NO_VAL; step_ptr->time_limit = INFINITE; - step_ptr->jobacct = jobacctinfo_create(NULL); - step_ptr->requid = -1; - if (list_append (job_ptr->step_list, step_ptr) == NULL) - fatal ("_create_step_record: unable to allocate memory"); + step_ptr->jobacct = jobacctinfo_create(NULL); + step_ptr->requid = -1; + (void) list_append (job_ptr->step_list, step_ptr); return step_ptr; } +/* The step with a state of PENDING is used as a placeholder for a host and + * port that can be used to wake a pending srun as soon another step ends */ +static void _build_pending_step(struct job_record *job_ptr, + job_step_create_request_msg_t *step_specs) +{ + struct step_record *step_ptr; + + if ((step_specs->host == NULL) || (step_specs->port == 0)) + return; + + step_ptr = _create_step_record(job_ptr); + if (step_ptr == NULL) + return; + + step_ptr->port = step_specs->port; + step_ptr->host = xstrdup(step_specs->host); + step_ptr->state = JOB_PENDING; + step_ptr->cpu_count = step_specs->num_tasks; + step_ptr->time_last_active = time(NULL); + step_ptr->step_id = NO_VAL; +} + +static void _internal_step_complete( + struct job_record *job_ptr, + struct step_record *step_ptr, bool terminated) +{ + jobacct_storage_g_step_complete(acct_db_conn, step_ptr); + job_ptr->derived_ec = MAX(job_ptr->derived_ec, + step_ptr->exit_code); + if (!terminated) { + /* These operations are not needed for + * terminated jobs */ + select_g_step_finish(step_ptr); + _step_dealloc_lps(step_ptr); + gres_plugin_step_dealloc(step_ptr->gres_list, + job_ptr->gres_list, job_ptr->job_id, + step_ptr->step_id); + } +} /* - * delete_step_records - delete step record for specified job_ptr + * delete_step_records - Delete step record for specified job_ptr. + * This function is called when a job terminates abnormally, say when it's + * allocated nodes go DOWN and active steps can not be properly terminated. * IN job_ptr - pointer to job table entry to have step records removed */ extern void delete_step_records (struct job_record *job_ptr) @@ -197,6 +242,7 @@ extern void delete_step_records (struct job_record *job_ptr) last_job_update = time(NULL); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + _internal_step_complete(job_ptr, step_ptr, true); list_remove (step_iterator); _free_step_rec(step_ptr); } @@ -234,6 +280,7 @@ static void _free_step_rec(struct step_record *step_ptr) if (step_ptr->gres_list) list_destroy(step_ptr->gres_list); select_g_select_jobinfo_free(step_ptr->select_jobinfo); + xfree(step_ptr->ext_sensors); xfree(step_ptr); } @@ -279,6 +326,14 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id) void dump_step_desc(job_step_create_request_msg_t *step_spec) { + uint32_t mem_value = step_spec->pn_min_memory; + char *mem_type = "node"; + + if (mem_value & MEM_PER_CPU) { + mem_value &= (~MEM_PER_CPU); + mem_type = "cpu"; + } + debug3("StepDesc: user_id=%u job_id=%u node_count=%u-%u cpu_count=%u", step_spec->user_id, step_spec->job_id, step_spec->min_nodes, step_spec->max_nodes, @@ -291,8 +346,8 @@ dump_step_desc(job_step_create_request_msg_t *step_spec) step_spec->network, step_spec->exclusive); debug3(" checkpoint-dir=%s checkpoint_int=%u", step_spec->ckpt_dir, step_spec->ckpt_interval); - debug3(" mem_per_cpu=%u resv_port_cnt=%u immediate=%u no_kill=%u", - step_spec->mem_per_cpu, step_spec->resv_port_cnt, + debug3(" mem_per_%s=%u resv_port_cnt=%u immediate=%u no_kill=%u", + mem_type, mem_value, step_spec->resv_port_cnt, step_spec->immediate, step_spec->no_kill); debug3(" overcommit=%d time_limit=%u gres=%s constraints=%s", step_spec->overcommit, step_spec->time_limit, step_spec->gres, @@ -360,7 +415,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, job_ptr = find_job_record(job_id); if (job_ptr == NULL) { - error("job_step_cancel: invalid job id %u", job_id); + error("job_step_signal: invalid job id %u", job_id); return ESLURM_INVALID_JOB_ID; } @@ -386,7 +441,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, if (step_ptr == NULL) { if (signal != SIG_NODE_FAIL) { rc = ESLURM_INVALID_JOB_ID; - info("job_step_cancel step %u.%u not found", + info("job_step_signal step %u.%u not found", job_id, step_id); return rc; } @@ -395,7 +450,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, where jobs could be started on these nodes and fail. */ - debug("job_step_cancel step %u.%u not found, but got " + debug("job_step_signal step %u.%u not found, but got " "SIG_NODE_FAIL, so failing all nodes in allocation.", job_id, step_id); memset(&step_rec, 0, sizeof(struct step_record)); @@ -458,8 +513,6 @@ void signal_step_tasks(struct step_record *step_ptr, uint16_t signal, agent_args->msg_type = msg_type; agent_args->retry = 1; agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t)); kill_tasks_msg->job_id = step_ptr->job_ptr->job_id; kill_tasks_msg->job_step_id = step_ptr->step_id; @@ -468,8 +521,6 @@ void signal_step_tasks(struct step_record *step_ptr, uint16_t signal, #ifdef HAVE_FRONT_END xassert(step_ptr->job_ptr->batch_host); hostlist_push(agent_args->hostlist, step_ptr->job_ptr->batch_host); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); agent_args->node_count = 1; #else for (i = 0; i < node_record_count; i++) { @@ -515,12 +566,14 @@ void signal_step_tasks_on_node(char* node_name, struct step_record *step_ptr, xassert(step_ptr->job_ptr->batch_host); agent_args->node_count++; agent_args->hostlist = hostlist_create(step_ptr->job_ptr->batch_host); + if (!agent_args->hostlist) + fatal("Invalid batch_host: %s", step_ptr->job_ptr->batch_host); #else agent_args->node_count++; agent_args->hostlist = hostlist_create(node_name); + if (!agent_args->hostlist) + fatal("Invalid node_name: %s", node_name); #endif - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t)); kill_tasks_msg->job_id = step_ptr->job_ptr->job_id; kill_tasks_msg->job_step_id = step_ptr->step_id; @@ -530,6 +583,34 @@ void signal_step_tasks_on_node(char* node_name, struct step_record *step_ptr, return; } +/* A step just completed, signal srun processes with pending steps to retry */ +static void _wake_pending_steps(struct job_record *job_ptr) +{ + ListIterator step_iterator; + struct step_record *step_ptr; + int start_count = 0; + time_t max_age = time(NULL) - 60; /* Wake step after 60 seconds */ + + if (!job_ptr->step_list) + return; + + /* We do not know which steps can use currently available resources. + * Try to start a bit more based upon step sizes. Effectiveness + * varies with step sizes, constraints and order. */ + step_iterator = list_iterator_create(job_ptr->step_list); + while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if ((step_ptr->state == JOB_PENDING) && + ((start_count < 8) || + (step_ptr->time_last_active <= max_age))) { + srun_step_signal(step_ptr, 0); + list_remove (step_iterator); + _free_step_rec(step_ptr); + start_count++; + } + } + list_iterator_destroy (step_iterator); +} + /* * job_step_complete - note normal completion the specified job step * IN job_id - id of the job to be completed @@ -564,14 +645,7 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid, if (step_ptr == NULL) return ESLURM_INVALID_JOB_ID; - select_g_step_finish(step_ptr); - - jobacct_storage_g_step_complete(acct_db_conn, step_ptr); - job_ptr->derived_ec = MAX(job_ptr->derived_ec, step_ptr->exit_code); - - _step_dealloc_lps(step_ptr); - gres_plugin_step_dealloc(step_ptr->gres_list, job_ptr->gres_list, - job_id, step_id); + _internal_step_complete(job_ptr, step_ptr, false); last_job_update = time(NULL); error_code = delete_step_record(job_ptr, step_id); @@ -580,6 +654,7 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid, step_id); return ESLURM_ALREADY_DONE; } + _wake_pending_steps(job_ptr); return SLURM_SUCCESS; } @@ -752,8 +827,6 @@ _pick_step_nodes (struct job_record *job_ptr, return NULL; #endif nodes_avail = bit_copy (job_ptr->node_bitmap); - if (nodes_avail == NULL) - fatal("bit_copy malloc failure"); bit_and (nodes_avail, up_node_bitmap); if (step_spec->features) { /* We only select for a single feature name here. @@ -767,13 +840,14 @@ _pick_step_nodes (struct job_record *job_ptr, bit_nclear(nodes_avail, 0, (bit_size(nodes_avail)-1)); } - if (step_spec->mem_per_cpu && + if (step_spec->pn_min_memory && ((job_resrcs_ptr->memory_allocated == NULL) || (job_resrcs_ptr->memory_used == NULL))) { error("_pick_step_nodes: job lacks memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); - step_spec->mem_per_cpu = 0; - } + step_spec->pn_min_memory = 0; + } else if (step_spec->pn_min_memory == MEM_PER_CPU) + step_spec->pn_min_memory = 0; /* clear MEM_PER_CPU flag */ if (job_ptr->next_step_id == 0) { if (job_ptr->details && job_ptr->details->prolog_running) { @@ -867,21 +941,38 @@ _pick_step_nodes (struct job_record *job_ptr, avail_tasks = step_spec->num_tasks; total_tasks = step_spec->num_tasks; } - if (step_spec->mem_per_cpu && _is_mem_resv()) { + if (_is_mem_resv() && + (step_spec->pn_min_memory & MEM_PER_CPU)) { + uint32_t mem_use = step_spec->pn_min_memory; + mem_use &= (~MEM_PER_CPU); + avail_mem = job_resrcs_ptr-> memory_allocated[node_inx] - job_resrcs_ptr->memory_used[node_inx]; - task_cnt = avail_mem / step_spec->mem_per_cpu; + task_cnt = avail_mem / mem_use; if (cpus_per_task > 0) task_cnt /= cpus_per_task; avail_tasks = MIN(avail_tasks, task_cnt); total_mem = job_resrcs_ptr-> memory_allocated[node_inx]; - task_cnt = total_mem / step_spec->mem_per_cpu; + task_cnt = total_mem / mem_use; if (cpus_per_task > 0) task_cnt /= cpus_per_task; total_tasks = MIN(total_tasks, task_cnt); + } else if (_is_mem_resv() && step_spec->pn_min_memory) { + uint32_t mem_use = step_spec->pn_min_memory; + + avail_mem = job_resrcs_ptr-> + memory_allocated[node_inx] - + job_resrcs_ptr->memory_used[node_inx]; + if (avail_mem < mem_use) + avail_tasks = 0; + + total_mem = job_resrcs_ptr-> + memory_allocated[node_inx]; + if (total_mem < mem_use) + total_tasks = 0; } gres_cnt = gres_plugin_step_test(step_gres_list, @@ -970,7 +1061,7 @@ _pick_step_nodes (struct job_record *job_ptr, return NULL; } - if ((step_spec->mem_per_cpu && _is_mem_resv()) || + if ((step_spec->pn_min_memory && _is_mem_resv()) || (step_spec->gres && (step_spec->gres[0]))) { int fail_mode = ESLURM_INVALID_TASK_MEMORY; uint32_t tmp_mem, tmp_cpus, avail_cpus, total_cpus; @@ -988,21 +1079,38 @@ _pick_step_nodes (struct job_record *job_ptr, total_cpus = job_resrcs_ptr->cpus[node_inx]; usable_cpu_cnt[i] = avail_cpus = total_cpus; - if (step_spec->mem_per_cpu) { + if (step_spec->pn_min_memory & MEM_PER_CPU) { + uint32_t mem_use = step_spec->pn_min_memory; + mem_use &= (~MEM_PER_CPU); /* ignore current step allocations */ tmp_mem = job_resrcs_ptr-> memory_allocated[node_inx]; - tmp_cpus = tmp_mem / step_spec->mem_per_cpu; + tmp_cpus = tmp_mem / mem_use; total_cpus = MIN(total_cpus, tmp_cpus); /* consider current step allocations */ tmp_mem -= job_resrcs_ptr-> memory_used[node_inx]; - tmp_cpus = tmp_mem / step_spec->mem_per_cpu; + tmp_cpus = tmp_mem / mem_use; if (tmp_cpus < avail_cpus) { avail_cpus = tmp_cpus; usable_cpu_cnt[i] = avail_cpus; fail_mode = ESLURM_INVALID_TASK_MEMORY; } + } else if (step_spec->pn_min_memory) { + uint32_t mem_use = step_spec->pn_min_memory; + /* ignore current step allocations */ + tmp_mem = job_resrcs_ptr-> + memory_allocated[node_inx]; + if (tmp_mem < mem_use) + total_cpus = 0; + /* consider current step allocations */ + tmp_mem -= job_resrcs_ptr-> + memory_used[node_inx]; + if ((tmp_mem < mem_use) && (avail_cpus > 0)) { + avail_cpus = 0; + usable_cpu_cnt[i] = avail_cpus; + fail_mode = ESLURM_INVALID_TASK_MEMORY; + } } if (step_spec->gres) { @@ -1137,8 +1245,6 @@ _pick_step_nodes (struct job_record *job_ptr, } else if (step_spec->min_nodes && (node_cnt > step_spec->min_nodes)) { nodes_picked = bit_alloc(bit_size(nodes_avail)); - if (nodes_picked == NULL) - fatal("bit_alloc malloc failure"); FREE_NULL_BITMAP(nodes_avail); nodes_avail = selected_nodes; selected_nodes = NULL; @@ -1151,8 +1257,6 @@ _pick_step_nodes (struct job_record *job_ptr, } } else { nodes_picked = bit_alloc(bit_size(nodes_avail)); - if (nodes_picked == NULL) - fatal("bit_alloc malloc failure"); } /* In case we are in relative mode, do not look for idle nodes @@ -1175,11 +1279,11 @@ _pick_step_nodes (struct job_record *job_ptr, FREE_NULL_BITMAP (relative_nodes); } else { nodes_idle = bit_alloc (bit_size (nodes_avail) ); - if (nodes_idle == NULL) - fatal("bit_alloc malloc failure"); step_iterator = list_iterator_create(job_ptr->step_list); while ((step_p = (struct step_record *) list_next(step_iterator))) { + if (step_p->state != JOB_RUNNING) + continue; bit_or(nodes_idle, step_p->step_node_bitmap); if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) { char *temp; @@ -1509,7 +1613,9 @@ static void _pick_step_cores(struct step_record *step_ptr, return; } } - if (use_all_cores) + /* The test for cores==0 is just to avoid CLANG errors. + * It should never happen */ + if (use_all_cores || (cores == 0)) return; /* We need to over-subscribe one or more cores. @@ -1595,12 +1701,12 @@ extern void step_alloc_lps(struct step_record *step_ptr) } #endif - if (step_ptr->mem_per_cpu && _is_mem_resv() && + if (step_ptr->pn_min_memory && _is_mem_resv() && ((job_resrcs_ptr->memory_allocated == NULL) || (job_resrcs_ptr->memory_used == NULL))) { error("step_alloc_lps: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); - step_ptr->mem_per_cpu = 0; + step_ptr->pn_min_memory = 0; } for (i_node = i_first; i_node <= i_last; i_node++) { @@ -1627,9 +1733,16 @@ extern void step_alloc_lps(struct step_record *step_ptr) gres_plugin_step_alloc(step_ptr->gres_list, job_ptr->gres_list, job_node_inx, cpus_alloc, job_ptr->job_id, step_ptr->step_id); - if (step_ptr->mem_per_cpu && _is_mem_resv()) { - job_resrcs_ptr->memory_used[job_node_inx] += - (step_ptr->mem_per_cpu * cpus_alloc); + if (step_ptr->pn_min_memory && _is_mem_resv()) { + if (step_ptr->pn_min_memory & MEM_PER_CPU) { + uint32_t mem_use = step_ptr->pn_min_memory; + mem_use &= (~MEM_PER_CPU); + job_resrcs_ptr->memory_used[job_node_inx] += + (mem_use * cpus_alloc); + } else { + job_resrcs_ptr->memory_used[job_node_inx] += + step_ptr->pn_min_memory; + } } if (pick_step_cores) { _pick_step_cores(step_ptr, job_resrcs_ptr, @@ -1712,12 +1825,12 @@ static void _step_dealloc_lps(struct step_record *step_ptr) if (i_first == -1) /* empty bitmap */ return; - if (step_ptr->mem_per_cpu && _is_mem_resv() && + if (step_ptr->pn_min_memory && _is_mem_resv() && ((job_resrcs_ptr->memory_allocated == NULL) || (job_resrcs_ptr->memory_used == NULL))) { error("_step_dealloc_lps: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); - step_ptr->mem_per_cpu = 0; + step_ptr->pn_min_memory = 0; } for (i_node = i_first; i_node <= i_last; i_node++) { @@ -1745,9 +1858,14 @@ static void _step_dealloc_lps(struct step_record *step_ptr) job_ptr->job_id, step_ptr->step_id); job_resrcs_ptr->cpus_used[job_node_inx] = 0; } - if (step_ptr->mem_per_cpu && _is_mem_resv()) { - uint32_t mem_use = step_ptr->mem_per_cpu * cpus_alloc; - if (job_resrcs_ptr->memory_used[job_node_inx] >= mem_use) { + if (step_ptr->pn_min_memory && _is_mem_resv()) { + uint32_t mem_use = step_ptr->pn_min_memory; + if (mem_use & MEM_PER_CPU) { + mem_use &= (~MEM_PER_CPU); + mem_use *= cpus_alloc; + } + if (job_resrcs_ptr->memory_used[job_node_inx] >= + mem_use) { job_resrcs_ptr->memory_used[job_node_inx] -= mem_use; } else { @@ -1875,7 +1993,7 @@ step_create(job_step_create_request_msg_t *step_specs, return ESLURM_TASKDIST_ARBITRARY_UNSUPPORTED; } - if (_test_strlen(step_specs->ckpt_dir, "ckpt_dir", 1024) || + if (_test_strlen(step_specs->ckpt_dir, "ckpt_dir", MAXPATHLEN) || _test_strlen(step_specs->gres, "gres", 1024) || _test_strlen(step_specs->host, "host", 1024) || _test_strlen(step_specs->name, "name", 1024) || @@ -1919,10 +2037,8 @@ step_create(job_step_create_request_msg_t *step_specs, if (cpus_per_mp == (uint16_t)NO_VAL) select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT, &cpus_per_mp); - /* Below is done to get the correct cpu_count and then we need - to set the cpu_count to 0 later so just pretend we are - overcommitting. - */ + /* Below is done to get the correct cpu_count and then we need to set + * the cpu_count to 0 later so just pretend we are overcommitting. */ step_specs->cpu_count = node_count * cpus_per_mp; step_specs->overcommit = 1; step_specs->exclusive = 0; @@ -1947,7 +2063,7 @@ step_create(job_step_create_request_msg_t *step_specs, return ESLURM_BAD_TASK_COUNT; /* we set cpus_per_task to 0 if we can't spread them evenly - over the nodes (hetergeneous systems) */ + * over the nodes (hetergeneous systems) */ if (!step_specs->cpu_count || (step_specs->cpu_count % step_specs->num_tasks)) cpus_per_task = 0; @@ -1985,6 +2101,10 @@ step_create(job_step_create_request_msg_t *step_specs, if (step_gres_list) list_destroy(step_gres_list); select_g_select_jobinfo_free(select_jobinfo); + if ((ret_code == ESLURM_NODES_BUSY) || + (ret_code == ESLURM_PORTS_BUSY) || + (ret_code == ESLURM_INTERCONNECT_BUSY)) + _build_pending_step(job_ptr, step_specs); return ret_code; } #ifdef HAVE_CRAY @@ -1993,8 +2113,7 @@ step_create(job_step_create_request_msg_t *step_specs, #endif #ifdef HAVE_BGQ /* Things might of changed here since sometimes users ask for - the wrong size in cnodes to make a block. - */ + * the wrong size in cnodes to make a block. */ select_g_select_jobinfo_get(select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_count); @@ -2030,7 +2149,9 @@ step_create(job_step_create_request_msg_t *step_specs, select_g_select_jobinfo_free(select_jobinfo); return ESLURMD_TOOMANYSTEPS; } - step_ptr->step_id = job_ptr->next_step_id++; + step_ptr->start_time = time(NULL); + step_ptr->state = JOB_RUNNING; + step_ptr->step_id = job_ptr->next_step_id++; /* Here is where the node list is set for the step */ if (step_specs->node_list && @@ -2072,7 +2193,7 @@ step_create(job_step_create_request_msg_t *step_specs, step_ptr->batch_step = batch_step; step_ptr->cpu_freq = step_specs->cpu_freq; step_ptr->cpus_per_task = (uint16_t)cpus_per_task; - step_ptr->mem_per_cpu = step_specs->mem_per_cpu; + step_ptr->pn_min_memory = step_specs->pn_min_memory; step_ptr->ckpt_interval = step_specs->ckpt_interval; step_ptr->ckpt_time = now; step_ptr->cpu_count = orig_cpu_count; @@ -2080,6 +2201,7 @@ step_create(job_step_create_request_msg_t *step_specs, step_ptr->exclusive = step_specs->exclusive; step_ptr->ckpt_dir = xstrdup(step_specs->ckpt_dir); step_ptr->no_kill = step_specs->no_kill; + step_ptr->ext_sensors = ext_sensors_alloc(); /* step's name and network default to job's values if not * specified in the step specification */ @@ -2126,7 +2248,7 @@ step_create(job_step_create_request_msg_t *step_specs, xfree(step_node_list); if (!step_ptr->step_layout) { delete_step_record (job_ptr, step_ptr->step_id); - if (step_specs->mem_per_cpu) + if (step_specs->pn_min_memory) return ESLURM_INVALID_TASK_MEMORY; return SLURM_ERROR; } @@ -2206,13 +2328,15 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, xassert(job_resrcs_ptr->cpus); xassert(job_resrcs_ptr->cpus_used); - if (step_ptr->mem_per_cpu && _is_mem_resv() && + if (step_ptr->pn_min_memory && _is_mem_resv() && ((job_resrcs_ptr->memory_allocated == NULL) || (job_resrcs_ptr->memory_used == NULL))) { error("step_layout_create: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); - step_ptr->mem_per_cpu = 0; - } + step_ptr->pn_min_memory = 0; + } else if (step_ptr->pn_min_memory == MEM_PER_CPU) + step_ptr->pn_min_memory = 0; /* clear MEM_PER_CPU flag */ + #ifdef HAVE_BGQ /* Since we have to deal with a conversion between cnodes and midplanes here the math is really easy, and already has @@ -2243,11 +2367,14 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, job_resrcs_ptr->cpus_used[pos]; } else usable_cpus = job_resrcs_ptr->cpus[pos]; - if (step_ptr->mem_per_cpu && _is_mem_resv()) { + if ((step_ptr->pn_min_memory & MEM_PER_CPU) && + _is_mem_resv()) { + uint32_t mem_use = step_ptr->pn_min_memory; + mem_use &= (~MEM_PER_CPU); usable_mem = - job_resrcs_ptr->memory_allocated[pos]- + job_resrcs_ptr->memory_allocated[pos] - job_resrcs_ptr->memory_used[pos]; - usable_mem /= step_ptr->mem_per_cpu; + usable_mem /= mem_use; usable_cpus = MIN(usable_cpus, usable_mem); } @@ -2339,7 +2466,7 @@ static void _pack_ctld_job_step_info(struct step_record *step_ptr, Buf buffer, if (step_ptr->job_ptr->total_cpus) cpu_cnt = step_ptr->job_ptr->total_cpus; - else if(step_ptr->job_ptr->details) + else if (step_ptr->job_ptr->details) cpu_cnt = step_ptr->job_ptr->details->min_cpus; else cpu_cnt = step_ptr->job_ptr->cpu_cnt; @@ -2358,7 +2485,9 @@ static void _pack_ctld_job_step_info(struct step_record *step_ptr, Buf buffer, cpu_cnt = step_ptr->cpu_count; #endif - if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + pack32(step_ptr->job_ptr->array_job_id, buffer); + pack16(step_ptr->job_ptr->array_task_id, buffer); pack32(step_ptr->job_ptr->job_id, buffer); pack32(step_ptr->step_id, buffer); pack16(step_ptr->ckpt_interval, buffer); @@ -2367,6 +2496,7 @@ static void _pack_ctld_job_step_info(struct step_record *step_ptr, Buf buffer, pack32(step_ptr->cpu_freq, buffer); pack32(task_cnt, buffer); pack32(step_ptr->time_limit, buffer); + pack16(step_ptr->state, buffer); pack_time(step_ptr->start_time, buffer); if (IS_JOB_SUSPENDED(step_ptr->job_ptr)) { @@ -2389,7 +2519,38 @@ static void _pack_ctld_job_step_info(struct step_record *step_ptr, Buf buffer, packstr(step_ptr->gres, buffer); select_g_select_jobinfo_pack(step_ptr->select_jobinfo, buffer, protocol_version); - } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + pack32(step_ptr->job_ptr->job_id, buffer); + pack32(step_ptr->step_id, buffer); + pack16(step_ptr->ckpt_interval, buffer); + pack32(step_ptr->job_ptr->user_id, buffer); + pack32(cpu_cnt, buffer); + pack32(step_ptr->cpu_freq, buffer); + pack32(task_cnt, buffer); + pack32(step_ptr->time_limit, buffer); + + pack_time(step_ptr->start_time, buffer); + if (IS_JOB_SUSPENDED(step_ptr->job_ptr)) { + run_time = step_ptr->pre_sus_time; + } else { + begin_time = MAX(step_ptr->start_time, + step_ptr->job_ptr->suspend_time); + run_time = step_ptr->pre_sus_time + + difftime(time(NULL), begin_time); + } + pack_time(run_time, buffer); + + packstr(step_ptr->job_ptr->partition, buffer); + packstr(step_ptr->resv_ports, buffer); + packstr(node_list, buffer); + packstr(step_ptr->name, buffer); + packstr(step_ptr->network, buffer); + pack_bit_fmt(pack_bitstr, buffer); + packstr(step_ptr->ckpt_dir, buffer); + packstr(step_ptr->gres, buffer); + select_g_select_jobinfo_pack(step_ptr->select_jobinfo, buffer, + protocol_version); + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { pack32(step_ptr->job_ptr->job_id, buffer); pack32(step_ptr->step_id, buffer); pack16(step_ptr->ckpt_interval, buffer); @@ -2455,12 +2616,13 @@ extern int pack_ctld_job_step_info_response_msg( job_iterator = list_iterator_create(job_list); while ((job_ptr = list_next(job_iterator))) { - if ((job_id != NO_VAL) && (job_ptr->job_id != job_id)) + if ((job_id != NO_VAL) && (job_id != job_ptr->job_id) && + (job_id != job_ptr->array_job_id)) continue; - if (((show_flags & SHOW_ALL) == 0) - && (job_ptr->part_ptr) - && (job_ptr->part_ptr->flags & PART_FLAG_HIDDEN)) + if (((show_flags & SHOW_ALL) == 0) && + (job_ptr->part_ptr) && + (job_ptr->part_ptr->flags & PART_FLAG_HIDDEN)) continue; if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && @@ -2473,8 +2635,8 @@ extern int pack_ctld_job_step_info_response_msg( step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = list_next(step_iterator))) { - if ((step_id != NO_VAL) - && (step_ptr->step_id != step_id)) + if ((step_id != NO_VAL) && + (step_ptr->step_id != step_id)) continue; _pack_ctld_job_step_info(step_ptr, buffer, protocol_version); @@ -2522,6 +2684,8 @@ extern int kill_step_on_node(struct job_record *job_ptr, bit_position = node_ptr - node_record_table_ptr; step_iterator = list_iterator_create (job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; if (bit_test(step_ptr->step_node_bitmap, bit_position) == 0) continue; if (node_fail && !step_ptr->no_kill) @@ -2794,6 +2958,7 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, return EINVAL; } + ext_sensors_g_get_stependdata(step_ptr); jobacctinfo_aggregate(step_ptr->jobacct, req->jobacct); /* we have been adding task average frequencies for @@ -2812,8 +2977,6 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, req->range_last = nodes - 1; #endif step_ptr->exit_node_bitmap = bit_alloc(nodes); - if (step_ptr->exit_node_bitmap == NULL) - fatal("bit_alloc: %m"); step_ptr->exit_code = req->step_rc; } else { nodes = _bitstr_bits(step_ptr->exit_node_bitmap); @@ -2936,6 +3099,8 @@ extern int step_epilog_complete(struct job_record *job_ptr, step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; if ((!step_ptr->switch_job) || (bit_test(step_ptr->step_node_bitmap, node_inx) == 0)) continue; @@ -2986,6 +3151,8 @@ suspend_job_step(struct job_record *job_ptr) step_iterator = list_iterator_create (job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; _suspend_job_step(job_ptr, step_ptr, now); } list_iterator_destroy (step_iterator); @@ -3015,6 +3182,8 @@ resume_job_step(struct job_record *job_ptr) step_iterator = list_iterator_create (job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; _resume_job_step(job_ptr, step_ptr, now); } list_iterator_destroy (step_iterator); @@ -3037,11 +3206,12 @@ extern void dump_job_step_state(struct job_record *job_ptr, pack16(step_ptr->ckpt_interval, buffer); pack16(step_ptr->cpus_per_task, buffer); pack16(step_ptr->resv_port_cnt, buffer); + pack16(step_ptr->state, buffer); pack8(step_ptr->no_kill, buffer); pack32(step_ptr->cpu_count, buffer); - pack32(step_ptr->mem_per_cpu, buffer); + pack32(step_ptr->pn_min_memory, buffer); pack32(step_ptr->exit_code, buffer); if (step_ptr->exit_code != NO_VAL) { pack_bit_fmt(step_ptr->exit_node_bitmap, buffer); @@ -3054,8 +3224,9 @@ extern void dump_job_step_state(struct job_record *job_ptr, pack_bit_fmt(step_ptr->core_bitmap_job, buffer); } else pack32((uint32_t) 0, buffer); - pack32(step_ptr->time_limit, buffer); + pack32(step_ptr->cpu_freq, buffer); + pack_time(step_ptr->start_time, buffer); pack_time(step_ptr->pre_sus_time, buffer); pack_time(step_ptr->tot_sus_time, buffer); @@ -3097,9 +3268,9 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, struct step_record *step_ptr = NULL; uint8_t no_kill; uint16_t cyclic_alloc, port, batch_step, bit_cnt; - uint16_t ckpt_interval, cpus_per_task, resv_port_cnt; - uint32_t core_size, cpu_count, exit_code, mem_per_cpu, name_len; - uint32_t step_id, time_limit; + uint16_t ckpt_interval, cpus_per_task, resv_port_cnt, state; + uint32_t core_size, cpu_count, exit_code, pn_min_memory, name_len; + uint32_t step_id, time_limit, cpu_freq; time_t start_time, pre_sus_time, tot_sus_time, ckpt_time; char *host = NULL, *ckpt_dir = NULL, *core_job = NULL; char *resv_ports = NULL, *name = NULL, *network = NULL; @@ -3110,18 +3281,19 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, List gres_list = NULL; dynamic_plugin_data_t *select_jobinfo = NULL; - if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&step_id, buffer); safe_unpack16(&cyclic_alloc, buffer); safe_unpack16(&port, buffer); safe_unpack16(&ckpt_interval, buffer); safe_unpack16(&cpus_per_task, buffer); safe_unpack16(&resv_port_cnt, buffer); + safe_unpack16(&state, buffer); safe_unpack8(&no_kill, buffer); safe_unpack32(&cpu_count, buffer); - safe_unpack32(&mem_per_cpu, buffer); + safe_unpack32(&pn_min_memory, buffer); safe_unpack32(&exit_code, buffer); if (exit_code != NO_VAL) { safe_unpackstr_xmalloc(&bit_fmt, &name_len, buffer); @@ -3130,8 +3302,9 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, safe_unpack32(&core_size, buffer); if (core_size) safe_unpackstr_xmalloc(&core_job, &name_len, buffer); - safe_unpack32(&time_limit, buffer); + safe_unpack32(&cpu_freq, buffer); + safe_unpack_time(&start_time, buffer); safe_unpack_time(&pre_sus_time, buffer); safe_unpack_time(&tot_sus_time, buffer); @@ -3167,23 +3340,84 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, if (select_g_select_jobinfo_unpack(&select_jobinfo, buffer, protocol_version)) goto unpack_error; - /* validity test as possible */ - if (cyclic_alloc > 1) { - error("Invalid data for job %u.%u: cyclic_alloc=%u", - job_ptr->job_id, step_id, cyclic_alloc); - goto unpack_error; + } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + safe_unpack32(&step_id, buffer); + safe_unpack16(&cyclic_alloc, buffer); + safe_unpack16(&port, buffer); + safe_unpack16(&ckpt_interval, buffer); + safe_unpack16(&cpus_per_task, buffer); + safe_unpack16(&resv_port_cnt, buffer); + + safe_unpack8(&no_kill, buffer); + + safe_unpack32(&cpu_count, buffer); + safe_unpack32(&pn_min_memory, buffer); + safe_unpack32(&exit_code, buffer); + if (exit_code != NO_VAL) { + safe_unpackstr_xmalloc(&bit_fmt, &name_len, buffer); + safe_unpack16(&bit_cnt, buffer); } - if (no_kill > 1) { - error("Invalid data for job %u.%u: no_kill=%u", - job_ptr->job_id, step_id, no_kill); + safe_unpack32(&core_size, buffer); + if (core_size) + safe_unpackstr_xmalloc(&core_job, &name_len, buffer); + + safe_unpack32(&time_limit, buffer); + safe_unpack_time(&start_time, buffer); + safe_unpack_time(&pre_sus_time, buffer); + safe_unpack_time(&tot_sus_time, buffer); + safe_unpack_time(&ckpt_time, buffer); + + safe_unpackstr_xmalloc(&host, &name_len, buffer); + safe_unpackstr_xmalloc(&resv_ports, &name_len, buffer); + safe_unpackstr_xmalloc(&name, &name_len, buffer); + safe_unpackstr_xmalloc(&network, &name_len, buffer); + safe_unpackstr_xmalloc(&ckpt_dir, &name_len, buffer); + + safe_unpackstr_xmalloc(&gres, &name_len, buffer); + if (gres_plugin_step_state_unpack(&gres_list, buffer, + job_ptr->job_id, step_id, + protocol_version) + != SLURM_SUCCESS) goto unpack_error; + + safe_unpack16(&batch_step, buffer); + if (!batch_step) { + if (unpack_slurm_step_layout(&step_layout, buffer, + protocol_version)) + goto unpack_error; + switch_alloc_jobinfo(&switch_tmp); + if (switch_unpack_jobinfo(switch_tmp, buffer)) + goto unpack_error; } + checkpoint_alloc_jobinfo(&check_tmp); + if (checkpoint_unpack_jobinfo(check_tmp, buffer, + protocol_version)) + goto unpack_error; + + if (select_g_select_jobinfo_unpack(&select_jobinfo, buffer, + protocol_version)) + goto unpack_error; + /* Variables added since version 2.4 */ + cpu_freq = NO_VAL; + state = JOB_RUNNING; } else { error("load_step_state: protocol_version " "%hu not supported", protocol_version); goto unpack_error; } + /* validity test as possible */ + if (cyclic_alloc > 1) { + error("Invalid data for job %u.%u: cyclic_alloc=%u", + job_ptr->job_id, step_id, cyclic_alloc); + goto unpack_error; + } + if (no_kill > 1) { + error("Invalid data for job %u.%u: no_kill=%u", + job_ptr->job_id, step_id, no_kill); + goto unpack_error; + } + step_ptr = find_step_record(job_ptr, step_id); if (step_ptr == NULL) step_ptr = _create_step_record(job_ptr); @@ -3205,7 +3439,7 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, step_ptr->gres_list = gres_list; step_ptr->port = port; step_ptr->ckpt_interval= ckpt_interval; - step_ptr->mem_per_cpu = mem_per_cpu; + step_ptr->pn_min_memory= pn_min_memory; step_ptr->host = host; host = NULL; /* re-used, nothing left to free */ step_ptr->batch_step = batch_step; @@ -3225,6 +3459,11 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, step_ptr->switch_job = switch_tmp; step_ptr->check_job = check_tmp; + step_ptr->cpu_freq = cpu_freq; + step_ptr->state = state; + + if (!step_ptr->ext_sensors) + step_ptr->ext_sensors = ext_sensors_alloc(); step_ptr->exit_code = exit_code; if (bit_fmt) { @@ -3232,8 +3471,6 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer, * is actively in progress at step save time. Otherwise * the bitmap is NULL. */ step_ptr->exit_node_bitmap = bit_alloc(bit_cnt); - if (step_ptr->exit_node_bitmap == NULL) - fatal("bit_alloc: %m"); if (bit_unfmt(step_ptr->exit_node_bitmap, bit_fmt)) { error("error recovering exit_node_bitmap from %s", bit_fmt); @@ -3333,11 +3570,11 @@ extern void step_checkpoint(void) continue; /* ignore periodic step ckpt */ } step_iterator = list_iterator_create (job_ptr->step_list); - if (!step_iterator) - fatal("list_iterator_create: malloc failure"); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { char *image_dir = NULL; + if (step_ptr->state != JOB_RUNNING) + continue; if (step_ptr->ckpt_interval == 0) continue; ckpt_due = step_ptr->ckpt_time + @@ -3405,8 +3642,6 @@ static void _signal_step_timelimit(struct job_record *job_ptr, agent_args->msg_type = REQUEST_KILL_TIMELIMIT; agent_args->retry = 1; agent_args->hostlist = hostlist_create(""); - if (agent_args->hostlist == NULL) - fatal("hostlist_create: malloc failure"); kill_step = xmalloc(sizeof(kill_job_msg_t)); kill_step->job_id = job_ptr->job_id; kill_step->step_id = step_ptr->step_id; @@ -3462,7 +3697,8 @@ check_job_step_time_limit (struct job_record *job_ptr, time_t now) step_iterator = list_iterator_create (job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { - + if (step_ptr->state != JOB_RUNNING) + continue; if (step_ptr->time_limit == INFINITE || step_ptr->time_limit == NO_VAL) continue; @@ -3502,19 +3738,60 @@ static bool _is_mem_resv(void) extern int update_step(step_update_request_msg_t *req, uid_t uid) { struct job_record *job_ptr; - struct step_record *step_ptr; + struct step_record *step_ptr = NULL; ListIterator step_iterator; int mod_cnt = 0; + bool new_step = 0; job_ptr = find_job_record(req->job_id); if (job_ptr == NULL) { error("update_step: invalid job id %u", req->job_id); return ESLURM_INVALID_JOB_ID; } - - if ((job_ptr->user_id != uid) && !validate_operator(uid) && - !assoc_mgr_is_user_acct_coord(acct_db_conn, uid, - job_ptr->account)) { + if (req->jobacct) { + if (!validate_slurm_user(uid)) { + error("Security violation, STEP_UPDATE RPC " + "from uid %d", uid); + return ESLURM_USER_ID_MISSING; + } + /* need to create step (using some other launch mech + that didn't use srun to launch). Don't use + _create_step_record though since we don't want to + push it on the job's step_list. + */ + if (req->step_id == NO_VAL) { + step_ptr = xmalloc(sizeof(struct step_record)); + step_ptr->job_ptr = job_ptr; + step_ptr->exit_code = NO_VAL; + step_ptr->time_limit = INFINITE; + step_ptr->jobacct = jobacctinfo_create(NULL); + step_ptr->requid = -1; + step_ptr->step_node_bitmap = + bit_copy(job_ptr->node_bitmap); + req->step_id = step_ptr->step_id = + job_ptr->next_step_id++; + new_step = 1; + } else { + if (req->step_id >= job_ptr->next_step_id) + return ESLURM_INVALID_JOB_ID; + if (!(step_ptr + = find_step_record(job_ptr, req->step_id))) { + /* If updating this after the fact we + need to remake the step so we can + send the updated parts to + accounting. + */ + step_ptr = xmalloc(sizeof(struct step_record)); + step_ptr->job_ptr = job_ptr; + step_ptr->jobacct = jobacctinfo_create(NULL); + step_ptr->requid = -1; + step_ptr->step_id = req->step_id; + new_step = 1; + } + } + } else if ((job_ptr->user_id != uid) && !validate_operator(uid) && + !assoc_mgr_is_user_acct_coord(acct_db_conn, uid, + job_ptr->account)) { error("Security violation, STEP_UPDATE RPC from uid %d", uid); return ESLURM_USER_ID_MISSING; } @@ -3523,10 +3800,10 @@ extern int update_step(step_update_request_msg_t *req, uid_t uid) * any steps with any time limit */ if (req->step_id == NO_VAL) { step_iterator = list_iterator_create(job_ptr->step_list); - if (step_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; step_ptr->time_limit = req->time_limit; mod_cnt++; info("Updating step %u.%u time limit to %u", @@ -3534,14 +3811,45 @@ extern int update_step(step_update_request_msg_t *req, uid_t uid) } list_iterator_destroy (step_iterator); } else { - step_ptr = find_step_record(job_ptr, req->step_id); - if (step_ptr) { + if (!step_ptr) + step_ptr = find_step_record(job_ptr, req->step_id); + + if (!step_ptr) + return ESLURM_INVALID_JOB_ID; + + if (req->jobacct) { + jobacctinfo_aggregate(step_ptr->jobacct, req->jobacct); + if (new_step) { + step_ptr->start_time = req->start_time; + step_ptr->name = xstrdup(req->name); + jobacct_storage_g_step_start( + acct_db_conn, step_ptr); + } else if (!step_ptr->exit_node_bitmap) { + /* If the exit_code is not NO_VAL then + * we need to initialize the node bitmap for + * exited nodes for packing. */ + int nodes = bit_set_count( + step_ptr->step_node_bitmap); + step_ptr->exit_node_bitmap = bit_alloc(nodes); + if (!step_ptr->exit_node_bitmap) + fatal("bit_alloc: %m"); + } + step_ptr->exit_code = req->exit_code; + + jobacct_storage_g_step_complete(acct_db_conn, step_ptr); + + if (new_step) + _free_step_rec(step_ptr); + + mod_cnt++; + info("Updating step %u.%u jobacct info", + req->job_id, req->step_id); + } else { step_ptr->time_limit = req->time_limit; mod_cnt++; info("Updating step %u.%u time limit to %u", req->job_id, req->step_id, req->time_limit); - } else - return ESLURM_INVALID_JOB_ID; + } } if (mod_cnt) last_job_update = time(NULL); @@ -3585,10 +3893,10 @@ extern void rebuild_step_bitmaps(struct job_record *job_ptr, return; step_iterator = list_iterator_create(job_ptr->step_list); - if (step_iterator == NULL) - fatal("list_iterator_create: malloc failure"); while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + if (step_ptr->state != JOB_RUNNING) + continue; gres_plugin_step_state_rebase(step_ptr->gres_list, orig_job_node_bitmap, job_ptr->job_resrcs->node_bitmap); diff --git a/src/slurmctld/trigger_mgr.c b/src/slurmctld/trigger_mgr.c index 596f001acd4cb3f8acfa1691df378b360df082e8..5cb7db3930704fabcf554c591d888a2047abc082 100644 --- a/src/slurmctld/trigger_mgr.c +++ b/src/slurmctld/trigger_mgr.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -218,8 +218,6 @@ extern int trigger_pull(trigger_info_msg_t *msg) if (trigger_list == NULL) { trigger_list = list_create(_trig_del); - if (trigger_list == NULL) - fatal("list_create: malloc failure"); } /* validate the request, designated trigger must be set */ @@ -885,9 +883,9 @@ static int _open_resv_state_file(char **state_file) return state_fd; } -extern int trigger_state_restore(void) +extern void trigger_state_restore(void) { - int data_allocated, data_read = 0, error_code = 0; + int data_allocated, data_read = 0; uint32_t data_size = 0; uint16_t protocol_version = (uint16_t) NO_VAL; int state_fd, trigger_cnt = 0; @@ -902,7 +900,6 @@ extern int trigger_state_restore(void) state_fd = _open_resv_state_file(&state_file); if (state_fd < 0) { info("No trigger state file (%s) to recover", state_file); - error_code = ENOENT; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); @@ -942,7 +939,7 @@ extern int trigger_state_restore(void) "incompatible"); xfree(ver_str); free_buf(buffer); - return EFAULT; + return; } xfree(ver_str); @@ -950,8 +947,8 @@ extern int trigger_state_restore(void) if (trigger_list) list_delete_all (trigger_list, _match_all_triggers, NULL); while (remaining_buf(buffer) > 0) { - error_code = _load_trigger_state(buffer, protocol_version); - if (error_code != SLURM_SUCCESS) + if (_load_trigger_state(buffer, protocol_version) != + SLURM_SUCCESS) goto unpack_error; trigger_cnt++; } @@ -961,7 +958,6 @@ unpack_error: error("Incomplete trigger data checkpoint file"); fini: verbose("State of %d triggers recovered", trigger_cnt); free_buf(buffer); - return SLURM_FAILURE; } static bool _front_end_job_test(bitstr_t *front_end_bitmap, @@ -989,7 +985,7 @@ static void _trigger_job_event(trig_mgr_info_t *trig_in, time_t now) if ((trig_in->job_ptr == NULL) || (trig_in->job_ptr->magic != JOB_MAGIC) || (trig_in->job_ptr->job_id != trig_in->job_id)) - trig_in->job_ptr = find_job_record(trig_in->job_ptr->job_id); + trig_in->job_ptr = find_job_record(trig_in->job_id); if ((trig_in->trig_type & TRIGGER_TYPE_FINI) && ((trig_in->job_ptr == NULL) || diff --git a/src/slurmctld/trigger_mgr.h b/src/slurmctld/trigger_mgr.h index bb4eb6decdfff1eb0196d890c7279ed6dd024ec9..faa9436616b32117930e05d58d484923670c71e7 100644 --- a/src/slurmctld/trigger_mgr.h +++ b/src/slurmctld/trigger_mgr.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -75,7 +75,7 @@ extern void trigger_primary_db_res_op(void); /* Save and restore state for slurmctld fail-over or restart */ extern int trigger_state_save(void); -extern int trigger_state_restore(void); +extern void trigger_state_restore(void); /* Free all allocated memory */ extern void trigger_fini(void); diff --git a/src/slurmd/Makefile.in b/src/slurmd/Makefile.in index 2af4b5132d8804ef8503ef4723b4df25e32ca6bb..6af7fd9d2e9fadace4640cfaf251d95d82a5bf39 100644 --- a/src/slurmd/Makefile.in +++ b/src/slurmd/Makefile.in @@ -55,6 +55,7 @@ subdir = src/slurmd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/slurmd/common/Makefile.in b/src/slurmd/common/Makefile.in index 4ea62d5ba3dc0f37067e269d51f1f7bda3d61674..9519646e87fe2baa6e7f2c3b511062c3a8a06ef0 100644 --- a/src/slurmd/common/Makefile.in +++ b/src/slurmd/common/Makefile.in @@ -58,6 +58,7 @@ subdir = src/slurmd/common DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -150,6 +154,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -170,6 +176,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -179,6 +188,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -186,6 +197,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -220,6 +240,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -247,6 +270,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/slurmd/common/proctrack.c b/src/slurmd/common/proctrack.c index 7621410ff5bcbc1730637c05ec34304753a84582..831f30e9c6690c8760dbd631a94d3a5817ae3e92 100644 --- a/src/slurmd/common/proctrack.c +++ b/src/slurmd/common/proctrack.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/proctrack.h b/src/slurmd/common/proctrack.h index 4ca94998ea70dcd14d1b3f071f03a42521955320..8c7e49051fb2acc412c5822f999344bc1579e068 100644 --- a/src/slurmd/common/proctrack.h +++ b/src/slurmd/common/proctrack.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/reverse_tree.h b/src/slurmd/common/reverse_tree.h index 66d6af95b4dfd0f5940009aa071d4ba491802460..5fc004e728ad542d3e5156301fb3a9e43866bd31 100644 --- a/src/slurmd/common/reverse_tree.h +++ b/src/slurmd/common/reverse_tree.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/run_script.c b/src/slurmd/common/run_script.c index 2eade8c1c97892968dae54a146f5448cd18e7817..bf7c28ad34f1fe8e1d7fc4fcf3019eef66733fd8 100644 --- a/src/slurmd/common/run_script.c +++ b/src/slurmd/common/run_script.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -183,8 +183,6 @@ static List _script_list_create (const char *pattern) switch (rc) { case 0: l = list_create ((ListDelF) _xfree_f); - if (l == NULL) - fatal("run_script: list_create: Out of memory"); for (i = 0; i < gl.gl_pathc; i++) list_push (l, xstrdup (gl.gl_pathv[i])); break; @@ -222,9 +220,6 @@ int run_script(const char *name, const char *pattern, uint32_t jobid, return error ("Unable to run %s [%s]", name, pattern); i = list_iterator_create (l); - if (i == NULL) - fatal ("run_script: list_iterator_create: Out of memory"); - while ((s = list_next (i))) { rc = run_one_script (name, s, jobid, max_wait, env); if (rc) { diff --git a/src/slurmd/common/run_script.h b/src/slurmd/common/run_script.h index 635487082eb153601b78cb68f01bbadcaf8ff427..0ba175ad319d1a550d34cbbb0d609de0c88a49a9 100644 --- a/src/slurmd/common/run_script.h +++ b/src/slurmd/common/run_script.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/set_oomadj.c b/src/slurmd/common/set_oomadj.c index 81dab1069d657a42e8bd9403abb6caecb3a329f8..6bcb80d95f3168581dec7d02bb70250e4ebc90ac 100644 --- a/src/slurmd/common/set_oomadj.c +++ b/src/slurmd/common/set_oomadj.c @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/set_oomadj.h b/src/slurmd/common/set_oomadj.h index 99aabf85f75c71d63f4e40e29a26cf1ec9104126..4384cebfc40581c46a4ab4a4ec2589c03ad0ed4b 100644 --- a/src/slurmd/common/set_oomadj.h +++ b/src/slurmd/common/set_oomadj.h @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/setproctitle.c b/src/slurmd/common/setproctitle.c index 1f701d5eb1111b5765ce04cc1b4142a052687296..0b838934e8b97c33eb30461570af4fc1da7ca59b 100644 --- a/src/slurmd/common/setproctitle.c +++ b/src/slurmd/common/setproctitle.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -85,6 +85,7 @@ # endif #endif +#include <errno.h> #if defined(__NetBSD__) #include <stdlib.h> #include <string.h> @@ -153,6 +154,7 @@ static const size_t ps_buffer_size = sizeof(ps_buffer); #else static char *ps_buffer; /* will point to argv area */ static size_t ps_buffer_size; /* space determined at run time */ +static char **new_environ = (char **) NULL; #endif /* save the original argv[] location here */ @@ -260,7 +262,6 @@ init_setproctitle(int argc, char *argv[]) { #if SETPROCTITLE_STRATEGY == PS_USE_CLOBBER_ARGV char *end_of_area = NULL; - char **new_environ; int i; #endif @@ -309,12 +310,15 @@ init_setproctitle(int argc, char *argv[]) * Duplicate and move the environment out of the way */ new_environ = malloc(sizeof(char *) * (i + 1)); + if (!new_environ) { + fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", + __FILE__, __LINE__, "init_setproctitle", + strerror(errno)); + abort(); + } for (i = 0; environ[i] != NULL; i++) { new_environ[i] = strdup(environ[i]); - //free(environ[i]); } - /* if(environ) */ -/* free(environ); */ new_environ[i] = NULL; environ = new_environ; #endif /* PS_USE_CLOBBER_ARGV */ @@ -327,11 +331,15 @@ void fini_setproctitle(void) #if SETPROCTITLE_STRATEGY == PS_USE_CLOBBER_ARGV int i; - for (i = 0; environ[i] != NULL; i++) { - free(environ[i]); + if (!new_environ) + return; + + for (i = 0; new_environ[i] != NULL; i++) { + free(new_environ[i]); } - free(environ); - environ = (char **) NULL; + free(new_environ); + new_environ = (char **) NULL; + environ = new_environ; #endif /* PS_USE_CLOBBER_ARGV */ } diff --git a/src/slurmd/common/setproctitle.h b/src/slurmd/common/setproctitle.h index eda7743f7f3c724bd1a4705b08c69b7521d54611..2b6a440f8659cf4daccd5c5306521ba490bf4b75 100644 --- a/src/slurmd/common/setproctitle.h +++ b/src/slurmd/common/setproctitle.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/slurmstepd_init.c b/src/slurmd/common/slurmstepd_init.c index 708e3c6d80fa143be32baba916db74f5b304a41c..02e598f322dee763480d8df3774e6da23c82b51d 100644 --- a/src/slurmd/common/slurmstepd_init.c +++ b/src/slurmd/common/slurmstepd_init.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -45,6 +45,7 @@ extern void pack_slurmd_conf_lite(slurmd_conf_t *conf, Buf buffer) { xassert(conf != NULL); packstr(conf->hostname, buffer); + pack16(conf->cpus, buffer); pack16(conf->boards, buffer); pack16(conf->sockets, buffer); pack16(conf->cores, buffer); @@ -58,7 +59,7 @@ extern void pack_slurmd_conf_lite(slurmd_conf_t *conf, Buf buffer) packstr(conf->logfile, buffer); packstr(conf->task_prolog, buffer); packstr(conf->task_epilog, buffer); - pack16(conf->job_acct_gather_freq, buffer); + packstr(conf->job_acct_gather_freq, buffer); packstr(conf->job_acct_gather_type, buffer); pack16(conf->propagate_prio, buffer); pack32(conf->debug_flags, buffer); @@ -77,6 +78,7 @@ extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer) uint32_t uint32_tmp; safe_unpackstr_xmalloc(&conf->hostname, &uint32_tmp, buffer); + safe_unpack16(&conf->cpus, buffer); safe_unpack16(&conf->boards, buffer); safe_unpack16(&conf->sockets, buffer); safe_unpack16(&conf->cores, buffer); @@ -90,7 +92,8 @@ extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer) safe_unpackstr_xmalloc(&conf->logfile, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&conf->task_prolog, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&conf->task_epilog, &uint32_tmp, buffer); - safe_unpack16(&conf->job_acct_gather_freq, buffer); + safe_unpackstr_xmalloc(&conf->job_acct_gather_freq, &uint32_tmp, + buffer); safe_unpackstr_xmalloc(&conf->job_acct_gather_type, &uint32_tmp, buffer); safe_unpack16(&conf->propagate_prio, buffer); @@ -111,6 +114,8 @@ extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer) unpack_error: error("unpack_error in unpack_slurmd_conf_lite_no_alloc: %m"); + xfree(conf->job_acct_gather_freq); + xfree(conf->job_acct_gather_type); xfree(conf->hostname); xfree(conf->spooldir); xfree(conf->node_name); diff --git a/src/slurmd/common/slurmstepd_init.h b/src/slurmd/common/slurmstepd_init.h index 493f41ccfa568e9aab8ee2928da15d3747e086aa..60a0e2a1a0450ddcc06cbbe77a909c091dd90c6c 100644 --- a/src/slurmd/common/slurmstepd_init.h +++ b/src/slurmd/common/slurmstepd_init.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,6 +48,10 @@ #include "src/slurmd/slurmstepd/slurmstepd_job.h" #include "src/slurmd/slurmd/slurmd.h" +/* If you want to run memcheck on slurmstepd switch this comment */ +//#define SLURMSTEPD_MEMCHECK 1 +#undef SLURMSTEPD_MEMCHECK + typedef enum slurmd_step_tupe { LAUNCH_BATCH_JOB = 0, LAUNCH_TASKS, diff --git a/src/slurmd/common/task_plugin.c b/src/slurmd/common/task_plugin.c index 5d9c129e0e04a429a0388f019a832c51cf99d817..cedd4d1291ff4d4b6997146fc1fb4d287373376a 100644 --- a/src/slurmd/common/task_plugin.c +++ b/src/slurmd/common/task_plugin.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/common/task_plugin.h b/src/slurmd/common/task_plugin.h index 730017735a81f9dccfa5546d7684ac1d00598c40..00034b7905c699f01728f0fdc6ea10bbed0b3b5b 100644 --- a/src/slurmd/common/task_plugin.h +++ b/src/slurmd/common/task_plugin.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/Makefile.in b/src/slurmd/slurmd/Makefile.in index 02e43f85c3797226b68fa0266ea1f8897942e57b..1d5206e8acf9882e2add2f5091f93fd1182b4a72 100644 --- a/src/slurmd/slurmd/Makefile.in +++ b/src/slurmd/slurmd/Makefile.in @@ -60,6 +60,7 @@ subdir = src/slurmd/slurmd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/slurmd/slurmd/get_mach_stat.c b/src/slurmd/slurmd/get_mach_stat.c index edd5a0266345d80bf83eb012cd8229bcdc902ab6..1d4b782e4db317dbf18e0bd27b35940647733c99 100644 --- a/src/slurmd/slurmd/get_mach_stat.c +++ b/src/slurmd/slurmd/get_mach_stat.c @@ -13,7 +13,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/get_mach_stat.h b/src/slurmd/slurmd/get_mach_stat.h index a0bb36dbe5d6e0874178dd5ae913fd5a3c31ac29..83bf9a4336fc98d524bb7e0f11fda22d2ebc1325 100644 --- a/src/slurmd/slurmd/get_mach_stat.h +++ b/src/slurmd/slurmd/get_mach_stat.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/read_proc.c b/src/slurmd/slurmd/read_proc.c index c8d786127705310e63d80a0827eb186d8546b696..dfaa80ffa4824bf47ea5d02fc7df922f5fbd1107 100644 --- a/src/slurmd/slurmd/read_proc.c +++ b/src/slurmd/slurmd/read_proc.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 543feb42efc4f1e2bd9b6e2d10801f2aaba8d64a..b1015d05767cff773cd1da6d0fd2f6b809768229 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -165,6 +165,7 @@ static int _rpc_file_bcast(slurm_msg_t *msg); static int _rpc_ping(slurm_msg_t *); static int _rpc_health_check(slurm_msg_t *); static int _rpc_acct_gather_update(slurm_msg_t *); +static int _rpc_acct_gather_energy(slurm_msg_t *); static int _rpc_step_complete(slurm_msg_t *msg); static int _rpc_stat_jobacct(slurm_msg_t *msg); static int _rpc_list_pids(slurm_msg_t *msg); @@ -189,6 +190,7 @@ static bool _steps_completed_now(uint32_t jobid); static int _valid_sbcast_cred(file_bcast_msg_t *req, uid_t req_uid, uint16_t block_no); static void _wait_state_completed(uint32_t jobid, int max_delay); +static slurmstepd_info_t *_get_job_step_info(uint32_t jobid); static long _get_job_uid(uint32_t jobid); static gids_t *_gids_cache_lookup(char *user, gid_t gid); @@ -386,6 +388,11 @@ slurmd_req(slurm_msg_t *msg) last_slurmctld_msg = time(NULL); /* No body to free */ break; + case REQUEST_ACCT_GATHER_ENERGY: + debug2("Processing RPC: REQUEST_ACCT_GATHER_ENERGY"); + _rpc_acct_gather_energy(msg); + slurm_free_acct_gather_energy_req_msg(msg->data); + break; case REQUEST_JOB_ID: _rpc_pid2jid(msg); slurm_free_job_id_request_msg(msg->data); @@ -671,7 +678,9 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, return SLURM_FAILURE; } else if (pid > 0) { int rc = 0; +#ifndef SLURMSTEPD_MEMCHECK time_t start_time = time(NULL); +#endif /* * Parent sends initialization data to the slurmstepd * over the to_stepd pipe, and waits for the return code @@ -688,6 +697,11 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, error("Unable to init slurmstepd"); goto done; } + + /* If running under memcheck stdout doesn't work correctly so + * just skip it. + */ +#ifndef SLURMSTEPD_MEMCHECK if (read(to_slurmd[0], &rc, sizeof(int)) != sizeof(int)) { error("Error reading return code message " "from slurmstepd: %m"); @@ -700,7 +714,7 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, "memory", delta_time); } } - +#endif done: if (_remove_starting_step(type, req)) error("Error cleaning up starting_step list"); @@ -714,7 +728,12 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, error("close read to_slurmd in parent: %m"); return rc; } else { +#ifndef SLURMSTEPD_MEMCHECK char *const argv[2] = { (char *)conf->stepd_loc, NULL}; +#else + char *const argv[3] = {"memcheck", + (char *)conf->stepd_loc, NULL}; +#endif int failed = 0; /* inform slurmstepd about our config */ setenv("SLURM_CONF", conf->conffile, 1); @@ -759,7 +778,7 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, } fd_set_noclose_on_exec(STDERR_FILENO); log_fini(); - if(!failed) { + if (!failed) { execvp(argv[0], argv); error("exec of slurmstepd failed: %m"); } @@ -788,7 +807,7 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, uint32_t jobid = req->job_id; uint32_t stepid = req->job_step_id; int tasks_to_launch = req->tasks_to_launch[node_id]; - uint32_t job_cores=0, step_cores=0; + uint32_t job_cpus = 0, step_cpus = 0; /* * First call slurm_cred_verify() so that all valid @@ -909,11 +928,11 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, for (i=i_first_bit, j=0; i<i_last_bit; i++, j++) { char *who_has = NULL; if (bit_test(arg.job_core_bitmap, i)) { - job_cores++; + job_cpus++; who_has = "Job"; } if (bit_test(arg.step_core_bitmap, i)) { - step_cores++; + step_cpus++; who_has = "Step"; } if (cpu_log && who_has) { @@ -923,11 +942,11 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, } if (cpu_log) info("===================="); - if (step_cores == 0) { + if (step_cpus == 0) { error("cons_res: zero processors allocated to step"); - step_cores = 1; + step_cpus = 1; } - /* NOTE: step_cores is the count of allocated resources + /* NOTE: step_cpus is the count of allocated resources * (typically cores). Convert to CPU count as needed */ if (i_last_bit <= i_first_bit) error("step credential has no CPUs selected"); @@ -935,20 +954,21 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, i = conf->cpus / (i_last_bit - i_first_bit); if (i > 1) { info("scaling CPU count by factor of %d", i); - step_cores *= i; + step_cpus *= i; + job_cpus *= i; } } - if (tasks_to_launch > step_cores) { + if (tasks_to_launch > step_cpus) { /* This is expected with the --overcommit option * or hyperthreads */ debug("cons_res: More than one tasks per logical " "processor (%d > %u) on host [%u.%u %ld %s] ", - tasks_to_launch, step_cores, arg.jobid, + tasks_to_launch, step_cpus, arg.jobid, arg.stepid, (long) arg.uid, arg.step_hostlist); } } else { - step_cores = 1; - job_cores = 1; + step_cpus = 1; + job_cpus = 1; } /* Overwrite any memory limits in the RPC with contents of the @@ -958,27 +978,27 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, if (arg.step_mem_limit & MEM_PER_CPU) { req->step_mem_lim = arg.step_mem_limit & (~MEM_PER_CPU); - req->step_mem_lim *= step_cores; + req->step_mem_lim *= step_cpus; } else req->step_mem_lim = arg.step_mem_limit; } else { if (arg.job_mem_limit & MEM_PER_CPU) { req->step_mem_lim = arg.job_mem_limit & (~MEM_PER_CPU); - req->step_mem_lim *= job_cores; + req->step_mem_lim *= job_cpus; } else req->step_mem_lim = arg.job_mem_limit; } if (arg.job_mem_limit & MEM_PER_CPU) { req->job_mem_lim = arg.job_mem_limit & (~MEM_PER_CPU); - req->job_mem_lim *= job_cores; + req->job_mem_lim *= job_cpus; } else req->job_mem_lim = arg.job_mem_limit; - req->cpus_allocated[node_id] = step_cores; + req->cpus_allocated[node_id] = step_cpus; #if 0 info("%u.%u node_id:%d mem orig:%u cpus:%u limit:%u", jobid, stepid, node_id, arg.job_mem_limit, - step_cores, req->job_mem_lim); + step_cpus, req->job_mem_lim); #endif *step_hset = s_hset; @@ -1755,7 +1775,7 @@ _cancel_step_mem_limit(uint32_t job_id, uint32_t step_id) kill_req.job_id = job_id; kill_req.job_step_id = step_id; kill_req.signal = SIGKILL; - kill_req.batch_flag = (uint16_t) 0; + kill_req.flags = (uint16_t) 0; msg.msg_type = REQUEST_CANCEL_JOB_STEP; msg.data = &kill_req; slurm_send_only_controller_msg(&msg); @@ -1842,17 +1862,27 @@ _enforce_job_mem_limit(void) acct_req.job_id = stepd->jobid; acct_req.step_id = stepd->stepid; resp = xmalloc(sizeof(job_step_stat_t)); - if ((!stepd_stat_jobacct(fd, &acct_req, resp)) && + + if (!stepd->stepd_info) + stepd->stepd_info = stepd_get_info(fd); + + if ((!stepd_stat_jobacct( + fd, &acct_req, resp, + stepd->stepd_info->protocol_version)) && (resp->jobacct)) { /* resp->jobacct is NULL if account is disabled */ jobacctinfo_getinfo((struct jobacctinfo *) resp->jobacct, JOBACCT_DATA_TOT_RSS, - &step_rss); + &step_rss, + stepd->stepd_info-> + protocol_version); jobacctinfo_getinfo((struct jobacctinfo *) resp->jobacct, JOBACCT_DATA_TOT_VSIZE, - &step_vsize); + &step_vsize, + stepd->stepd_info-> + protocol_version); #if _LIMIT_INFO info("Step:%u.%u RSS:%u KB VSIZE:%u KB", stepd->jobid, stepd->stepid, @@ -2043,6 +2073,57 @@ _rpc_acct_gather_update(slurm_msg_t *msg) return rc; } +static int +_rpc_acct_gather_energy(slurm_msg_t *msg) +{ + int rc = SLURM_SUCCESS; + uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + static bool first_msg = true; + + if (!_slurm_authorized_user(req_uid)) { + error("Security violation, acct_gather_update RPC from uid %d", + req_uid); + if (first_msg) { + error("Do you have SlurmUser configured as uid %d?", + req_uid); + } + rc = ESLURM_USER_ID_MISSING; /* or bad in this case */ + } + first_msg = false; + + if (rc != SLURM_SUCCESS) { + if (slurm_send_rc_msg(msg, rc) < 0) + error("Error responding to energy request: %m"); + } else { + slurm_msg_t resp_msg; + acct_gather_node_resp_msg_t acct_msg; + time_t now = time(NULL), last_poll = 0; + int data_type = ENERGY_DATA_STRUCT; + acct_gather_energy_req_msg_t *req = msg->data; + + acct_gather_energy_g_get_data(ENERGY_DATA_LAST_POLL, + &last_poll); + + /* If we polled later than delta seconds then force a + new poll. + */ + if ((now - last_poll) > req->delta) + data_type = ENERGY_DATA_JOULES_TASK; + + memset(&acct_msg, 0, sizeof(acct_gather_node_resp_msg_t)); + acct_msg.energy = acct_gather_energy_alloc(); + acct_gather_energy_g_get_data(data_type, acct_msg.energy); + + slurm_msg_t_copy(&resp_msg, msg); + resp_msg.msg_type = RESPONSE_ACCT_GATHER_ENERGY; + resp_msg.data = &acct_msg; + + slurm_send_node_msg(msg->conn_fd, &resp_msg); + + acct_gather_energy_destroy(acct_msg.energy); + } + return rc; +} static int _signal_jobstep(uint32_t jobid, uint32_t stepid, uid_t req_uid, @@ -2337,13 +2418,27 @@ _rpc_stat_jobacct(slurm_msg_t *msg) int fd; uid_t req_uid; long job_uid; + slurmstepd_info_t *stepd_info = NULL; + uint16_t protocol_version; debug3("Entering _rpc_stat_jobacct"); /* step completion messages are only allowed from other slurmstepd, so only root or SlurmUser is allowed here */ req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); - job_uid = _get_job_uid(req->job_id); + stepd_info = _get_job_step_info(req->job_id); + if (!stepd_info) { + error("stat_jobacct For invalid job_id: %u", + req->job_id); + if (msg->conn_fd >= 0) + slurm_send_rc_msg(msg, ESLURM_INVALID_JOB_ID); + return ESLURM_INVALID_JOB_ID; + } + + protocol_version = stepd_info->protocol_version; + job_uid = stepd_info->uid; + xfree(stepd_info); + if (job_uid < 0) { error("stat_jobacct for invalid job_id: %u", req->job_id); @@ -2382,7 +2477,8 @@ _rpc_stat_jobacct(slurm_msg_t *msg) } - if (stepd_stat_jobacct(fd, req, resp) == SLURM_ERROR) { + if (stepd_stat_jobacct(fd, req, resp, protocol_version) + == SLURM_ERROR) { debug("accounting for nonexistent job %u.%u requested", req->job_id, req->step_id); } @@ -2421,7 +2517,8 @@ _rpc_list_pids(slurm_msg_t *msg) so only root or SlurmUser is allowed here */ req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); - job_uid = _get_job_uid(req->job_id); + job_uid = _get_job_uid(req->job_id); + if (job_uid < 0) { error("stat_pid for invalid job_id: %u", req->job_id); @@ -2433,7 +2530,8 @@ _rpc_list_pids(slurm_msg_t *msg) /* * check that requesting user ID is the SLURM UID or root */ - if ((req_uid != job_uid) && (!_slurm_authorized_user(req_uid))) { + if ((req_uid != job_uid) + && (!_slurm_authorized_user(req_uid))) { error("stat_pid from uid %ld for job %u " "owned by uid %ld", (long) req_uid, req->job_id, job_uid); @@ -2679,9 +2777,11 @@ _rpc_file_bcast(slurm_msg_t *msg) #endif #endif - if ((rc = _valid_sbcast_cred(req, req_uid, req->block_no)) != - SLURM_SUCCESS) - return rc; + if (!_slurm_authorized_user(req_uid)) { + rc = _valid_sbcast_cred(req, req_uid, req->block_no); + if (rc != SLURM_SUCCESS) + return rc; + } info("sbcast req_uid=%u fname=%s block_no=%u", req_uid, req->fname, req->block_no); @@ -2885,15 +2985,13 @@ done: slurm_free_reattach_tasks_response_msg(resp); } -static long -_get_job_uid(uint32_t jobid) +static slurmstepd_info_t *_get_job_step_info(uint32_t jobid) { List steps; ListIterator i; step_loc_t *stepd; slurmstepd_info_t *info = NULL; int fd; - long uid = -1; steps = stepd_available(conf->spooldir, conf->node_name); i = list_iterator_create(steps); @@ -2917,13 +3015,25 @@ _get_job_uid(uint32_t jobid) stepd->jobid, stepd->stepid); continue; } - uid = (long)info->uid; break; } list_iterator_destroy(i); list_destroy(steps); - xfree(info); + return info; +} + +static long +_get_job_uid(uint32_t jobid) +{ + slurmstepd_info_t *info = NULL; + long uid = -1; + + if ((info = _get_job_step_info(jobid))) { + uid = (long)info->uid; + xfree(info); + } + return uid; } @@ -4156,7 +4266,7 @@ _destroy_env(char **env) { int i=0; - if(env) { + if (env) { for(i=0; env[i]; i++) { xfree(env[i]); } @@ -4518,7 +4628,7 @@ _getgroups(void) return NULL; } gg = (gid_t *)xmalloc(n * sizeof(gid_t)); - if(getgroups(n, gg) == -1) { + if (getgroups(n, gg) == -1) { error("_getgroups: couldn't get %d groups: %m", n); xfree(gg); return NULL; @@ -4559,7 +4669,7 @@ init_gids_cache(int cache) return; } orig_gids = (gid_t *)xmalloc(ngids * sizeof(gid_t)); - if(getgroups(ngids, orig_gids) == -1) { + if (getgroups(ngids, orig_gids) == -1) { error("init_gids_cache: couldn't get %d groups: %m", ngids); xfree(orig_gids); return; diff --git a/src/slurmd/slurmd/req.h b/src/slurmd/slurmd/req.h index 0ec5f45c6a0ff1a9bddb875d42c875c5882a7de1..9fa60f344c410b2c991421cff46936b761f6bc80 100644 --- a/src/slurmd/slurmd/req.h +++ b/src/slurmd/slurmd/req.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/reverse_tree_math.c b/src/slurmd/slurmd/reverse_tree_math.c index 73f5757b678d25e3da5ea4a1ae61c382eb962e69..7c8f554038d7a1da92cbd758ef44618e3ef32da6 100644 --- a/src/slurmd/slurmd/reverse_tree_math.c +++ b/src/slurmd/slurmd/reverse_tree_math.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/reverse_tree_math.h b/src/slurmd/slurmd/reverse_tree_math.h index 454962798065863a30f126da13e2514be39e21d1..1e72a48a6161a69b8a862ff9ef1db0286a32fa9c 100644 --- a/src/slurmd/slurmd/reverse_tree_math.h +++ b/src/slurmd/slurmd/reverse_tree_math.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 39126ade7eb031870b7c51efdc075521c64f61dc..1d5f4dbc1cf3f1f59e69c90673b65bd6ba1c5d92 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -169,7 +169,7 @@ static void _term_handler(int); static void _update_logging(void); static void _update_nice(void); static void _usage(void); -static void _wait_for_all_threads(void); +static void _wait_for_all_threads(int secs); int @@ -293,6 +293,8 @@ main (int argc, char *argv[]) if (!conf->cleanstart && (_restore_cred_state(conf->vctx) < 0)) return SLURM_FAILURE; + if (jobacct_gather_init() != SLURM_SUCCESS) + fatal("Unable to initialize jobacct_gather"); if (interconnect_node_init() < 0) fatal("Unable to initialize interconnect."); if (conf->cleanstart && switch_g_clear_node_state()) @@ -306,11 +308,9 @@ main (int argc, char *argv[]) so we keep the write lock of the pidfile. */ pidfd = create_pidfile(conf->pidfile, 0); - if (pidfd >= 0) - fd_set_close_on_exec(pidfd); rfc2822_timestamp(time_stamp, sizeof(time_stamp)); - info("%s started on %s", xbasename(argv[0]), time_stamp); + info("%s started on %s", slurm_prog_name, time_stamp); _install_fork_handlers(); list_install_fork_handlers(); @@ -328,10 +328,7 @@ main (int argc, char *argv[]) if (unlink(conf->pidfile) < 0) error("Unable to remove pidfile `%s': %m", conf->pidfile); - _wait_for_all_threads(); - - interconnect_node_fini(); - + _wait_for_all_threads(120); _slurmd_fini(); _destroy_conf(); slurm_crypto_fini(); /* must be after _destroy_conf() */ @@ -403,6 +400,7 @@ _msg_engine(void) while (!_shutdown) { if (_reconfig) { verbose("got reconfigure request"); + _wait_for_all_threads(5); /* Wait for RPCs to finish */ _reconfigure(); } @@ -452,15 +450,16 @@ _increment_thd_count(void) slurm_mutex_unlock(&active_mutex); } +/* secs IN - wait up to this number of seconds for all threads to complete */ static void -_wait_for_all_threads(void) +_wait_for_all_threads(int secs) { struct timespec ts; int rc; ts.tv_sec = time(NULL); ts.tv_nsec = 0; - ts.tv_sec += 120; /* 2 minutes allowed for shutdown */ + ts.tv_sec += secs; slurm_mutex_lock(&active_mutex); while (active_threads > 0) { @@ -530,7 +529,7 @@ _service_connection(void *arg) debug3("in the service_connection"); slurm_msg_t_init(msg); - if((rc = slurm_receive_msg_and_forward(con->fd, con->cli_addr, msg, 0)) + if ((rc = slurm_receive_msg_and_forward(con->fd, con->cli_addr, msg, 0)) != SLURM_SUCCESS) { error("service_connection: slurm_receive_msg: %m"); /* if this fails we need to make sure the nodes we forward @@ -619,13 +618,13 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) if (first_msg) { first_msg = false; - info("Procs=%u Boards=%u Sockets=%u Cores=%u Threads=%u " + info("CPUs=%u Boards=%u Sockets=%u Cores=%u Threads=%u " "Memory=%u TmpDisk=%u Uptime=%u", msg->cpus, msg->boards, msg->sockets, msg->cores, msg->threads, msg->real_memory, msg->tmp_disk, msg->up_time); } else { - debug3("Procs=%u Boards=%u Sockets=%u Cores=%u Threads=%u " + debug3("CPUs=%u Boards=%u Sockets=%u Cores=%u Threads=%u " "Memory=%u TmpDisk=%u Uptime=%u", msg->cpus, msg->boards, msg->sockets, msg->cores, msg->threads, msg->real_memory, msg->tmp_disk, @@ -671,6 +670,7 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) close(fd); continue; } + close(fd); if (stepd->stepid == NO_VAL) debug("found apparently running job %u", stepd->jobid); @@ -724,6 +724,8 @@ _read_config(void) { char *path_pubkey = NULL; slurm_ctl_conf_t *cf = NULL; + uint16_t tmp16 = 0; + #ifndef HAVE_FRONT_END bool cr_flag = false, gang_flag = false; #endif @@ -786,8 +788,6 @@ _read_config(void) xfree(conf->block_map); xfree(conf->block_map_inv); - conf->block_map_size = 0; - _update_logging(); _update_nice(); @@ -840,7 +840,7 @@ _read_config(void) if (cf->fast_schedule) { info("Node configuration differs from hardware: " "CPUs=%u:%u(hw) Boards=%u:%u(hw) " - "Sockets=%u:%u(hw) CoresPerSocket=%u:%u(hw) " + "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) " "ThreadsPerCore=%u:%u(hw)", conf->cpus, conf->actual_cpus, conf->boards, conf->actual_boards, @@ -855,7 +855,7 @@ _read_config(void) "the bitmaps the slurmctld must create before " "the slurmd registers.\n" " CPUs=%u:%u(hw) Boards=%u:%u(hw) " - "Sockets=%u:%u(hw) CoresPerSocket=%u:%u(hw) " + "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) " "ThreadsPerCore=%u:%u(hw)", conf->cpus, conf->actual_cpus, conf->boards, conf->actual_boards, @@ -887,10 +887,24 @@ _read_config(void) conf->debug_flags = cf->debug_flags; conf->propagate_prio = cf->propagate_prio_process; - conf->job_acct_gather_freq = cf->job_acct_gather_freq; + + _free_and_set(&conf->job_acct_gather_freq, + xstrdup(cf->job_acct_gather_freq)); + + conf->acct_freq_task = (uint16_t)NO_VAL; + tmp16 = acct_gather_parse_freq(PROFILE_TASK, + conf->job_acct_gather_freq); + if (tmp16 != -1) + conf->acct_freq_task = tmp16; _free_and_set(&conf->acct_gather_energy_type, xstrdup(cf->acct_gather_energy_type)); + _free_and_set(&conf->acct_gather_filesystem_type, + xstrdup(cf->acct_gather_filesystem_type)); + _free_and_set(&conf->acct_gather_infiniband_type, + xstrdup(cf->acct_gather_infiniband_type)); + _free_and_set(&conf->acct_gather_profile_type, + xstrdup(cf->acct_gather_profile_type)); _free_and_set(&conf->job_acct_gather_type, xstrdup(cf->job_acct_gather_type)); @@ -964,7 +978,7 @@ _reconfigure(void) stepd->jobid, stepd->stepid); if (fd == -1) continue; - if(stepd_reconfig(fd) != SLURM_SUCCESS) + if (stepd_reconfig(fd) != SLURM_SUCCESS) debug("Reconfig jobid=%u.%u failed: %m", stepd->jobid, stepd->stepid); close(fd); @@ -1106,12 +1120,16 @@ _destroy_conf(void) { if (conf) { xfree(conf->acct_gather_energy_type); + xfree(conf->acct_gather_filesystem_type); + xfree(conf->acct_gather_infiniband_type); + xfree(conf->acct_gather_profile_type); xfree(conf->block_map); xfree(conf->block_map_inv); xfree(conf->conffile); xfree(conf->epilog); xfree(conf->health_check_program); xfree(conf->hostname); + xfree(conf->job_acct_gather_freq); xfree(conf->job_acct_gather_type); xfree(conf->logfile); xfree(conf->node_name); @@ -1156,7 +1174,7 @@ _print_config(void) &conf->actual_threads, &conf->block_map_size, &conf->block_map, &conf->block_map_inv); - printf("CPUs=%u Boards=%u Sockets=%u CoresPerSocket=%u " + printf("CPUs=%u Boards=%u SocketsPerBoard=%u CoresPerSocket=%u " "ThreadsPerCore=%u ", conf->actual_cpus, conf->actual_boards, conf->actual_sockets, conf->actual_cores, conf->actual_threads); @@ -1223,6 +1241,7 @@ _process_cmdline(int ac, char **av) break; case 'v': conf->debug_level++; + conf->debug_level_set = 1; break; case 'V': print_slurm_version(); @@ -1369,8 +1388,7 @@ _slurmd_init(void) cpu_freq_init(conf); _print_conf(); - if (jobacct_gather_init() != SLURM_SUCCESS) - return SLURM_FAILURE; + if (slurm_proctrack_init() != SLURM_SUCCESS) return SLURM_FAILURE; if (slurmd_task_init() != SLURM_SUCCESS) @@ -1476,11 +1494,10 @@ _slurmd_init(void) init_gids_cache(0); slurm_conf_unlock(); - if ((devnull = open("/dev/null", O_RDWR)) < 0) { + if ((devnull = open_cloexec("/dev/null", O_RDWR)) < 0) { error("Unable to open /dev/null: %m"); return SLURM_FAILURE; } - fd_set_close_on_exec(devnull); /* make sure we have slurmstepd installed */ if (stat(conf->stepd_loc, &stat_buf)) @@ -1544,6 +1561,7 @@ cleanup: static int _slurmd_fini(void) { + interconnect_node_fini(); save_cred_state(conf->vctx); switch_fini(); slurmd_task_fini(); @@ -1556,10 +1574,9 @@ _slurmd_fini(void) slurmd_req(NULL); /* purge memory allocated by slurmd_req() */ fini_setproctitle(); slurm_select_fini(); - jobacct_gather_fini(); - acct_gather_energy_fini(); spank_slurmd_exit(); cpu_freq_fini(); + acct_gather_conf_destroy(); return SLURM_SUCCESS; } @@ -1603,11 +1620,11 @@ int save_cred_state(slurm_cred_ctx_t ctx) goto cleanup; } (void) unlink(old_file); - if(link(reg_file, old_file)) + if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink(reg_file); - if(link(new_file, reg_file)) + if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink(new_file); @@ -1741,12 +1758,9 @@ static void _update_logging(void) log_options_t *o = &conf->log_opts; slurm_ctl_conf_t *cf; - /* - * Initialize debug level if not already set - */ + /* Preserve execute line verbose arguments (if any) */ cf = slurm_conf_lock(); - if ( (conf->debug_level == LOG_LEVEL_INFO) - && (cf->slurmd_debug != (uint16_t) NO_VAL) ) + if (!conf->debug_level_set && (cf->slurmd_debug != (uint16_t) NO_VAL)) conf->debug_level = cf->slurmd_debug; slurm_conf_unlock(); diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index cf819278a0eb94c9388f734f3542111906599d10..b721e3596a8a2d60ac718459e6e8b81c7b9aac69 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -123,6 +123,7 @@ typedef struct slurmd_config { pid_t pid; /* server pid */ log_options_t log_opts; /* current logging options */ int debug_level; /* logging detail level */ + uint16_t debug_level_set; /* debug_level set on command line */ uint32_t debug_flags; /* DebugFlags configured */ int daemonize:1; /* daemonize flag */ int cleanstart:1; /* clean start requested (-c) */ @@ -133,9 +134,13 @@ typedef struct slurmd_config { uint16_t slurmd_timeout; /* SlurmdTimeout */ uid_t slurm_user_id; /* UID that slurmctld runs as */ pthread_mutex_t config_mutex; /* lock for slurmd_config access */ - uint16_t job_acct_gather_freq; - char *job_acct_gather_type; /* job accounting gather type */ + uint16_t acct_freq_task; + char *job_acct_gather_freq; + char *job_acct_gather_type; /* job accounting gather type */ char *acct_gather_energy_type; /* */ + char *acct_gather_filesystem_type; /* */ + char *acct_gather_infiniband_type; /* */ + char *acct_gather_profile_type; /* */ uint16_t use_pam; uint16_t task_plugin_param; /* TaskPluginParams, expressed * using cpu_bind_type_t flags */ diff --git a/src/slurmd/slurmd/xcpu.c b/src/slurmd/slurmd/xcpu.c index db8f419ce76d8d00405b7d393d9ce8a95adc7e22..2ecac5f8fda50da061c7ea2f454c334d9ae43453 100644 --- a/src/slurmd/slurmd/xcpu.c +++ b/src/slurmd/slurmd/xcpu.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmd/xcpu.h b/src/slurmd/slurmd/xcpu.h index d2223395d2d9fdef6c4cc0f0dae86ac3095b6481..5ed081390603a350d81855a814dd1a092f0b6d8f 100644 --- a/src/slurmd/slurmd/xcpu.h +++ b/src/slurmd/slurmd/xcpu.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/Makefile.in b/src/slurmd/slurmstepd/Makefile.in index e62800e6a51105f41a8590745a9223da53077c77..ec5b6a10e4ab9f621de17f5133671ee2fe3d1daa 100644 --- a/src/slurmd/slurmstepd/Makefile.in +++ b/src/slurmd/slurmstepd/Makefile.in @@ -60,6 +60,7 @@ subdir = src/slurmd/slurmstepd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -163,6 +167,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -183,6 +189,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -192,6 +201,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -199,6 +210,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -233,6 +253,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -260,6 +283,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/slurmd/slurmstepd/fname.c b/src/slurmd/slurmstepd/fname.c index 83aa3297559a665f89482a3904c920c0617f6886..bf0f38f9289b4ba18869a40a30198581b2d17b81 100644 --- a/src/slurmd/slurmstepd/fname.c +++ b/src/slurmd/slurmstepd/fname.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -49,9 +49,10 @@ #include "src/slurmd/slurmd/slurmd.h" #include "src/slurmd/slurmstepd/fname.h" +#include "src/common/uid.h" +#include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" -#include "src/common/xassert.h" /* * Max zero-padding width @@ -67,6 +68,7 @@ fname_create(slurmd_job_t *job, const char *format, int taskid) unsigned int wid = 0; char *name = NULL; char *orig = xstrdup(format); + char *uname; char *p, *q; int id; @@ -83,7 +85,7 @@ fname_create(slurmd_job_t *job, const char *format, int taskid) } q = p = orig; - while(*p != '\0') { + while (*p != '\0') { if (*p == '%') { if (isdigit(*(++p))) { unsigned long in_width = 0; @@ -98,6 +100,18 @@ fname_create(slurmd_job_t *job, const char *format, int taskid) } switch (*p) { + case 'a': /* '%a' => array task id */ + xmemcat(name, q, p - 1); + xstrfmtcat(name, "%0*d", wid, + job->array_task_id); + q = ++p; + break; + case 'A': /* '%A' => array master job id */ + xmemcat(name, q, p - 1); + xstrfmtcat(name, "%0*d", wid, + job->array_job_id); + q = ++p; + break; case 's': /* '%s' => step id */ xmemcat(name, q, p - 1); xstrfmtcat(name, "%0*d", wid, job->stepid); @@ -118,8 +132,15 @@ fname_create(slurmd_job_t *job, const char *format, int taskid) xstrfmtcat(name, "%s", conf->hostname); q = ++p; break; - case 'J': - case 'j': + case 'u': /* '%u' => user name */ + uname = uid_to_string(job->uid); + xmemcat(name, q, p - 1); + xstrfmtcat(name, "%s", uname); + xfree(uname); + q = ++p; + break; + case 'J': /* '%J' => jobid.stepid */ + case 'j': /* '%j' => jobid */ xmemcat(name, q, p - 1); xstrfmtcat(name, "%0*d", wid, job->jobid); diff --git a/src/slurmd/slurmstepd/fname.h b/src/slurmd/slurmstepd/fname.h index d4358d1e06a83cca7003e8d198dc884d5216d371..61356caf6a638fafba775fc6dcbba6b8891c9214 100644 --- a/src/slurmd/slurmstepd/fname.h +++ b/src/slurmd/slurmstepd/fname.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c index e118d950e2b083f883c6a911cff3440f4995fc93..668623dfe936663c6225560c07b5fb0dc38c6709 100644 --- a/src/slurmd/slurmstepd/io.c +++ b/src/slurmd/slurmstepd/io.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -72,18 +72,20 @@ #include <unistd.h> #include <errno.h> +#include "src/common/cbuf.h" #include "src/common/eio.h" +#include "src/common/fd.h" #include "src/common/io_hdr.h" -#include "src/common/cbuf.h" +#include "src/common/list.h" #include "src/common/log.h" #include "src/common/macros.h" -#include "src/common/fd.h" -#include "src/common/list.h" +#include "src/common/net.h" #include "src/common/read_config.h" +#include "src/common/write_labelled_message.h" #include "src/common/xmalloc.h" #include "src/common/xsignal.h" #include "src/common/xstring.h" -#include "src/common/write_labelled_message.h" + #include "src/slurmd/slurmd/slurmd.h" #include "src/slurmd/slurmstepd/io.h" @@ -284,9 +286,6 @@ _client_writable(eio_obj_t *obj) struct io_buf *msg; client->msg_queue = list_create(NULL); /* need destructor */ msgs = list_iterator_create(client->job->outgoing_cache); - if (!msgs) - fatal("Could not allocate iterator"); - while ((msg = list_next(msgs))) { msg->ref_count++; list_enqueue(client->msg_queue, msg); @@ -556,7 +555,7 @@ _local_file_write(eio_obj_t *obj, List objs) if (!header_tmp_buf) fatal("Failure to allocate memory for a message header"); io_hdr_unpack(&header, header_tmp_buf); - header_tmp_buf->head = NULL; + header_tmp_buf->head = NULL; /* CLANG false positive bug here */ free_buf(header_tmp_buf); /* A zero-length message indicates the end of a stream from one @@ -1227,10 +1226,7 @@ _send_connection_okay_response(slurmd_job_t *job) } clients = list_iterator_create(job->clients); - if (!clients) - fatal("Could not allocate memory"); - - while((eio = list_next(clients))) { + while ((eio = list_next(clients))) { client = (struct client_io_info *)eio->arg; if (client->out_eof || client->is_local_file) continue; @@ -1273,7 +1269,7 @@ _build_connection_okay_message(slurmd_job_t *job) msg->ref_count = 0; /* make certain it is initialized */ /* free the Buf packbuf, but not the memory to which it points */ - packbuf->head = NULL; + packbuf->head = NULL; /* CLANG false positive bug here */ free_buf(packbuf); return msg; @@ -1300,10 +1296,7 @@ _route_msg_task_to_client(eio_obj_t *obj) /* Add message to the msg_queue of all clients */ clients = list_iterator_create(out->job->clients); - if (!clients) - fatal("Could not allocate iterator"); - - while((eio = list_next(clients))) { + while ((eio = list_next(clients))) { client = (struct client_io_info *)eio->arg; if (client->out_eof == true) continue; @@ -1385,14 +1378,14 @@ _free_all_outgoing_msgs(List msg_queue, slurmd_job_t *job) struct io_buf *msg; msgs = list_iterator_create(msg_queue); - if (!msgs) - fatal("Could not allocate iterator"); while((msg = list_next(msgs))) { _free_outgoing_msg(msg, job); } list_iterator_destroy(msgs); } +/* Close I/O file descriptors created by slurmstepd. The connections have + * all been moved to the spawned tasks stdin/out/err file descriptors. */ extern void io_close_task_fds(slurmd_job_t *job) { @@ -1448,8 +1441,6 @@ io_close_local_fds(slurmd_job_t *job) return; clients = list_iterator_create(job->clients); - if (!clients) - fatal("Could not allocate iterator"); while((eio = list_next(clients))) { client = (struct client_io_info *)eio->arg; if (client->is_local_file) { @@ -1761,10 +1752,12 @@ _send_eof_msg(struct task_read_info *out) msg->length = io_hdr_packed_size() + header.length; msg->ref_count = 0; /* make certain it is initialized */ + /* free the Buf packbuf, but not the memory to which it points */ + packbuf->head = NULL; /* CLANG false positive bug here */ + free_buf(packbuf); + /* Add eof message to the msg_queue of all clients */ clients = list_iterator_create(out->job->clients); - if (!clients) - fatal("Could not allocate iterator"); while((eio = list_next(clients))) { client = (struct client_io_info *)eio->arg; debug5("======================== Enqueued eof message"); @@ -1853,7 +1846,7 @@ _task_build_message(struct task_read_info *out, slurmd_job_t *job, cbuf_t cbuf) msg->ref_count = 0; /* make certain it is initialized */ /* free the Buf packbuf, but not the memory to which it points */ - packbuf->head = NULL; + packbuf->head = NULL; /* CLANG false positive bug here */ free_buf(packbuf); debug4("Leaving _task_build_message"); @@ -1949,6 +1942,7 @@ _user_managed_io_connect(srun_info_t *srun, uint32_t gtid) if (fd == -1) return -1; + net_set_keep_alive(fd); if (slurm_send_node_msg(fd, &msg) == -1) { close(fd); return -1; diff --git a/src/slurmd/slurmstepd/io.h b/src/slurmd/slurmstepd/io.h index 9c5ac04cf58e685f289e39a359091a78ef841a91..086c4f31fbd2961184ee21df926d041ab38a243c 100644 --- a/src/slurmd/slurmstepd/io.h +++ b/src/slurmd/slurmstepd/io.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index c7d3063424e5d2e260e733db68dccd03e4a6e335..eabb0d7f14567ccdbb6c237f7112e9212bc0ce68 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -96,6 +96,7 @@ #include "src/common/plugstack.h" #include "src/common/safeopen.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/switch.h" #include "src/common/util-net.h" #include "src/common/xmalloc.h" @@ -179,7 +180,7 @@ static void _set_prio_process (slurmd_job_t *job); static void _set_job_log_prefix(slurmd_job_t *job); static int _setup_normal_io(slurmd_job_t *job); static int _drop_privileges(slurmd_job_t *job, bool do_setuid, - struct priv_state *state); + struct priv_state *state, bool get_list); static int _reclaim_privileges(struct priv_state *state); static void _send_launch_resp(slurmd_job_t *job, int rc); static int _slurmd_job_log_init(slurmd_job_t *job); @@ -258,7 +259,7 @@ static uint32_t _get_exit_code(slurmd_job_t *job) for (i = 0; i < job->node_tasks; i++) { /* If signalled we only need to check one and then * break out of the loop */ - if(WIFSIGNALED(job->task[i]->estatus)) { + if (WIFSIGNALED(job->task[i]->estatus)) { step_rc = job->task[i]->estatus; break; } @@ -440,7 +441,7 @@ _setup_normal_io(slurmd_job_t *job) * descriptors (which may be connected to files), then * reclaim privileges. */ - if (_drop_privileges(job, true, &sprivs) < 0) + if (_drop_privileges(job, true, &sprivs, true) < 0) return ESLURMD_SET_UID_OR_GID_ERROR; if (io_init_tasks_stdio(job) != SLURM_SUCCESS) { @@ -554,7 +555,7 @@ _setup_normal_io(slurmd_job_t *job) } } - if(io_initial_client_connect(srun, job, srun_stdout_tasks, + if (io_initial_client_connect(srun, job, srun_stdout_tasks, srun_stderr_tasks) < 0) { rc = ESLURMD_IO_ERROR; goto claim; @@ -725,7 +726,8 @@ _one_step_complete_msg(slurmd_job_t *job, int first, int last) if (!acct_sent) { jobacctinfo_aggregate(step_complete.jobacct, job->jobacct); jobacctinfo_getinfo(step_complete.jobacct, - JOBACCT_DATA_TOTAL, msg.jobacct); + JOBACCT_DATA_TOTAL, msg.jobacct, + SLURM_PROTOCOL_VERSION); acct_sent = true; } /*********************************************/ @@ -900,6 +902,9 @@ job_manager(slurmd_job_t *job) debug ("Unable to set dumpable to 1"); #endif /* PR_SET_DUMPABLE */ + /* run now so we don't drop permissions on any of the gather plugins */ + acct_gather_conf_init(); + /* * Preload plugins. */ @@ -992,7 +997,7 @@ job_manager(slurmd_job_t *job) rc = SLURM_FAILURE; goto fail2; } - + /* calls pam_setup() and requires pam_finish() if successful */ if ((rc = _fork_all_tasks(job, &io_initialized)) < 0) { debug("_fork_all_tasks failed"); @@ -1024,7 +1029,9 @@ job_manager(slurmd_job_t *job) _send_launch_resp(job, 0); _wait_for_all_tasks(job); - jobacct_gather_endpoll(); + acct_gather_profile_endpoll(); + acct_gather_profile_g_node_step_end(); + acct_gather_profile_fini(); job->state = SLURMSTEPD_STEP_ENDING; @@ -1100,10 +1107,11 @@ fail1: _send_launch_resp(job, rc); } - if (job->aborted) - info("job_manager exiting with aborted job"); - else if (!job->batch && (step_complete.rank > -1)) { - _wait_for_children_slurmstepd(job); + if (!job->batch && (step_complete.rank > -1)) { + if (job->aborted) + info("job_manager exiting with aborted job"); + else + _wait_for_children_slurmstepd(job); _send_step_complete_msgs(job); } @@ -1123,7 +1131,7 @@ _pre_task_privileged(slurmd_job_t *job, int taskid, struct priv_state *sp) if (pre_launch_priv(job) < 0) return error("pre_launch_priv failed"); - return(_drop_privileges (job, true, sp)); + return(_drop_privileges (job, true, sp, false)); } struct exec_wait_info { @@ -1166,6 +1174,7 @@ static void exec_wait_info_destroy (struct exec_wait_info *e) close (e->childfd); e->id = -1; e->pid = -1; + xfree(e); } static pid_t exec_wait_get_pid (struct exec_wait_info *e) @@ -1321,7 +1330,7 @@ _fork_all_tasks(slurmd_job_t *job, bool *io_initialized) /* Temporarily drop effective privileges, except for the euid. * We need to wait until after pam_setup() to drop euid. */ - if (_drop_privileges (job, false, &sprivs) < 0) + if (_drop_privileges (job, false, &sprivs, true) < 0) return ESLURMD_SET_UID_OR_GID_ERROR; if (pam_setup(job->pwd->pw_name, conf->hostname) @@ -1361,7 +1370,7 @@ _fork_all_tasks(slurmd_job_t *job, bool *io_initialized) /* * Temporarily drop effective privileges */ - if (_drop_privileges (job, true, &sprivs) < 0) { + if (_drop_privileges (job, true, &sprivs, true) < 0) { error ("_drop_privileges: %m"); rc = SLURM_ERROR; goto fail2; @@ -1398,6 +1407,7 @@ _fork_all_tasks(slurmd_job_t *job, bool *io_initialized) pid_t pid; struct exec_wait_info *ei; + acct_gather_profile_g_task_start(i); if ((ei = fork_child_with_wait_info (i)) == NULL) { error("child fork: %m"); exec_wait_kill_children (exec_wait_list); @@ -1659,7 +1669,7 @@ static int _wait_for_any_task(slurmd_job_t *job, bool waitflag) { slurmd_task_info_t *t = NULL; - int status; + int status = 0; pid_t pid; int completed = 0; jobacctinfo_t *jobacct = NULL; @@ -1687,7 +1697,8 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) jobacct = jobacct_gather_remove_task(pid); if (jobacct) { jobacctinfo_setinfo(jobacct, - JOBACCT_DATA_RUSAGE, &rusage); + JOBACCT_DATA_RUSAGE, &rusage, + SLURM_PROTOCOL_VERSION); /* Since we currently don't track energy usage per task (only per step). We take into account only the last poll of the last task. @@ -1703,6 +1714,7 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) jobacctinfo_aggregate(job->jobacct, jobacct); jobacctinfo_destroy(jobacct); } + acct_gather_profile_g_task_end(pid); /*********************************************/ if ((t = job_task_info_by_pid(job, pid))) { @@ -2106,7 +2118,8 @@ _send_complete_batch_script_msg(slurmd_job_t *job, int err, int status) static int -_drop_privileges(slurmd_job_t *job, bool do_setuid, struct priv_state *ps) +_drop_privileges(slurmd_job_t *job, bool do_setuid, + struct priv_state *ps, bool get_list) { ps->saved_uid = getuid(); ps->saved_gid = getgid(); @@ -2117,14 +2130,15 @@ _drop_privileges(slurmd_job_t *job, bool do_setuid, struct priv_state *ps) } ps->ngids = getgroups(0, NULL); - - ps->gid_list = (gid_t *) xmalloc(ps->ngids * sizeof(gid_t)); - - if(getgroups(ps->ngids, ps->gid_list) == -1) { - error("_drop_privileges: couldn't get %d groups: %m", - ps->ngids); - xfree(ps->gid_list); - return -1; + if (get_list) { + ps->gid_list = (gid_t *) xmalloc(ps->ngids * sizeof(gid_t)); + + if (getgroups(ps->ngids, ps->gid_list) == -1) { + error("_drop_privileges: couldn't get %d groups: %m", + ps->ngids); + xfree(ps->gid_list); + return -1; + } } /* @@ -2153,27 +2167,25 @@ _drop_privileges(slurmd_job_t *job, bool do_setuid, struct priv_state *ps) static int _reclaim_privileges(struct priv_state *ps) { + int rc = SLURM_SUCCESS; + /* * No need to reclaim privileges if our uid == pwd->pw_uid */ if (geteuid() == ps->saved_uid) - return SLURM_SUCCESS; - - if (seteuid(ps->saved_uid) < 0) { + goto done; + else if (seteuid(ps->saved_uid) < 0) { error("seteuid: %m"); - return -1; - } - - if (setegid(ps->saved_gid) < 0) { + rc = -1; + } else if (setegid(ps->saved_gid) < 0) { error("setegid: %m"); - return -1; - } - - setgroups(ps->ngids, ps->gid_list); - + rc = -1; + } else + setgroups(ps->ngids, ps->gid_list); +done: xfree(ps->gid_list); - return SLURM_SUCCESS; + return rc; } @@ -2418,7 +2430,7 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, argv[0] = (char *)xstrdup(path); argv[1] = NULL; - if (_drop_privileges(job, true, &sprivs) < 0) { + if (_drop_privileges(job, true, &sprivs, false) < 0) { error("run_script_as_user _drop_privileges: %m"); /* child process, should not return */ exit(127); @@ -2430,7 +2442,7 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, exit(127); } - if(chdir(job->cwd) == -1) + if (chdir(job->cwd) == -1) error("run_script_as_user: couldn't " "change working dir to %s: %m", job->cwd); #ifdef SETPGRP_TWO_ARGS diff --git a/src/slurmd/slurmstepd/mgr.h b/src/slurmd/slurmstepd/mgr.h index 69d495428cb9c4544e45b5a6573358e8433eca31..fa303017cd8899ef61c687ebccefbd734eb6b6a9 100644 --- a/src/slurmd/slurmstepd/mgr.h +++ b/src/slurmd/slurmstepd/mgr.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/multi_prog.c b/src/slurmd/slurmstepd/multi_prog.c index 54be8ec5421c1c5530dcda43a626d24ce6c7764d..afbbe743fbcc2f1ab98f7d38c380ac0a89ec99f6 100644 --- a/src/slurmd/slurmstepd/multi_prog.c +++ b/src/slurmd/slurmstepd/multi_prog.c @@ -14,7 +14,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/multi_prog.h b/src/slurmd/slurmstepd/multi_prog.h index 71c2b26ac325f9e2bbc3bcbdea3ae85ac208fc8c..3d7d204f0f3e1d9167dc4f7664883dc9702f5ba5 100644 --- a/src/slurmd/slurmstepd/multi_prog.h +++ b/src/slurmd/slurmstepd/multi_prog.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/pam_ses.c b/src/slurmd/slurmstepd/pam_ses.c index 93afa8f8c5f628fcad82773401e9d947a8afec10..84da0f7ab4332574105aee6477444036f73297ea 100644 --- a/src/slurmd/slurmstepd/pam_ses.c +++ b/src/slurmd/slurmstepd/pam_ses.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/pam_ses.h b/src/slurmd/slurmstepd/pam_ses.h index 81160e2431140c5b48eba6932855093a49f1155d..a5b26e99ce2cb1a81148f85165f799b5db81ee55 100644 --- a/src/slurmd/slurmstepd/pam_ses.h +++ b/src/slurmd/slurmstepd/pam_ses.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/pdebug.c b/src/slurmd/slurmstepd/pdebug.c index b5dd8935064a2dbc46c6739b7baabc792149514d..7a421b84d844a33c5861679cd437412e79bcd139 100644 --- a/src/slurmd/slurmstepd/pdebug.c +++ b/src/slurmd/slurmstepd/pdebug.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/pdebug.h b/src/slurmd/slurmstepd/pdebug.h index d40a9f7c0f56ae0fb7d7d8f3f605a82d0547ccf8..574282dfe29b4d780e6c158250f8773cdefe5c0f 100644 --- a/src/slurmd/slurmstepd/pdebug.h +++ b/src/slurmd/slurmstepd/pdebug.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index d4ab8d45d0e61fb0e7b0c4ad45015460e6ed59aa..7f84c8ca82b1da536036224701537edc8e73f4b2 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,6 +48,7 @@ #include <unistd.h> #include <signal.h> #include <time.h> +#include <stdlib.h> #include "src/common/cpu_frequency.h" #include "src/common/fd.h" @@ -56,6 +57,7 @@ #include "src/slurmd/common/proctrack.h" #include "src/common/slurm_auth.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather.h" #include "src/common/stepd_api.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -591,6 +593,7 @@ _handle_signal_process_group(int fd, slurmd_job_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int signal; + char *ptr = NULL; debug3("_handle_signal_process_group for job %u.%u", job->jobid, job->stepid); @@ -628,8 +631,11 @@ _handle_signal_process_group(int fd, slurmd_job_t *job, uid_t uid) /* * Print a message in the step output before killing when * SIGTERM or SIGKILL are sent + * hjcao: print JOB/STEP KILLED msg on specific node id only */ - if ((signal == SIGTERM) || (signal == SIGKILL)) { + ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID"); + if ((!ptr || atoi(ptr) == job->nodeid) && + ((signal == SIGTERM) || (signal == SIGKILL))) { time_t now = time(NULL); char entity[24], time_str[24]; if (job->stepid == SLURM_BATCH_SCRIPT) { @@ -669,13 +675,11 @@ _handle_signal_task_local(int fd, slurmd_job_t *job, uid_t uid) int signal; int ltaskid; /* local task index */ - debug("_handle_signal_task_local for job %u.%u", - job->jobid, job->stepid); - safe_read(fd, &signal, sizeof(int)); safe_read(fd, <askid, sizeof(int)); + debug("_handle_signal_task_local for step=%u.%u uid=%d signal=%d", + job->jobid, job->stepid, (int) uid, signal); - debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("kill req from uid %ld for job %u.%u owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); @@ -745,12 +749,9 @@ _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid) int sig; static int msg_sent = 0; - debug("_handle_signal_container for job %u.%u", - job->jobid, job->stepid); - safe_read(fd, &sig, sizeof(int)); - - debug3(" uid = %d", uid); + debug("_handle_signal_container for step=%u.%u uid=%d signal=%d", + job->jobid, job->stepid, (int) uid, sig); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("kill container req from uid %ld for job %u.%u " "owned by uid %ld", @@ -979,11 +980,10 @@ _handle_terminate(int fd, slurmd_job_t *job, uid_t uid) int rc = SLURM_SUCCESS; int errnum = 0; - debug("_handle_terminate for job %u.%u", - job->jobid, job->stepid); + debug("_handle_terminate for step=%u.%u uid=%d", + job->jobid, job->stepid, uid); step_terminate_monitor_start(job->jobid, job->stepid); - debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("terminate req from uid %ld for job %u.%u " "owned by uid %ld", @@ -1103,9 +1103,14 @@ done: xfree(gtids); for (i = 0; i < job->node_tasks; i++) { - len = strlen(job->task[i]->argv[0]) + 1; - safe_write(fd, &len, sizeof(int)); - safe_write(fd, job->task[i]->argv[0], len); + if (job->task[i] && job->task[i]->argv) { + len = strlen(job->task[i]->argv[0]) + 1; + safe_write(fd, &len, sizeof(int)); + safe_write(fd, job->task[i]->argv[0], len); + } else { + len = 0; + safe_write(fd, &len, sizeof(int)); + } } } @@ -1153,8 +1158,8 @@ _handle_suspend(int fd, slurmd_job_t *job, uid_t uid) int rc = SLURM_SUCCESS; int errnum = 0; - debug("_handle_suspend for job %u.%u", job->jobid, job->stepid); - debug3(" uid = %d", uid); + debug("_handle_suspend for step=%u.%u uid=%d", + job->jobid, job->stepid, (int) uid); if (!_slurm_authorized_user(uid)) { debug("job step suspend request from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); @@ -1171,7 +1176,7 @@ _handle_suspend(int fd, slurmd_job_t *job, uid_t uid) goto done; } - jobacct_gather_suspend_poll(); + acct_gather_suspend_poll(); if (launch_poe == -1) { char *launch_type = slurm_get_launch_type(); if (!strcmp(launch_type, "launch/poe")) @@ -1259,7 +1264,7 @@ _handle_resume(int fd, slurmd_job_t *job, uid_t uid) goto done; } - jobacct_gather_resume_poll(); + acct_gather_resume_poll(); /* * Signal the container */ @@ -1418,7 +1423,8 @@ _handle_stat_jobacct(int fd, slurmd_job_t *job, uid_t uid) "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); /* Send NULL */ - jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd); + jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd, + SLURM_PROTOCOL_VERSION); return SLURM_ERROR; } @@ -1427,13 +1433,14 @@ _handle_stat_jobacct(int fd, slurmd_job_t *job, uid_t uid) for (i = 0; i < job->node_tasks; i++) { temp_jobacct = jobacct_gather_stat_task(job->task[i]->pid); - if(temp_jobacct) { + if (temp_jobacct) { jobacctinfo_aggregate(jobacct, temp_jobacct); jobacctinfo_destroy(temp_jobacct); num_tasks++; } } - jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd); + jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd, + SLURM_PROTOCOL_VERSION); safe_write(fd, &num_tasks, sizeof(int)); jobacctinfo_destroy(jobacct); return SLURM_SUCCESS; diff --git a/src/slurmd/slurmstepd/req.h b/src/slurmd/slurmstepd/req.h index af674b754dcd686ecbaa834116422da0abedcf52..f876a55ac2696a9e7c2810fa73391569c9bc6885 100644 --- a/src/slurmd/slurmstepd/req.h +++ b/src/slurmd/slurmstepd/req.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index e6a4fe313235f11fd1399f533173362dd14838b8..5640d03eaf8d257e1667a5dbf360f5b79d92b488 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -50,6 +50,7 @@ #include "src/common/cpu_frequency.h" #include "src/common/gres.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_rlimits_info.h" #include "src/common/stepd_api.h" #include "src/common/switch.h" @@ -162,8 +163,11 @@ main (int argc, char *argv[]) ending: #ifdef MEMORY_LEAK_DEBUG + acct_gather_conf_destroy(); _step_cleanup(job, msg, rc); + fini_setproctitle(); + xfree(cli); xfree(self); xfree(conf->hostname); @@ -187,6 +191,7 @@ static slurmd_conf_t * read_slurmd_conf_lite (int fd) int len; Buf buffer; slurmd_conf_t *confl; + int tmp_int = 0; /* First check to see if we've already initialized the * global slurmd_conf_t in 'conf'. Allocate memory if not. @@ -221,6 +226,13 @@ static slurmd_conf_t * read_slurmd_conf_lite (int fd) } else confl->log_opts.syslog_level = LOG_LEVEL_QUIET; + confl->acct_freq_task = (uint16_t)NO_VAL; + tmp_int = acct_gather_parse_freq(PROFILE_TASK, + confl->job_acct_gather_freq); + if (tmp_int != -1) + confl->acct_freq_task = tmp_int; + + return (confl); rwfail: return (NULL); @@ -279,8 +291,7 @@ static int handle_spank_mode (int argc, char *argv[]) log_alter (conf->log_opts, 0, conf->logfile); close (STDIN_FILENO); - if (slurm_conf_init(NULL) != SLURM_SUCCESS) - return error ("Failed to read slurm config"); + slurm_conf_init(NULL); if (get_jobid_uid_from_env (&jobid, &uid) < 0) return error ("spank environment invalid"); @@ -324,11 +335,16 @@ static int process_cmdline (int argc, char *argv[]) static void _send_ok_to_slurmd(int sock) { + /* If running under memcheck stdout doesn't work correctly so + * just skip it. + */ +#ifndef SLURMSTEPD_MEMCHECK int ok = SLURM_SUCCESS; safe_write(sock, &ok, sizeof(int)); return; rwfail: error("Unable to send \"ok\" to slurmd"); +#endif } static void @@ -391,8 +407,6 @@ _init_from_slurmd(int sock, char **argv, log_alter(conf->log_opts, 0, conf->logfile); debug2("debug level is %d.", conf->debug_level); - /* acct info */ - jobacct_gather_startpoll(conf->job_acct_gather_freq); switch_g_slurmd_step_init(); @@ -406,7 +420,7 @@ _init_from_slurmd(int sock, char **argv, safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); cli = xmalloc(sizeof(slurm_addr_t)); - if(slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR) + if (slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR) fatal("slurmstepd: problem with unpack of slurmd_conf"); free_buf(buffer); @@ -453,7 +467,7 @@ _init_from_slurmd(int sock, char **argv, fatal("Unrecognized launch RPC"); break; } - if(unpack_msg(msg, buffer) == SLURM_ERROR) + if (unpack_msg(msg, buffer) == SLURM_ERROR) fatal("slurmstepd: we didn't unpack the request correctly"); free_buf(buffer); diff --git a/src/slurmd/slurmstepd/slurmstepd.h b/src/slurmd/slurmstepd/slurmstepd.h index fd107e6b090519370e3305ed6cc7393982f911c3..522130e51e5435b9c270c3525513167c90013e0f 100644 --- a/src/slurmd/slurmstepd/slurmstepd.h +++ b/src/slurmd/slurmstepd/slurmstepd.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 0ece10d9ca5d9ea0828ef37074ea6e7d6982e111..df8528c6cc979542fde8bd3739320edc6d07cea4 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -48,6 +48,7 @@ #include <grp.h> #include <signal.h> +#include <stdlib.h> #include <sys/types.h> #include "src/common/eio.h" @@ -56,6 +57,7 @@ #include "src/common/log.h" #include "src/common/node_select.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xassert.h" #include "src/common/xmalloc.h" @@ -74,6 +76,30 @@ static void _job_init_task_info(slurmd_job_t *job, uint32_t *gtid, char *ifname, char *ofname, char *efname); static void _task_info_destroy(slurmd_task_info_t *t, uint16_t multi_prog); +static int _check_acct_freq_task(uint32_t job_mem_lim, char *acctg_freq) +{ + int task_freq; + + if (!job_mem_lim || !conf->acct_freq_task) + return 0; + + task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq); + + if (task_freq == -1) + return 0; + + if ((task_freq == 0) || (task_freq > conf->acct_freq_task)) { + error("Can't set frequency to %d, it is higher than %u. " + "We need it to be at least at this level to " + "monitor memory usage.", + task_freq, conf->acct_freq_task); + slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); + return 1; + } + + return 0; +} + static struct passwd * _pwd_create(uid_t uid) { @@ -162,7 +188,7 @@ job_create(launch_tasks_request_msg_t *msg) srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; - int nodeid = NO_VAL; + int i, nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); @@ -178,13 +204,7 @@ job_create(launch_tasks_request_msg_t *msg) return NULL; } - if (msg->job_mem_lim && (msg->acctg_freq != (uint16_t) NO_VAL) - && (msg->acctg_freq > conf->job_acct_gather_freq)) { - error("Can't set frequency to %u, it is higher than %u. " - "We need it to be at least at this level to " - "monitor memory usage.", - msg->acctg_freq, conf->job_acct_gather_freq); - slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); + if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) { _pwd_destroy(pwd); return NULL; } @@ -197,7 +217,7 @@ job_create(launch_tasks_request_msg_t *msg) nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif - if(nodeid < 0) { + if (nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); job_destroy(job); @@ -226,6 +246,17 @@ job_create(launch_tasks_request_msg_t *msg) job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); + job->array_job_id = msg->job_id; + job->array_task_id = (uint16_t) NO_VAL; + for (i = 0; i < msg->envc; i++) { + /* 1234567890123456789 */ + if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19)) + job->array_job_id = atoi(msg->env[i] + 19); + /* 12345678901234567890 */ + if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20)) + job->array_task_id = atoi(msg->env[i] + 20); + } + job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->clients = list_create(NULL); /* FIXME! Needs destructor */ @@ -269,6 +300,7 @@ job_create(launch_tasks_request_msg_t *msg) job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; + job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); @@ -279,8 +311,15 @@ job_create(launch_tasks_request_msg_t *msg) job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->cpus_allocated[nodeid]; - if (msg->acctg_freq != (uint16_t) NO_VAL) - jobacct_gather_change_poll(msg->acctg_freq); + + /* This needs to happen before acct_gather_profile_startpoll + and only really looks at the profile in the job. + */ + acct_gather_profile_g_node_step_start(job); + + acct_gather_profile_startpoll(msg->acctg_freq, + conf->job_acct_gather_freq); + job->multi_prog = msg->multi_prog; job->timelimit = (time_t) -1; job->task_flags = msg->task_flags; @@ -288,7 +327,7 @@ job_create(launch_tasks_request_msg_t *msg) job->pty = msg->pty; job->open_mode = msg->open_mode; job->options = msg->options; - format_core_allocs(msg->cred, conf->node_name, + format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { @@ -327,9 +366,12 @@ job_create(launch_tasks_request_msg_t *msg) static char * _batchfilename(slurmd_job_t *job, const char *name) { - if (name == NULL) - return fname_create(job, "slurm-%J.out", 0); - else + if (name == NULL) { + if (job->array_task_id == (uint16_t) NO_VAL) + return fname_create(job, "slurm-%J.out", 0); + else + return fname_create(job, "slurm-%A_%a.out", 0); + } else return fname_create(job, name, 0); } @@ -355,13 +397,8 @@ job_batch_job_create(batch_job_launch_msg_t *msg) _pwd_destroy(pwd); return NULL; } - if(msg->job_mem && (msg->acctg_freq != (uint16_t) NO_VAL) - && (msg->acctg_freq > conf->job_acct_gather_freq)) { - error("Can't set frequency to %u, it is higher than %u. " - "We need it to be at least at this level to " - "monitor memory usage.", - msg->acctg_freq, conf->job_acct_gather_freq); - slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); + + if (_check_acct_freq_task(msg->job_mem, msg->acctg_freq)) { _pwd_destroy(pwd); return NULL; } @@ -376,10 +413,18 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->step_id; + job->array_job_id = msg->array_job_id; + job->array_task_id = msg->array_task_id; job->batch = true; - if (msg->acctg_freq != (uint16_t) NO_VAL) - jobacct_gather_change_poll(msg->acctg_freq); + /* This needs to happen before acct_gather_profile_startpoll + and only really looks at the profile in the job. + */ + acct_gather_profile_g_node_step_start(job); + /* needed for the jobacct_gather plugin to start */ + acct_gather_profile_startpoll(msg->acctg_freq, + conf->job_acct_gather_freq); + job->multi_prog = 0; job->open_mode = msg->open_mode; job->overcommit = (bool) msg->overcommit; @@ -412,7 +457,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg) if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; - format_core_allocs(msg->cred, conf->node_name, + format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 8204690041d91b56eb827afe8a809db33c500f2e..41d30918d495e32528d9b339fb5046118fdaec7a 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -114,8 +114,10 @@ typedef struct slurmd_job { slurmstepd_state_t state; uint32_t jobid; /* Current SLURM job id */ uint32_t stepid; /* Current step id (or NO_VAL) */ + uint32_t array_job_id; /* job array master job ID */ + uint16_t array_task_id; /* job array ID */ uint32_t nnodes; /* number of nodes in current job */ - uint32_t ntasks; /* total number of tasks in current job */ + uint32_t ntasks; /* total number of tasks in current job */ uint32_t nodeid; /* relative position of this node in job */ uint32_t node_tasks; /* number of tasks on *this* node */ uint32_t cpus_per_task; /* number of cpus desired per task */ @@ -145,6 +147,7 @@ typedef struct slurmd_job { bool run_prolog; /* true if need to run prolog */ bool user_managed_io; time_t timelimit; /* time at which job must stop */ + uint32_t profile; /* Level of acct_gather_profile */ char *task_prolog; /* per-task prolog */ char *task_epilog; /* per-task epilog */ struct passwd *pwd; /* saved passwd struct for user job */ diff --git a/src/slurmd/slurmstepd/step_terminate_monitor.c b/src/slurmd/slurmstepd/step_terminate_monitor.c index 818cbbb64186c55c44216af0e23c0369723af836..4185ee6c3990d80f43d9b470ac0301fb83e7a924 100644 --- a/src/slurmd/slurmstepd/step_terminate_monitor.c +++ b/src/slurmd/slurmstepd/step_terminate_monitor.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/step_terminate_monitor.h b/src/slurmd/slurmstepd/step_terminate_monitor.h index c358fb8925a8a4200e75d295e8563b4b58b4e132..d5b56746a346fac4d2d65c7a50b80130926751b4 100644 --- a/src/slurmd/slurmstepd/step_terminate_monitor.h +++ b/src/slurmd/slurmstepd/step_terminate_monitor.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index c0bd0b216cbb9ff6e7e1f5d4fa003dd760dd1216..9450ea96f3b8e97a8801b50854046fa996ddf285 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -215,7 +215,7 @@ _run_script_and_set_env(const char *name, const char *path, slurmd_job_t *job) argv[0] = xstrdup(path); argv[1] = NULL; close(1); - if(dup(pfd[1]) == -1) + if (dup(pfd[1]) == -1) error("couldn't do the dup: %m"); close(2); close(0); @@ -534,7 +534,7 @@ static char *_uint32_array_to_str(int array_len, const uint32_t *array) char *sep = ","; /* seperator */ char *str = xstrdup(""); - if(array == NULL) + if (array == NULL) return str; for (i = 0; i < array_len; i++) { diff --git a/src/slurmd/slurmstepd/task.h b/src/slurmd/slurmstepd/task.h index 78c0b6058cdc402411bff87e710adba65f9538ea..ed7e1b42168c1e277e4da076215fb993867c9409 100644 --- a/src/slurmd/slurmstepd/task.h +++ b/src/slurmd/slurmstepd/task.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmd/slurmstepd/ulimits.c b/src/slurmd/slurmstepd/ulimits.c index bd806a097085953ff07d7182efa7e7cc72b68025..f92a0299767c86e308600318d058cb906781e53f 100644 --- a/src/slurmd/slurmstepd/ulimits.c +++ b/src/slurmd/slurmstepd/ulimits.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -146,8 +146,8 @@ set_umask(slurmd_job_t *job) return SLURM_ERROR; } - unsetenvp(job->env, "SLURM_UMASK"); mask = strtol(val, (char **)NULL, 8); + unsetenvp(job->env, "SLURM_UMASK"); umask(mask); return SLURM_SUCCESS; } diff --git a/src/slurmd/slurmstepd/ulimits.h b/src/slurmd/slurmstepd/ulimits.h index 82cffa72ce9f881a8e212a73ac09ea7ff7bbaedc..718f4e4bc3079b1cd55b052fe1832121b8db5ff2 100644 --- a/src/slurmd/slurmstepd/ulimits.h +++ b/src/slurmd/slurmstepd/ulimits.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmdbd/Makefile.in b/src/slurmdbd/Makefile.in index 08c4f7733cab2f37d71630f874b4b568d63dcbae..272de1bc953f3b60944f1be097c9742cbed8260b 100644 --- a/src/slurmdbd/Makefile.in +++ b/src/slurmdbd/Makefile.in @@ -60,6 +60,7 @@ subdir = src/slurmdbd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -77,6 +78,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -85,11 +87,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -159,6 +163,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -179,6 +185,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -188,6 +197,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -195,6 +206,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -229,6 +249,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -256,6 +279,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/slurmdbd/agent.c b/src/slurmdbd/agent.c index 01141586b1debfb483853a7dc6d380e13f207e98..a9383d102190c1fa95a66443247dc997c0104b3a 100644 --- a/src/slurmdbd/agent.c +++ b/src/slurmdbd/agent.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmdbd/agent.h b/src/slurmdbd/agent.h index ac68f24a60a8722772716e78c25889aff0399ba8..7a60d73108538d0720acce07034b074af67ddcb3 100644 --- a/src/slurmdbd/agent.h +++ b/src/slurmdbd/agent.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmdbd/backup.c b/src/slurmdbd/backup.c index 13e61adb9238d2a5fe03ad4861a636574cd3fb06..ae83d593c559a70294e69f5ab83cd7cd3d65c844 100644 --- a/src/slurmdbd/backup.c +++ b/src/slurmdbd/backup.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -82,9 +82,9 @@ static void _reopen_slurmdbd_fd(slurm_addr_t dbd_addr) _open_slurmdbd_fd(dbd_addr); } -/* run_backup - this is the backup controller, it should run in standby +/* run_dbd_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */ -extern void run_backup(void) +extern void run_dbd_backup(void) { slurm_addr_t dbd_addr; diff --git a/src/slurmdbd/backup.h b/src/slurmdbd/backup.h index f5531005f151eaed006db35182cefc99e725c64c..10b60b2b90b704457ddb8057871154798299e317 100644 --- a/src/slurmdbd/backup.h +++ b/src/slurmdbd/backup.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -47,9 +47,9 @@ extern bool primary_resumed; extern bool backup; extern bool have_control; -/* run_backup - this is the backup dbd, it should run in standby +/* run_dbd_backup - this is the backup dbd, it should run in standby * mode, assuming control when the primary dbd stops responding */ -extern void run_backup(void); +extern void run_dbd_backup(void); #endif diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index c83383c7527c2b3a068042edb4c8701a9f47916a..7043c18525e6a4a9a742d2cecdead068e45908f3 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -907,6 +907,8 @@ static int _archive_dump(slurmdbd_conn_t *slurmdbd_conn, arch_cond->purge_event = slurmdbd_conf->purge_event; if (arch_cond->purge_job == NO_VAL) arch_cond->purge_job = slurmdbd_conf->purge_job; + if (arch_cond->purge_resv == NO_VAL) + arch_cond->purge_resv = slurmdbd_conf->purge_resv; if (arch_cond->purge_step == NO_VAL) arch_cond->purge_step = slurmdbd_conf->purge_step; if (arch_cond->purge_suspend == NO_VAL) @@ -2165,18 +2167,6 @@ static int _modify_job(slurmdbd_conn_t *slurmdbd_conn, debug2("DBD_MODIFY_JOB: called"); - if ((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) - && assoc_mgr_get_admin_level(slurmdbd_conn->db_conn, *uid) - < SLURMDB_ADMIN_SUPER_USER) { - comment = "Your user doesn't have privilege to preform this action"; - error("CONN:%u %s", slurmdbd_conn->newsockfd, comment); - *out_buffer = make_dbd_rc_msg(slurmdbd_conn->rpc_version, - ESLURM_ACCESS_DENIED, - comment, DBD_MODIFY_JOB); - - return ESLURM_ACCESS_DENIED; - } - if (slurmdbd_unpack_modify_msg(&get_msg, slurmdbd_conn->rpc_version, DBD_MODIFY_JOB, in_buffer) != SLURM_SUCCESS) { @@ -2375,9 +2365,15 @@ is_same_user: if ((user_rec->admin_level != SLURMDB_ADMIN_NOTSET) && (*uid != slurmdbd_conf->slurm_user_id && *uid != 0) - && (admin_level < user_rec->admin_level)) { - comment = "You have to be the same or higher admin level to change another persons"; - user_rec->admin_level = SLURMDB_ADMIN_NOTSET; + && (admin_level < SLURMDB_ADMIN_SUPER_USER)) { + comment = "You must be a super user to modify a users admin level"; + error("CONN:%u %s", slurmdbd_conn->newsockfd, comment); + *out_buffer = make_dbd_rc_msg(slurmdbd_conn-> + rpc_version, + ESLURM_ACCESS_DENIED, + comment, + DBD_MODIFY_USERS); + return ESLURM_ACCESS_DENIED; } if (!(list_msg.my_list = acct_storage_g_modify_users( @@ -2632,6 +2628,7 @@ static void _process_job_start(slurmdbd_conn_t *slurmdbd_conn, job.network = job_start_msg->node_inx; job.partition = job_start_msg->partition; details.min_cpus = job_start_msg->req_cpus; + details.pn_min_memory = job_start_msg->req_mem; job.qos_id = job_start_msg->qos_id; job.resv_id = job_start_msg->resv_id; job.priority = job_start_msg->priority; @@ -3578,6 +3575,7 @@ static int _step_start(slurmdbd_conn_t *slurmdbd_conn, step.step_id = step_start_msg->step_id; step.cpu_count = step_start_msg->total_cpus; details.num_tasks = step_start_msg->total_tasks; + step.cpu_freq = step_start_msg->req_cpufreq; layout.node_cnt = step_start_msg->node_cnt; layout.task_dist = step_start_msg->task_dist; diff --git a/src/slurmdbd/proc_req.h b/src/slurmdbd/proc_req.h index 1800dd7ce15be94a4c3098a18ea2cafe4da703c9..ddd1e76e623632d7aeaca17a9fc97ca5dd01e69b 100644 --- a/src/slurmdbd/proc_req.h +++ b/src/slurmdbd/proc_req.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c index df3f73b364e4e251e0d3eea987b2a90c6f46f6d2..982f284b79ca1d6201603c3187e43a3637a93db6 100644 --- a/src/slurmdbd/read_config.c +++ b/src/slurmdbd/read_config.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -59,6 +59,7 @@ #include "src/common/xstring.h" #include "src/common/slurmdb_defs.h" #include "src/slurmdbd/read_config.h" +#include "src/common/slurm_strcasestr.h" /* Global variables */ pthread_mutex_t conf_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -66,7 +67,6 @@ pthread_mutex_t conf_mutex = PTHREAD_MUTEX_INITIALIZER; /* Local functions */ static void _clear_slurmdbd_conf(void); -static char * _get_conf_path(void); static time_t boot_time; @@ -101,6 +101,7 @@ static void _clear_slurmdbd_conf(void) slurmdbd_conf->private_data = 0; slurmdbd_conf->purge_event = 0; slurmdbd_conf->purge_job = 0; + slurmdbd_conf->purge_resv = 0; slurmdbd_conf->purge_step = 0; slurmdbd_conf->purge_suspend = 0; slurmdbd_conf->slurm_user_id = NO_VAL; @@ -129,6 +130,7 @@ extern int read_slurmdbd_conf(void) {"ArchiveDir", S_P_STRING}, {"ArchiveEvents", S_P_BOOLEAN}, {"ArchiveJobs", S_P_BOOLEAN}, + {"ArchiveResvs", S_P_BOOLEAN}, {"ArchiveScript", S_P_STRING}, {"ArchiveSteps", S_P_BOOLEAN}, {"ArchiveSuspend", S_P_BOOLEAN}, @@ -148,6 +150,7 @@ extern int read_slurmdbd_conf(void) {"PrivateData", S_P_STRING}, {"PurgeEventAfter", S_P_STRING}, {"PurgeJobAfter", S_P_STRING}, + {"PurgeResvAfter", S_P_STRING}, {"PurgeStepAfter", S_P_STRING}, {"PurgeSuspendAfter", S_P_STRING}, {"PurgeEventMonths", S_P_UINT32}, @@ -181,11 +184,12 @@ extern int read_slurmdbd_conf(void) _clear_slurmdbd_conf(); /* Get the slurmdbd.conf path and validate the file */ - conf_path = _get_conf_path(); + conf_path = get_extra_conf_path("slurmdbd.conf"); if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) { info("No slurmdbd.conf file (%s)", conf_path); } else { - bool a_events = 0, a_jobs = 0, a_steps = 0, a_suspend = 0; + bool a_events = 0, a_jobs = 0, a_resv = 0, + a_steps = 0, a_suspend = 0; debug("Reading slurmdbd.conf file %s", conf_path); tbl = s_p_hashtbl_create(options); @@ -201,6 +205,7 @@ extern int read_slurmdbd_conf(void) xstrdup(DEFAULT_SLURMDBD_ARCHIVE_DIR); s_p_get_boolean(&a_events, "ArchiveEvents", tbl); s_p_get_boolean(&a_jobs, "ArchiveJobs", tbl); + s_p_get_boolean(&a_resv, "ArchiveResvs", tbl); s_p_get_string(&slurmdbd_conf->archive_script, "ArchiveScript", tbl); s_p_get_boolean(&a_steps, "ArchiveSteps", tbl); @@ -243,28 +248,28 @@ extern int read_slurmdbd_conf(void) slurmdbd_conf->private_data = 0; /* default visible to all */ if (s_p_get_string(&temp_str, "PrivateData", tbl)) { - if (strstr(temp_str, "account")) + if (slurm_strcasestr(temp_str, "account")) slurmdbd_conf->private_data |= PRIVATE_DATA_ACCOUNTS; - if (strstr(temp_str, "job")) + if (slurm_strcasestr(temp_str, "job")) slurmdbd_conf->private_data |= PRIVATE_DATA_JOBS; - if (strstr(temp_str, "node")) + if (slurm_strcasestr(temp_str, "node")) slurmdbd_conf->private_data |= PRIVATE_DATA_NODES; - if (strstr(temp_str, "partition")) + if (slurm_strcasestr(temp_str, "partition")) slurmdbd_conf->private_data |= PRIVATE_DATA_PARTITIONS; - if (strstr(temp_str, "reservation")) + if (slurm_strcasestr(temp_str, "reservation")) slurmdbd_conf->private_data |= PRIVATE_DATA_RESERVATIONS; - if (strstr(temp_str, "usage")) + if (slurm_strcasestr(temp_str, "usage")) slurmdbd_conf->private_data |= PRIVATE_DATA_USAGE; - if (strstr(temp_str, "user")) + if (slurm_strcasestr(temp_str, "user")) slurmdbd_conf->private_data |= PRIVATE_DATA_USERS; - if (strstr(temp_str, "all")) + if (slurm_strcasestr(temp_str, "all")) slurmdbd_conf->private_data = 0xffff; xfree(temp_str); } @@ -286,6 +291,15 @@ extern int read_slurmdbd_conf(void) } xfree(temp_str); } + if (s_p_get_string(&temp_str, "PurgeResvAfter", tbl)) { + /* slurmdb_parse_purge will set SLURMDB_PURGE_FLAGS */ + if ((slurmdbd_conf->purge_resv = + slurmdb_parse_purge(temp_str)) == NO_VAL) { + fatal("Bad value \"%s\" for PurgeResvAfter", + temp_str); + } + xfree(temp_str); + } if (s_p_get_string(&temp_str, "PurgeStepAfter", tbl)) { /* slurmdb_parse_purge will set SLURMDB_PURGE_FLAGS */ if ((slurmdbd_conf->purge_step = @@ -379,6 +393,8 @@ extern int read_slurmdbd_conf(void) slurmdbd_conf->purge_event |= SLURMDB_PURGE_ARCHIVE; if (a_jobs) slurmdbd_conf->purge_job |= SLURMDB_PURGE_ARCHIVE; + if (a_resv) + slurmdbd_conf->purge_resv |= SLURMDB_PURGE_ARCHIVE; if (a_steps) slurmdbd_conf->purge_step |= SLURMDB_PURGE_ARCHIVE; if (a_suspend) @@ -418,6 +434,11 @@ extern int read_slurmdbd_conf(void) if (slurmdbd_conf->storage_type == NULL) fatal("StorageType must be specified"); + if (!strcmp(slurmdbd_conf->storage_type, + "accounting_storage/slurmdbd")) { + fatal("StorageType=%s is invalid in slurmdbd.conf", + slurmdbd_conf->storage_type); + } if (!slurmdbd_conf->storage_host) slurmdbd_conf->storage_host = xstrdup(DEFAULT_STORAGE_HOST); @@ -482,7 +503,7 @@ extern int read_slurmdbd_conf(void) slurmdbd_conf->purge_step = NO_VAL; if (!slurmdbd_conf->purge_suspend) slurmdbd_conf->purge_suspend = NO_VAL; - + slurm_mutex_unlock(&conf_mutex); return SLURM_SUCCESS; } @@ -527,6 +548,13 @@ extern void log_config(void) sprintf(tmp_str, "NONE"); debug2("PurgeJobAfter = %s", tmp_str); + if (slurmdbd_conf->purge_resv != NO_VAL) + slurmdb_purge_string(slurmdbd_conf->purge_resv, + tmp_str, sizeof(tmp_str), 1); + else + sprintf(tmp_str, "NONE"); + debug2("PurgeResvAfter = %s", tmp_str); + if (slurmdbd_conf->purge_step != NO_VAL) slurmdb_purge_string(slurmdbd_conf->purge_step, tmp_str, sizeof(tmp_str), 1); @@ -539,7 +567,7 @@ extern void log_config(void) tmp_str, sizeof(tmp_str), 1); else sprintf(tmp_str, "NONE"); - debug2("PurgeSuspendAfter = %s", tmp_str); + debug2("PurgeSuspendAfter = %s", tmp_str); debug2("SlurmUser = %s(%u)", slurmdbd_conf->slurm_user_name, slurmdbd_conf->slurm_user_id); @@ -578,31 +606,6 @@ extern void slurmdbd_conf_unlock(void) } -/* Return the pathname of the slurmdbd.conf file. - * xfree() the value returned */ -static char * _get_conf_path(void) -{ - char *val = getenv("SLURM_CONF"); - char *path = NULL; - int i; - - if (!val) - val = default_slurm_config_file; - - /* Replace file name on end of path */ - i = strlen(val) + 15; - path = xmalloc(i); - strcpy(path, val); - val = strrchr(path, (int)'/'); - if (val) /* absolute path */ - val++; - else /* not absolute path */ - val = path; - strcpy(val, "slurmdbd.conf"); - - return path; -} - /* Dump the configuration in name,value pairs for output to * "statsmgr show config", caller must call list_destroy() */ extern List dump_config(void) @@ -610,9 +613,6 @@ extern List dump_config(void) config_key_pair_t *key_pair; List my_list = list_create(destroy_config_key_pair); - if (!my_list) - fatal("malloc failure on list_create"); - key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ArchiveDir"); key_pair->value = xstrdup(slurmdbd_conf->archive_dir); @@ -748,6 +748,16 @@ extern List dump_config(void) key_pair->value = xstrdup("NONE"); list_append(my_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("PurgeResvAfter"); + if (slurmdbd_conf->purge_resv != NO_VAL) { + key_pair->value = xmalloc(32); + slurmdb_purge_string(slurmdbd_conf->purge_resv, + key_pair->value, 32, 1); + } else + key_pair->value = xstrdup("NONE"); + list_append(my_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PurgeStepAfter"); if (slurmdbd_conf->purge_step != NO_VAL) { @@ -770,7 +780,7 @@ extern List dump_config(void) key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SLURMDBD_CONF"); - key_pair->value = _get_conf_path(); + key_pair->value = get_extra_conf_path("slurmdbd.conf"); list_append(my_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); diff --git a/src/slurmdbd/read_config.h b/src/slurmdbd/read_config.h index f8567460e1a19e92083dcc3390486f3e589c3f00..eacba760169fc3521bf8b05f403cd607afefe871 100644 --- a/src/slurmdbd/read_config.h +++ b/src/slurmdbd/read_config.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -91,6 +91,7 @@ typedef struct slurm_dbd_conf { uint32_t purge_event; /* purge events older than * this in months or days */ uint32_t purge_job; /* purge time for job info */ + uint32_t purge_resv; /* purge time for reservation info */ uint32_t purge_step; /* purge time for step info */ uint32_t purge_suspend; /* purge suspend data older * than this in months or days */ diff --git a/src/slurmdbd/rpc_mgr.c b/src/slurmdbd/rpc_mgr.c index 41ccaf1faab2eccacfd6fd1c45a56703d6adb23e..c0b98a18395729aefe4b3554281ec4af5900e223 100644 --- a/src/slurmdbd/rpc_mgr.c +++ b/src/slurmdbd/rpc_mgr.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmdbd/rpc_mgr.h b/src/slurmdbd/rpc_mgr.h index 308d5539038ba735bf7a8d94ba48c75d53838f13..b8a1f6918175294a06dc54f1eea7965fb0263806 100644 --- a/src/slurmdbd/rpc_mgr.h +++ b/src/slurmdbd/rpc_mgr.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c index 5513f01707e487cf49886df7bfba7d3588ffb9a4..185e89a1a5cdafb5b3f0fa150707316cc5399467 100644 --- a/src/slurmdbd/slurmdbd.c +++ b/src/slurmdbd/slurmdbd.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -188,7 +188,7 @@ int main(int argc, char *argv[]) info("slurmdbd running in background mode"); have_control = false; backup = true; - run_backup(); + run_dbd_backup(); if (!shutdown_time) assoc_mgr_refresh_lists(db_conn, NULL); } else if (slurmdbd_conf->dbd_host && @@ -228,7 +228,7 @@ int main(int argc, char *argv[]) info("slurmdbd version %s started", SLURM_VERSION_STRING); if (backup) - run_backup(); + run_dbd_backup(); } _request_registrations(db_conn); @@ -605,6 +605,8 @@ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec) slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_REGISTER_CTLD; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; + out_msg.protocol_version + = slurmdbd_translate_rpc(cluster_rec->rpc_version); slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire diff --git a/src/slurmdbd/slurmdbd.h b/src/slurmdbd/slurmdbd.h index ad4a3d79c3efbade600af8f7390c4ace3e37399f..b500c270f92cce2790d1f17723b220da779a0649 100644 --- a/src/slurmdbd/slurmdbd.h +++ b/src/slurmdbd/slurmdbd.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/smap/Makefile.in b/src/smap/Makefile.in index 6040b2d10b3296bb5d7e39d086d8fd705143f797..5edf8e093d67322c9e5ff7954cf06404cf566cb3 100644 --- a/src/smap/Makefile.in +++ b/src/smap/Makefile.in @@ -67,6 +67,7 @@ DIST_COMMON = $(am__noinst_HEADERS_DIST) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -84,6 +85,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -92,11 +94,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -179,6 +183,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -199,6 +205,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -208,6 +217,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -215,6 +226,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -249,6 +269,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -276,6 +299,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c index 3ac4a2c7f0de007c63690b6e3268d2f32abe67d1..aa4841f528b385a938b7dc938add3a4bde533f20 100644 --- a/src/smap/configure_functions.c +++ b/src/smap/configure_functions.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -587,7 +587,7 @@ static int _create_allocation(char *com, List allocated_blocks) } } start_request: - if(!_full_request(request, NULL, allocated_blocks)) + if (!_full_request(request, NULL, allocated_blocks)) destroy_select_ba_request(request); } @@ -1133,7 +1133,7 @@ static int _add_bg_record(select_ba_request_t *blockreq, List allocated_blocks) memcpy(blockreq->start, best_start, sizeof(blockreq->start)); - if(!_full_request(blockreq, mark_bitmap, allocated_blocks)) + if (!_full_request(blockreq, mark_bitmap, allocated_blocks)) destroy_select_ba_request(blockreq); fini: FREE_NULL_BITMAP(mark_bitmap); diff --git a/src/smap/grid_functions.c b/src/smap/grid_functions.c index e02af52a204ba296f65975c43d05dedac1af47e9..aaa57e75ad46c9009cc5d794da31d6710bc40984 100644 --- a/src/smap/grid_functions.c +++ b/src/smap/grid_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -124,6 +124,8 @@ extern void set_grid_inx(int start, int end, int count) return; for (i = 0; i < smap_system_ptr->node_cnt; i++) { + if (!smap_system_ptr->grid[i]) /* Null node name */ + continue; if ((smap_system_ptr->grid[i]->index < start) || (smap_system_ptr->grid[i]->index > end)) continue; @@ -145,6 +147,8 @@ extern int set_grid_bg(int *start, int *end, int count, int set) return 0; for (i = 0; i < smap_system_ptr->node_cnt; i++) { + if (!smap_system_ptr->grid[i]) /* Null node name */ + continue; for (j = 0; j < params.cluster_dims; j++) { if ((smap_system_ptr->grid[i]->coord[j] < start[j]) || (smap_system_ptr->grid[i]->coord[j] > end[j])) @@ -269,6 +273,8 @@ extern void init_grid(node_info_msg_t *node_info_ptr, int cols) cols = 80; for (i = 0; i < smap_system_ptr->node_cnt; i++) { smap_node = smap_system_ptr->grid[i]; + if (!smap_node) /* Null node name */ + continue; if (params.cluster_dims == 1) { smap_node->grid_xcord = (i % cols) + 1; smap_node->grid_ycord = (i / cols) + 1; @@ -322,6 +328,8 @@ extern void clear_grid(void) for (i = 0; i < smap_system_ptr->node_cnt; i++) { smap_node = smap_system_ptr->grid[i]; + if (!smap_node) /* Null node name */ + continue; if ((smap_node->state == NODE_STATE_DOWN) || (smap_node->state & NODE_STATE_DRAIN)) { smap_node->color = COLOR_BLACK; @@ -343,6 +351,8 @@ extern void free_grid(void) if (smap_system_ptr->grid) { for (i = 0; i < smap_system_ptr->node_cnt; i++) { smap_node_t *smap_node = smap_system_ptr->grid[i]; + if (!smap_node) /* Null node name */ + continue; xfree(smap_node->coord); xfree(smap_node); } @@ -361,6 +371,8 @@ extern void print_grid(void) return; for (i = 0; i < smap_system_ptr->node_cnt; i++) { + if (!smap_system_ptr->grid[i]) /* Null node name */ + continue; if (smap_system_ptr->grid[i]->color) init_pair(smap_system_ptr->grid[i]->color, smap_system_ptr->grid[i]->color, COLOR_BLACK); diff --git a/src/smap/job_functions.c b/src/smap/job_functions.c index 234ec0b184e8c0710da6fa8f51641d4de74e9201..d30b09d72bcc1cb2778e4a707bd4463295a6c7b6 100644 --- a/src/smap/job_functions.c +++ b/src/smap/job_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -83,11 +83,11 @@ extern void get_job(void) if (!params.commandline) { mvwprintw(text_win, main_ycord, 1, - "slurm_load_job: %s", + "slurm_load_jobs: %s", slurm_strerror(slurm_get_errno())); main_ycord++; } else { - printf("slurm_load_job: %s\n", + printf("slurm_load_jobs: %s\n", slurm_strerror(slurm_get_errno())); } } diff --git a/src/smap/opts.c b/src/smap/opts.c index de47963204aa140c00b41d3e9a1f5d176d19e2ad..5e0c37eaa68f40c193d5578619c3c0cd9ab56a3f 100644 --- a/src/smap/opts.c +++ b/src/smap/opts.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -126,16 +126,16 @@ extern void parse_command_line(int argc, char *argv[]) * pset size. This number should be large enough. */ params.io_bit = bit_alloc(128); - if(bit_unfmt(params.io_bit, optarg) == -1) { + if (bit_unfmt(params.io_bit, optarg) == -1) { error("'%s' invalid entry for --ionodes", optarg); exit(1); } break; case 'M': - if(params.clusters) + if (params.clusters) list_destroy(params.clusters); - if(!(params.clusters = + if (!(params.clusters = slurmdb_get_info_cluster(optarg))) { print_db_notok(optarg, 0); exit(1); diff --git a/src/smap/partition_functions.c b/src/smap/partition_functions.c index b3723a89dbbf511cb29ed48e3f289914b46fdc71..77c3e2bb665eb8b08cd7a5eb33bea6cd6c5a4778 100644 --- a/src/smap/partition_functions.c +++ b/src/smap/partition_functions.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -257,10 +257,6 @@ extern void get_bg_part(void) list_flush(block_list); } else { block_list = list_create(_block_list_del); - if (!block_list) { - fprintf(stderr, "malloc error\n"); - return; - } } if (!params.commandline) if ((new_bg_ptr->record_count - text_line_cnt) @@ -762,7 +758,6 @@ static int _print_text_part(partition_info_t *part_ptr, sizeof(time_buf)); } - width = strlen(time_buf); printf("%9.9s ", time_buf); } } diff --git a/src/smap/reservation_functions.c b/src/smap/reservation_functions.c index 72a5f89a0055deb9e680002219198cc0d6c6d266..48ea66a703e13fe02baea460e43b81d11273bef7 100644 --- a/src/smap/reservation_functions.c +++ b/src/smap/reservation_functions.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/smap/smap.c b/src/smap/smap.c index 03dd4e760a7e6ae277e40a3642d5485c4b86c1ec..595f3cbf7c1c82356e776cc207eb1cc0cf417d49 100644 --- a/src/smap/smap.c +++ b/src/smap/smap.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/smap/smap.h b/src/smap/smap.h index e887f10cc3a4a6cfecc01a62a3964da32b83f369..a112b2cb4003d715cdcd2745542e5f8917cc1850 100644 --- a/src/smap/smap.h +++ b/src/smap/smap.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sprio/Makefile.in b/src/sprio/Makefile.in index a38575270baafb91e4e6f086a9737b834aee8813..ecd9e0b10fd39d2559f3486db6c821202ee318da 100644 --- a/src/sprio/Makefile.in +++ b/src/sprio/Makefile.in @@ -62,6 +62,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -79,6 +80,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -87,11 +89,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sprio/opts.c b/src/sprio/opts.c index e8cf37a0914f00ffc73bfdcd3fb0c276217e3c6f..3163bafcaa6445d86b4c0268faccad49564abe61 100644 --- a/src/sprio/opts.c +++ b/src/sprio/opts.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -138,9 +138,9 @@ parse_command_line( int argc, char* argv[] ) params.long_list = true; break; case (int) 'M': - if(params.clusters) + if (params.clusters) list_destroy(params.clusters); - if(!(params.clusters = + if (!(params.clusters = slurmdb_get_info_cluster(optarg))) { print_db_notok(optarg, 0); exit(1); @@ -431,9 +431,8 @@ _build_job_list( char* str ) my_list = list_create( NULL ); my_job_list = xstrdup( str ); job = strtok_r( my_job_list, ",", &tmp_char ); - while (job) - { - i = strtol( job, (char **) NULL, 10 ); + while (job) { + i = slurm_xlate_job_id(job); if (i <= 0) { error( "Invalid job id: %s", job ); exit( 1 ); diff --git a/src/sprio/print.c b/src/sprio/print.c index 75d6ede1863cab27bbade4e8ef9fc6046886aa8f..29b0f4f8e51797c5d9bdd36b09900a4364884bf0 100644 --- a/src/sprio/print.c +++ b/src/sprio/print.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sprio/print.h b/src/sprio/print.h index 49c57cde61c66781b907219d9bf0be36702d97a1..558bad14a6f6054a5d377ddeb7187a203277310e 100644 --- a/src/sprio/print.h +++ b/src/sprio/print.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sprio/sprio.c b/src/sprio/sprio.c index 099b17cfa60c7a21dc154697baef4a863f831c4a..ff6e39047ad1f63c4080c1b9346620cd4c2448c3 100644 --- a/src/sprio/sprio.c +++ b/src/sprio/sprio.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -192,13 +192,13 @@ int main (int argc, char *argv[]) /* create the format list from the format */ parse_format(params.format); - if (params.jobs && (!resp_msg->priority_factors_list || - !list_count(resp_msg->priority_factors_list))) + if (params.jobs && (!resp_msg || !resp_msg->priority_factors_list || + !list_count(resp_msg->priority_factors_list))) { printf("Unable to find jobs matching user/id(s) specified\n"); - else + } else if (resp_msg) { print_jobs_array(resp_msg->priority_factors_list, params.format_list); - + } #if 0 /* Free storage here if we want to verify that logic. * Since we exit next, this is not important */ diff --git a/src/sprio/sprio.h b/src/sprio/sprio.h index d8ef2a315e35add026f8f6de3e0b7da62ac71d59..f62fca3ebb5ae0971f0d23c8e054fbfdcc7c5791 100644 --- a/src/sprio/sprio.h +++ b/src/sprio/sprio.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/squeue/Makefile.in b/src/squeue/Makefile.in index b9868700fd4923694da8d49652770f6c68c583b8..75f9e919f7998284a848515c50b408e05108d389 100644 --- a/src/squeue/Makefile.in +++ b/src/squeue/Makefile.in @@ -62,6 +62,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -79,6 +80,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -87,11 +89,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -161,6 +165,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -181,6 +187,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -190,6 +199,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -197,6 +208,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -231,6 +251,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -258,6 +281,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/squeue/opts.c b/src/squeue/opts.c index 2f1ec2750495e67b7acba2be09225da5e061b9d5..38852b5765d9af1ceb162fcfa68649bcd6802593 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -79,7 +79,6 @@ static List _build_step_list( char* str ); static List _build_user_list( char* str ); static char *_get_prefix(char *token); static void _help( void ); -static int _max_cpus_per_node(void); static int _parse_state( char* str, uint16_t* states ); static void _parse_token( char *token, char *field, int *field_size, bool *right_justify, char **suffix); @@ -99,6 +98,7 @@ parse_command_line( int argc, char* argv[] ) static struct option long_options[] = { {"accounts", required_argument, 0, 'A'}, {"all", no_argument, 0, 'a'}, + {"array", no_argument, 0, 'r'}, {"format", required_argument, 0, 'o'}, {"help", no_argument, 0, OPT_LONG_HELP}, {"hide", no_argument, 0, OPT_LONG_HIDE}, @@ -129,6 +129,8 @@ parse_command_line( int argc, char* argv[] ) if (getenv("SQUEUE_ALL")) params.all_flag = true; + if (getenv("SQUEUE_ARRAY")) + params.array_flag = true; if ( ( env_val = getenv("SQUEUE_SORT") ) ) params.sort = xstrdup(env_val); if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) { @@ -140,7 +142,7 @@ parse_command_line( int argc, char* argv[] ) } while ((opt_char = getopt_long(argc, argv, - "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vVw:", + "A:ahi:j::ln:M:o:p:q:R:rs::S:t:u:U:vVw:", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': @@ -217,6 +219,9 @@ parse_command_line( int argc, char* argv[] ) xfree(params.reservation); params.reservation = xstrdup(optarg); break; + case (int)'r': + params.array_flag = true; + break; case (int) 's': if (optarg) { params.steps = xstrdup(optarg); @@ -309,8 +314,6 @@ parse_command_line( int argc, char* argv[] ) char *name1 = NULL; char *name2 = NULL; hostset_t nodenames = hostset_create(NULL); - if (nodenames == NULL) - fatal("malloc failure"); while ( hostset_count(params.nodes) > 0 ) { name1 = hostset_pop(params.nodes); @@ -377,7 +380,7 @@ parse_command_line( int argc, char* argv[] ) if ( params.start_flag && !params.step_flag ) { /* Set more defaults */ if (params.format == NULL) - params.format = xstrdup("%.7i %.9P %.8j %.8u %.2t %.19S %.6D %R"); + params.format = xstrdup("%.7i %.9P %.8j %.8u %.2t %.19S %.6D %R"); if (params.sort == NULL) params.sort = xstrdup("S"); if (params.states == NULL) { @@ -386,30 +389,27 @@ parse_command_line( int argc, char* argv[] ) } } - params.max_cpus = _max_cpus_per_node(); - - if ( params.verbose ) - _print_options(); -} - -/* Return the maximum number of processors for any node in the cluster */ -static int _max_cpus_per_node(void) -{ - int error_code, max_cpus = 1; - node_info_msg_t *node_info_ptr = NULL; - - error_code = slurm_load_node ((time_t) NULL, &node_info_ptr, - params.all_flag); - if (error_code == SLURM_SUCCESS) { - int i; - node_info_t *node_ptr = node_info_ptr->node_array; - for (i=0; i<node_info_ptr->record_count; i++) { - max_cpus = MAX(max_cpus, node_ptr[i].cpus); + if (params.job_list && (list_count(params.job_list) == 1)) { + ListIterator iterator; + uint32_t *job_id_ptr; + iterator = list_iterator_create(params.job_list); + job_id_ptr = list_next(iterator); + params.job_id = *job_id_ptr; + list_iterator_destroy(iterator); + } + if (params.user_list && (list_count(params.user_list) == 1)) { + ListIterator iterator; + uint32_t *uid_ptr; + iterator = list_iterator_create(params.user_list); + while ((uid_ptr = list_next(iterator))) { + params.user_id = *uid_ptr; + break; } - slurm_free_node_info_msg (node_info_ptr); + list_iterator_destroy(iterator); } - return max_cpus; + if ( params.verbose ) + _print_options(); } /* @@ -557,6 +557,11 @@ extern int parse_format( char* format ) field_size, right_justify, suffix ); + else if (field[0] == 'A') + job_format_add_job_id2(params.format_list, + field_size, + right_justify, + suffix); else if (field[0] == 'b') job_format_add_gres( params.format_list, field_size, right_justify, @@ -602,6 +607,12 @@ extern int parse_format( char* format ) field_size, right_justify, suffix ); + else if (field[0] == 'F') + job_format_add_array_job_id( + params.format_list, + field_size, + right_justify, + suffix ); else if (field[0] == 'g') job_format_add_group_name( params.format_list, field_size, @@ -645,6 +656,12 @@ extern int parse_format( char* format ) field_size, right_justify, suffix ); + else if (field[0] == 'K') + job_format_add_array_task_id( + params.format_list, + field_size, + right_justify, + suffix ); else if (field[0] == 'l') job_format_add_time_limit( params.format_list, field_size, @@ -852,7 +869,6 @@ _print_options(void) uint32_t *user; enum job_states *state_id; squeue_job_step_t *job_step_id; - uint32_t *job_id; char hostlist[8192]; if (params.nodes) { @@ -863,11 +879,11 @@ _print_options(void) printf( "-----------------------------\n" ); printf( "all = %s\n", params.all_flag ? "true" : "false"); + printf( "array = %s\n", params.array_flag ? "true" : "false"); printf( "format = %s\n", params.format ); printf( "iterate = %d\n", params.iterate ); printf( "job_flag = %d\n", params.job_flag ); printf( "jobs = %s\n", params.jobs ); - printf( "max_cpus = %d\n", params.max_cpus ) ; printf( "names = %s\n", params.names ); printf( "nodes = %s\n", hostlist ) ; printf( "partitions = %s\n", params.partitions ) ; @@ -883,8 +899,15 @@ _print_options(void) if ((params.verbose > 1) && params.job_list) { i = 0; iterator = list_iterator_create( params.job_list ); - while ( (job_id = list_next( iterator )) ) { - printf( "job_list[%d] = %u\n", i++, *job_id); + while ( (job_step_id = list_next( iterator )) ) { + if (job_step_id->array_id == (uint16_t) NO_VAL) { + printf( "job_list[%d] = %u\n", i++, + job_step_id->job_id ); + } else { + printf( "job_list[%d] = %u_%u\n", i++, + job_step_id->job_id, + job_step_id->array_id ); + } } list_iterator_destroy( iterator ); } @@ -922,8 +945,16 @@ _print_options(void) i = 0; iterator = list_iterator_create( params.step_list ); while ( (job_step_id = list_next( iterator )) ) { - printf( "step_list[%d] = %u.%u\n", i++, - job_step_id->job_id, job_step_id->step_id ); + if (job_step_id->array_id == (uint16_t) NO_VAL) { + printf( "step_list[%d] = %u.%u\n", i++, + job_step_id->job_id, + job_step_id->step_id ); + } else { + printf( "step_list[%d] = %u_%u.%u\n", i++, + job_step_id->job_id, + job_step_id->array_id, + job_step_id->step_id ); + } } list_iterator_destroy( iterator ); } @@ -950,9 +981,10 @@ static List _build_job_list( char* str ) { List my_list; - char *job = NULL, *tmp_char = NULL, *my_job_list = NULL; - int i; - uint32_t *job_id = NULL; + char *end_ptr = NULL, *job = NULL, *tmp_char = NULL; + char *my_job_list = NULL; + int job_id, array_id; + squeue_job_step_t *job_step_id; if ( str == NULL ) return NULL; @@ -960,14 +992,20 @@ _build_job_list( char* str ) my_job_list = xstrdup( str ); job = strtok_r( my_job_list, ",", &tmp_char ); while (job) { - i = strtol( job, (char **) NULL, 10 ); - if (i <= 0) { + job_id = strtol( job, &end_ptr, 10 ); + if (end_ptr[0] == '_') + array_id = strtol( end_ptr + 1, &end_ptr, 10 ); + else + array_id = (uint16_t) NO_VAL; + if (job_id <= 0) { error( "Invalid job id: %s", job ); exit( 1 ); } - job_id = xmalloc( sizeof( uint32_t ) ); - *job_id = (uint32_t) i; - list_append( my_list, job_id ); + + job_step_id = xmalloc( sizeof( squeue_job_step_t ) ); + job_step_id->job_id = (uint32_t) job_id; + job_step_id->array_id = (uint16_t) array_id; + list_append( my_list, job_step_id ); job = strtok_r (NULL, ",", &tmp_char); } return my_list; @@ -1060,16 +1098,16 @@ _build_all_states_list( void ) /* * _build_step_list- build a list of job/step_ids - * IN str - comma separated list of job_id.step_ids + * IN str - comma separated list of job_id[array_id].step_id values * RET List of job/step_ids (structure of uint32_t's) */ static List _build_step_list( char* str ) { List my_list; - char *step = NULL, *tmp_char = NULL, *tmps_char = NULL; + char *end_ptr = NULL, *step = NULL, *tmp_char = NULL, *tmps_char = NULL; char *job_name = NULL, *step_name = NULL, *my_step_list = NULL; - int i, j; + int job_id, array_id, step_id; squeue_job_step_t *job_step_id = NULL; if ( str == NULL) @@ -1077,25 +1115,29 @@ _build_step_list( char* str ) my_list = list_create( NULL ); my_step_list = xstrdup( str ); step = strtok_r( my_step_list, ",", &tmp_char ); - while (step) - { + while (step) { job_name = strtok_r( step, ".", &tmps_char ); step_name = strtok_r( NULL, ".", &tmps_char ); - i = strtol( job_name, (char **) NULL, 10 ); + job_id = strtol( job_name, &end_ptr, 10 ); + if (end_ptr[0] == '_') + array_id = strtol( end_ptr + 1, &end_ptr, 10 ); + else + array_id = (uint16_t) NO_VAL; if (step_name == NULL) { error ( "Invalid job_step id: %s.??", job_name ); exit( 1 ); } - j = strtol( step_name, (char **) NULL, 10 ); - if ((i <= 0) || (j < 0)) { + step_id = strtol( step_name, &end_ptr, 10 ); + if ((job_id <= 0) || (step_id < 0)) { error( "Invalid job_step id: %s.%s", job_name, step_name ); exit( 1 ); } job_step_id = xmalloc( sizeof( squeue_job_step_t ) ); - job_step_id->job_id = (uint32_t) i; - job_step_id->step_id = (uint32_t) j; + job_step_id->job_id = (uint32_t) job_id; + job_step_id->array_id = (uint16_t) array_id; + job_step_id->step_id = (uint32_t) step_id; list_append( my_list, job_step_id ); step = strtok_r( NULL, ",", &tmp_char); } diff --git a/src/squeue/print.c b/src/squeue/print.c index 8e76b1d86f2adfcd695618d984516cea79982062..f5955a03be9168633ff245ffa5bf455c774b54c4 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -58,6 +58,7 @@ static int _filter_job(job_info_t * job); static int _filter_step(job_step_info_t * step); static int _get_node_cnt(job_info_t * job); +static bool _merge_job_array(List l, job_info_t * job_ptr); static int _nodes_in_list(char *node_list); static int _print_str(char *str, int width, bool right, bool cut_output); @@ -83,7 +84,7 @@ int print_steps(List steps, List format) int print_jobs_array(job_info_t * jobs, int size, List format) { - int i = 0; + int i; List l; l = list_create(NULL); @@ -91,9 +92,11 @@ int print_jobs_array(job_info_t * jobs, int size, List format) print_job_from_format(NULL, format); /* Filter out the jobs of interest */ - for (; i < size; i++) { + for (i = 0; i < size; i++) { if (_filter_job(&jobs[i])) continue; + if (_merge_job_array(l, &jobs[i])) + continue; list_append(l, (void *) &jobs[i]); } sort_jobs_by_start_time (l); @@ -113,7 +116,7 @@ int print_steps_array(job_step_info_t * steps, int size, List format) print_step_from_format(NULL, format); if (size > 0) { - int i = 0; + int i; List step_list; ListIterator step_iterator; job_step_info_t *step_ptr; @@ -121,7 +124,7 @@ int print_steps_array(job_step_info_t * steps, int size, List format) step_list = list_create(NULL); /* Filter out the jobs of interest */ - for (; i < size; i++) { + for (i = 0; i < size; i++) { if (_filter_step(&steps[i])) continue; list_append(step_list, (void *) &steps[i]); @@ -141,6 +144,47 @@ int print_steps_array(job_step_info_t * steps, int size, List format) return SLURM_SUCCESS; } +static bool _merge_job_array(List l, job_info_t * job_ptr) +{ + job_info_t *list_job_ptr; + ListIterator iter; + bool merge = false; + + if (params.array_flag) + return merge; + if (job_ptr->array_task_id == (uint16_t) NO_VAL) + return merge; + if (!IS_JOB_PENDING(job_ptr)) + return merge; + xfree(job_ptr->node_inx); + if (!l) + return merge; + + iter = list_iterator_create(l); + while ((list_job_ptr = list_next(iter))) { + if ((list_job_ptr->array_task_id == (uint16_t) NO_VAL) || + (job_ptr->array_job_id != list_job_ptr->array_job_id) || + (!IS_JOB_PENDING(list_job_ptr))) + continue; + /* We re-purpose the job's node_inx array to store the + * array_task_id values */ + if (!list_job_ptr->node_inx) { + list_job_ptr->node_inx = xmalloc(sizeof(int) * 0xffff); + list_job_ptr->node_inx[0] = 1; /* offset */ + list_job_ptr->node_inx[1] = + list_job_ptr->array_task_id; + } + list_job_ptr->node_inx[0]++; + list_job_ptr->node_inx[list_job_ptr->node_inx[0]] = + job_ptr->array_task_id; + merge = true; + break; + } + list_iterator_destroy(iter); + + return merge; +} + static int _print_str(char *str, int width, bool right, bool cut_output) { char format[64]; @@ -287,6 +331,40 @@ job_format_add_function(List list, int width, bool right, char *suffix, return SLURM_SUCCESS; } +int _print_job_array_job_id(job_info_t * job, int width, bool right, + char* suffix) +{ + if (job == NULL) { /* Print the Header instead */ + _print_str("ARRAY_JOB_ID", width, right, true); + } else if (job->array_task_id != (uint16_t) NO_VAL) { + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "%u", job->array_job_id); + _print_str(id, width, right, true); + } else { + _print_str("N/A", width, right, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + +int _print_job_array_task_id(job_info_t * job, int width, bool right, + char* suffix) +{ + if (job == NULL) { /* Print the Header instead */ + _print_str("ARRAY_TASK_ID", width, right, true); + } else if (job->array_task_id != (uint16_t) NO_VAL) { + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "%u", job->array_task_id); + _print_str(id, width, right, true); + } else { + _print_str("N/A", width, right, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + int _print_job_batch_host(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) /* Print the Header instead */ @@ -305,9 +383,41 @@ int _print_job_batch_host(job_info_t * job, int width, bool right, char* suffix) int _print_job_job_id(job_info_t * job, int width, bool right, char* suffix) { - if (job == NULL) /* Print the Header instead */ + if (job == NULL) { /* Print the Header instead */ _print_str("JOBID", width, right, true); - else { + } else if ((job->array_task_id != (uint16_t) NO_VAL) && + !params.array_flag && IS_JOB_PENDING(job) && + job->node_inx) { + int i; + char id[FORMAT_STRING_SIZE], task_str[FORMAT_STRING_SIZE]; + bitstr_t *task_bits = bit_alloc(0xffff); + for (i = 1; i <= job->node_inx[0]; i++) + bit_set(task_bits, job->node_inx[i]); + bit_fmt(task_str, sizeof(task_str), task_bits); + snprintf(id, FORMAT_STRING_SIZE, "%u_[%s]", + job->array_job_id, task_str); + _print_str(id, width, right, true); + bit_free(task_bits); + } else if (job->array_task_id != (uint16_t) NO_VAL) { + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "%u_%u", + job->array_job_id, job->array_task_id); + _print_str(id, width, right, true); + } else { + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "%u", job->job_id); + _print_str(id, width, right, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + +int _print_job_job_id2(job_info_t * job, int width, bool right, char* suffix) +{ + if (job == NULL) { /* Print the Header instead */ + _print_str("JOBID", width, right, true); + } else { char id[FORMAT_STRING_SIZE]; snprintf(id, FORMAT_STRING_SIZE, "%u", job->job_id); _print_str(id, width, right, true); @@ -609,7 +719,7 @@ int _print_job_nodes(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) { /* Print the Header instead */ char *title = "NODELIST"; - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) title = "MIDPLANELIST"; _print_str(title, width, right, false); } else { @@ -640,7 +750,7 @@ int _print_job_reason_list(job_info_t * job, int width, bool right, { if (job == NULL) { /* Print the Header instead */ char *title = "NODELIST(REASON)"; - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) title = "MIDPLANELIST(REASON)"; _print_str(title, width, right, false); } else if (!IS_JOB_COMPLETING(job) @@ -661,7 +771,7 @@ int _print_job_reason_list(job_info_t * job, int width, bool right, select_g_select_jobinfo_get(job->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); - if(ionodes) { + if (ionodes) { xstrfmtcat(nodes, "[%s]", ionodes); xfree(ionodes); _print_str(nodes, width, right, false); @@ -701,7 +811,7 @@ int _print_job_num_cpus(job_info_t * job, int width, bool right, char* suffix) if (job == NULL) /* Print the Header instead */ _print_str("CPUS", width, right, true); else { - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)job->num_cpus, tmp_char, sizeof(tmp_char), UNIT_NONE); else @@ -723,7 +833,7 @@ int _print_job_num_nodes(job_info_t * job, int width, bool right_justify, if (job == NULL) /* Print the Header instead */ _print_str("NODES", width, right_justify, true); else { - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) select_g_select_jobinfo_get(job->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); @@ -731,7 +841,7 @@ int _print_job_num_nodes(job_info_t * job, int width, bool right_justify, if ((node_cnt == 0) || (node_cnt == NO_VAL)) node_cnt = _get_node_cnt(job); - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)node_cnt, tmp_char, sizeof(tmp_char), UNIT_NONE); else @@ -746,7 +856,7 @@ int _print_job_num_nodes(job_info_t * job, int width, bool right_justify, static int _get_node_cnt(job_info_t * job) { - int node_cnt = 0, round; + int node_cnt = 0; /* For PENDING jobs, return the maximum of the requested nodelist, * requested maximum number of nodes, or requested CPUs rounded @@ -761,9 +871,18 @@ static int _get_node_cnt(job_info_t * job) if (IS_JOB_PENDING(job)) { node_cnt = _nodes_in_list(job->req_nodes); node_cnt = MAX(node_cnt, job->num_nodes); - round = job->num_cpus + params.max_cpus - 1; - round /= params.max_cpus; /* round up */ - node_cnt = MAX(node_cnt, round); + if ((node_cnt == 1) && (job->num_cpus > 1) + && job->ntasks_per_node + && (job->ntasks_per_node != (uint16_t) NO_VAL)) { + int num_tasks = job->num_cpus; + if (job->cpus_per_task != (uint16_t) NO_VAL) + num_tasks /= job->cpus_per_task; + node_cnt = (num_tasks + 1) / job->ntasks_per_node; + if (node_cnt > num_tasks) + node_cnt = num_tasks; + else if (!node_cnt) + node_cnt = 1; + } } else node_cnt = _nodes_in_list(job->nodes); return node_cnt; @@ -1192,9 +1311,14 @@ int _print_step_id(job_step_info_t * step, int width, bool right, char* suffix) { char id[FORMAT_STRING_SIZE]; - if (step == NULL) /* Print the Header instead */ + if (step == NULL) { /* Print the Header instead */ _print_str("STEPID", width, right, true); - else { + } else if (step->array_job_id) { + snprintf(id, FORMAT_STRING_SIZE, "%u_%u.%u", + step->array_job_id, step->array_task_id, + step->step_id); + _print_str(id, width, right, true); + } else { snprintf(id, FORMAT_STRING_SIZE, "%u.%u", step->job_id, step->step_id); _print_str(id, width, right, true); @@ -1312,7 +1436,7 @@ int _print_step_nodes(job_step_info_t * step, int width, bool right, { if (step == NULL) { /* Print the Header instead */ char *title = "NODELIST"; - if(params.cluster_flags & CLUSTER_FLAG_BG) + if (params.cluster_flags & CLUSTER_FLAG_BG) title = "MIDPLANELIST"; _print_str(title, width, right, false); @@ -1369,15 +1493,20 @@ static int _filter_job(job_info_t * job) { int filter; ListIterator iterator; - uint32_t *job_id, *user; + uint32_t *user; uint16_t *state_id; char *account, *part, *qos, *name; + squeue_job_step_t *job_step_id; if (params.job_list) { filter = 1; iterator = list_iterator_create(params.job_list); - while ((job_id = list_next(iterator))) { - if (*job_id == job->job_id) { + while ((job_step_id = list_next(iterator))) { + if (((job_step_id->array_id == (uint16_t) NO_VAL) && + ((job_step_id->job_id == job->array_job_id) || + (job_step_id->job_id == job->job_id))) || + ((job_step_id->array_id == job->array_task_id) && + (job_step_id->job_id == job->array_job_id))) { filter = 0; break; } @@ -1515,15 +1644,22 @@ static int _filter_step(job_step_info_t * step) { int filter; ListIterator iterator; - uint32_t *job_id, *user; + uint32_t *user; char *part; squeue_job_step_t *job_step_id; + if (step->state == JOB_PENDING) + return 1; + if (params.job_list) { filter = 1; iterator = list_iterator_create(params.job_list); - while ((job_id = list_next(iterator))) { - if (*job_id == step->job_id) { + while ((job_step_id = list_next(iterator))) { + if (((job_step_id->array_id == (uint16_t) NO_VAL) && + ((job_step_id->job_id == step->array_job_id) || + (job_step_id->job_id == step->job_id))) || + ((job_step_id->array_id == step->array_task_id) && + (job_step_id->job_id == step->array_job_id))) { filter = 0; break; } @@ -1551,8 +1687,13 @@ static int _filter_step(job_step_info_t * step) filter = 1; iterator = list_iterator_create(params.step_list); while ((job_step_id = list_next(iterator))) { - if ((job_step_id->job_id == step->job_id) && - (job_step_id->step_id == step->step_id)) { + if (job_step_id->step_id != step->step_id) + continue; + if (((job_step_id->array_id == (uint16_t) NO_VAL) && + ((job_step_id->job_id == step->array_job_id) || + (job_step_id->job_id == step->job_id))) || + ((job_step_id->array_id == step->array_task_id) && + (job_step_id->job_id == step->array_job_id))) { filter = 0; break; } diff --git a/src/squeue/print.h b/src/squeue/print.h index 1aa3c9d9fc7611c85cdde222922c89f5d788e077..35240ba566550aa6e080780984c41cee62059277 100644 --- a/src/squeue/print.h +++ b/src/squeue/print.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -80,11 +80,16 @@ int print_step_from_format(job_step_info_t * job_step, List list); int job_format_add_function(List list, int width, bool right_justify, char *suffix, int (*function) (job_info_t *, int, bool, char*)); - +#define job_format_add_array_job_id(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_array_job_id) +#define job_format_add_array_task_id(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_array_task_id) #define job_format_add_batch_host(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_batch_host) #define job_format_add_job_id(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_job_id) +#define job_format_add_job_id2(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_job_id2) #define job_format_add_partition(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_partition) #define job_format_add_prefix(list,wid,right,prefix) \ @@ -184,10 +189,16 @@ int job_format_add_function(List list, int width, bool right_justify, /***************************************************************************** * Job Line Print Functions *****************************************************************************/ +int _print_job_array_job_id(job_info_t * job, int width, bool right_justify, + char* suffix); +int _print_job_array_task_id(job_info_t * job, int width, bool right_justify, + char* suffix); int _print_job_batch_host(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_job_id(job_info_t * job, int width, bool right_justify, char* suffix); +int _print_job_job_id2(job_info_t * job, int width, bool right_justify, + char* suffix); int _print_job_prefix(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_reason(job_info_t * job, int width, bool right_justify, diff --git a/src/squeue/sort.c b/src/squeue/sort.c index 210753974cb3e7b2240f040bad536d52563dbec6..eed201241c0f52f8ba5dd2b771756a8196e314c6 100644 --- a/src/squeue/sort.c +++ b/src/squeue/sort.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/squeue/squeue.c b/src/squeue/squeue.c index 6f89397ec32beba0dc43d0778c99916e3bfc1948..f8c73a87913ef7ac312dc4f79ab1a11d5b413271 100644 --- a/src/squeue/squeue.c +++ b/src/squeue/squeue.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -99,8 +99,7 @@ main (int argc, char *argv[]) if ( params.iterate ) { printf( "\n"); sleep( params.iterate ); - } - else + } else break; } @@ -117,8 +116,6 @@ static int _multi_cluster(List clusters) int rc = 0, rc2; itr = list_iterator_create(clusters); - if (!itr) - fatal("list_iterator_create: malloc failure"); while ((working_cluster_rec = list_next(itr))) { if (first) first = false; @@ -172,7 +169,6 @@ _print_job ( bool clear_old ) static job_info_msg_t * old_job_ptr = NULL, * new_job_ptr; int error_code; uint16_t show_flags = 0; - uint32_t job_id = 0; if (params.all_flag || (params.job_list && list_count(params.job_list))) show_flags |= SHOW_ALL; @@ -181,22 +177,17 @@ _print_job ( bool clear_old ) if (params.format && strstr(params.format, "C")) show_flags |= SHOW_DETAIL; - if (params.job_list && (list_count(params.job_list) == 1)) { - ListIterator iterator; - uint32_t *job_id_ptr; - iterator = list_iterator_create(params.job_list); - job_id_ptr = list_next(iterator); - job_id = *job_id_ptr; - list_iterator_destroy(iterator); - } - if (old_job_ptr) { if (clear_old) old_job_ptr->last_update = 0; - if (job_id) { + if (params.job_id) { error_code = slurm_load_job( - &new_job_ptr, job_id, + &new_job_ptr, params.job_id, show_flags); + } else if (params.user_id) { + error_code = slurm_load_job_user(&new_job_ptr, + params.user_id, + show_flags); } else { error_code = slurm_load_jobs( old_job_ptr->last_update, @@ -208,8 +199,12 @@ _print_job ( bool clear_old ) error_code = SLURM_SUCCESS; new_job_ptr = old_job_ptr; } - } else if (job_id) { - error_code = slurm_load_job(&new_job_ptr, job_id, show_flags); + } else if (params.job_id) { + error_code = slurm_load_job(&new_job_ptr, params.job_id, + show_flags); + } else if (params.user_id) { + error_code = slurm_load_job_user(&new_job_ptr, params.user_id, + show_flags); } else { error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr, show_flags); @@ -220,26 +215,29 @@ _print_job ( bool clear_old ) return SLURM_ERROR; } old_job_ptr = new_job_ptr; - if (job_id) + if (params.job_id || params.job_id) old_job_ptr->last_update = (time_t) 0; if (params.verbose) { - printf ("last_update_time=%ld\n", - (long) new_job_ptr->last_update); + printf ("last_update_time=%ld records=%u\n", + (long) new_job_ptr->last_update, + new_job_ptr->record_count); } if (params.format == NULL) { - if (params.long_list) - params.format = "%.7i %.9P %.8j %.8u %.8T %.10M %.9l " - "%.6D %R"; - else - params.format = "%.7i %.9P %.8j %.8u %.2t %.10M %.6D %R"; + if (params.long_list) { + xstrcat(params.format, + "%.18i %.9P %.8j %.8u %.8T %.10M %.9l %.6D %R"); + } else { + xstrcat(params.format, + "%.18i %.9P %.8j %.8u %.2t %.10M %.6D %R"); + } } if (params.format_list == NULL) parse_format(params.format); - print_jobs_array( new_job_ptr->job_array, new_job_ptr->record_count , - params.format_list ) ; + print_jobs_array(new_job_ptr->job_array, new_job_ptr->record_count, + params.format_list) ; return SLURM_SUCCESS; } @@ -280,12 +278,13 @@ _print_job_steps( bool clear_old ) old_step_ptr = new_step_ptr; if (params.verbose) { - printf ("last_update_time=%ld\n", - (long) new_step_ptr->last_update); + printf ("last_update_time=%ld records=%u\n", + (long) new_step_ptr->last_update, + new_step_ptr->job_step_count); } if (params.format == NULL) - params.format = "%10i %.8j %.9P %.8u %.9M %N"; + params.format = "%.15i %.8j %.9P %.8u %.9M %N"; if (params.format_list == NULL) parse_format(params.format); diff --git a/src/squeue/squeue.h b/src/squeue/squeue.h index 1df5449f6f86c300c4999a596953db53848146c6..e392cc5c59ed3b45eaa2264c078d9b6286f3e28f 100644 --- a/src/squeue/squeue.h +++ b/src/squeue/squeue.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -70,21 +70,21 @@ #include "src/common/slurmdb_defs.h" #include "src/squeue/print.h" -struct job_step { +typedef struct job_step { uint32_t job_id; + uint16_t array_id; uint32_t step_id; -}; -typedef struct job_step squeue_job_step_t; +} squeue_job_step_t; struct squeue_parameters { bool all_flag; + bool array_flag; bool job_flag; bool start_flag; bool step_flag; bool long_list; bool no_header; int iterate; - int max_cpus; int verbose; char* accounts; @@ -102,6 +102,9 @@ struct squeue_parameters { char* steps; char* users; + uint32_t job_id; /* set if request for a single job ID */ + uint32_t user_id; /* set if request for a single user ID */ + List account_list; List format_list; List job_list; diff --git a/src/sreport/Makefile.in b/src/sreport/Makefile.in index 07a6009160ce67a29962f7cf855464c13bb38ead..18ff5a583264f84db615761556229b3c7b28abd1 100644 --- a/src/sreport/Makefile.in +++ b/src/sreport/Makefile.in @@ -59,6 +59,7 @@ subdir = src/sreport DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -158,6 +162,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -178,6 +184,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -187,6 +196,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -194,6 +205,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -228,6 +248,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -255,6 +278,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sreport/assoc_reports.c b/src/sreport/assoc_reports.c index 55bd7c148966b3ff98d562fe4417829cf395f070..b60d5d06fc4d60e8fd965987d7df54bcac52121b 100644 --- a/src/sreport/assoc_reports.c +++ b/src/sreport/assoc_reports.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sreport/assoc_reports.h b/src/sreport/assoc_reports.h index 67fcf6bfe6e9a9368efb26ec1d658d2c48d8c5c9..92ba3679fcddeb9105eace7c813de73bf36e7188 100644 --- a/src/sreport/assoc_reports.h +++ b/src/sreport/assoc_reports.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 2f4a71358fe52fb06c66012446b1b184f00ca9a8..5c645bac8fbcf3e00d4628e9a6b1a775eb2cf819 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -81,7 +81,7 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], int local_cluster_flag = all_clusters_flag; time_t start_time, end_time; - if(!wckey_cond) { + if (!wckey_cond) { error("No wckey_cond given"); return -1; } @@ -89,12 +89,12 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], wckey_cond->with_usage = 1; wckey_cond->with_deleted = 1; - if(!wckey_cond->cluster_list) + if (!wckey_cond->cluster_list) wckey_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -103,28 +103,28 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 1))) { local_cluster_flag = 1; - } else if(!end && !strncasecmp(argv[i], "withdeleted", + } else if (!end && !strncasecmp(argv[i], "withdeleted", MAX(command_len, 5))) { wckey_cond->with_deleted = 1; set = 1; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "WCKeys", MAX(command_len, 3))) { - if(!wckey_cond->name_list) + if (!wckey_cond->name_list) wckey_cond->name_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->name_list, + if (slurm_addto_char_list(wckey_cond->name_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "Clusters", MAX(command_len, 3))) { - if(!wckey_cond->cluster_list) + if (!wckey_cond->cluster_list) wckey_cond->cluster_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->cluster_list, + if (slurm_addto_char_list(wckey_cond->cluster_list, argv[i]+end)) set = 1; } else if (!strncasecmp (argv[i], "End", MAX(command_len, 1))) { @@ -132,7 +132,7 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Start", MAX(command_len, 1))) { @@ -140,10 +140,10 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "User", MAX(command_len, 1))) { - if(!wckey_cond->user_list) + if (!wckey_cond->user_list) wckey_cond->user_list = list_create(slurm_destroy_char); - if(slurm_addto_char_list(wckey_cond->user_list, + if (slurm_addto_char_list(wckey_cond->user_list, argv[i]+end)) set = 1; } else { @@ -154,9 +154,9 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], (*start) = i; - if(!local_cluster_flag && !list_count(wckey_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(wckey_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(wckey_cond->cluster_list, temp); } @@ -184,7 +184,7 @@ static int _set_assoc_cond(int *start, int argc, char *argv[], time_t start_time, end_time; int command_len = 0; - if(!assoc_cond) { + if (!assoc_cond) { error("We need an slurmdb_association_cond to call this"); return SLURM_ERROR; } @@ -192,11 +192,11 @@ static int _set_assoc_cond(int *start, int argc, char *argv[], assoc_cond->with_usage = 1; assoc_cond->with_deleted = 1; - if(!assoc_cond->cluster_list) + if (!assoc_cond->cluster_list) assoc_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -205,16 +205,16 @@ static int _set_assoc_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 1))) { local_cluster_flag = 1; } else if (!end && !strncasecmp (argv[i], "Tree", MAX(command_len, 4))) { tree_display = 1; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!assoc_cond->user_list) + if (!assoc_cond->user_list) assoc_cond->user_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->user_list, @@ -224,7 +224,7 @@ static int _set_assoc_cond(int *start, int argc, char *argv[], MAX(command_len, 2)) || !strncasecmp(argv[i], "Acct", MAX(command_len, 4))) { - if(!assoc_cond->acct_list) + if (!assoc_cond->acct_list) assoc_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->acct_list, @@ -240,7 +240,7 @@ static int _set_assoc_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Start", @@ -255,9 +255,9 @@ static int _set_assoc_cond(int *start, int argc, char *argv[], } (*start) = i; - if(!local_cluster_flag && !list_count(assoc_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(assoc_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(assoc_cond->cluster_list, temp); } @@ -285,7 +285,7 @@ static int _set_cluster_cond(int *start, int argc, char *argv[], time_t start_time, end_time; int command_len = 0; - if(!cluster_cond) { + if (!cluster_cond) { error("We need an slurmdb_cluster_cond to call this"); return SLURM_ERROR; } @@ -293,11 +293,11 @@ static int _set_cluster_cond(int *start, int argc, char *argv[], cluster_cond->with_deleted = 1; cluster_cond->with_usage = 1; - if(!cluster_cond->cluster_list) + if (!cluster_cond->cluster_list) cluster_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -306,10 +306,10 @@ static int _set_cluster_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 1))) { local_cluster_flag = 1; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Clusters", MAX(command_len, 1))) { slurm_addto_char_list(cluster_cond->cluster_list, @@ -320,7 +320,7 @@ static int _set_cluster_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Start", @@ -335,9 +335,9 @@ static int _set_cluster_cond(int *start, int argc, char *argv[], } (*start) = i; - if(!local_cluster_flag && !list_count(cluster_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(cluster_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(cluster_cond->cluster_list, temp); } @@ -360,14 +360,14 @@ static int _setup_print_fields_list(List format_list) print_field_t *field = NULL; char *object = NULL; - if(!format_list || !list_count(format_list)) { + if (!format_list || !list_count(format_list)) { exit_code=1; fprintf(stderr, " we need a format list " "to set up the print.\n"); return SLURM_ERROR; } - if(!print_fields_list) + if (!print_fields_list) print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -376,7 +376,7 @@ static int _setup_print_fields_list(List format_list) int command_len = 0; int newlen = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -384,125 +384,125 @@ static int _setup_print_fields_list(List format_list) command_len = strlen(object); field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("Accounts", object, MAX(command_len, 2))) { + if (!strncasecmp("Accounts", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_ACCT; field->name = xstrdup("Account"); - if(tree_display) + if (tree_display) field->len = -20; else field->len = 15; field->print_routine = print_fields_str; - } else if(!strncasecmp("allocated", object, + } else if (!strncasecmp("allocated", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_ACPU; field->name = xstrdup("Allocated"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 12; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("Cluster", object, + } else if (!strncasecmp("Cluster", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_NAME; field->name = xstrdup("Cluster"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("cpucount", object, + } else if (!strncasecmp("cpucount", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_CPUS; field->name = xstrdup("CPU count"); field->len = 9; field->print_routine = print_fields_uint; - } else if(!strncasecmp("down", object, MAX(command_len, 1))) { + } else if (!strncasecmp("down", object, MAX(command_len, 1))) { field->type = PRINT_CLUSTER_DCPU; field->name = xstrdup("Down"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 18; else field->len = 10; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("idle", object, MAX(command_len, 1))) { + } else if (!strncasecmp("idle", object, MAX(command_len, 1))) { field->type = PRINT_CLUSTER_ICPU; field->name = xstrdup("Idle"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 12; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("Login", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Login", object, MAX(command_len, 1))) { field->type = PRINT_CLUSTER_USER_LOGIN; field->name = xstrdup("Login"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("overcommited", object, + } else if (!strncasecmp("overcommited", object, MAX(command_len, 1))) { field->type = PRINT_CLUSTER_OCPU; field->name = xstrdup("Over Comm"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 18; else field->len = 9; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("PlannedDown", object, + } else if (!strncasecmp("PlannedDown", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_PDCPU; field->name = xstrdup("PLND Down"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 18; else field->len = 10; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("Proper", object, MAX(command_len, 2))) { + } else if (!strncasecmp("Proper", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_USER_PROPER; field->name = xstrdup("Proper Name"); field->len = 15; field->print_routine = print_fields_str; - } else if(!strncasecmp("reported", object, + } else if (!strncasecmp("reported", object, MAX(command_len, 3))) { field->type = PRINT_CLUSTER_TOTAL; field->name = xstrdup("Reported"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 12; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("reserved", object, + } else if (!strncasecmp("reserved", object, MAX(command_len, 3))) { field->type = PRINT_CLUSTER_RCPU; field->name = xstrdup("Reserved"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 18; else field->len = 9; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("Used", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Used", object, MAX(command_len, 1))) { field->type = PRINT_CLUSTER_AMOUNT_USED; field->name = xstrdup("Used"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 18; else field->len = 10; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("WCKey", object, MAX(command_len, 2))) { + } else if (!strncasecmp("WCKey", object, MAX(command_len, 2))) { field->type = PRINT_CLUSTER_WCKEY; field->name = xstrdup("WCKey"); - if(tree_display) + if (tree_display) field->len = 20; else field->len = 15; @@ -514,7 +514,7 @@ static int _setup_print_fields_list(List format_list) continue; } - if(newlen) + if (newlen) field->len = newlen; list_append(print_fields_list, field); @@ -539,13 +539,13 @@ static List _get_cluster_list(int argc, char *argv[], uint32_t *total_time, _set_cluster_cond(&i, argc, argv, cluster_cond, format_list); cluster_list = slurmdb_clusters_get(db_conn, cluster_cond); - if(!cluster_list) { + if (!cluster_list) { exit_code=1; fprintf(stderr, " Problem with cluster query.\n"); return NULL; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = cluster_cond->usage_start; @@ -606,20 +606,20 @@ extern int cluster_account_by_user(int argc, char *argv[]) _set_assoc_cond(&i, argc, argv, assoc_cond, format_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cluster,Ac,Login,Proper,Used"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_list = + if (!(slurmdb_report_cluster_list = slurmdb_report_cluster_account_by_user(db_conn, assoc_cond))) { exit_code = 1; goto end_it; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = assoc_cond->usage_start; @@ -656,7 +656,7 @@ extern int cluster_account_by_user(int argc, char *argv[]) while((slurmdb_report_cluster = list_next(cluster_itr))) { //list_sort(slurmdb_report_cluster->assoc_list, // (ListCmpF)sort_assoc_dec); - if(tree_list) + if (tree_list) list_flush(tree_list); else tree_list = list_create(slurmdb_destroy_print_tree); @@ -664,17 +664,17 @@ extern int cluster_account_by_user(int argc, char *argv[]) itr = list_iterator_create(slurmdb_report_cluster->assoc_list); while((slurmdb_report_assoc = list_next(itr))) { int curr_inx = 1; - if(!slurmdb_report_assoc->cpu_secs) + if (!slurmdb_report_assoc->cpu_secs) continue; while((field = list_next(itr2))) { char *tmp_char = NULL; struct passwd *pwd = NULL; switch(field->type) { case PRINT_CLUSTER_ACCT: - if(tree_display) { + if (tree_display) { char *local_acct = NULL; char *parent_acct = NULL; - if(slurmdb_report_assoc->user) { + if (slurmdb_report_assoc->user) { local_acct = xstrdup_printf( "|%s", @@ -716,14 +716,14 @@ extern int cluster_account_by_user(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_CLUSTER_USER_PROPER: - if(slurmdb_report_assoc->user) + if (slurmdb_report_assoc->user) pwd = getpwnam( slurmdb_report_assoc->user); - if(pwd) { + if (pwd) { tmp_char = strtok(pwd->pw_gecos, ","); - if(!tmp_char) + if (!tmp_char) tmp_char = pwd->pw_gecos; } @@ -756,12 +756,12 @@ extern int cluster_account_by_user(int argc, char *argv[]) end_it: slurmdb_destroy_association_cond(assoc_cond); - if(slurmdb_report_cluster_list) { + if (slurmdb_report_cluster_list) { list_destroy(slurmdb_report_cluster_list); slurmdb_report_cluster_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } @@ -792,20 +792,20 @@ extern int cluster_user_by_account(int argc, char *argv[]) _set_assoc_cond(&i, argc, argv, assoc_cond, format_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cluster,Login,Proper,Ac,Used"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_list = + if (!(slurmdb_report_cluster_list = slurmdb_report_cluster_user_by_account(db_conn, assoc_cond))) { exit_code = 1; goto end_it; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = assoc_cond->usage_start; @@ -846,7 +846,7 @@ extern int cluster_user_by_account(int argc, char *argv[]) int curr_inx = 1; /* we don't care if they didn't use any time */ - if(!slurmdb_report_user->cpu_secs) + if (!slurmdb_report_user->cpu_secs) continue; while((field = list_next(itr2))) { @@ -873,11 +873,11 @@ extern int cluster_user_by_account(int argc, char *argv[]) break; case PRINT_CLUSTER_USER_PROPER: pwd = getpwnam(slurmdb_report_user->name); - if(pwd) { + if (pwd) { tmp_char = strtok(pwd->pw_gecos, ","); - if(!tmp_char) + if (!tmp_char) tmp_char = pwd->pw_gecos; } @@ -910,12 +910,12 @@ extern int cluster_user_by_account(int argc, char *argv[]) end_it: slurmdb_destroy_association_cond(assoc_cond); - if(slurmdb_report_cluster_list) { + if (slurmdb_report_cluster_list) { list_destroy(slurmdb_report_cluster_list); slurmdb_report_cluster_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } @@ -946,20 +946,20 @@ extern int cluster_user_by_wckey(int argc, char *argv[]) _set_wckey_cond(&i, argc, argv, wckey_cond, format_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cluster,Login,Proper,WCkey,Used"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_list = + if (!(slurmdb_report_cluster_list = slurmdb_report_cluster_user_by_wckey(db_conn, wckey_cond))) { exit_code = 1; goto end_it; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = wckey_cond->usage_start; @@ -1000,7 +1000,7 @@ extern int cluster_user_by_wckey(int argc, char *argv[]) int curr_inx = 1; /* we don't care if they didn't use any time */ - if(!slurmdb_report_user->cpu_secs) + if (!slurmdb_report_user->cpu_secs) continue; while((field = list_next(itr2))) { @@ -1027,11 +1027,11 @@ extern int cluster_user_by_wckey(int argc, char *argv[]) break; case PRINT_CLUSTER_USER_PROPER: pwd = getpwnam(slurmdb_report_user->name); - if(pwd) { + if (pwd) { tmp_char = strtok(pwd->pw_gecos, ","); - if(!tmp_char) + if (!tmp_char) tmp_char = pwd->pw_gecos; } @@ -1064,12 +1064,12 @@ extern int cluster_user_by_wckey(int argc, char *argv[]) end_it: slurmdb_destroy_wckey_cond(wckey_cond); - if(slurmdb_report_cluster_list) { + if (slurmdb_report_cluster_list) { list_destroy(slurmdb_report_cluster_list); slurmdb_report_cluster_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } @@ -1096,12 +1096,12 @@ extern int cluster_utilization(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); - if(!(cluster_list = _get_cluster_list(argc, argv, &total_time, + if (!(cluster_list = _get_cluster_list(argc, argv, &total_time, "Cluster Utilization", format_list))) goto end_it; - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,al,d,planned,i,res,rep"); _setup_print_fields_list(format_list); @@ -1121,7 +1121,7 @@ extern int cluster_utilization(int argc, char *argv[]) uint64_t local_total_time = 0; int curr_inx = 1; - if(!cluster->accounting_list + if (!cluster->accounting_list || !list_count(cluster->accounting_list)) continue; @@ -1227,12 +1227,12 @@ extern int cluster_utilization(int argc, char *argv[]) list_iterator_destroy(itr); end_it: - if(cluster_list) { + if (cluster_list) { list_destroy(cluster_list); cluster_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } @@ -1264,20 +1264,20 @@ extern int cluster_wckey_by_user(int argc, char *argv[]) _set_wckey_cond(&i, argc, argv, wckey_cond, format_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cluster,WCKey,Login,Proper,Used"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_list = + if (!(slurmdb_report_cluster_list = slurmdb_report_cluster_wckey_by_user(db_conn, wckey_cond))) { exit_code = 1; goto end_it; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = wckey_cond->usage_start; @@ -1314,7 +1314,7 @@ extern int cluster_wckey_by_user(int argc, char *argv[]) while((slurmdb_report_cluster = list_next(cluster_itr))) { //list_sort(slurmdb_report_cluster->wckey_list, // (ListCmpF)sort_wckey_dec); - if(tree_list) + if (tree_list) list_flush(tree_list); else tree_list = list_create(slurmdb_destroy_print_tree); @@ -1322,7 +1322,7 @@ extern int cluster_wckey_by_user(int argc, char *argv[]) itr = list_iterator_create(slurmdb_report_cluster->assoc_list); while((slurmdb_report_assoc = list_next(itr))) { int curr_inx = 1; - if(!slurmdb_report_assoc->cpu_secs) + if (!slurmdb_report_assoc->cpu_secs) continue; while((field = list_next(itr2))) { char *tmp_char = NULL; @@ -1348,14 +1348,14 @@ extern int cluster_wckey_by_user(int argc, char *argv[]) (curr_inx == field_count)); break; case PRINT_CLUSTER_USER_PROPER: - if(slurmdb_report_assoc->user) + if (slurmdb_report_assoc->user) pwd = getpwnam( slurmdb_report_assoc->user); - if(pwd) { + if (pwd) { tmp_char = strtok(pwd->pw_gecos, ","); - if(!tmp_char) + if (!tmp_char) tmp_char = pwd->pw_gecos; } @@ -1388,12 +1388,12 @@ extern int cluster_wckey_by_user(int argc, char *argv[]) end_it: slurmdb_destroy_wckey_cond(wckey_cond); - if(slurmdb_report_cluster_list) { + if (slurmdb_report_cluster_list) { list_destroy(slurmdb_report_cluster_list); slurmdb_report_cluster_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } diff --git a/src/sreport/cluster_reports.h b/src/sreport/cluster_reports.h index dee5ee63854eab1817f017d08757d74a8737918c..8502b27a8c615498b2be4a95ac4146f21f44393f 100644 --- a/src/sreport/cluster_reports.h +++ b/src/sreport/cluster_reports.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sreport/common.c b/src/sreport/common.c index cc9eaef1a298c7754b806fae5b509e94b6192e6b..6507494fabefed6c385dfab6d2b6a1f32c8fa6ac 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -45,16 +45,16 @@ extern void slurmdb_report_print_time(print_field_t *field, uint64_t value, { int abs_len = abs(field->len); - if(!total_time) + if (!total_time) total_time = 1; /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print + if ((value == NO_VAL) || (value == INFINITE)) { + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) ; - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("|"); else printf("%-*s ", abs_len, " "); @@ -106,13 +106,13 @@ extern void slurmdb_report_print_time(print_field_t *field, uint64_t value, break; } - if(print_fields_parsable_print + if (print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING && last) printf("%s", output); - else if(print_fields_parsable_print) + else if (print_fields_parsable_print) printf("%s|", output); - else if(field->len == abs_len) + else if (field->len == abs_len) printf("%*.*s ", abs_len, abs_len, output); else printf("%-*.*s ", abs_len, abs_len, output); @@ -125,12 +125,12 @@ extern int parse_option_end(char *option) { int end = 0; - if(!option) + if (!option) return 0; while(option[end] && option[end] != '=') end++; - if(!option[end]) + if (!option[end]) return 0; end++; return end; @@ -143,7 +143,7 @@ extern char *strip_quotes(char *option, int *increased) int i=0, start=0; char *meat = NULL; - if(!option) + if (!option) return NULL; /* first strip off the ("|')'s */ @@ -152,7 +152,7 @@ extern char *strip_quotes(char *option, int *increased) start = i; while(option[i]) { - if(option[i] == '\"' || option[i] == '\'') { + if (option[i] == '\"' || option[i] == '\'') { end++; break; } @@ -163,7 +163,7 @@ extern char *strip_quotes(char *option, int *increased) meat = xmalloc((i-start)+1); memcpy(meat, option+start, (i-start)); - if(increased) + if (increased) (*increased) += end; return meat; @@ -175,30 +175,30 @@ extern void addto_char_list(List char_list, char *names) char *name = NULL, *tmp_char = NULL; ListIterator itr = NULL; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') i++; start = i; while(names[i]) { - if(names[i] == '\"' || names[i] == '\'') + if (names[i] == '\"' || names[i] == '\'') break; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) + if (!tmp_char) list_append(char_list, name); else xfree(name); @@ -209,15 +209,15 @@ extern void addto_char_list(List char_list, char *names) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) + if (!tmp_char) list_append(char_list, name); else xfree(name); @@ -237,14 +237,14 @@ extern int sort_user_dec(slurmdb_report_user_rec_t *user_a, { int diff = 0; - if(sort_flag == SLURMDB_REPORT_SORT_TIME) { + if (sort_flag == SLURMDB_REPORT_SORT_TIME) { if (user_a->cpu_secs > user_b->cpu_secs) return -1; else if (user_a->cpu_secs < user_b->cpu_secs) return 1; } - if(!user_a->name || !user_b->name) + if (!user_a->name || !user_b->name) return 0; diff = strcmp(user_a->name, user_b->name); @@ -270,7 +270,7 @@ extern int sort_cluster_dec(slurmdb_report_cluster_rec_t *cluster_a, { int diff = 0; - if(!cluster_a->name || !cluster_b->name) + if (!cluster_a->name || !cluster_b->name) return 0; diff = strcmp(cluster_a->name, cluster_b->name); @@ -298,7 +298,7 @@ extern int sort_assoc_dec(slurmdb_report_assoc_rec_t *assoc_a, { int diff = 0; - if(!assoc_a->acct || !assoc_b->acct) + if (!assoc_a->acct || !assoc_b->acct) return 0; diff = strcmp(assoc_a->acct, assoc_b->acct); @@ -308,9 +308,9 @@ extern int sort_assoc_dec(slurmdb_report_assoc_rec_t *assoc_a, else if (diff < 0) return -1; - if(!assoc_a->user && assoc_b->user) + if (!assoc_a->user && assoc_b->user) return 1; - else if(!assoc_b->user) + else if (!assoc_b->user) return -1; diff = strcmp(assoc_a->user, assoc_b->user); @@ -335,7 +335,7 @@ extern int sort_reservations_dec(slurmdb_reservation_rec_t *resv_a, { int diff = 0; - if(!resv_a->cluster || !resv_b->cluster) + if (!resv_a->cluster || !resv_b->cluster) return 0; diff = strcmp(resv_a->cluster, resv_b->cluster); @@ -345,7 +345,7 @@ extern int sort_reservations_dec(slurmdb_reservation_rec_t *resv_a, else if (diff < 0) return -1; - if(!resv_a->name || !resv_b->name) + if (!resv_a->name || !resv_b->name) return 0; diff = strcmp(resv_a->name, resv_b->name); @@ -355,9 +355,9 @@ extern int sort_reservations_dec(slurmdb_reservation_rec_t *resv_a, else if (diff < 0) return -1; - if(resv_a->time_start < resv_b->time_start) + if (resv_a->time_start < resv_b->time_start) return 1; - else if(resv_a->time_start > resv_b->time_start) + else if (resv_a->time_start > resv_b->time_start) return -1; return 0; @@ -368,7 +368,7 @@ extern int get_uint(char *in_value, uint32_t *out_value, char *type) char *ptr = NULL, *meat = NULL; long num; - if(!(meat = strip_quotes(in_value, NULL))) { + if (!(meat = strip_quotes(in_value, NULL))) { error("Problem with strip_quotes"); return SLURM_ERROR; } diff --git a/src/sreport/job_reports.c b/src/sreport/job_reports.c index c632469531ccb8393e165ef2813c9a982b521cf5..46455200b13c6d5e4ea5d496e54de2746829e35a 100644 --- a/src/sreport/job_reports.c +++ b/src/sreport/job_reports.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -73,7 +73,7 @@ static int _sort_cluster_grouping_dec( { int diff = 0; - if(!cluster_a->cluster || !cluster_b->cluster) + if (!cluster_a->cluster || !cluster_b->cluster) return 0; diff = strcmp(cluster_a->cluster, cluster_b->cluster); @@ -103,7 +103,7 @@ static int _sort_acct_grouping_dec(slurmdb_report_acct_grouping_t *acct_a, char tmp_acct_b[200]; char *wckey_a = NULL, *wckey_b = NULL; - if(!acct_a->acct || !acct_b->acct) + if (!acct_a->acct || !acct_b->acct) return 0; snprintf(tmp_acct_a, sizeof(tmp_acct_a), "%s", acct_a->acct); @@ -121,7 +121,7 @@ static int _sort_acct_grouping_dec(slurmdb_report_acct_grouping_t *acct_a, else if (diff < 0) return -1; - if(!wckey_a || !wckey_b) + if (!wckey_a || !wckey_b) return 0; diff = strcmp(wckey_a, wckey_b); @@ -157,13 +157,13 @@ static int _addto_uid_char_list(List char_list, char *names) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -172,23 +172,23 @@ static int _addto_uid_char_list(List char_list, char *names) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); //info("got %s %d", name, i-start); name = _string_to_uid( name ); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -197,7 +197,7 @@ static int _addto_uid_char_list(List char_list, char *names) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -206,17 +206,17 @@ static int _addto_uid_char_list(List char_list, char *names) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); name = _string_to_uid( name ); while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -238,21 +238,21 @@ static int _set_cond(int *start, int argc, char *argv[], time_t start_time, end_time; int command_len = 0; - if(!job_cond->cluster_list) + if (!job_cond->cluster_list) job_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else command_len=end-1; - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 1))) { local_cluster_flag = 1; continue; - } else if(!end && !strncasecmp(argv[i], "PrintJobCount", + } else if (!end && !strncasecmp(argv[i], "PrintJobCount", MAX(command_len, 2))) { print_job_count = 1; continue; @@ -260,7 +260,7 @@ static int _set_cond(int *start, int argc, char *argv[], MAX(command_len, 2))) { flat_view = true; continue; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Clusters", MAX(command_len, 1))) { slurm_addto_char_list(job_cond->cluster_list, @@ -270,7 +270,7 @@ static int _set_cond(int *start, int argc, char *argv[], MAX(command_len, 2)) || !strncasecmp(argv[i], "Acct", MAX(command_len, 4))) { - if(!job_cond->acct_list) + if (!job_cond->acct_list) job_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->acct_list, @@ -278,7 +278,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Associations", MAX(command_len, 2))) { - if(!job_cond->associd_list) + if (!job_cond->associd_list) job_cond->associd_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->associd_list, @@ -289,10 +289,10 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 2))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Gid", MAX(command_len, 2))) { - if(!job_cond->groupid_list) + if (!job_cond->groupid_list) job_cond->groupid_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->groupid_list, @@ -300,9 +300,9 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "grouping", MAX(command_len, 2))) { - if(!strncasecmp(argv[i]+end, "individual", 1)) { + if (!strncasecmp(argv[i]+end, "individual", 1)) { individual_grouping = 1; - } else if(grouping_list) + } else if (grouping_list) slurm_addto_char_list(grouping_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Jobs", @@ -310,7 +310,7 @@ static int _set_cond(int *start, int argc, char *argv[], char *end_char = NULL, *start_char = argv[i]+end; slurmdb_selected_step_t *selected_step = NULL; char *dot = NULL; - if(!job_cond->step_list) + if (!job_cond->step_list) job_cond->step_list = list_create(slurm_destroy_char); @@ -319,7 +319,7 @@ static int _set_cond(int *start, int argc, char *argv[], *end_char = 0; while (isspace(*start_char)) start_char++; /* discard whitespace */ - if(!(int)*start_char) + if (!(int)*start_char) continue; selected_step = xmalloc( sizeof(slurmdb_selected_step_t)); @@ -338,9 +338,9 @@ static int _set_cond(int *start, int argc, char *argv[], } set = 1; - } else if(!strncasecmp (argv[i], "Nodes", + } else if (!strncasecmp (argv[i], "Nodes", MAX(command_len, 1))) { - if(job_cond->used_nodes) { + if (job_cond->used_nodes) { error("You already specified nodes '%s' " " combine your request into 1 nodes=.", job_cond->used_nodes); @@ -351,7 +351,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Partitions", MAX(command_len, 2))) { - if(!job_cond->partition_list) + if (!job_cond->partition_list) job_cond->partition_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->partition_list, @@ -363,7 +363,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!job_cond->userid_list) + if (!job_cond->userid_list) job_cond->userid_list = list_create(slurm_destroy_char); _addto_uid_char_list(job_cond->userid_list, @@ -371,7 +371,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Wckeys", MAX(command_len, 2))) { - if(!job_cond->wckey_list) + if (!job_cond->wckey_list) job_cond->wckey_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->wckey_list, @@ -385,9 +385,9 @@ static int _set_cond(int *start, int argc, char *argv[], } (*start) = i; - if(!local_cluster_flag && !list_count(job_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(job_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(job_cond->cluster_list, temp); } @@ -411,14 +411,14 @@ static int _setup_print_fields_list(List format_list) print_field_t *field = NULL; char *object = NULL; - if(!format_list || !list_count(format_list)) { + if (!format_list || !list_count(format_list)) { exit_code=1; fprintf(stderr, " We need a format list to set up the print.\n"); return SLURM_ERROR; } - if(!print_fields_list) + if (!print_fields_list) print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -427,7 +427,7 @@ static int _setup_print_fields_list(List format_list) int command_len = 0; int newlen = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -435,49 +435,49 @@ static int _setup_print_fields_list(List format_list) command_len = strlen(object); field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("Account", object, MAX(command_len, 1)) + if (!strncasecmp("Account", object, MAX(command_len, 1)) || !strncasecmp("Acct", object, MAX(command_len, 4))) { field->type = PRINT_JOB_ACCOUNT; field->name = xstrdup("Account"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("Cluster", object, + } else if (!strncasecmp("Cluster", object, MAX(command_len, 2))) { field->type = PRINT_JOB_CLUSTER; field->name = xstrdup("Cluster"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("CpuCount", object, + } else if (!strncasecmp("CpuCount", object, MAX(command_len, 2))) { field->type = PRINT_JOB_CPUS; field->name = xstrdup("CPU Count"); field->len = 9; field->print_routine = print_fields_uint; - } else if(!strncasecmp("Duration", object, + } else if (!strncasecmp("Duration", object, MAX(command_len, 1))) { field->type = PRINT_JOB_DUR; field->name = xstrdup("Duration"); field->len = 12; field->print_routine = print_fields_time; - } else if(!strncasecmp("JobCount", object, + } else if (!strncasecmp("JobCount", object, MAX(command_len, 2))) { field->type = PRINT_JOB_COUNT; field->name = xstrdup("Job Count"); field->len = 9; field->print_routine = print_fields_uint; - } else if(!strncasecmp("NodeCount", object, + } else if (!strncasecmp("NodeCount", object, MAX(command_len, 2))) { field->type = PRINT_JOB_NODES; field->name = xstrdup("Node Count"); field->len = 9; field->print_routine = print_fields_uint; - } else if(!strncasecmp("User", object, + } else if (!strncasecmp("User", object, MAX(command_len, 1))) { field->type = PRINT_JOB_USER; field->name = xstrdup("User"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("Wckey", object, + } else if (!strncasecmp("Wckey", object, MAX(command_len, 1))) { field->type = PRINT_JOB_WCKEY; field->name = xstrdup("Wckey"); @@ -490,7 +490,7 @@ static int _setup_print_fields_list(List format_list) continue; } - if(newlen) + if (newlen) field->len = newlen; list_append(print_fields_list, field); @@ -510,72 +510,72 @@ static int _setup_grouping_print_fields_list(List grouping_list) uint32_t size = 0; char *tmp_char = NULL; - if(!grouping_list || !list_count(grouping_list)) { + if (!grouping_list || !list_count(grouping_list)) { exit_code=1; fprintf(stderr, " We need a grouping list to " "set up the print.\n"); return SLURM_ERROR; } - if(!grouping_print_fields_list) + if (!grouping_print_fields_list) grouping_print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(grouping_list); while((object = list_next(itr))) { field = xmalloc(sizeof(print_field_t)); size = atoi(object); - if(print_job_count) + if (print_job_count) field->type = PRINT_JOB_COUNT; else field->type = PRINT_JOB_SIZE; - if(individual_grouping) + if (individual_grouping) field->name = xstrdup_printf("%u cpus", size); else field->name = xstrdup_printf("%u-%u cpus", last_size, size-1); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 13; - if(print_job_count) + if (print_job_count) field->print_routine = print_fields_uint; else field->print_routine = slurmdb_report_print_time; last_size = size; last_object = object; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { int newlen = atoi(tmp_char+1); - if(newlen) + if (newlen) field->len = newlen; } list_append(grouping_print_fields_list, field); } list_iterator_destroy(itr); - if(last_size && !individual_grouping) { + if (last_size && !individual_grouping) { field = xmalloc(sizeof(print_field_t)); - if(print_job_count) + if (print_job_count) field->type = PRINT_JOB_COUNT; else field->type = PRINT_JOB_SIZE; field->name = xstrdup_printf(">= %u cpus", last_size); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 13; - if(print_job_count) + if (print_job_count) field->print_routine = print_fields_uint; else field->print_routine = slurmdb_report_print_time; - if((tmp_char = strstr(last_object, "\%"))) { + if ((tmp_char = strstr(last_object, "\%"))) { int newlen = atoi(tmp_char+1); - if(newlen) + if (newlen) field->len = newlen; } list_append(grouping_print_fields_list, field); @@ -621,16 +621,16 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) _set_cond(&i, argc, argv, job_cond, format_list, grouping_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,a"); - if(!individual_grouping && !list_count(grouping_list)) + if (!individual_grouping && !list_count(grouping_list)) slurm_addto_char_list(grouping_list, "50,250,500,1000"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_grouping_list = + if (!(slurmdb_report_cluster_grouping_list = slurmdb_report_job_sizes_grouped_by_top_account(db_conn, job_cond, grouping_list, flat_view))) { exit_code = 1; @@ -639,7 +639,7 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) _setup_grouping_print_fields_list(grouping_list); - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = job_cond->usage_start; @@ -652,7 +652,7 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) printf("Job Sizes %s - %s (%d secs)\n", start_char, end_char, (int)(job_cond->usage_end - job_cond->usage_start)); - if(print_job_count) + if (print_job_count) printf("Units are in number of jobs ran\n"); else printf("Time reported in %s\n", time_format_string); @@ -757,35 +757,35 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) end_it: xfree(total_field.name); - if(print_job_count) + if (print_job_count) print_job_count = 0; - if(individual_grouping) + if (individual_grouping) individual_grouping = 0; slurmdb_destroy_job_cond(job_cond); - if(grouping_list) { + if (grouping_list) { list_destroy(grouping_list); grouping_list = NULL; } - if(assoc_list) { + if (assoc_list) { list_destroy(assoc_list); assoc_list = NULL; } - if(slurmdb_report_cluster_grouping_list) { + if (slurmdb_report_cluster_grouping_list) { list_destroy(slurmdb_report_cluster_grouping_list); slurmdb_report_cluster_grouping_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } - if(grouping_print_fields_list) { + if (grouping_print_fields_list) { list_destroy(grouping_print_fields_list); grouping_print_fields_list = NULL; } @@ -829,16 +829,16 @@ extern int job_sizes_grouped_by_wckey(int argc, char *argv[]) _set_cond(&i, argc, argv, job_cond, format_list, grouping_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,wc"); - if(!individual_grouping && !list_count(grouping_list)) + if (!individual_grouping && !list_count(grouping_list)) slurm_addto_char_list(grouping_list, "50,250,500,1000"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_grouping_list = + if (!(slurmdb_report_cluster_grouping_list = slurmdb_report_job_sizes_grouped_by_wckey(db_conn, job_cond, grouping_list))) { exit_code = 1; @@ -847,7 +847,7 @@ extern int job_sizes_grouped_by_wckey(int argc, char *argv[]) _setup_grouping_print_fields_list(grouping_list); - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = job_cond->usage_start; @@ -860,7 +860,7 @@ extern int job_sizes_grouped_by_wckey(int argc, char *argv[]) printf("Job Sizes by Wckey %s - %s (%d secs)\n", start_char, end_char, (int)(job_cond->usage_end - job_cond->usage_start)); - if(print_job_count) + if (print_job_count) printf("Units are in number of jobs ran\n"); else printf("Time reported in %s\n", time_format_string); @@ -965,35 +965,35 @@ extern int job_sizes_grouped_by_wckey(int argc, char *argv[]) end_it: xfree(total_field.name); - if(print_job_count) + if (print_job_count) print_job_count = 0; - if(individual_grouping) + if (individual_grouping) individual_grouping = 0; slurmdb_destroy_job_cond(job_cond); - if(grouping_list) { + if (grouping_list) { list_destroy(grouping_list); grouping_list = NULL; } - if(wckey_list) { + if (wckey_list) { list_destroy(wckey_list); wckey_list = NULL; } - if(slurmdb_report_cluster_grouping_list) { + if (slurmdb_report_cluster_grouping_list) { list_destroy(slurmdb_report_cluster_grouping_list); slurmdb_report_cluster_grouping_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } - if(grouping_print_fields_list) { + if (grouping_print_fields_list) { list_destroy(grouping_print_fields_list); grouping_print_fields_list = NULL; } @@ -1038,16 +1038,16 @@ extern int job_sizes_grouped_by_top_acct_and_wckey(int argc, char *argv[]) _set_cond(&i, argc, argv, job_cond, format_list, grouping_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,a%-20"); - if(!individual_grouping && !list_count(grouping_list)) + if (!individual_grouping && !list_count(grouping_list)) slurm_addto_char_list(grouping_list, "50,250,500,1000"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_grouping_list = + if (!(slurmdb_report_cluster_grouping_list = slurmdb_report_job_sizes_grouped_by_top_account_then_wckey( db_conn, job_cond, grouping_list, flat_view))) { exit_code = 1; @@ -1056,7 +1056,7 @@ extern int job_sizes_grouped_by_top_acct_and_wckey(int argc, char *argv[]) _setup_grouping_print_fields_list(grouping_list); - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = job_cond->usage_start; @@ -1069,7 +1069,7 @@ extern int job_sizes_grouped_by_top_acct_and_wckey(int argc, char *argv[]) printf("Job Sizes %s - %s (%d secs)\n", start_char, end_char, (int)(job_cond->usage_end - job_cond->usage_start)); - if(print_job_count) + if (print_job_count) printf("Units are in number of jobs ran\n"); else printf("Time reported in %s\n", time_format_string); @@ -1174,35 +1174,35 @@ extern int job_sizes_grouped_by_top_acct_and_wckey(int argc, char *argv[]) end_it: xfree(total_field.name); - if(print_job_count) + if (print_job_count) print_job_count = 0; - if(individual_grouping) + if (individual_grouping) individual_grouping = 0; slurmdb_destroy_job_cond(job_cond); - if(grouping_list) { + if (grouping_list) { list_destroy(grouping_list); grouping_list = NULL; } - if(assoc_list) { + if (assoc_list) { list_destroy(assoc_list); assoc_list = NULL; } - if(slurmdb_report_cluster_grouping_list) { + if (slurmdb_report_cluster_grouping_list) { list_destroy(slurmdb_report_cluster_grouping_list); slurmdb_report_cluster_grouping_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } - if(grouping_print_fields_list) { + if (grouping_print_fields_list) { list_destroy(grouping_print_fields_list); grouping_print_fields_list = NULL; } diff --git a/src/sreport/job_reports.h b/src/sreport/job_reports.h index 39af5ba501a26c4e49d571a04700f58469778955..324dff318aa9bfa287551bbf254ce4f7c5011dae 100644 --- a/src/sreport/job_reports.h +++ b/src/sreport/job_reports.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sreport/resv_reports.c b/src/sreport/resv_reports.c index 39205c641940a0cdf310823e461fd1c5692fa621..a5944e33d1216c926060dba14971e75f6e979ac9 100644 --- a/src/sreport/resv_reports.c +++ b/src/sreport/resv_reports.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -79,18 +79,18 @@ static int _set_resv_cond(int *start, int argc, char *argv[], time_t start_time, end_time; int command_len = 0; - if(!resv_cond) { + if (!resv_cond) { error("We need an slurmdb_reservation_cond to call this"); return SLURM_ERROR; } resv_cond->with_usage = 1; - if(!resv_cond->cluster_list) + if (!resv_cond->cluster_list) resv_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else { command_len=end-1; @@ -99,13 +99,13 @@ static int _set_resv_cond(int *start, int argc, char *argv[], } } - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 1))) { local_cluster_flag = 1; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Names", MAX(command_len, 1))) { - if(!resv_cond->name_list) + if (!resv_cond->name_list) resv_cond->name_list = list_create(slurm_destroy_char); slurm_addto_char_list(resv_cond->name_list, @@ -126,19 +126,19 @@ static int _set_resv_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 2))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Ids", MAX(command_len, 1))) { - if(!resv_cond->id_list) + if (!resv_cond->id_list) resv_cond->id_list = list_create(slurm_destroy_char); slurm_addto_char_list(resv_cond->id_list, argv[i]+end); set = 1; - } else if(!strncasecmp (argv[i], "Nodes", + } else if (!strncasecmp (argv[i], "Nodes", MAX(command_len, 1))) { - if(resv_cond->nodes) { + if (resv_cond->nodes) { error("You already specified nodes '%s' " " combine your request into 1 nodes=.", resv_cond->nodes); @@ -159,9 +159,9 @@ static int _set_resv_cond(int *start, int argc, char *argv[], } (*start) = i; - if(!local_cluster_flag && !list_count(resv_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(resv_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(resv_cond->cluster_list, temp); } @@ -184,14 +184,14 @@ static int _setup_print_fields_list(List format_list) print_field_t *field = NULL; char *object = NULL; - if(!format_list || !list_count(format_list)) { + if (!format_list || !list_count(format_list)) { exit_code=1; fprintf(stderr, " we need a format list " "to set up the print.\n"); return SLURM_ERROR; } - if(!print_fields_list) + if (!print_fields_list) print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -200,7 +200,7 @@ static int _setup_print_fields_list(List format_list) int command_len = 0; int newlen = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -208,85 +208,85 @@ static int _setup_print_fields_list(List format_list) command_len = strlen(object); field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("allocated", object, + if (!strncasecmp("allocated", object, MAX(command_len, 2))) { field->type = PRINT_RESV_ACPU; field->name = xstrdup("Allocated"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 9; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("Associations", + } else if (!strncasecmp("Associations", object, MAX(command_len, 2))) { field->type = PRINT_RESV_ASSOCS; field->name = xstrdup("Associations"); field->len = 15; field->print_routine = print_fields_str; - } else if(!strncasecmp("Cluster", object, + } else if (!strncasecmp("Cluster", object, MAX(command_len, 2))) { field->type = PRINT_RESV_CLUSTER; field->name = xstrdup("Cluster"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("cpucount", object, + } else if (!strncasecmp("cpucount", object, MAX(command_len, 2))) { field->type = PRINT_RESV_CPUS; field->name = xstrdup("CPU count"); field->len = 9; field->print_routine = print_fields_uint; - } else if(!strncasecmp("down", object, MAX(command_len, 1))) { + } else if (!strncasecmp("down", object, MAX(command_len, 1))) { field->type = PRINT_RESV_DCPU; field->name = xstrdup("Down"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 9; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("idle", object, MAX(command_len, 1))) { + } else if (!strncasecmp("idle", object, MAX(command_len, 1))) { field->type = PRINT_RESV_ICPU; field->name = xstrdup("Idle"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 20; else field->len = 9; field->print_routine = slurmdb_report_print_time; - } else if(!strncasecmp("Nodes", object, MAX(command_len, 2))) { + } else if (!strncasecmp("Nodes", object, MAX(command_len, 2))) { field->type = PRINT_RESV_NODES; field->name = xstrdup("Nodes"); field->len = 15; field->print_routine = print_fields_str; - } else if(!strncasecmp("Name", object, + } else if (!strncasecmp("Name", object, MAX(command_len, 2))) { field->type = PRINT_RESV_NAME; field->name = xstrdup("Name"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("Start", object, + } else if (!strncasecmp("Start", object, MAX(command_len, 2))) { field->type = PRINT_RESV_START; field->name = xstrdup("Start"); field->len = 19; field->print_routine = print_fields_date; - } else if(!strncasecmp("End", object, + } else if (!strncasecmp("End", object, MAX(command_len, 2))) { field->type = PRINT_RESV_END; field->name = xstrdup("End"); field->len = 19; field->print_routine = print_fields_date; - } else if(!strncasecmp("TotalTime", object, + } else if (!strncasecmp("TotalTime", object, MAX(command_len, 2))) { field->type = PRINT_RESV_TIME; field->name = xstrdup("TotalTime"); field->len = 9; field->print_routine = print_fields_time_from_secs; - } else if(!strncasecmp("CPUTime", object, + } else if (!strncasecmp("CPUTime", object, MAX(command_len, 2))) { field->type = PRINT_RESV_CPUTIME; field->name = xstrdup("CPUTime"); @@ -299,7 +299,7 @@ static int _setup_print_fields_list(List format_list) continue; } - if(newlen) + if (newlen) field->len = newlen; list_append(print_fields_list, field); @@ -322,13 +322,13 @@ static List _get_resv_list(int argc, char *argv[], _set_resv_cond(&i, argc, argv, resv_cond, format_list); resv_list = slurmdb_reservations_get(db_conn, resv_cond); - if(!resv_list) { + if (!resv_list) { exit_code=1; fprintf(stderr, " Problem with resv query.\n"); return NULL; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = resv_cond->time_start; @@ -380,12 +380,12 @@ extern int resv_utilization(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); - if(!(resv_list = _get_resv_list(argc, argv, + if (!(resv_list = _get_resv_list(argc, argv, "Reservation Utilization", format_list))) goto end_it; - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,name,start,end,al,i"); @@ -412,7 +412,7 @@ extern int resv_utilization(int argc, char *argv[]) */ while((resv = list_next(itr))) { while((tot_resv = list_next(tot_itr))) { - if(tot_resv->id == resv->id) { + if (tot_resv->id == resv->id) { /* get an average of cpus if the reservation changes we will just get an average. @@ -421,14 +421,14 @@ extern int resv_utilization(int argc, char *argv[]) tot_resv->cpus /= 2; tot_resv->alloc_secs += resv->alloc_secs; tot_resv->down_secs += resv->down_secs; - if(resv->time_start < tot_resv->time_start) + if (resv->time_start < tot_resv->time_start) tot_resv->time_start = resv->time_start; - if(resv->time_end > tot_resv->time_end) + if (resv->time_end > tot_resv->time_end) tot_resv->time_end = resv->time_end; break; } } - if(!tot_resv) + if (!tot_resv) list_append(tot_resv_list, resv); list_iterator_reset(tot_itr); @@ -441,7 +441,7 @@ extern int resv_utilization(int argc, char *argv[]) int curr_inx = 1; total_time = tot_resv->time_end - tot_resv->time_start; - if(total_time <= 0) + if (total_time <= 0) continue; total_reported = (uint64_t)(total_time * tot_resv->cpus); @@ -542,15 +542,15 @@ extern int resv_utilization(int argc, char *argv[]) list_iterator_destroy(itr); end_it: - if(resv_list) { + if (resv_list) { list_destroy(resv_list); resv_list = NULL; } - if(tot_resv_list) { + if (tot_resv_list) { list_destroy(tot_resv_list); tot_resv_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } diff --git a/src/sreport/resv_reports.h b/src/sreport/resv_reports.h index 8464a95835651c126fa36b0f42bfe264f874ef07..0c8096f2a935f191de99c9a38ebf327df80300a2 100644 --- a/src/sreport/resv_reports.h +++ b/src/sreport/resv_reports.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sreport/sreport.c b/src/sreport/sreport.c index fb8c281c3469fc3ff8c0d942419435c10fd52baa..12f4604f55d47e192fca748dd5b171a817e2875c 100644 --- a/src/sreport/sreport.c +++ b/src/sreport/sreport.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -105,7 +105,7 @@ main (int argc, char *argv[]) /* Check to see if we are running a supported accounting plugin */ temp = slurm_get_accounting_storage_type(); - if(strcasecmp(temp, "accounting_storage/slurmdbd") + if (strcasecmp(temp, "accounting_storage/slurmdbd") && strcasecmp(temp, "accounting_storage/mysql")) { fprintf (stderr, "You are not running a supported " "accounting_storage plugin\n(%s).\n" @@ -180,7 +180,7 @@ main (int argc, char *argv[]) db_conn = slurmdb_connection_get(); - if(errno) { + if (errno) { error("Problem talking to the database: %m"); exit(1); } @@ -213,18 +213,23 @@ static char *_getline(const char *prompt) char buf[4096]; char *line; int len; + printf("%s", prompt); - /* Set "line" here to avoid a warning and discard it later. */ + /* Set "line" here to avoid a warning, discard later */ line = fgets(buf, 4096, stdin); if (line == NULL) return NULL; len = strlen(buf); - if ((len > 0) && (buf[len-1] == '\n')) + if ((len == 0) || (len >= 4096)) + return NULL; + if (buf[len-1] == '\n') buf[len-1] = '\0'; else len++; - line = malloc (len * sizeof(char)); + line = malloc(len * sizeof(char)); + if (!line) + return NULL; return strncpy(line, buf, len); } #endif diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index 53cede7234f517a64331b4255a8c8eae619fa02b..51bb1ad0a58d8b8680aec5757fdbcb7a7b6ec247 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index cd54a715336eb7547773aa0bc06a773364990743..5abdbe012ab6d3d87e42c0eeaafcf06da6e97360 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -63,40 +63,40 @@ static int _set_cond(int *start, int argc, char *argv[], time_t start_time, end_time; int command_len = 0; - if(!user_cond) { + if (!user_cond) { error("We need an slurmdb_user_cond to call this"); return SLURM_ERROR; } user_cond->with_deleted = 1; user_cond->with_assocs = 1; - if(!user_cond->assoc_cond) { + if (!user_cond->assoc_cond) { user_cond->assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); user_cond->assoc_cond->with_usage = 1; } assoc_cond = user_cond->assoc_cond; - if(!assoc_cond->cluster_list) + if (!assoc_cond->cluster_list) assoc_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if(!end) + if (!end) command_len=strlen(argv[i]); else command_len=end-1; - if(!end && !strncasecmp(argv[i], "all_clusters", + if (!end && !strncasecmp(argv[i], "all_clusters", MAX(command_len, 1))) { local_cluster_flag = 1; continue; } else if (!end && !strncasecmp(argv[i], "group", MAX(command_len, 1))) { group_accts = 1; - } else if(!end + } else if (!end || !strncasecmp (argv[i], "Users", MAX(command_len, 1))) { - if(!assoc_cond->user_list) + if (!assoc_cond->user_list) assoc_cond->user_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->user_list, @@ -106,7 +106,7 @@ static int _set_cond(int *start, int argc, char *argv[], MAX(command_len, 2)) || !strncasecmp(argv[i], "Acct", MAX(command_len, 4))) { - if(!assoc_cond->acct_list) + if (!assoc_cond->acct_list) assoc_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(assoc_cond->acct_list, @@ -122,7 +122,7 @@ static int _set_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { - if(format_list) + if (format_list) slurm_addto_char_list(format_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Start", MAX(command_len, 1))) { @@ -141,9 +141,9 @@ static int _set_cond(int *start, int argc, char *argv[], } (*start) = i; - if(!local_cluster_flag && !list_count(assoc_cond->cluster_list)) { + if (!local_cluster_flag && !list_count(assoc_cond->cluster_list)) { char *temp = slurm_get_cluster_name(); - if(temp) + if (temp) list_append(assoc_cond->cluster_list, temp); } @@ -166,14 +166,14 @@ static int _setup_print_fields_list(List format_list) print_field_t *field = NULL; char *object = NULL; - if(!format_list || !list_count(format_list)) { + if (!format_list || !list_count(format_list)) { exit_code=1; fprintf(stderr, " We need a format list to set up the print.\n"); return SLURM_ERROR; } - if(!print_fields_list) + if (!print_fields_list) print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -182,7 +182,7 @@ static int _setup_print_fields_list(List format_list) int command_len = 0; int newlen = 0; - if((tmp_char = strstr(object, "\%"))) { + if ((tmp_char = strstr(object, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } @@ -190,31 +190,31 @@ static int _setup_print_fields_list(List format_list) command_len = strlen(object); field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("Accounts", object, MAX(command_len, 1))) { + if (!strncasecmp("Accounts", object, MAX(command_len, 1))) { field->type = PRINT_USER_ACCT; field->name = xstrdup("Account"); field->len = 15; field->print_routine = print_fields_str; - } else if(!strncasecmp("Cluster", object, + } else if (!strncasecmp("Cluster", object, MAX(command_len, 1))) { field->type = PRINT_USER_CLUSTER; field->name = xstrdup("Cluster"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("Login", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Login", object, MAX(command_len, 1))) { field->type = PRINT_USER_LOGIN; field->name = xstrdup("Login"); field->len = 9; field->print_routine = print_fields_str; - } else if(!strncasecmp("Proper", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Proper", object, MAX(command_len, 1))) { field->type = PRINT_USER_PROPER; field->name = xstrdup("Proper Name"); field->len = 15; field->print_routine = print_fields_str; - } else if(!strncasecmp("Used", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Used", object, MAX(command_len, 1))) { field->type = PRINT_USER_USED; field->name = xstrdup("Used"); - if(time_format == SLURMDB_REPORT_TIME_SECS_PER + if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) field->len = 18; @@ -228,7 +228,7 @@ static int _setup_print_fields_list(List format_list) continue; } - if(newlen) + if (newlen) field->len = newlen; list_append(print_fields_list, field); @@ -260,19 +260,19 @@ extern int user_top(int argc, char *argv[]) _set_cond(&i, argc, argv, user_cond, format_list); - if(!list_count(format_list)) + if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,L,P,A,U"); _setup_print_fields_list(format_list); list_destroy(format_list); - if(!(slurmdb_report_cluster_list = + if (!(slurmdb_report_cluster_list = slurmdb_report_user_top_usage(db_conn, user_cond, group_accts))) { exit_code = 1; goto end_it; } - if(print_fields_have_header) { + if (print_fields_have_header) { char start_char[20]; char end_char[20]; time_t my_start = user_cond->assoc_cond->usage_start; @@ -321,7 +321,7 @@ extern int user_top(int argc, char *argv[]) itr3 = list_iterator_create( slurmdb_report_user->acct_list); while((object = list_next(itr3))) { - if(tmp_char) + if (tmp_char) xstrfmtcat(tmp_char, ", %s", object); @@ -350,10 +350,10 @@ extern int user_top(int argc, char *argv[]) break; case PRINT_USER_PROPER: pwd = getpwnam(slurmdb_report_user->name); - if(pwd) { + if (pwd) { tmp_char = strtok(pwd->pw_gecos, ","); - if(!tmp_char) + if (!tmp_char) tmp_char = pwd->pw_gecos; } @@ -380,7 +380,7 @@ extern int user_top(int argc, char *argv[]) list_iterator_reset(itr2); printf("\n"); count++; - if(count >= top_limit) + if (count >= top_limit) break; } list_iterator_destroy(itr); @@ -393,12 +393,12 @@ end_it: group_accts = 0; slurmdb_destroy_user_cond(user_cond); - if(slurmdb_report_cluster_list) { + if (slurmdb_report_cluster_list) { list_destroy(slurmdb_report_cluster_list); slurmdb_report_cluster_list = NULL; } - if(print_fields_list) { + if (print_fields_list) { list_destroy(print_fields_list); print_fields_list = NULL; } diff --git a/src/sreport/user_reports.h b/src/sreport/user_reports.h index d373a95d92c38ac9e69167ef299b2139aa806ed9..29848fe2ce98127bbd64e1bff7782b442a0a5d10 100644 --- a/src/sreport/user_reports.h +++ b/src/sreport/user_reports.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/Makefile.in b/src/srun/Makefile.in index 2d52eb1d4f746f32f597687481cf1cb2b22bdf26..026dbdb3ea3a5c47a79c39ff3f7255732a58de4b 100644 --- a/src/srun/Makefile.in +++ b/src/srun/Makefile.in @@ -63,6 +63,7 @@ subdir = src/srun DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -80,6 +81,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -88,11 +90,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -198,6 +202,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -218,6 +224,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -227,6 +236,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -234,6 +245,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -268,6 +288,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -295,6 +318,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/srun/libsrun/Makefile.in b/src/srun/libsrun/Makefile.in index 85a0ef86eb64fb728fe73b6bb9f214567004ff3b..df870c6da937089e35372c859abb6ec16a31ea78 100644 --- a/src/srun/libsrun/Makefile.in +++ b/src/srun/libsrun/Makefile.in @@ -58,6 +58,7 @@ subdir = src/srun/libsrun DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -75,6 +76,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -83,11 +85,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -154,6 +158,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -174,6 +180,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -183,6 +192,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -190,6 +201,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -224,6 +244,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -251,6 +274,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index 242c8f1e9d6e224dfc5cde858f720eefdf2e85ce..c82afda59d12253340c3a53cefd0e8eab5ca3b46 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -184,7 +184,7 @@ static bool _retry(void) static char *msg = "Slurm controller not responding, " "sleeping and retrying."; - if (errno == ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) { + if ((errno == ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) || (errno == EAGAIN)) { if (retries == 0) error("%s", msg); else if (retries < MAX_RETRIES) @@ -207,6 +207,16 @@ static bool _retry(void) slurm_strerror(ESLURM_NODES_BUSY)); error_exit = immediate_exit; return false; + } else if ((errno == SLURM_PROTOCOL_AUTHENTICATION_ERROR) || + (errno == SLURM_UNEXPECTED_MSG_ERROR) || + (errno == SLURM_PROTOCOL_INSANE_MSG_LENGTH)) { + static int external_msg_count = 0; + error("Srun communication socket apparently being written to " + "by something other than Slurm"); + if (external_msg_count++ < 4) + return true; + error("Unable to allocate resources: %m"); + return false; } else { error("Unable to allocate resources: %m"); return false; @@ -300,7 +310,7 @@ static int _blocks_dealloc(void) return -1; } for (i=0; i<new_bg_ptr->record_count; i++) { - if(new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { + if (new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { rc = 1; break; } @@ -379,7 +389,7 @@ allocate_test(void) { int rc; job_desc_msg_t *j = job_desc_msg_create_from_opts(); - if(!j) + if (!j) return SLURM_ERROR; rc = slurm_job_will_run(j); @@ -441,14 +451,44 @@ allocate_nodes(bool handle_signals) * Allocation granted! */ pending_job_id = resp->job_id; + + /* + * These values could be changed while the job was + * pending so overwrite the request with what was + * allocated so we don't have issues when we use them + * in the step creation. + */ + if (opt.pn_min_memory != NO_VAL) + opt.pn_min_memory = (resp->pn_min_memory & + (~MEM_PER_CPU)); + else if (opt.mem_per_cpu != NO_VAL) + opt.mem_per_cpu = (resp->pn_min_memory & + (~MEM_PER_CPU)); + /* + * FIXME: timelimit should probably also be updated + * here since it could also change. + */ + #ifdef HAVE_BG + uint32_t node_cnt = 0; + select_g_select_jobinfo_get(resp->select_jobinfo, + SELECT_JOBDATA_NODE_CNT, + &node_cnt); + if ((node_cnt == 0) || (node_cnt == NO_VAL)) { + opt.min_nodes = node_cnt; + opt.max_nodes = node_cnt; + } /* else we just use the original request */ + if (!_wait_bluegene_block_ready(resp)) { - if(!destroy_job) + if (!destroy_job) error("Something is wrong with the " "boot of the block."); goto relinquish; } #else + opt.min_nodes = resp->node_cnt; + opt.max_nodes = resp->node_cnt; + if (!_wait_nodes_ready(resp)) { if (!destroy_job) error("Something is wrong with the " @@ -468,10 +508,11 @@ allocate_nodes(bool handle_signals) return resp; relinquish: - - slurm_free_resource_allocation_response_msg(resp); - if (!destroy_job) - slurm_complete_job(resp->job_id, 1); + if (resp) { + if (!destroy_job) + slurm_complete_job(resp->job_id, 1); + slurm_free_resource_allocation_response_msg(resp); + } exit(error_exit); return NULL; } @@ -593,8 +634,8 @@ job_desc_msg_create_from_opts (void) j->argv = (char **) xmalloc(sizeof(char *) * 2); j->argv[0] = xstrdup(opt.argv[0]); } - if (opt.acctg_freq >= 0) - j->acctg_freq = opt.acctg_freq; + if (opt.acctg_freq) + j->acctg_freq = xstrdup(opt.acctg_freq); j->reservation = opt.reservation; j->wckey = opt.wckey; @@ -602,7 +643,7 @@ job_desc_msg_create_from_opts (void) /* simplify the job allocation nodelist, * not laying out tasks until step */ - if(j->req_nodes) { + if (j->req_nodes) { hl = hostlist_create(j->req_nodes); xfree(opt.nodelist); opt.nodelist = hostlist_ranged_string_xmalloc(hl); @@ -613,7 +654,7 @@ job_desc_msg_create_from_opts (void) } - if(opt.distribution == SLURM_DIST_ARBITRARY + if (opt.distribution == SLURM_DIST_ARBITRARY && !j->req_nodes) { error("With Arbitrary distribution you need to " "specify a nodelist or hostfile with the -w option"); @@ -663,6 +704,8 @@ job_desc_msg_create_from_opts (void) j->licenses = opt.licenses; if (opt.network) j->network = opt.network; + if (opt.profile) + j->profile = opt.profile; if (opt.account) j->account = opt.account; if (opt.comment) diff --git a/src/srun/libsrun/allocate.h b/src/srun/libsrun/allocate.h index a105a991cdd0d119f5a98a90485c8e35313166b2..13aa11f590522872addd35c48bf9c48b6e29533b 100644 --- a/src/srun/libsrun/allocate.h +++ b/src/srun/libsrun/allocate.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/libsrun/debugger.c b/src/srun/libsrun/debugger.c index 9a43d49bbd8aa048c90dc8ae514479b264ac731f..b1444ff42a9a31588ed3b170921c060a1760c37a 100644 --- a/src/srun/libsrun/debugger.c +++ b/src/srun/libsrun/debugger.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/libsrun/fname.c b/src/srun/libsrun/fname.c index 5c05cf8b6e8b35d78a900d24b148e00bd98ab3b3..afb3a80a19bb1054c607530b8a78485e76752f0b 100644 --- a/src/srun/libsrun/fname.c +++ b/src/srun/libsrun/fname.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -69,7 +69,9 @@ fname_create(srun_job_t *job, char *format) unsigned int wid = 0; unsigned long int taskid = 0; fname_t *fname = NULL; - char *p, *q, *name; + char *p, *q, *name, *tmp_env; + uint32_t array_job_id = job->jobid; + uint16_t array_task_id = (uint16_t) NO_VAL; fname = xmalloc(sizeof(*fname)); fname->type = IO_ALL; @@ -125,6 +127,23 @@ fname_create(srun_job_t *job, char *format) } switch (*p) { + case 'a': /* '%a' => array task id */ + tmp_env = getenv("SLURM_ARRAY_TASK_ID"); + if (tmp_env) + array_task_id = atoi(tmp_env); + xmemcat(name, q, p - 1); + xstrfmtcat(name, "%0*d", wid, array_task_id); + q = ++p; + break; + case 'A': /* '%A' => array master job id */ + tmp_env = getenv("SLURM_ARRAY_JOB_ID"); + if (tmp_env) + array_job_id = atoi(tmp_env); + xmemcat(name, q, p - 1); + xstrfmtcat(name, "%0*d", wid, array_job_id); + q = ++p; + break; + case 't': /* '%t' => taskid */ case 'n': /* '%n' => nodeid */ case 'N': /* '%N' => node name */ @@ -152,6 +171,12 @@ fname_create(srun_job_t *job, char *format) q = ++p; break; + case 'u': /* '%u' => username */ + xmemcat(name, q, p - 1); + xstrfmtcat(name, "%s", opt.user); + q = ++p; + break; + default: break; } diff --git a/src/srun/libsrun/fname.h b/src/srun/libsrun/fname.h index d5b07dabaf35b39a7c6e01ebd805d1eff5090de2..afd403e93143db451113715f9e425c4f572ff892 100644 --- a/src/srun/libsrun/fname.h +++ b/src/srun/libsrun/fname.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/libsrun/launch.c b/src/srun/libsrun/launch.c index 596ba83159ee173e6a5c33a6671a87dba4c6599b..b181a8692f11d54019876f94d9dfc6db1cfefebe 100644 --- a/src/srun/libsrun/launch.c +++ b/src/srun/libsrun/launch.c @@ -4,7 +4,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. -* For details, see <http://www.schedmd.com/slurmdocs/>. +* For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -163,7 +163,7 @@ extern int launch_common_create_job_step(srun_job_t *job, bool use_all_cpus, sig_atomic_t *destroy_job) { int i, rc; - unsigned long my_sleep = 0; + unsigned long my_sleep = 0; time_t begin_time; if (!job) { @@ -202,7 +202,9 @@ extern int launch_common_create_job_step(srun_job_t *job, bool use_all_cpus, job->ctx_params.task_count = opt.ntasks; if (opt.mem_per_cpu != NO_VAL) - job->ctx_params.mem_per_cpu = opt.mem_per_cpu; + job->ctx_params.pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU; + else if (opt.pn_min_memory != NO_VAL) + job->ctx_params.pn_min_memory = opt.pn_min_memory; if (opt.gres) job->ctx_params.gres = opt.gres; else @@ -273,12 +275,20 @@ extern int launch_common_create_job_step(srun_job_t *job, bool use_all_cpus, begin_time = time(NULL); for (i=0; (!(*destroy_job)); i++) { + bool blocking_step_create = false; if (opt.no_alloc) { job->step_ctx = slurm_step_ctx_create_no_alloc( &job->ctx_params, job->stepid); - } else + } else if (opt.immediate) { job->step_ctx = slurm_step_ctx_create( &job->ctx_params); + } else { + /* Wait 60 to 70 seconds for response */ + my_sleep = (getpid() % 10) * 1000 + 60000; + job->step_ctx = slurm_step_ctx_create_timeout( + &job->ctx_params, my_sleep); + blocking_step_create = true; + } if (job->step_ctx != NULL) { if (i > 0) info("Job step created"); @@ -311,14 +321,18 @@ extern int launch_common_create_job_step(srun_job_t *job, bool use_all_cpus, xsignal_unblock(sig_array); for (i = 0; sig_array[i]; i++) xsignal(sig_array[i], signal_function); - - my_sleep = (getpid() % 1000) * 100 + 100000; + if (!blocking_step_create) + my_sleep = (getpid() % 1000) * 100 + 100000; } else { verbose("Job step creation still disabled, retrying"); - my_sleep = MIN((my_sleep * 2), 29000000); + if (!blocking_step_create) + my_sleep *= 2; + } + if (!blocking_step_create) { + /* sleep 0.1 to 29 secs with exponential back-off */ + my_sleep = MIN(my_sleep, 29000000); + usleep(my_sleep); } - /* sleep 0.1 to 29 secs with exponential back-off */ - usleep(my_sleep); if (*destroy_job) { /* cancelled by signal */ break; diff --git a/src/srun/libsrun/launch.h b/src/srun/libsrun/launch.h index 60b1d93af431ce4ad7d024cd2ff231e2c3d17d5b..50b0318280e77fe51b54363eebf54dd5b963171f 100644 --- a/src/srun/libsrun/launch.h +++ b/src/srun/libsrun/launch.h @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/libsrun/multi_prog.c b/src/srun/libsrun/multi_prog.c index 803636a6e27d9b7a6cafb73b88c93aeb545cfbc1..7ee902573ab5c6be8cc831a778611fd30205ef9a 100644 --- a/src/srun/libsrun/multi_prog.c +++ b/src/srun/libsrun/multi_prog.c @@ -14,7 +14,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/libsrun/multi_prog.h b/src/srun/libsrun/multi_prog.h index 5ffb58c4deef1ed0182fb4fb85b8eaff4f146446..655fe9e9bdc7d9aa96aff33b5b08ac7d4e740613 100644 --- a/src/srun/libsrun/multi_prog.h +++ b/src/srun/libsrun/multi_prog.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index a71a1f2d617fcbe220377b9b282038422addd3d1..b731dc3f784fb900051fb11832dd8256720a4856 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -82,6 +82,7 @@ #include "src/common/slurm_protocol_interface.h" #include "src/common/slurm_rlimits_info.h" #include "src/common/slurm_resource_info.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -119,6 +120,7 @@ #define OPT_SIGNAL 0x17 #define OPT_TIME_VAL 0x18 #define OPT_CPU_FREQ 0x19 +#define OPT_PROFILE 0x20 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_HELP 0x100 @@ -188,6 +190,7 @@ #define LONG_OPT_LAUNCHER_OPTS 0x154 #define LONG_OPT_CPU_FREQ 0x155 #define LONG_OPT_LAUNCH_CMD 0x156 +#define LONG_OPT_PROFILE 0x157 extern char **environ; @@ -319,12 +322,12 @@ static bool _valid_node_list(char **node_list_pptr) procs to use then we need exactly this many since we are saying, lay it out this way! Same for max and min nodes. Other than that just read in as many in the hostfile */ - if(opt.ntasks_set) + if (opt.ntasks_set) count = opt.ntasks; - else if(opt.nodes_set) { - if(opt.max_nodes) + else if (opt.nodes_set) { + if (opt.max_nodes) count = opt.max_nodes; - else if(opt.min_nodes) + else if (opt.min_nodes) count = opt.min_nodes; } @@ -474,7 +477,7 @@ static void _opt_default() for (i=0; i<HIGHEST_DIMENSIONS; i++) { opt.conn_type[i] = (uint16_t) NO_VAL; - opt.geometry[i] = (uint16_t) NO_VAL; + opt.geometry[i] = 0; } opt.reboot = false; opt.no_rotate = false; @@ -487,6 +490,7 @@ static void _opt_default() opt.egid = (gid_t) -1; opt.propagate = NULL; /* propagate specific rlimits */ + opt.profile = ACCT_GATHER_PROFILE_NOT_SET; opt.prolog = slurm_get_srun_prolog(); opt.epilog = slurm_get_srun_epilog(); @@ -507,7 +511,7 @@ static void _opt_default() opt.pty = false; opt.open_mode = 0; - opt.acctg_freq = -1; + opt.acctg_freq = NULL; opt.cpu_freq = NO_VAL; opt.reservation = NULL; opt.wckey = NULL; @@ -538,7 +542,7 @@ struct env_vars { env_vars_t env_vars[] = { {"SLURMD_DEBUG", OPT_INT, &opt.slurmd_debug, NULL }, {"SLURM_ACCOUNT", OPT_STRING, &opt.account, NULL }, -{"SLURM_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, +{"SLURM_ACCTG_FREQ", OPT_STRING, &opt.acctg_freq, NULL }, {"SLURM_BLRTS_IMAGE", OPT_STRING, &opt.blrtsimage, NULL }, {"SLURM_CHECKPOINT", OPT_STRING, &opt.ckpt_interval_str, NULL }, {"SLURM_CHECKPOINT_DIR",OPT_STRING, &opt.ckpt_dir, NULL }, @@ -581,6 +585,7 @@ env_vars_t env_vars[] = { {"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL }, +{"SLURM_PROFILE", OPT_PROFILE, NULL, NULL }, {"SLURM_PROLOG", OPT_STRING, &opt.prolog, NULL }, {"SLURM_QOS", OPT_STRING, &opt.qos, NULL }, {"SLURM_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL }, @@ -761,7 +766,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_TIME_VAL: opt.wait4switch = time_str2secs(val); break; - + case OPT_PROFILE: + opt.profile = acct_gather_profile_from_string((char *)val); + break; default: /* do nothing */ break; @@ -885,6 +892,7 @@ static void _set_options(const int argc, char **argv) {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, + {"profile", required_argument, 0, LONG_OPT_PROFILE}, {"prolog", required_argument, 0, LONG_OPT_PROLOG}, {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, {"pty", no_argument, 0, LONG_OPT_PTY}, @@ -1501,8 +1509,8 @@ static void _set_options(const int argc, char **argv) } break; case LONG_OPT_ACCTG_FREQ: - opt.acctg_freq = _get_int(optarg, "acctg-freq", - false); + xfree(opt.acctg_freq); + opt.acctg_freq = xstrdup(optarg); break; case LONG_OPT_CPU_FREQ: if (cpu_freq_verify_param(optarg, &opt.cpu_freq)) @@ -1513,6 +1521,9 @@ static void _set_options(const int argc, char **argv) xfree(opt.wckey); opt.wckey = xstrdup(optarg); break; + case LONG_OPT_PROFILE: + opt.profile = acct_gather_profile_from_string(optarg); + break; case LONG_OPT_RESERVATION: xfree(opt.reservation); opt.reservation = xstrdup(optarg); @@ -1603,7 +1614,7 @@ static void _opt_args(int argc, char **argv) if (opt.distribution == SLURM_DIST_PLANE && opt.plane_size) { if ((opt.ntasks/opt.plane_size) < opt.min_nodes) { if (((opt.min_nodes-1)*opt.plane_size) >= opt.ntasks) { -#if(0) +#if (0) info("Too few processes ((n/plane_size) %d < N %d) " "and ((N-1)*(plane_size) %d >= n %d)) ", opt.ntasks/opt.plane_size, opt.min_nodes, @@ -1688,7 +1699,7 @@ static void _opt_args(int argc, char **argv) xassert((command_pos + command_args) <= opt.argc); for (i = command_pos; i < opt.argc; i++) { - if (!rest[i-command_pos]) + if (!rest || !rest[i-command_pos]) break; opt.argv[i] = xstrdup(rest[i-command_pos]); } @@ -1767,11 +1778,11 @@ static bool _opt_verify(void) opt.cmd_name = base_name(opt.argv[0]); if (!opt.nodelist) { - if((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) { + if ((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) { /* make sure the file being read in has a / in it to make sure it is a file in the valid_node_list function */ - if(!strstr(opt.nodelist, "/")) { + if (!strstr(opt.nodelist, "/")) { char *add_slash = xstrdup("./"); xstrcat(add_slash, opt.nodelist); xfree(opt.nodelist); @@ -1789,7 +1800,7 @@ static bool _opt_verify(void) } } } else { - if(strstr(opt.nodelist, "/")) + if (strstr(opt.nodelist, "/")) opt.hostfile = xstrdup(opt.nodelist); if (!_valid_node_list(&opt.nodelist)) exit(error_exit); @@ -1797,14 +1808,14 @@ static bool _opt_verify(void) /* set up the proc and node counts based on the arbitrary list of nodes */ - if((opt.distribution == SLURM_DIST_ARBITRARY) + if ((opt.distribution == SLURM_DIST_ARBITRARY) && (!opt.nodes_set || !opt.ntasks_set)) { hostlist_t hl = hostlist_create(opt.nodelist); - if(!opt.ntasks_set) { + if (!opt.ntasks_set) { opt.ntasks_set = true; opt.ntasks = hostlist_count(hl); } - if(!opt.nodes_set) { + if (!opt.nodes_set) { opt.nodes_set = true; opt.nodes_set_opt = true; hostlist_uniq(hl); @@ -1817,11 +1828,11 @@ static bool _opt_verify(void) * nodelist but only if it isn't arbitrary since the user has * laid it out how it should be so don't mess with it print an * error later if it doesn't work the way they wanted */ - if(opt.max_nodes && opt.nodelist + if (opt.max_nodes && opt.nodelist && opt.distribution != SLURM_DIST_ARBITRARY) { hostlist_t hl = hostlist_create(opt.nodelist); int count = hostlist_count(hl); - if(count > opt.max_nodes) { + if (count > opt.max_nodes) { int i = 0; error("Required nodelist includes more nodes than " "permitted by max-node count (%d > %d). " @@ -1830,7 +1841,7 @@ static bool _opt_verify(void) count -= opt.max_nodes; while(i<count) { char *name = hostlist_pop(hl); - if(name) + if (name) free(name); else break; @@ -1949,7 +1960,8 @@ static bool _opt_verify(void) opt.ntasks *= opt.cores_per_socket; opt.ntasks *= opt.threads_per_core; opt.ntasks_set = true; - } + } else if (opt.ntasks_per_node > 0) + opt.ntasks *= opt.ntasks_per_node; /* massage the numbers */ if (opt.nodelist) { @@ -1960,7 +1972,7 @@ static bool _opt_verify(void) error("memory allocation failure"); exit(error_exit); } - if(opt.distribution == SLURM_DIST_ARBITRARY + if (opt.distribution == SLURM_DIST_ARBITRARY && !opt.ntasks_set) { opt.ntasks = hostlist_count(hl); opt.ntasks_set = true; @@ -2213,16 +2225,16 @@ static char *print_constraints() { char *buf = xstrdup(""); - if (opt.pn_min_cpus > 0) + if (opt.pn_min_cpus != NO_VAL) xstrfmtcat(buf, "mincpus-per-node=%d ", opt.pn_min_cpus); - if (opt.pn_min_memory > 0) + if (opt.pn_min_memory != NO_VAL) xstrfmtcat(buf, "mem-per-node=%dM ", opt.pn_min_memory); - if (opt.mem_per_cpu > 0) + if (opt.mem_per_cpu != NO_VAL) xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); - if (opt.pn_min_tmp_disk > 0) + if (opt.pn_min_tmp_disk != NO_VAL) xstrfmtcat(buf, "tmp-per-node=%ld ", opt.pn_min_tmp_disk); if (opt.contiguous == true) @@ -2244,7 +2256,6 @@ static char *print_constraints() static void _opt_list(void) { - int i; char *str; info("defined options for program `%s'", opt.progname); @@ -2268,13 +2279,15 @@ static void _opt_list(void) opt.jobid_set ? "(set)" : "(default)"); info("partition : %s", opt.partition == NULL ? "default" : opt.partition); + info("profile : `%s'", + acct_gather_profile_to_string(opt.profile)); info("job name : `%s'", opt.job_name); info("reservation : `%s'", opt.reservation); info("wckey : `%s'", opt.wckey); info("switches : %d", opt.req_switch); info("wait-for-switches : %d", opt.wait4switch); info("distribution : %s", format_task_dist_states(opt.distribution)); - if(opt.distribution == SLURM_DIST_PLANE) + if (opt.distribution == SLURM_DIST_PLANE) info("plane size : %u", opt.plane_size); info("cpu_bind : %s", opt.cpu_bind == NULL ? "default" : opt.cpu_bind); @@ -2318,10 +2331,10 @@ static void _opt_list(void) str = print_constraints(); info("constraints : %s", str); xfree(str); - for (i = 0; i < HIGHEST_DIMENSIONS; i++) { - if (opt.conn_type[i] == (uint16_t) NO_VAL) - break; - info("conn_type[%d] : %u", i, opt.conn_type[i]); + if (opt.conn_type[0] != (uint16_t) NO_VAL) { + str = conn_type_string_full(opt.conn_type); + info("conn_type : %s", str); + xfree(str); } str = print_geometry(opt.geometry); info("geometry : %s", str); @@ -2410,6 +2423,7 @@ static void _usage(void) " [--cpu_bind=...] [--mem_bind=...] [--network=type]\n" " [--ntasks-per-node=n] [--ntasks-per-socket=n] [reservation=name]\n" " [--ntasks-per-core=n] [--mem-per-cpu=MB] [--preserve-env]\n" +" [--profile=...]\n" #ifdef HAVE_BG /* Blue gene specific options */ #ifdef HAVE_BG_L_P " [--geometry=XxYxZ] " @@ -2484,6 +2498,9 @@ static void _help(void) " -O, --overcommit overcommit resources\n" " -p, --partition=partition partition requested\n" " --prolog=program run \"program\" before launching job step\n" +" --profile=value enable acct_gather_profile for detailed data\n" +" value is all or none or any combination of\n" +" energy, lustre, network or task\n" " --propagate[=rlimits] propagate all [or specific list of] rlimits\n" #ifdef HAVE_PTY_H " --pty run task zero in pseudo terminal\n" diff --git a/src/srun/libsrun/opt.h b/src/srun/libsrun/opt.h index a813c43c1fa3f5c52c7ce39dc8dec5e418afcce7..5cb901a7f51f52e33d66f090e9551cd2b0fbf9ab 100644 --- a/src/srun/libsrun/opt.h +++ b/src/srun/libsrun/opt.h @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -173,6 +173,7 @@ typedef struct srun_options { bool parallel_debug; /* srun controlled by debugger */ bool debugger_test; /* --debugger-test */ bool test_only; /* --test-only */ + uint32_t profile; /* --profile=[all | none} */ char *propagate; /* --propagate[=RLIMIT_CORE,...]*/ char *task_epilog; /* --task-epilog= */ char *task_prolog; /* --task-prolog= */ @@ -219,7 +220,8 @@ typedef struct srun_options { uint16_t mail_type; /* --mail-type */ char *mail_user; /* --mail-user */ uint8_t open_mode; /* --open-mode=append|truncate */ - int acctg_freq; /* --acctg-freq=secs */ + char *acctg_freq; /* --acctg-freq=<type1>=<freq1>,*/ + /* <type2>=<freq2>,... */ uint32_t cpu_freq; /* --cpu_freq=kilohertz */ bool pty; /* --pty */ char *restart_dir; /* --restart */ diff --git a/src/srun/libsrun/srun_job.c b/src/srun/libsrun/srun_job.c index 9837e39f2261a3d38cd096e42e6a2858c5c09c8b..03d8c3c5ed24caa673d2b6d1c9e7572dbec52b7d 100644 --- a/src/srun/libsrun/srun_job.c +++ b/src/srun/libsrun/srun_job.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -305,7 +305,7 @@ job_step_create_allocation(resource_allocation_response_msg_t *resp) * know it is less than the number of nodes * in the allocation */ - if(opt.ntasks_set && (opt.ntasks < ai->nnodes)) + if (opt.ntasks_set && (opt.ntasks < ai->nnodes)) opt.min_nodes = opt.ntasks; else opt.min_nodes = ai->nnodes; @@ -503,7 +503,7 @@ extern void create_srun_job(srun_job_t **p_job, bool *got_alloc, * and a node went DOWN or it used a node count range * specification, was checkpointed from one size and * restarted at a different size */ - error("SLURM_NNODES environment varariable " + error("SLURM_NNODES environment variable " "conflicts with allocated node count (%u!=%u).", opt.min_nodes, resp->node_cnt); /* Modify options to match resource allocation. @@ -1214,20 +1214,21 @@ static int _set_rlimit_env(void) return rc; } -/* Set SLURM_SUBMIT_DIR environment variable with current state */ +/* Set SLURM_SUBMIT_DIR and SLURM_SUBMIT_HOST environment variables within + * current state */ static void _set_submit_dir_env(void) { - char buf[MAXPATHLEN + 1]; + char buf[MAXPATHLEN + 1], host[256]; - if ((getcwd(buf, MAXPATHLEN)) == NULL) { + if ((getcwd(buf, MAXPATHLEN)) == NULL) error("getcwd failed: %m"); - exit(error_exit); - } - - if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", buf) < 0) { + else if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", buf) < 0) error("unable to set SLURM_SUBMIT_DIR in environment"); - return; - } + + if ((gethostname(host, sizeof(host)))) + error("gethostname_short failed: %m"); + else if (setenvf(NULL, "SLURM_SUBMIT_HOST", "%s", host) < 0) + error("unable to set SLURM_SUBMIT_HOST in environment"); } /* Set some environment variables with current state */ diff --git a/src/srun/libsrun/srun_job.h b/src/srun/libsrun/srun_job.h index cbe01ec957a53d4e4a0a354118738e2d5cc3342c..628b6c4485eacd6e5cde56b671f6206611be7e25 100644 --- a/src/srun/libsrun/srun_job.h +++ b/src/srun/libsrun/srun_job.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/srun.c b/src/srun/srun.c index 2f069c8d6511a4241f7aee3b0f028fc0443a19a0..e7f4f950c248ef8481264bdba0c34ec629a9c339 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -305,7 +305,7 @@ static char *_uint16_array_to_str(int array_len, const uint16_t *array) char *sep = ","; /* seperator */ char *str = xstrdup(""); - if(array == NULL) + if (array == NULL) return str; for (i = 0; i < array_len; i++) { diff --git a/src/srun/srun.wrapper.c b/src/srun/srun.wrapper.c index 2d2e229e5d5aaf34f7ce96968cb0f9b722ef447c..f80de251ab78180e966766a74042e8a367a95946 100644 --- a/src/srun/srun.wrapper.c +++ b/src/srun/srun.wrapper.c @@ -7,7 +7,7 @@ * Type "<ctrl-a>" to specify arguments for srun * Type "g" to start the program * - * Information for other debuggers may be submitted to slurm-dev@lists.llnl.gov + * Information for other debuggers may be submitted to slurm-dev@schedmd.com */ extern int srun(int argc, char **argv); diff --git a/src/srun/srun_pty.c b/src/srun/srun_pty.c index daebf6302c7861c254ec1ced856abffe31883f5d..7c3602d5956a66a44fdbfcd922f41786c7e794f5 100644 --- a/src/srun/srun_pty.c +++ b/src/srun/srun_pty.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun/srun_pty.h b/src/srun/srun_pty.h index 5dcb1451c31f2a9847ffdb792127223fca05b384..fd4798cc23fe5c65f5a06b53ca13c7a47e3165d3 100644 --- a/src/srun/srun_pty.h +++ b/src/srun/srun_pty.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/srun_cr/Makefile.am b/src/srun_cr/Makefile.am index 4040626ec2f823c3783bc77092109aadf9b1fbad..cd296570db235f50aa18a4961bff7cf3b243022d 100644 --- a/src/srun_cr/Makefile.am +++ b/src/srun_cr/Makefile.am @@ -1,4 +1,4 @@ -# +# Makefile for srun_cr AUTOMAKE_OPTIONS = foreign CLEANFILES = core.* diff --git a/src/srun_cr/Makefile.in b/src/srun_cr/Makefile.in index 4aafb1b2d633523ad7b318a073059c23506de18e..6a73a0e3594d49ef578445f0358ee71bee00b2c5 100644 --- a/src/srun_cr/Makefile.in +++ b/src/srun_cr/Makefile.in @@ -15,7 +15,7 @@ @SET_MAKE@ -# +# Makefile for srun_cr VPATH = @srcdir@ am__make_dryrun = \ @@ -59,6 +59,7 @@ subdir = src/srun_cr DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -162,6 +166,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -182,6 +188,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -191,6 +200,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -198,6 +209,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -232,6 +252,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -259,6 +282,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/srun_cr/srun_cr.c b/src/srun_cr/srun_cr.c index 707350a2deb397a930390c61165a2e76a2a2392f..ad2b8f22c4092700432fcb01f27388d28fc2055b 100644 --- a/src/srun_cr/srun_cr.c +++ b/src/srun_cr/srun_cr.c @@ -6,7 +6,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -544,9 +544,6 @@ _read_info_from_srun(int srun_fd) xfree(nodelist); nodelist = (char *)xmalloc(len + 1); - if (!nodelist) { - fatal("failed to malloc nodelist: %m"); - } if (read(srun_fd, nodelist, len + 1) != len + 1) { fatal("failed to read nodelist: %m"); } diff --git a/src/sshare/Makefile.in b/src/sshare/Makefile.in index bc5fe751943c49aabeb8ec4d987e0d9bacf69a75..788b11d9907f5a6873aa3e6c7c0f4e1819737dd7 100644 --- a/src/sshare/Makefile.in +++ b/src/sshare/Makefile.in @@ -59,6 +59,7 @@ subdir = src/sshare DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -156,6 +160,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -176,6 +182,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -185,6 +194,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -192,6 +203,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -226,6 +246,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -253,6 +276,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sshare/process.c b/src/sshare/process.c index 760653dd51118426eed92daf14d7b0ffb2996e05..4adbfa1fe2f755e434178021c99d81e5f7446424 100644 --- a/src/sshare/process.c +++ b/src/sshare/process.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -78,8 +78,6 @@ extern int process(shares_response_msg_t *resp) return SLURM_ERROR; format_list = list_create(slurm_destroy_char); - if (!format_list) - fatal("list_create: malloc failure"); if (long_flag) { slurm_addto_char_list(format_list, "A,User,RawShares,NormShares," @@ -92,11 +90,7 @@ extern int process(shares_response_msg_t *resp) } print_fields_list = list_create(destroy_print_field); - if (!print_fields_list) - fatal("list_create: malloc failure"); itr = list_iterator_create(format_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); while ((object = list_next(itr))) { char *tmp_char = NULL; field = xmalloc(sizeof(print_field_t)); @@ -183,8 +177,6 @@ extern int process(shares_response_msg_t *resp) } itr2 = list_iterator_create(print_fields_list); - if (!itr2) - fatal("list_iterator_create: malloc failure"); print_fields_header(print_fields_list); field_count = list_count(print_fields_list); @@ -193,11 +185,7 @@ extern int process(shares_response_msg_t *resp) return SLURM_SUCCESS; tree_list = list_create(slurmdb_destroy_print_tree); - if (!tree_list) - fatal("list_create: malloc failure"); itr = list_iterator_create(resp->assoc_shares_list); - if (!itr) - fatal("list_iterator_create: malloc failure"); while ((share = list_next(itr))) { int curr_inx = 1; char *tmp_char = NULL; diff --git a/src/sshare/sshare.c b/src/sshare/sshare.c index 451a042a74ebbc7382286546dc0c24172c5bc80a..11eaaf0b5737eef76441b6a5c4afaef3c920c063 100644 --- a/src/sshare/sshare.c +++ b/src/sshare/sshare.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -107,7 +107,7 @@ main (int argc, char *argv[]) all_users = 1; break; case 'A': - if(!req_msg.acct_list) + if (!req_msg.acct_list) req_msg.acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(req_msg.acct_list, optarg); @@ -121,9 +121,9 @@ main (int argc, char *argv[]) long_flag = 1; break; case 'M': - if(clusters) + if (clusters) list_destroy(clusters); - if(!(clusters = + if (!(clusters = slurmdb_get_info_cluster(optarg))) { print_db_notok(optarg, 0); exit(1); @@ -142,12 +142,12 @@ main (int argc, char *argv[]) PRINT_FIELDS_PARSABLE_NO_ENDING; break; case 'u': - if(!strcmp(optarg, "-1")) { + if (!strcmp(optarg, "-1")) { all_users = 1; break; } all_users = 0; - if(!req_msg.user_list) + if (!req_msg.user_list) req_msg.user_list = list_create(slurm_destroy_char); _addto_name_char_list(req_msg.user_list, optarg, 0); @@ -178,13 +178,13 @@ main (int argc, char *argv[]) log_alter(opts, 0, NULL); } - if(all_users) { - if(req_msg.user_list + if (all_users) { + if (req_msg.user_list && list_count(req_msg.user_list)) { list_destroy(req_msg.user_list); req_msg.user_list = NULL; } - if(verbosity) + if (verbosity) fprintf(stderr, "Users requested:\n\t: all\n"); } else if (verbosity && req_msg.user_list && list_count(req_msg.user_list)) { @@ -193,40 +193,40 @@ main (int argc, char *argv[]) while((temp = list_next(itr))) fprintf(stderr, "\t: %s\n", temp); list_iterator_destroy(itr); - } else if(!req_msg.user_list || !list_count(req_msg.user_list)) { + } else if (!req_msg.user_list || !list_count(req_msg.user_list)) { struct passwd *pwd = getpwuid(getuid()); - if(!req_msg.user_list) + if (!req_msg.user_list) req_msg.user_list = list_create(slurm_destroy_char); temp = xstrdup(pwd->pw_name); list_append(req_msg.user_list, temp); - if(verbosity) { + if (verbosity) { fprintf(stderr, "Users requested:\n"); fprintf(stderr, "\t: %s\n", temp); } } - if(req_msg.acct_list && list_count(req_msg.acct_list)) { + if (req_msg.acct_list && list_count(req_msg.acct_list)) { fprintf(stderr, "Accounts requested:\n"); ListIterator itr = list_iterator_create(req_msg.acct_list); while((temp = list_next(itr))) fprintf(stderr, "\t: %s\n", temp); list_iterator_destroy(itr); } else { - if(req_msg.acct_list + if (req_msg.acct_list && list_count(req_msg.acct_list)) { list_destroy(req_msg.acct_list); req_msg.acct_list = NULL; } - if(verbosity) + if (verbosity) fprintf(stderr, "Accounts requested:\n\t: all\n"); } error_code = _get_info(&req_msg, &resp_msg); - if(req_msg.acct_list) + if (req_msg.acct_list) list_destroy(req_msg.acct_list); - if(req_msg.user_list) + if (req_msg.user_list) list_destroy(req_msg.user_list); if (error_code) { @@ -287,13 +287,13 @@ static int _addto_name_char_list(List char_list, char *names, bool gid) int quote = 0; int count = 0; - if(!char_list) { + if (!char_list) { error("No list was given to fill in"); return 0; } itr = list_iterator_create(char_list); - if(names) { + if (names) { if (names[i] == '\"' || names[i] == '\'') { quote_c = names[i]; quote = 1; @@ -302,12 +302,12 @@ static int _addto_name_char_list(List char_list, char *names, bool gid) start = i; while(names[i]) { //info("got %d - %d = %d", i, start, i-start); - if(quote && names[i] == quote_c) + if (quote && names[i] == quote_c) break; else if (names[i] == '\"' || names[i] == '\'') names[i] = '`'; - else if(names[i] == ',') { - if((i-start) > 0) { + else if (names[i] == ',') { + if ((i-start) > 0) { name = xmalloc((i-start+1)); memcpy(name, names+start, (i-start)); //info("got %s %d", name, i-start); @@ -319,11 +319,11 @@ static int _addto_name_char_list(List char_list, char *names, bool gid) } while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -332,7 +332,7 @@ static int _addto_name_char_list(List char_list, char *names, bool gid) } i++; start = i; - if(!names[i]) { + if (!names[i]) { info("There is a problem with " "your request. It appears you " "have spaces inside your list."); @@ -341,7 +341,7 @@ static int _addto_name_char_list(List char_list, char *names, bool gid) } i++; } - if((i-start) > 0) { + if ((i-start) > 0) { name = xmalloc((i-start)+1); memcpy(name, names+start, (i-start)); @@ -352,11 +352,11 @@ static int _addto_name_char_list(List char_list, char *names, bool gid) } while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) + if (!strcasecmp(tmp_char, name)) break; } - if(!tmp_char) { + if (!tmp_char) { list_append(char_list, name); count++; } else @@ -371,7 +371,7 @@ static char *_convert_to_name(int id, bool gid) { char *name = NULL; - if(gid) { + if (gid) { struct group *grp; if (!(grp=getgrgid(id))) { fprintf(stderr, "Invalid group id: %s\n", name); diff --git a/src/sshare/sshare.h b/src/sshare/sshare.h index d5fe656509a5508d1b9e9ba8119d59251bd6c69e..324050251fca1b54ecfe558b40a92c5df8ffbdf8 100644 --- a/src/sshare/sshare.h +++ b/src/sshare/sshare.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sstat/Makefile.in b/src/sstat/Makefile.in index da06b73d33fcb395a2c7ddf6c88969122de10234..266e8784c0dff48f9cfc376effc7fd151f9902de 100644 --- a/src/sstat/Makefile.in +++ b/src/sstat/Makefile.in @@ -59,6 +59,7 @@ subdir = src/sstat DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -157,6 +161,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -177,6 +183,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -186,6 +195,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -193,6 +204,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -227,6 +247,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -254,6 +277,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sstat/options.c b/src/sstat/options.c index 93909ccfc27e1dc1dbff678b8c37ae30fa2e20b9..d639d466feaa49e2afb7ca8fe14d3c11d36a6526 100644 --- a/src/sstat/options.c +++ b/src/sstat/options.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -54,10 +54,10 @@ void _help_fields_msg(void) for (i = 0; fields[i].name; i++) { if (i & 3) - printf(" "); + printf(" "); else if (i) printf("\n"); - printf("%-13s", fields[i].name); + printf("%-17s", fields[i].name); } printf("\n"); return; @@ -189,7 +189,8 @@ static int _addto_job_list(List job_list, char *names) selected_step->stepid = atoi(dot); } - selected_step->jobid = atoi(name); + selected_step->jobid = + slurm_xlate_job_id(name); xfree(name); while ((curr_step = list_next(itr))) { @@ -233,7 +234,7 @@ static int _addto_job_list(List job_list, char *names) else selected_step->stepid = atoi(dot); } - selected_step->jobid = atoi(name); + selected_step->jobid = slurm_xlate_job_id(name); xfree(name); while ((curr_step = list_next(itr))) { @@ -456,13 +457,6 @@ void parse_command_line(int argc, char **argv) } field_count = list_count(print_fields_list); - if (optind < argc) { - debug2("Error: Unknown arguments:"); - for (i=optind; i<argc; i++) - debug2(" %s", argv[i]); - debug2("\n"); - exit(1); - } return; } diff --git a/src/sstat/print.c b/src/sstat/print.c index 0faea9b83a0aa2312bdb1a266c1c3af1188bbfa6..21defe08887ca6f9683c1b7b84954a858939e56e 100644 --- a/src/sstat/print.c +++ b/src/sstat/print.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -51,7 +51,7 @@ char *_elapsed_time(long secs, long usecs) long subsec = 0; char *str = NULL; - if(secs < 0 || secs == NO_VAL) + if ((secs < 0) || (secs == NO_VAL)) return NULL; @@ -59,7 +59,7 @@ char *_elapsed_time(long secs, long usecs) secs++; usecs -= 1E6; } - if(usecs > 0) { + if (usecs > 0) { /* give me 3 significant digits to tack onto the sec */ subsec = (usecs/1000); } @@ -80,6 +80,20 @@ char *_elapsed_time(long secs, long usecs) return str; } +static void _print_small_double( + char *outbuf, int buf_size, double dub, int units) +{ + if (fuzzy_equal(dub, NO_VAL)) + return; + + if (dub > 1) + convert_num_unit((float)dub, outbuf, buf_size, units); + else if (dub > 0) + snprintf(outbuf, buf_size, "%.2fM", dub); + else + snprintf(outbuf, buf_size, "0"); +} + void print_fields(slurmdb_step_rec_t *step) { print_field_t *field = NULL; @@ -103,9 +117,9 @@ void print_fields(slurmdb_step_rec_t *step) break; case PRINT_ACT_CPUFREQ: - convert_num_unit((float)step->stats.act_cpufreq, - outbuf, sizeof(outbuf), - UNIT_NONE); + convert_num_unit2((float)step->stats.act_cpufreq, + outbuf, sizeof(outbuf), + UNIT_KILO, 1000, false); field->print_routine(field, outbuf, @@ -113,11 +127,34 @@ void print_fields(slurmdb_step_rec_t *step) break; case PRINT_CONSUMED_ENERGY: if (!fuzzy_equal(step->stats.consumed_energy, NO_VAL)) { - convert_num_unit((float) - step->stats.consumed_energy, - outbuf, sizeof(outbuf), - UNIT_NONE); + convert_num_unit2((float) + step->stats.consumed_energy, + outbuf, sizeof(outbuf), + UNIT_NONE, 1000, false); } + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_CONSUMED_ENERGY_RAW: + field->print_routine(field, + step->stats.consumed_energy, + (curr_inx == field_count)); + break; + case PRINT_AVEDISKREAD: + _print_small_double(outbuf, sizeof(outbuf), + step->stats.disk_read_ave, + UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_AVEDISKWRITE: + _print_small_double(outbuf, sizeof(outbuf), + step->stats.disk_write_ave, + UNIT_MEGA); + field->print_routine(field, outbuf, (curr_inx == field_count)); @@ -162,6 +199,52 @@ void print_fields(slurmdb_step_rec_t *step) outbuf, (curr_inx == field_count)); break; + case PRINT_MAXDISKREAD: + _print_small_double(outbuf, sizeof(outbuf), + step->stats.disk_read_max, + UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_MAXDISKREADNODE: + tmp_char = find_hostname( + step->stats.disk_read_max_nodeid, + step->nodes); + field->print_routine(field, + tmp_char, + (curr_inx == field_count)); + xfree(tmp_char); + break; + case PRINT_MAXDISKREADTASK: + field->print_routine(field, + step->stats.disk_read_max_taskid, + (curr_inx == field_count)); + break; + case PRINT_MAXDISKWRITE: + _print_small_double(outbuf, sizeof(outbuf), + step->stats.disk_write_max, + UNIT_MEGA); + + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; + case PRINT_MAXDISKWRITENODE: + tmp_char = find_hostname( + step->stats.disk_write_max_nodeid, + step->nodes); + field->print_routine(field, + tmp_char, + (curr_inx == field_count)); + xfree(tmp_char); + break; + case PRINT_MAXDISKWRITETASK: + field->print_routine(field, + step->stats.disk_write_max_taskid, + (curr_inx == field_count)); + break; case PRINT_MAXPAGES: convert_num_unit((float)step->stats.pages_max, outbuf, sizeof(outbuf), @@ -268,6 +351,21 @@ void print_fields(slurmdb_step_rec_t *step) step->pid_str, (curr_inx == field_count)); break; + case PRINT_REQ_CPUFREQ: + if (step->req_cpufreq == CPU_FREQ_LOW) + snprintf(outbuf, sizeof(outbuf), "Low"); + else if (step->req_cpufreq == CPU_FREQ_MEDIUM) + snprintf(outbuf, sizeof(outbuf), "Medium"); + else if (step->req_cpufreq == CPU_FREQ_HIGH) + snprintf(outbuf, sizeof(outbuf), "High"); + else if (!fuzzy_equal(step->req_cpufreq, NO_VAL)) + convert_num_unit2((float)step->req_cpufreq, + outbuf, sizeof(outbuf), + UNIT_KILO, 1000, false); + field->print_routine(field, + outbuf, + (curr_inx == field_count)); + break; default: break; } diff --git a/src/sstat/process.c b/src/sstat/process.c index 5627cf46752fc320f95de91e7b469435a0cbbf05..aa4b0aa349b5ec0ded768bda3198377b28d02359 100644 --- a/src/sstat/process.c +++ b/src/sstat/process.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -95,4 +95,16 @@ void aggregate_stats(slurmdb_stats_t *dest, slurmdb_stats_t *from) else dest->consumed_energy += from->consumed_energy; dest->act_cpufreq += from->act_cpufreq; + if (dest->disk_read_max < from->disk_read_max) { + dest->disk_read_max = from->disk_read_max; + dest->disk_read_max_nodeid = from->disk_read_max_nodeid; + dest->disk_read_max_taskid = from->disk_read_max_taskid; + } + dest->disk_read_ave += from->disk_read_ave; + if (dest->disk_write_max < from->disk_write_max) { + dest->disk_write_max = from->disk_write_max; + dest->disk_write_max_nodeid = from->disk_write_max_nodeid; + dest->disk_write_max_taskid = from->disk_write_max_taskid; + } + dest->disk_write_ave += from->disk_write_ave; } diff --git a/src/sstat/sstat.c b/src/sstat/sstat.c index 287855e2478b8ae2b638a3ff1c5adc0ad918127b..643347373049d519573459b7d6799c9a19379c18 100644 --- a/src/sstat/sstat.c +++ b/src/sstat/sstat.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -44,7 +44,8 @@ void *_stat_thread(void *args); int _sstat_query(slurm_step_layout_t *step_layout, uint32_t job_id, uint32_t step_id); int _process_results(); -int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist); +int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist, + uint32_t req_cpufreq); /* * Globals @@ -53,11 +54,21 @@ sstat_parameters_t params; print_field_t fields[] = { {10, "AveCPU", print_fields_str, PRINT_AVECPU}, {10, "AveCPUFreq", print_fields_str, PRINT_ACT_CPUFREQ}, + {12, "AveDiskRead", print_fields_str, PRINT_AVEDISKREAD}, + {12, "AveDiskWrite", print_fields_str, PRINT_AVEDISKWRITE}, {10, "AvePages", print_fields_str, PRINT_AVEPAGES}, {10, "AveRSS", print_fields_str, PRINT_AVERSS}, {10, "AveVMSize", print_fields_str, PRINT_AVEVSIZE}, {14, "ConsumedEnergy", print_fields_str, PRINT_CONSUMED_ENERGY}, + {17, "ConsumedEnergyRaw", print_fields_double, + PRINT_CONSUMED_ENERGY_RAW}, {-12, "JobID", print_fields_str, PRINT_JOBID}, + {12, "MaxDiskRead", print_fields_str, PRINT_MAXDISKREAD}, + {15, "MaxDiskReadNode", print_fields_str, PRINT_MAXDISKREADNODE}, + {15, "MaxDiskReadTask", print_fields_uint, PRINT_MAXDISKREADTASK}, + {12, "MaxDiskWrite", print_fields_str, PRINT_MAXDISKWRITE}, + {16, "MaxDiskWriteNode", print_fields_str, PRINT_MAXDISKWRITENODE}, + {16, "MaxDiskWriteTask", print_fields_uint, PRINT_MAXDISKWRITETASK}, {8, "MaxPages", print_fields_str, PRINT_MAXPAGES}, {12, "MaxPagesNode", print_fields_str, PRINT_MAXPAGESNODE}, {14, "MaxPagesTask", print_fields_uint, PRINT_MAXPAGESTASK}, @@ -73,6 +84,7 @@ print_field_t fields[] = { {20, "Nodelist", print_fields_str, PRINT_NODELIST}, {8, "NTasks", print_fields_uint, PRINT_NTASKS}, {20, "Pids", print_fields_str, PRINT_PIDS}, + {10, "ReqCPUFreq", print_fields_str, PRINT_REQ_CPUFREQ}, {0, NULL, NULL, 0}}; List jobs = NULL; @@ -82,7 +94,8 @@ List print_fields_list = NULL; ListIterator print_fields_itr = NULL; int field_count = 0; -int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist) +int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist, + uint32_t req_cpufreq) { job_step_stat_response_msg_t *step_stat_response = NULL; int rc = SLURM_SUCCESS; @@ -95,7 +108,7 @@ int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist) debug("requesting info for job %u.%u", jobid, stepid); if ((rc = slurm_job_step_stat(jobid, stepid, nodelist, - &step_stat_response)) != SLURM_SUCCESS) { + &step_stat_response)) != SLURM_SUCCESS) { if (rc == ESLURM_INVALID_JOB_ID) { debug("job step %u.%u has already completed", jobid, stepid); @@ -119,6 +132,7 @@ int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist) step.job_ptr = &job; step.stepid = stepid; step.nodes = xmalloc(BUF_SIZE); + step.req_cpufreq = req_cpufreq; step.stepname = NULL; step.state = JOB_RUNNING; @@ -164,6 +178,8 @@ int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist) step.stats.rss_ave /= (double)tot_tasks; step.stats.vsize_ave /= (double)tot_tasks; step.stats.pages_ave /= (double)tot_tasks; + step.stats.disk_read_ave /= (double)tot_tasks; + step.stats.disk_write_ave /= (double)tot_tasks; step.stats.act_cpufreq /= (double)tot_tasks; step.ntasks = tot_tasks; } @@ -176,7 +192,8 @@ int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist) int main(int argc, char **argv) { ListIterator itr = NULL; - uint32_t stepid = 0; + uint32_t req_cpufreq = NO_VAL; + uint32_t stepid = NO_VAL; slurmdb_selected_step_t *selected_step = NULL; #ifdef HAVE_CRAY @@ -208,7 +225,7 @@ int main(int argc, char **argv) hostlist_t hl; if (slurm_load_job( - &job_ptr, selected_step->jobid, SHOW_ALL)) { + &job_ptr, selected_step->jobid, SHOW_ALL)) { error("couldn't get info for job %u", selected_step->jobid); continue; @@ -226,8 +243,8 @@ int main(int argc, char **argv) job_step_info_response_msg_t *step_ptr = NULL; int i = 0; if (slurm_get_job_steps( - 0, selected_step->jobid, NO_VAL, - &step_ptr, SHOW_ALL)) { + 0, selected_step->jobid, NO_VAL, + &step_ptr, SHOW_ALL)) { error("couldn't get steps for job %u", selected_step->jobid); continue; @@ -236,7 +253,8 @@ int main(int argc, char **argv) for (i = 0; i < step_ptr->job_step_count; i++) { _do_stat(selected_step->jobid, step_ptr->job_steps[i].step_id, - step_ptr->job_steps[i].nodes); + step_ptr->job_steps[i].nodes, + step_ptr->job_steps[i].cpu_freq); } slurm_free_job_step_info_response_msg(step_ptr); continue; @@ -244,8 +262,8 @@ int main(int argc, char **argv) /* get the first running step to query against. */ job_step_info_response_msg_t *step_ptr = NULL; if (slurm_get_job_steps( - 0, selected_step->jobid, NO_VAL, - &step_ptr, SHOW_ALL)) { + 0, selected_step->jobid, NO_VAL, + &step_ptr, SHOW_ALL)) { error("couldn't get steps for job %u", selected_step->jobid); continue; @@ -257,8 +275,9 @@ int main(int argc, char **argv) } stepid = step_ptr->job_steps[0].step_id; nodelist = step_ptr->job_steps[0].nodes; + req_cpufreq = step_ptr->job_steps[0].cpu_freq; } - _do_stat(selected_step->jobid, stepid, nodelist); + _do_stat(selected_step->jobid, stepid, nodelist, req_cpufreq); if (free_nodelist && nodelist) free(nodelist); } diff --git a/src/sstat/sstat.h b/src/sstat/sstat.h index bc03aac6ea3f88b57584a648a6e725e21192e023..335e05645f2a3819163881250d05886d5894ae5f 100644 --- a/src/sstat/sstat.h +++ b/src/sstat/sstat.h @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -66,7 +66,7 @@ #define ERROR 2 -#define STAT_FIELDS "jobid,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,avecpufreq,consumedenergy" +#define STAT_FIELDS "jobid,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,avecpufreq,reqcpufreq,consumedenergy,maxdiskread,maxdiskreadnode,maxdiskreadtask,avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite" #define STAT_FIELDS_PID "jobid,nodelist,pids" @@ -83,11 +83,20 @@ typedef enum { PRINT_ACT_CPUFREQ, PRINT_AVECPU, + PRINT_AVEDISKREAD, + PRINT_AVEDISKWRITE, PRINT_AVEPAGES, PRINT_AVERSS, PRINT_AVEVSIZE, PRINT_CONSUMED_ENERGY, + PRINT_CONSUMED_ENERGY_RAW, PRINT_JOBID, + PRINT_MAXDISKREAD, + PRINT_MAXDISKREADNODE, + PRINT_MAXDISKREADTASK, + PRINT_MAXDISKWRITE, + PRINT_MAXDISKWRITENODE, + PRINT_MAXDISKWRITETASK, PRINT_MAXPAGES, PRINT_MAXPAGESNODE, PRINT_MAXPAGESTASK, @@ -103,6 +112,7 @@ typedef enum { PRINT_NODELIST, PRINT_NTASKS, PRINT_PIDS, + PRINT_REQ_CPUFREQ, } sstat_print_types_t; diff --git a/src/strigger/Makefile.in b/src/strigger/Makefile.in index b2e9b1a8650a02544f547e356ecd2fc6e0abb1b0..5d8a45156fb84e56e032a72036b0c91f5e7706be 100644 --- a/src/strigger/Makefile.in +++ b/src/strigger/Makefile.in @@ -62,6 +62,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -79,6 +80,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -87,11 +89,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -160,6 +164,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -180,6 +186,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -189,6 +198,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -196,6 +207,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -230,6 +250,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -257,6 +280,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/strigger/opts.c b/src/strigger/opts.c index 45507134a5952012d270ebd3d5e05a098342dbd3..6adb5b3391d156936e9c83f35e6bc0ac8a580197 100644 --- a/src/strigger/opts.c +++ b/src/strigger/opts.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/strigger/strigger.c b/src/strigger/strigger.c index 68b19f05171342474449879ae1169129607aa144..f590725150ac794d8d08e22fcb959f3b6630c869 100644 --- a/src/strigger/strigger.c +++ b/src/strigger/strigger.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/strigger/strigger.h b/src/strigger/strigger.h index 004a9a6d557a31b2ac32816ab8feecd7ddcc142a..bd35993c7a4495d823366253cd0e02a7a5431f12 100644 --- a/src/strigger/strigger.h +++ b/src/strigger/strigger.h @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/Makefile.in b/src/sview/Makefile.in index 7e867fe77680608f402010dbb87866ef4aa5b4e5..c52c02b7fde17a110afafe3aa1fd685c5e41b2bb 100644 --- a/src/sview/Makefile.in +++ b/src/sview/Makefile.in @@ -63,6 +63,7 @@ DIST_COMMON = $(am__noinst_HEADERS_DIST) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -80,6 +81,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -88,11 +90,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -185,6 +189,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -205,6 +211,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -214,6 +223,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -221,6 +232,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -255,6 +275,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -282,6 +305,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/src/sview/admin_info.c b/src/sview/admin_info.c index 55149e2d9cc7b0154cd871967559a3652d9372cd..77697542e79e9eb774086d1a9651a6f64703da01 100644 --- a/src/sview/admin_info.c +++ b/src/sview/admin_info.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/block_info.c b/src/sview/block_info.c index beaba9b1cc6c3dfe472894267191b570eb6b59a0..4c19b92c828c785d5b179958c2339ee57af675d5 100644 --- a/src/sview/block_info.c +++ b/src/sview/block_info.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/common.c b/src/sview/common.c index 91ac4d83dc8aeffd8a8617f02f928ed86315bd20..3f3495876a4acd1c0836c9d02ea392c91f57a586 100644 --- a/src/sview/common.c +++ b/src/sview/common.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -63,9 +63,10 @@ static int _find_node_inx (char *name) for (i = 0; i < g_node_info_ptr->record_count; i++) { - if (!strcmp (name, g_node_info_ptr->node_array[i].name)) { + if (g_node_info_ptr->node_array[i].name == NULL) + continue; /* Future node or other anomaly */ + if (!strcmp(name, g_node_info_ptr->node_array[i].name)) return i; - } } return -1; @@ -740,9 +741,6 @@ extern int build_nodes_bitmap(char *node_names, bitstr_t **bitmap) g_print("...............build_nodes_bitmap............%s\n", node_names); my_bitmap = (bitstr_t *) bit_alloc(g_node_info_ptr->record_count); - if (!my_bitmap) { - fatal("bit_alloc malloc failure"); - } *bitmap = my_bitmap; if (!node_names) { diff --git a/src/sview/config_info.c b/src/sview/config_info.c index df6302f462a546a5889ee94a7aa8810b4dd74ade..38c0ba2cfbc2fdbd1fe93633db5937bd39d264d6 100644 --- a/src/sview/config_info.c +++ b/src/sview/config_info.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/defaults.c b/src/sview/defaults.c index 16d17e6681088d0ba4569a5e1cabd8aca1a838a2..0a98b095804def35d17be3dfa9e225f180cd917f 100644 --- a/src/sview/defaults.c +++ b/src/sview/defaults.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/front_end_info.c b/src/sview/front_end_info.c index 30c41fdc2d86b919e544dfc50b1f4299f14d04c9..01b00b161447e08d85928a597b6f857fd0762e76 100644 --- a/src/sview/front_end_info.c +++ b/src/sview/front_end_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -56,9 +56,13 @@ enum { /* These need to be in alpha order (except POS and CNT) */ enum { SORTID_POS = POS_LOC, + SORTID_ALLOW_GROUPS, + SORTID_ALLOW_USERS, SORTID_BOOT_TIME, SORTID_COLOR, SORTID_COLOR_INX, + SORTID_DENY_GROUPS, + SORTID_DENY_USERS, SORTID_NAME, SORTID_NODE_INX, SORTID_REASON, @@ -93,6 +97,14 @@ static display_data_t display_data_front_end[] = { refresh_front_end, create_model_front_end, admin_edit_front_end}, {G_TYPE_STRING, SORTID_REASON, "Reason", FALSE, EDIT_NONE, refresh_front_end, create_model_front_end, admin_edit_front_end}, + {G_TYPE_STRING, SORTID_ALLOW_GROUPS, "Allow Groups", FALSE, EDIT_NONE, + refresh_front_end, create_model_front_end, admin_edit_front_end}, + {G_TYPE_STRING, SORTID_ALLOW_USERS, "Allow Users", FALSE, EDIT_NONE, + refresh_front_end, create_model_front_end, admin_edit_front_end}, + {G_TYPE_STRING, SORTID_DENY_GROUPS, "Deny Groups", FALSE, EDIT_NONE, + refresh_front_end, create_model_front_end, admin_edit_front_end}, + {G_TYPE_STRING, SORTID_DENY_USERS, "Deny Users", FALSE, EDIT_NONE, + refresh_front_end, create_model_front_end, admin_edit_front_end}, {G_TYPE_INT, SORTID_COLOR_INX, NULL, FALSE, EDIT_NONE, refresh_front_end, create_model_front_end, admin_edit_front_end}, {G_TYPE_POINTER, SORTID_NODE_INX, NULL, FALSE, EDIT_NONE, @@ -139,6 +151,7 @@ static void _layout_front_end_record(GtkTreeView *treeview, sview_front_end_info, int update) { + GtkTreeIter iter; front_end_info_t *front_end_ptr = sview_front_end_info->front_end_ptr; @@ -172,6 +185,27 @@ static void _layout_front_end_record(GtkTreeView *treeview, find_col_name(display_data_front_end, SORTID_REASON), sview_front_end_info->reason); + + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_front_end, + SORTID_ALLOW_GROUPS), + front_end_ptr->allow_groups); + + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_front_end, + SORTID_ALLOW_USERS), + front_end_ptr->allow_users); + + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_front_end, + SORTID_DENY_GROUPS), + front_end_ptr->deny_groups); + + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_front_end, + SORTID_DENY_USERS), + front_end_ptr->deny_users); + } static void _update_front_end_record( @@ -185,12 +219,16 @@ static void _update_front_end_record( /* Combining these records provides a slight performance improvement */ gtk_tree_store_set(treestore, iter, + SORTID_ALLOW_GROUPS, front_end_ptr->allow_groups, + SORTID_ALLOW_USERS, front_end_ptr->allow_users, SORTID_BOOT_TIME, sview_front_end_info_ptr->boot_time, SORTID_COLOR, sview_colors[ sview_front_end_info_ptr->color_inx], SORTID_COLOR_INX, sview_front_end_info_ptr->color_inx, + SORTID_DENY_GROUPS, front_end_ptr->deny_groups, + SORTID_DENY_USERS, front_end_ptr->deny_users, SORTID_NODE_INX, sview_front_end_info_ptr->node_inx, SORTID_NAME, front_end_ptr->name, diff --git a/src/sview/grid.c b/src/sview/grid.c index 652dd9a752d7961e7fdd461a20e6a811307b0516..1a5dc175724cffa250ad9d020c9857683af1a883 100644 --- a/src/sview/grid.c +++ b/src/sview/grid.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/gthread_helper.c b/src/sview/gthread_helper.c index 94b49d206f4db914bdee4c50fcaa001630046f7b..5a01e3309281dfd3a74804d77c71b49ea34eef57 100644 --- a/src/sview/gthread_helper.c +++ b/src/sview/gthread_helper.c @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/gthread_helper.h b/src/sview/gthread_helper.h index 987321f72c20591ff5a9caf52d609cb2bea32f0e..a357bb865a3f9717d10b26e401708f7647f8a67d 100644 --- a/src/sview/gthread_helper.h +++ b/src/sview/gthread_helper.h @@ -5,7 +5,7 @@ * Written by Danny Auble <da@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/job_info.c b/src/sview/job_info.c index e863b9bf5808769c3898dbcea3955b77e75acca2..2513e798d1d29d6f129a51df17d8cb7b30a782c2 100644 --- a/src/sview/job_info.c +++ b/src/sview/job_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -97,6 +97,8 @@ enum { SORTID_ALLOC, SORTID_ALLOC_NODE, SORTID_ALPS_RESV_ID, + SORTID_ARRAY_JOB_ID, + SORTID_ARRAY_TASK_ID, SORTID_BATCH, SORTID_BATCH_HOST, SORTID_BLOCK, @@ -207,6 +209,10 @@ static display_data_t display_data_job[] = { EDIT_MODEL, refresh_job, create_model_job, admin_edit_job}, {G_TYPE_INT, SORTID_ALLOC, NULL, FALSE, EDIT_NONE, refresh_job, create_model_job, admin_edit_job}, + {G_TYPE_STRING, SORTID_ARRAY_JOB_ID, "Array_Job_ID", FALSE, EDIT_NONE, + refresh_job, create_model_job, admin_edit_job}, + {G_TYPE_STRING, SORTID_ARRAY_TASK_ID, "Array_Task_ID", FALSE, EDIT_NONE, + refresh_job, create_model_job, admin_edit_job}, {G_TYPE_STRING, SORTID_PARTITION, "Partition", FALSE, EDIT_TEXTBOX, refresh_job, create_model_job, admin_edit_job}, #ifdef HAVE_BG @@ -1221,6 +1227,22 @@ static int _get_node_cnt(job_info_t * job) if (IS_JOB_PENDING(job)) { node_cnt = _nodes_in_list(job->req_nodes); node_cnt = MAX(node_cnt, job->num_nodes); + if ((node_cnt == 1) && (job->num_cpus > 1) + && job->ntasks_per_node + && (job->ntasks_per_node != (uint16_t) NO_VAL)) { + int num_tasks = job->num_cpus; + if (job->cpus_per_task != (uint16_t) NO_VAL) + num_tasks /= job->cpus_per_task; + node_cnt = (num_tasks + 1) / job->ntasks_per_node; + if (node_cnt > num_tasks) + node_cnt = num_tasks; + else if (!node_cnt) + node_cnt = 1; + } else if (sview_max_cpus) { + int round = job->num_cpus + sview_max_cpus - 1; + round /= sview_max_cpus; /* round up */ + node_cnt = MAX(node_cnt, round); + } } else node_cnt = _nodes_in_list(job->nodes); return node_cnt; @@ -1317,6 +1339,29 @@ static void _layout_job_record(GtkTreeView *treeview, tmp_char, sizeof(tmp_char), SELECT_PRINT_DATA)); + + if (job_ptr->array_task_id != (uint16_t) NO_VAL) { + snprintf(tmp_char, sizeof(tmp_char), "%u", + job_ptr->array_job_id); + } else { + snprintf(tmp_char, sizeof(tmp_char), "N/A"); + } + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_job, + SORTID_ARRAY_JOB_ID), + tmp_char); + + if (job_ptr->array_task_id != (uint16_t) NO_VAL) { + snprintf(tmp_char, sizeof(tmp_char), "%u", + job_ptr->array_task_id); + } else { + snprintf(tmp_char, sizeof(tmp_char), "N/A"); + } + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_job, + SORTID_ARRAY_TASK_ID), + tmp_char); + if (job_ptr->batch_flag) sprintf(tmp_char, "yes"); else @@ -1705,6 +1750,15 @@ static void _layout_job_record(GtkTreeView *treeview, SORTID_STATE), job_state_string(job_ptr->job_state)); + secs2time_str((time_t) job_ptr->wait4switch, time_buf, + sizeof(time_buf)); + snprintf(tmp_char, sizeof(tmp_char), "%u@%s", + job_ptr->req_switch, time_buf); + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_job, + SORTID_SWITCHES), + tmp_char); + slurm_make_time_str((time_t *)&job_ptr->eligible_time, tmp_char, sizeof(tmp_char)); add_display_treestore_line(update, treestore, &iter, @@ -1777,15 +1831,6 @@ static void _layout_job_record(GtkTreeView *treeview, SORTID_TIME_SUSPEND), tmp_char); - secs2time_str((time_t) job_ptr->wait4switch, time_buf, - sizeof(time_buf)); - snprintf(tmp_char, sizeof(tmp_char), "%u@%s\n", - job_ptr->req_switch, time_buf); - add_display_treestore_line(update, treestore, &iter, - find_col_name(display_data_job, - SORTID_SWITCHES), - tmp_char); - uname = uid_to_string((uid_t)job_ptr->user_id); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_job, @@ -1806,6 +1851,7 @@ static void _layout_job_record(GtkTreeView *treeview, static void _update_job_record(sview_job_info_t *sview_job_info_ptr, GtkTreeStore *treestore) { + char tmp_array_job_id[20], tmp_array_task_id[20]; char tmp_time_run[40], tmp_time_resize[40], tmp_time_submit[40]; char tmp_time_elig[40], tmp_time_start[40], tmp_time_end[40]; char tmp_time_sus[40], tmp_time_limit[40], tmp_alloc_node[40]; @@ -1829,6 +1875,16 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, snprintf(tmp_alloc_node, sizeof(tmp_alloc_node), "%s:%u", job_ptr->alloc_node, job_ptr->alloc_sid); + if (job_ptr->array_task_id != (uint16_t) NO_VAL) { + snprintf(tmp_array_job_id, sizeof(tmp_array_job_id), "%u", + job_ptr->array_job_id); + snprintf(tmp_array_task_id, sizeof(tmp_array_task_id), "%u", + job_ptr->array_task_id); + } else { + snprintf(tmp_array_job_id, sizeof(tmp_array_job_id), "N/A"); + snprintf(tmp_array_task_id, sizeof(tmp_array_task_id), "N/A"); + } + if (job_ptr->batch_flag) tmp_batch = "yes"; else @@ -2020,6 +2076,8 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, SORTID_ACCOUNT, job_ptr->account, SORTID_ALLOC, 1, SORTID_ALLOC_NODE, tmp_alloc_node, + SORTID_ARRAY_JOB_ID, tmp_array_job_id, + SORTID_ARRAY_TASK_ID,tmp_array_task_id, SORTID_BATCH, tmp_batch, SORTID_BATCH_HOST, job_ptr->batch_host, SORTID_COLOR, @@ -2629,7 +2687,7 @@ static List _create_job_info_list(job_info_msg_t *job_info_ptr, return NULL; } - for(i=0; i<job_info_ptr->record_count; i++) { + for (i=0; i<job_info_ptr->record_count; i++) { job_ptr = &(job_info_ptr->job_array[i]); sview_job_info_ptr = xmalloc(sizeof(sview_job_info_t)); @@ -2655,9 +2713,10 @@ static List _create_job_info_list(job_info_msg_t *job_info_ptr, just keep tacking on ionodes to a node list */ sview_job_info_ptr->nodes = xstrdup(tmp_char); - } else + } else { sview_job_info_ptr->nodes = xstrdup(job_ptr->nodes); + } xfree(ionodes); } else sview_job_info_ptr->nodes = xstrdup(job_ptr->nodes); @@ -2665,9 +2724,10 @@ static List _create_job_info_list(job_info_msg_t *job_info_ptr, if (!sview_job_info_ptr->node_cnt) sview_job_info_ptr->node_cnt = _get_node_cnt(job_ptr); - for(j = 0; j < step_info_ptr->job_step_count; j++) { + for (j = 0; j < step_info_ptr->job_step_count; j++) { step_ptr = &(step_info_ptr->job_steps[j]); - if (step_ptr->job_id == job_ptr->job_id) { + if ((step_ptr->job_id == job_ptr->job_id) && + (step_ptr->state == JOB_RUNNING)) { list_append(sview_job_info_ptr->step_list, step_ptr); } @@ -3231,7 +3291,7 @@ extern void get_info_job(GtkTable *table, display_data_t *display_data) if (display_widget) gtk_widget_destroy(display_widget); view = ERROR_VIEW; - sprintf(error_char, "slurm_load_job: %s", + sprintf(error_char, "slurm_load_jobs: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(table, label, 0, 1, 0, 1); @@ -3325,7 +3385,7 @@ display_it: if (!display_widget) { tree_view = create_treeview(local_display_data, &grid_button_list); - /*set multiple capability here*/ + /* set multiple capability here */ gtk_tree_selection_set_mode( gtk_tree_view_get_selection(tree_view), GTK_SELECTION_MULTIPLE); @@ -3333,9 +3393,8 @@ display_it: gtk_table_attach_defaults(GTK_TABLE(table), GTK_WIDGET(tree_view), 0, 1, 0, 1); - /* since this function sets the model of the tree_view - to the treestore we don't really care about - the return value */ + /* since this function sets the model of the tree_view to the + * treestore we don't really care about the return value */ create_treestore(tree_view, display_data_job, SORTID_CNT, SORTID_TIME_SUBMIT, SORTID_COLOR); } @@ -3402,7 +3461,7 @@ extern void specific_info_job(popup_info_t *popup_win) if (spec_info->display_widget) gtk_widget_destroy(spec_info->display_widget); - sprintf(error_char, "slurm_load_job: %s", + sprintf(error_char, "slurm_load_jobs: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(GTK_TABLE(popup_win->table), @@ -3457,9 +3516,8 @@ display_it: gtk_table_attach_defaults(popup_win->table, GTK_WIDGET(tree_view), 0, 1, 0, 1); - /* since this function sets the model of the tree_view - to the treestore we don't really care about - the return value */ + /* since this function sets the model of the tree_view to the + * treestore we don't really care about the return value */ create_treestore(tree_view, popup_win->display_data, SORTID_CNT, SORTID_TIME_SUBMIT, SORTID_COLOR); } @@ -3474,7 +3532,7 @@ display_it: /* just linking to another list, don't free the inside, just - the list */ + * the list */ send_info_list = list_create(NULL); itr = list_iterator_create(info_list); i = -1; @@ -3497,19 +3555,21 @@ display_it: if (job_ptr->job_id != search_info->int_data) { continue; } +#if 0 /* if we ever want to display just the step - this is where we would do it */ -/* if (spec_info->search_info->int_data2 */ -/* == NO_VAL) */ -/* break; */ -/* step_itr = list_iterator_create( */ -/* sview_job_info->step_list); */ -/* while ((step_ptr = list_next(itr))) { */ -/* if (step_ptr->step_id */ -/* == spec_info->search_info->int_data2) { */ -/* break; */ -/* } */ -/* } */ + * this is where we would do it */ + if (spec_info->search_info->int_data2 + == NO_VAL) + break; + step_itr = list_iterator_create( + sview_job_info->step_list); + while ((step_ptr = list_next(itr))) { + if (step_ptr->step_id == + spec_info->search_info->int_data2) { + break; + } + } +#endif break; case SEARCH_JOB_USER: if (!search_info->gchar_data) @@ -4135,7 +4195,7 @@ extern void admin_job(GtkTreeModel *model, GtkTreeIter *iter, GTK_STOCK_YES, GTK_RESPONSE_OK); gtk_window_set_default(GTK_WINDOW(popup), label); gtk_dialog_add_button(GTK_DIALOG(popup), - GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL); + GTK_STOCK_NO, GTK_RESPONSE_CANCEL); if (stepid != NO_VAL) snprintf(tmp_char, sizeof(tmp_char), diff --git a/src/sview/node_info.c b/src/sview/node_info.c index b467095e0c19dcc041d79ca933b4d58b4a1a52ce..78d61c1beaa428caac77b5d756f96ddf30183734 100644 --- a/src/sview/node_info.c +++ b/src/sview/node_info.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -42,12 +42,15 @@ int g_node_scaling = 1; enum { SORTID_POS = POS_LOC, SORTID_ARCH, + SORTID_BASE_WATTS, SORTID_BOARDS, SORTID_BOOT_TIME, SORTID_COLOR, SORTID_CPUS, SORTID_CPU_LOAD, + SORTID_CONSUMED_ENERGY, SORTID_CORES, + SORTID_CURRENT_WATTS, SORTID_ERR_CPUS, SORTID_FEATURES, SORTID_GRES, @@ -65,6 +68,7 @@ enum { SORTID_DISK, /* TmpDisk */ SORTID_UPDATED, SORTID_USED_CPUS, + SORTID_USED_MEMORY, SORTID_WEIGHT, SORTID_CNT }; @@ -120,6 +124,8 @@ static display_data_t display_data_node[] = { EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, {G_TYPE_STRING, SORTID_MEMORY, "Real Memory", FALSE, EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, + {G_TYPE_STRING, SORTID_USED_MEMORY, "Used Memory", FALSE, + EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, {G_TYPE_STRING, SORTID_DISK, "Tmp Disk", FALSE, EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, {G_TYPE_INT, SORTID_WEIGHT,"Weight", FALSE, EDIT_NONE, refresh_node, @@ -138,6 +144,12 @@ static display_data_t display_data_node[] = { EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, {G_TYPE_STRING, SORTID_REASON, "Reason", FALSE, EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, + {G_TYPE_STRING, SORTID_BASE_WATTS, "Lowest Joules", FALSE, + EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, + {G_TYPE_STRING, SORTID_CONSUMED_ENERGY,"Consumed Joules", FALSE, + EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, + {G_TYPE_STRING, SORTID_CURRENT_WATTS, "Current Watts", FALSE, + EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, {G_TYPE_INT, SORTID_UPDATED, NULL, FALSE, EDIT_NONE, refresh_node, create_model_node, admin_edit_node}, {G_TYPE_NONE, -1, NULL, FALSE, EDIT_NONE} @@ -180,9 +192,13 @@ static void _layout_node_record(GtkTreeView *treeview, int update) { char tmp_cnt[50]; + char tmp_current_watts[50]; + char tmp_base_watts[50]; + char tmp_consumed_energy[50]; char *upper = NULL, *lower = NULL; GtkTreeIter iter; uint16_t err_cpus = 0, alloc_cpus = 0; + uint32_t alloc_memory = 0; node_info_t *node_ptr = sview_node_info_ptr->node_ptr; int idle_cpus = node_ptr->cpus; GtkTreeStore *treestore = @@ -308,6 +324,16 @@ static void _layout_node_record(GtkTreeView *treeview, SORTID_MEMORY), tmp_cnt); + select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, + SELECT_NODEDATA_MEM_ALLOC, + NODE_STATE_ALLOCATED, + &alloc_memory); + snprintf(tmp_cnt, sizeof(tmp_cnt), "%uM", alloc_memory); + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_node, + SORTID_USED_MEMORY), + tmp_cnt); + convert_num_unit((float)node_ptr->tmp_disk, tmp_cnt, sizeof(tmp_cnt), UNIT_MEGA); add_display_treestore_line(update, treestore, &iter, @@ -343,6 +369,36 @@ static void _layout_node_record(GtkTreeView *treeview, find_col_name(display_data_node, SORTID_REASON), sview_node_info_ptr->reason); + + if (node_ptr->energy->current_watts == NO_VAL) { + snprintf(tmp_current_watts, sizeof(tmp_current_watts), + "N/A"); + snprintf(tmp_base_watts, sizeof(tmp_base_watts), + "N/A"); + snprintf(tmp_consumed_energy, sizeof(tmp_consumed_energy), + "N/A"); + } else { + snprintf(tmp_current_watts, sizeof(tmp_current_watts), + "%u", node_ptr->energy->current_watts); + snprintf(tmp_base_watts, sizeof(tmp_base_watts), + "%u", node_ptr->energy->base_watts); + snprintf(tmp_consumed_energy, sizeof(tmp_consumed_energy), + "%u", node_ptr->energy->consumed_energy); + } + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_node, + SORTID_BASE_WATTS), + tmp_base_watts); + + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_node, + SORTID_CONSUMED_ENERGY), + tmp_consumed_energy); + + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_node, + SORTID_CURRENT_WATTS), + tmp_current_watts); return; } @@ -350,11 +406,31 @@ static void _update_node_record(sview_node_info_t *sview_node_info_ptr, GtkTreeStore *treestore) { uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus; + uint32_t alloc_memory; node_info_t *node_ptr = sview_node_info_ptr->node_ptr; - char tmp_disk[20], tmp_cpus[20], tmp_err_cpus[20], - tmp_mem[20], tmp_used_cpus[20], tmp_cpu_load[20]; + char tmp_disk[20], tmp_cpus[20], tmp_err_cpus[20]; + char tmp_mem[20], tmp_used_memory[20]; + char tmp_used_cpus[20], tmp_cpu_load[20]; + char tmp_current_watts[50], tmp_base_watts[50], tmp_consumed_energy[50]; char *tmp_state_lower, *tmp_state_upper; + + if (node_ptr->energy->current_watts == NO_VAL) { + snprintf(tmp_current_watts, sizeof(tmp_current_watts), + "N/A"); + snprintf(tmp_base_watts, sizeof(tmp_base_watts), + "N/A"); + snprintf(tmp_consumed_energy, sizeof(tmp_consumed_energy), + "N/A"); + } else { + snprintf(tmp_current_watts, sizeof(tmp_current_watts), + "%u ", node_ptr->energy->current_watts); + snprintf(tmp_base_watts, sizeof(tmp_base_watts), + "%u", node_ptr->energy->base_watts); + snprintf(tmp_consumed_energy, sizeof(tmp_consumed_energy), + "%u", node_ptr->energy->consumed_energy); + } + if (node_ptr->cpu_load == NO_VAL) { strcpy(tmp_cpu_load, "N/A"); } else { @@ -378,6 +454,15 @@ static void _update_node_record(sview_node_info_t *sview_node_info_ptr, alloc_cpus *= cpus_per_node; } idle_cpus = node_ptr->cpus - alloc_cpus; + convert_num_unit((float)alloc_cpus, tmp_used_cpus, + sizeof(tmp_used_cpus), UNIT_NONE); + + select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, + SELECT_NODEDATA_MEM_ALLOC, + NODE_STATE_ALLOCATED, + &alloc_memory); + snprintf(tmp_used_memory, sizeof(tmp_used_memory), "%uM", alloc_memory); + convert_num_unit((float)alloc_cpus, tmp_used_cpus, sizeof(tmp_used_cpus), UNIT_NONE); @@ -412,13 +497,16 @@ static void _update_node_record(sview_node_info_t *sview_node_info_ptr, /* Combining these records provides a slight performance improvement */ gtk_tree_store_set(treestore, &sview_node_info_ptr->iter_ptr, SORTID_ARCH, node_ptr->arch, + SORTID_BASE_WATTS,tmp_base_watts, SORTID_BOARDS, node_ptr->boards, SORTID_BOOT_TIME, sview_node_info_ptr->boot_time, SORTID_COLOR, sview_colors[sview_node_info_ptr->pos % sview_colors_cnt], + SORTID_CONSUMED_ENERGY, tmp_consumed_energy, SORTID_CORES, node_ptr->cores, SORTID_CPUS, tmp_cpus, + SORTID_CURRENT_WATTS, tmp_current_watts, SORTID_CPU_LOAD, tmp_cpu_load, SORTID_DISK, tmp_disk, SORTID_ERR_CPUS, tmp_err_cpus, @@ -437,6 +525,7 @@ static void _update_node_record(sview_node_info_t *sview_node_info_ptr, SORTID_STATE_NUM, node_ptr->node_state, SORTID_THREADS, node_ptr->threads, SORTID_USED_CPUS, tmp_used_cpus, + SORTID_USED_MEMORY, tmp_used_memory, SORTID_WEIGHT, node_ptr->weight, SORTID_UPDATED, 1, -1); @@ -829,12 +918,12 @@ extern int get_new_info_node(node_info_msg_t **info_ptr, int force) g_node_scaling = new_node_ptr->node_scaling; cpus_per_node = new_node_ptr->node_array[0].cpus / g_node_scaling; - + sview_max_cpus = 0; for (i=0; i<g_node_info_ptr->record_count; i++) { node_ptr = &(g_node_info_ptr->node_array[i]); if (!node_ptr->name || (node_ptr->name[0] == '\0')) continue; /* bad node */ - + sview_max_cpus = MAX(sview_max_cpus, node_ptr->cpus); idle_cpus = node_ptr->cpus; slurm_get_select_nodeinfo( diff --git a/src/sview/part_info.c b/src/sview/part_info.c index 4a45c6ec22bbd593ec9abc39a1502e46957381a0..2f8715bef3b790dac02249c640880d31a37082c1 100644 --- a/src/sview/part_info.c +++ b/src/sview/part_info.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -84,6 +84,7 @@ enum { SORTID_GROUPS, SORTID_HIDDEN, SORTID_JOB_SIZE, + SORTID_MAX_CPUS_PER_NODE, SORTID_MEM, #ifdef HAVE_BG SORTID_NODELIST, @@ -153,6 +154,8 @@ static display_data_t display_data_part[] = { EDIT_TEXTBOX, refresh_part, create_model_part, admin_edit_part}, {G_TYPE_STRING, SORTID_NODES_MAX, "Nodes Max", FALSE, EDIT_TEXTBOX, refresh_part, create_model_part, admin_edit_part}, + {G_TYPE_STRING, SORTID_MAX_CPUS_PER_NODE, "Max CPUs Per Node", FALSE, + EDIT_TEXTBOX, refresh_part, create_model_part, admin_edit_part}, {G_TYPE_STRING, SORTID_ROOT, "Root", FALSE, EDIT_MODEL, refresh_part, create_model_part, admin_edit_part}, {G_TYPE_STRING, SORTID_SHARE, "Share", FALSE, EDIT_MODEL, refresh_part, @@ -213,6 +216,8 @@ static display_data_t create_data_part[] = { EDIT_TEXTBOX, refresh_part, _create_model_part2, admin_edit_part}, {G_TYPE_STRING, SORTID_NODES_MAX, "Nodes Max", FALSE, EDIT_TEXTBOX, refresh_part, _create_model_part2, admin_edit_part}, + {G_TYPE_STRING, SORTID_MAX_CPUS_PER_NODE, "Max CPUs Per Node", FALSE, + EDIT_TEXTBOX, refresh_part, _create_model_part2, admin_edit_part}, {G_TYPE_STRING, SORTID_ROOT, "Root", FALSE, EDIT_MODEL, refresh_part, _create_model_part2, admin_edit_part}, {G_TYPE_STRING, SORTID_SHARE, "Share", FALSE, @@ -526,6 +531,14 @@ static const char *_set_part_msg(update_part_msg_t *part_msg, type = "name"; part_msg->name = xstrdup(new_text); break; + case SORTID_MAX_CPUS_PER_NODE: + temp_int = strtol(new_text, (char **)NULL, 10); + type = "max_cpus_per_node"; + + if (temp_int <= 0) + goto return_error; + part_msg->max_cpus_per_node = temp_int; + break; case SORTID_NODES_MIN: temp_int = strtol(new_text, (char **)NULL, 10); type = "min_nodes"; @@ -955,6 +968,9 @@ static void _layout_part_record(GtkTreeView *treeview, case SORTID_NODES_MIN: limit_set = part_ptr->min_nodes; break; + case SORTID_MAX_CPUS_PER_NODE: + limit_set = part_ptr->max_cpus_per_node; + break; case SORTID_NODE_INX: break; case SORTID_ONLY_LINE: @@ -1018,7 +1034,7 @@ static void _layout_part_record(GtkTreeView *treeview, else temp_char = "down"; up_down = -1; - } if (yes_no != -1) { + } else if (yes_no != -1) { if (yes_no) temp_char = "yes"; else @@ -1058,7 +1074,7 @@ static void _update_part_record(sview_part_info_t *sview_part_info, { char tmp_prio[40], tmp_size[40], tmp_share_buf[40], tmp_time[40]; char tmp_max_nodes[40], tmp_min_nodes[40], tmp_grace[40]; - char tmp_cpu_cnt[40], tmp_node_cnt[40]; + char tmp_cpu_cnt[40], tmp_node_cnt[40], tmp_max_cpus_per_node[40]; char *tmp_alt, *tmp_default, *tmp_groups, *tmp_hidden; char *tmp_root, *tmp_share, *tmp_state; uint16_t tmp_preempt; @@ -1113,6 +1129,13 @@ static void _update_part_record(sview_part_info_t *sview_part_info, tmp_min_nodes, sizeof(tmp_min_nodes), UNIT_NONE); } + if (part_ptr->max_cpus_per_node == INFINITE) { + sprintf(tmp_max_cpus_per_node, "UNLIMITED"); + } else { + sprintf(tmp_max_cpus_per_node, "%u", + part_ptr->max_cpus_per_node); + } + if (cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)part_ptr->total_nodes, tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE); @@ -1181,6 +1204,7 @@ static void _update_part_record(sview_part_info_t *sview_part_info, SORTID_GROUPS, tmp_groups, SORTID_HIDDEN, tmp_hidden, SORTID_JOB_SIZE, tmp_size, + SORTID_MAX_CPUS_PER_NODE, tmp_max_cpus_per_node, SORTID_MEM, "", SORTID_NAME, part_ptr->name, SORTID_NODE_INX, part_ptr->node_inx, @@ -2112,6 +2136,7 @@ extern GtkListStore *create_model_part(int type) case SORTID_TIMELIMIT: case SORTID_NODES_MIN: case SORTID_NODES_MAX: + case SORTID_MAX_CPUS_PER_NODE: break; case SORTID_SHARE: model = gtk_list_store_new(2, G_TYPE_STRING, G_TYPE_INT); diff --git a/src/sview/popups.c b/src/sview/popups.c index 7750a20fea7faaf66f19cb48c7693cb3fe4db067..2fb62a585830cd1bf861d08da2527b5627918bd6 100644 --- a/src/sview/popups.c +++ b/src/sview/popups.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -514,7 +514,6 @@ extern void create_create_popup(GtkAction *action, gpointer user_data) if (!strcmp(name, "batch_job")) { sview_search_info.search_type = CREATE_BATCH_JOB; - entry = create_entry(); label = gtk_label_new( "Batch job submission specifications\n\n" "Specify size (task and/or node count) plus the\n" @@ -530,7 +529,6 @@ extern void create_create_popup(GtkAction *action, gpointer user_data) entry = create_job_entry(job_msg, model, &iter); } else if (!strcmp(name, "partition")) { sview_search_info.search_type = CREATE_PARTITION; - entry = create_entry(); label = gtk_label_new( "Partition creation specifications\n\n" "Specify Name. All other fields are optional."); diff --git a/src/sview/resv_info.c b/src/sview/resv_info.c index 4ef0b13ab1d298b40129df02451cea368af2d3fd..3e58b40964b46abe42cea285ab1b4fe32d09afc4 100644 --- a/src/sview/resv_info.c +++ b/src/sview/resv_info.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/submit_info.c b/src/sview/submit_info.c index 922d11c850a406fb8b7b09301ea5df7500da0d8a..e5206a4ad8091cc470aa6f2c2c71d7acf53ac19e 100644 --- a/src/sview/submit_info.c +++ b/src/sview/submit_info.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/src/sview/sview.c b/src/sview/sview.c index 7ca23b0136461661bb66ab134bcc5a5dbb88939a..77365f150dfe98d4271b3e5f1b24d059122640c4 100644 --- a/src/sview/sview.c +++ b/src/sview/sview.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -70,6 +70,7 @@ int global_row_count = 0; bool global_multi_error = 0; gint last_event_x = 0; gint last_event_y = 0; +int sview_max_cpus = 0; GdkCursor* in_process_cursor; gchar *global_edit_error_msg = NULL; GtkWidget *main_notebook = NULL; @@ -102,7 +103,8 @@ static bool debug_inited = 0; static char *orig_cluster_name = NULL; static int g_menu_id = 0; static GtkUIManager *g_ui_manager = NULL; - +static GtkToggleActionEntry *debug_actions = NULL; +static int debug_action_entries = 0; /* popup_positioner_t main_popup_positioner[] = { {0,"Sview Defaults", 150, 700 }, @@ -508,161 +510,21 @@ static void _get_current_debug_flags(GtkToggleAction *action) GtkAction *debug_action = NULL; GtkToggleAction *toggle_action; gboolean orig_state, new_state; + int i; if (err_code != SLURM_ERROR) debug_flags = slurm_ctl_conf_ptr->debug_flags; - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_backfill"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_BACKFILL; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_bg_algo"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_BG_ALGO; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_bg_algo_deep"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_BG_ALGO_DEEP; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_bg_pick"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_BG_PICK; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_bg_wires"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_BG_WIRES; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_cpu_bind"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_CPU_BIND; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_energy"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_ENERGY; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_front_end"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_FRONT_END; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_gang"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_GANG; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_gres"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_GRES; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_no_conf_hash"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_NO_CONF_HASH; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_no_real_time"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_NO_REALTIME; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_prio"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_PRIO; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_reservation"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_RESERVATION; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_select_type"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_SELECT_TYPE; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_steps"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_STEPS; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_switch"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_SWITCH; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_triggers"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_TRIGGERS; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); - - debug_action = gtk_action_group_get_action(menu_action_group, - "flags_wiki"); - toggle_action = GTK_TOGGLE_ACTION(debug_action); - orig_state = gtk_toggle_action_get_active(toggle_action); - new_state = debug_flags & DEBUG_FLAG_WIKI; - if (orig_state != new_state) - gtk_toggle_action_set_active(toggle_action, new_state); + for (i = 0; i < debug_action_entries; i++) { + debug_action = gtk_action_group_get_action( + menu_action_group, debug_actions[i].name); + toggle_action = GTK_TOGGLE_ACTION(debug_action); + orig_state = gtk_toggle_action_get_active(toggle_action); + new_state = debug_flags + & debug_str2flags((char *)debug_actions[i].name); + if (orig_state != new_state) + gtk_toggle_action_set_active(toggle_action, new_state); + } } static void _set_debug(GtkRadioAction *action, @@ -690,10 +552,21 @@ static void _set_debug(GtkRadioAction *action, g_free(temp); } -static void _set_flags(GtkToggleAction *action, uint32_t flag) +static void _set_flags(GtkToggleAction *action) { char *temp = NULL; uint32_t debug_flags_plus = 0, debug_flags_minus = 0; + uint32_t flag = NO_VAL; + const char *name; + + if (!action) + return; + + name = gtk_action_get_name(GTK_ACTION(action)); + if (!name) + return; + + flag = debug_str2flags((char *)name); if (action && gtk_toggle_action_get_active(action)) debug_flags_plus |= flag; @@ -708,83 +581,6 @@ static void _set_flags(GtkToggleAction *action, uint32_t flag) g_free(temp); } -static void _set_flags_backfill(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_BACKFILL); -} -static void _set_flags_bg_algo(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_BG_ALGO); -} -static void _set_flags_bg_algo_deep(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_BG_ALGO_DEEP); -} -static void _set_flags_bg_pick(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_BG_PICK); -} -static void _set_flags_bg_wires(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_BG_WIRES); -} -static void _set_flags_cpu_bind(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_CPU_BIND); -} -static void _set_flags_energy(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_ENERGY); -} -static void _set_flags_front_end(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_FRONT_END); -} -static void _set_flags_gang(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_GANG); -} -static void _set_flags_gres(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_GRES); -} -static void _set_flags_no_conf_hash(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_NO_CONF_HASH); -} -static void _set_flags_no_real_time(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_NO_REALTIME); -} -static void _set_flags_prio(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_PRIO); -} -static void _set_flags_reservation(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_RESERVATION); -} -static void _set_flags_select_type(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_SELECT_TYPE); -} -static void _set_flags_steps(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_STEPS); -} -static void _set_flags_switch(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_SWITCH); -} -static void _set_flags_triggers(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_TRIGGERS); -} -static void _set_flags_wiki(GtkToggleAction *action) -{ - _set_flags(action, DEBUG_FLAG_WIKI); -} - static void _tab_pos(GtkRadioAction *action, GtkRadioAction *extra, GtkNotebook *notebook) @@ -822,6 +618,8 @@ static gboolean _delete(GtkWidget *widget, GtkWidget *event, gpointer data) { + int i; + _persist_dynamics(); fini = 1; gtk_main_quit(); @@ -841,6 +639,11 @@ static gboolean _delete(GtkWidget *widget, list_destroy(cluster_list); xfree(orig_cluster_name); #endif + for (i = 0; i<debug_action_entries; i++) { + xfree(debug_actions[i].name); + } + xfree(debug_actions); + return FALSE; } @@ -848,6 +651,7 @@ static char *_get_ui_description() { /* Our menu*/ char *ui_description = NULL; + int i; xstrcat(ui_description, "<ui>" @@ -900,27 +704,14 @@ static char *_get_ui_description() " <menuitem action='debug_debug4'/>" " <menuitem action='debug_debug5'/>" " </menu>" - " <menu action='debugflags'>" - " <menuitem action='flags_backfill'/>" - " <menuitem action='flags_bg_algo'/>" - " <menuitem action='flags_bg_algo_deep'/>" - " <menuitem action='flags_bg_pick'/>" - " <menuitem action='flags_bg_wires'/>" - " <menuitem action='flags_cpu_bind'/>" - " <menuitem action='flags_energy'/>" - " <menuitem action='flags_front_end'/>" - " <menuitem action='flags_gang'/>" - " <menuitem action='flags_gres'/>" - " <menuitem action='flags_no_conf_hash'/>" - " <menuitem action='flags_no_real_time'/>" - " <menuitem action='flags_prio'/>" - " <menuitem action='flags_reservation'/>" - " <menuitem action='flags_select_type'/>" - " <menuitem action='flags_steps'/>" - " <menuitem action='flags_switch'/>" - " <menuitem action='flags_triggers'/>" - " <menuitem action='flags_wiki'/>" - " </menu>" + " <menu action='debugflags'>"); + for (i = 0; i < debug_action_entries; i++) { + xstrfmtcat(ui_description, + " <menuitem action='%s'/>", + debug_actions[i].name); + } + xstrcat(ui_description, + " </menu>" " <separator/>" " <menuitem action='exit'/>" " </menu>" @@ -970,7 +761,7 @@ static GtkWidget *_get_menubar_menu(GtkWidget *window, GtkWidget *notebook) { GtkAccelGroup *accel_group = NULL; GError *error = NULL; - char *ui_description = _get_ui_description(); + char *ui_description; GtkActionEntry entries[] = { {"actions", NULL, "_Actions", "<alt>a"}, @@ -1130,46 +921,27 @@ static GtkWidget *_get_menubar_menu(GtkWidget *window, GtkWidget *notebook) {"debug_debug5", NULL, "debug5(9)", "", "Debug5 level", 9}, }; - GtkToggleActionEntry debug_flags[] = { - {"flags_backfill", NULL, "Backfill", NULL, - "Backfill", G_CALLBACK(_set_flags_backfill), FALSE}, - {"flags_bg_algo", NULL, "BgBlockAlgo", NULL, - "BgBlockAlgo", G_CALLBACK(_set_flags_bg_algo), FALSE}, - {"flags_bg_algo_deep", NULL, "BgBlockAlgoDeep", NULL, - "BgBlockAlgoDeep", G_CALLBACK(_set_flags_bg_algo_deep),FALSE}, - {"flags_bg_pick", NULL, "BgBlockPick", NULL, - "BgBlockPick", G_CALLBACK(_set_flags_bg_pick), FALSE}, - {"flags_bg_wires", NULL, "BgBlockWires", NULL, - "BgBlockWires", G_CALLBACK(_set_flags_bg_wires), FALSE}, - {"flags_cpu_bind", NULL, "CPU Bind", NULL, - "CPU_Bind", G_CALLBACK(_set_flags_cpu_bind), FALSE}, - {"flags_energy", NULL, "Energy", NULL, - "Energy", G_CALLBACK(_set_flags_energy), FALSE}, - {"flags_front_end", NULL, "FrontEnd", NULL, - "FrontEnd", G_CALLBACK(_set_flags_front_end), FALSE}, - {"flags_gang", NULL, "Gang", NULL, - "Gang", G_CALLBACK(_set_flags_gang), FALSE}, - {"flags_gres", NULL, "Gres", NULL, - "Gres", G_CALLBACK(_set_flags_gres), FALSE}, - {"flags_no_conf_hash", NULL, "NO CONF HASH", NULL, - "NO_CONF_HASH", G_CALLBACK(_set_flags_no_conf_hash), FALSE}, - {"flags_no_real_time", NULL, "NoRealTime", NULL, - "NoRealTime", G_CALLBACK(_set_flags_no_real_time), FALSE}, - {"flags_prio", NULL, "Priority", NULL, - "Priority", G_CALLBACK(_set_flags_prio), FALSE}, - {"flags_reservation", NULL, "Reservation", NULL, - "Reservation", G_CALLBACK(_set_flags_reservation), FALSE}, - {"flags_select_type", NULL, "SelectType", NULL, - "SelectType", G_CALLBACK(_set_flags_select_type), FALSE}, - {"flags_steps", NULL, "Steps", NULL, - "Steps", G_CALLBACK(_set_flags_steps), FALSE}, - {"flags_switch", NULL, "Switch", NULL, - "Switch", G_CALLBACK(_set_flags_switch), FALSE}, - {"flags_triggers", NULL, "Triggers", NULL, - "Triggers", G_CALLBACK(_set_flags_triggers), FALSE}, - {"flags_wiki", NULL, "Wiki", NULL, - "Wiki", G_CALLBACK(_set_flags_wiki), FALSE}, - }; + char *all_debug_flags = debug_flags2str(0xFFFFFFFF); + char *last = NULL; + char *tok = strtok_r(all_debug_flags, ",", &last); + + /* set up the global debug_actions */ + debug_actions = xmalloc(sizeof(GtkToggleActionEntry)); + + while (tok) { + xrealloc(debug_actions, + (debug_action_entries + 1) + * sizeof(GtkToggleActionEntry)); + debug_actions[debug_action_entries].name = + debug_actions[debug_action_entries].label = + debug_actions[debug_action_entries].tooltip = + xstrdup(tok); + debug_actions[debug_action_entries].callback = + G_CALLBACK(_set_flags); + debug_action_entries++; + tok = strtok_r(NULL, ",", &last); + } + xfree(all_debug_flags); /* Make an accelerator group (shortcut keys) */ menu_action_group = gtk_action_group_new ("MenuActions"); @@ -1186,8 +958,8 @@ static GtkWidget *_get_menubar_menu(GtkWidget *window, GtkWidget *notebook) G_N_ELEMENTS(radio_entries), working_sview_config.tab_pos, G_CALLBACK(_tab_pos), notebook); - gtk_action_group_add_toggle_actions(menu_action_group, debug_flags, - G_N_ELEMENTS(debug_flags), NULL); + gtk_action_group_add_toggle_actions(menu_action_group, debug_actions, + debug_action_entries, NULL); gtk_action_group_add_radio_actions(menu_action_group, debug_entries, G_N_ELEMENTS(debug_entries), -1, G_CALLBACK(_set_debug), @@ -1208,7 +980,7 @@ static GtkWidget *_get_menubar_menu(GtkWidget *window, GtkWidget *notebook) accel_group = gtk_ui_manager_get_accel_group(g_ui_manager); gtk_window_add_accel_group(GTK_WINDOW(window), accel_group); - + ui_description = _get_ui_description(); if (!(g_menu_id = gtk_ui_manager_add_ui_from_string( g_ui_manager, ui_description, -1, &error))) { xfree(ui_description); diff --git a/src/sview/sview.h b/src/sview/sview.h index b9038d934e8bc1f5710035c1d29aae9da2465ace..21b529d819109688c73a34ef7c5420e99f933b6a 100644 --- a/src/sview/sview.h +++ b/src/sview/sview.h @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -337,6 +337,7 @@ extern GMutex *sview_mutex; extern int global_row_count; extern gint last_event_x; extern gint last_event_y; +extern int sview_max_cpus; extern GdkCursor* in_process_cursor; extern int cpus_per_node; extern int g_node_scaling; diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in index f65be125bb07751c7e7e3ddddc2e78992b9bb6c0..c4ad98b305e0e2801f52925f73855ff1960829c0 100644 --- a/testsuite/Makefile.in +++ b/testsuite/Makefile.in @@ -57,6 +57,7 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -174,6 +178,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -194,6 +200,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -203,6 +212,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -210,6 +221,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -244,6 +264,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -271,6 +294,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 29c9a70b8b3e60d56b5a2cf4ef44dd68a43f39a7..e4c11887a61cd7cca47d36d9e5eb17341c7d5963 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -86,12 +86,20 @@ EXTRA_DIST = \ test1.63.prog.c \ test1.64 \ test1.65 \ + test1.66 \ + test1.67 \ + test1.68 \ + test1.69 \ + test1.70 \ + test1.71 \ + test1.72 \ test1.80 \ test1.81 \ test1.82 \ test1.83 \ test1.84 \ test1.84.bash \ + test1.85 \ test1.86 \ test1.87 \ test1.88 \ @@ -278,6 +286,8 @@ EXTRA_DIST = \ inc12.3.2 \ test12.4 \ test12.5 \ + test12.6 \ + test12.6.prog.c \ test13.1 \ test13.2 \ test14.1 \ @@ -425,6 +435,12 @@ EXTRA_DIST = \ test27.3 \ test27.4 \ test27.5 \ + test28.1 \ + test28.2 \ + test28.3 \ + test28.4 \ + test28.5 \ + test28.6 \ usleep distclean-local: diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index 1984e81abd11275b6d81ef9cf18e6e1c397fb3a3..bf1a92d72fb8a1ddcb5fefd12b6d623d0003cc93 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -55,6 +55,7 @@ subdir = testsuite/expect DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -127,6 +131,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -147,6 +153,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -156,6 +165,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -163,6 +174,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -197,6 +217,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -224,6 +247,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -398,12 +424,20 @@ EXTRA_DIST = \ test1.63.prog.c \ test1.64 \ test1.65 \ + test1.66 \ + test1.67 \ + test1.68 \ + test1.69 \ + test1.70 \ + test1.71 \ + test1.72 \ test1.80 \ test1.81 \ test1.82 \ test1.83 \ test1.84 \ test1.84.bash \ + test1.85 \ test1.86 \ test1.87 \ test1.88 \ @@ -590,6 +624,8 @@ EXTRA_DIST = \ inc12.3.2 \ test12.4 \ test12.5 \ + test12.6 \ + test12.6.prog.c \ test13.1 \ test13.2 \ test14.1 \ @@ -737,6 +773,12 @@ EXTRA_DIST = \ test27.3 \ test27.4 \ test27.5 \ + test28.1 \ + test28.2 \ + test28.3 \ + test28.4 \ + test28.5 \ + test28.6 \ usleep all: all-am diff --git a/testsuite/expect/README b/testsuite/expect/README index 5874ac26507e1a41c057616facf400517e6b34cd..20de5dcb789601e9aec5905ba8650b1e320db31f 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -1,14 +1,14 @@ ############################################################################ # Copyright (C) 2002-2007 The Regents of the University of California. # Copyright (C) 2008-2011 Lawrence Livermore National Security. -# Copyright (C) 2010-2011 SchedMD LLC +# Copyright (C) 2010-2013 SchedMD LLC # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # Additionals by Joseph Donaghy <donaghy1@llnl.gov> # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -90,20 +90,20 @@ test1.# Testing of srun options. =================================== test1.1 Confirm that a job executes with appropriate user id and group id. test1.2 Confirm that a job executes with the proper task count (--nprocs - and --overcommit options). + and --overcommit options). test1.3 Confirm that srun reports a proper version number (--version option). test1.4 Confirm that srun usage option works (--usage option). test1.5 Confirm that srun help option works (--help option). test1.6 Confirm that srun sets appropriate working directory (--chdir - option). + option). test1.7 Confirm that job time limit function works (--time option). test1.8 Confirm that basic srun stdin, stdout, and stderr options work - (--input, --output, and --error option respectively). + (--input, --output, and --error option respectively). test1.9 Test of srun verbose mode (--verbose option). test1.10 Test of srun/slurmd debug mode (--slurmd-debug option). test1.11 Test job name option (--job-name). test1.12 Test of --checkpoint option. This does not validate the - checkpoint file itself. + checkpoint file itself. test1.13 Test of immediate allocation option (--immediate option). test1.14 Test exclusive resource allocation for a step (--exclusive option). test1.15 Test of wait option (--wait option). @@ -111,18 +111,18 @@ test1.16 Confirm that srun buffering can be disabled (--unbuffered option). test1.17 Test of srun --open-mode (truncate or append) option. test1.18 Test of --licenses option test1.19 Test srun stdout/err file name formatting (--output and --error - options with %j, %J, %n, %s and %t specifications). + options with %j, %J, %n, %s and %t specifications). test1.20 Test srun stdout/err disabling (--output and --error options with - argument of "none"). + argument of "none"). test1.21 Test srun stdin/out routing with specific task number (--input - and --output options with numeric argument). + and --output options with numeric argument). test1.22 Confirm that a job executes with various launch thread fanouts - (--threads option). + (--threads option). test1.23 Verify node configuration specification (--mem, --mincpus, and - --tmp options). + --tmp options). test1.24 Verify node configuration specification (--constraint option). test1.25 Submit job to not be killed on node failure (--no-kill option). - NOTE: We need to actually kill slurmd daemons to fully test this. + NOTE: We need to actually kill slurmd daemons to fully test this. test1.26 Submit job directly to slurmd without use of slurmctld scheduler. (--no-allocate option). NOTE: Needs to run as SlurmUser or root. test1.27 Verify the appropriate job environment variables are set. @@ -130,8 +130,8 @@ test1.28 Verify that user environment variables are propagated to the job. test1.29 Verify that user user limits are propagated to the job. test1.30 Test of increasing job sizes. test1.31 Verify that SLURM directed environment variables are processed: - SLURM_DEBUG, SLURM_NNODES, SLURM_NPROCS, SLURM_OVERCOMMIT, - SLURM_STDOUTMODE. + SLURM_DEBUG, SLURM_NNODES, SLURM_NPROCS, SLURM_OVERCOMMIT, + SLURM_STDOUTMODE. test1.32 Test of srun signal forwarding test1.33 Test of srun application exit code reporting test1.34 Test of task signal abort message @@ -139,7 +139,7 @@ test1.35 Test of batch job with multiple concurrent job steps test1.36 Test parallel launch of srun (e.g. "srun srun hostname") test1.37 Test of srun --tasks-per-node option. test1.38 Test srun handling of SIGINT to get task status or kill the job - (--quit-on-interrupt option). + (--quit-on-interrupt option). test1.39 Test of linux light-weight core files. test1.40 Test of job account (--account option). test1.41 Validate SLURM debugger infrastructure (--debugger-test option). @@ -156,9 +156,9 @@ test1.51 Test propagation of umask to spawned tasks. test1.52 Test of hostfile logic. test1.53 Test of --signal option. test1.54 Test of running different executables with different arguments - for each task (--multi-prog option). + for each task (--multi-prog option). test1.55 Make certain that srun behaves when its controlling terminal - disappears. + disappears. test1.56 Test buffered standard IO with really long lines test1.57 Test of srun --jobid for a new job allocation (used by Moab) test1.58 Test of srun --jobid for an existing job allocation @@ -169,25 +169,37 @@ test1.62 Test of gres/gpu plugin (if configured). test1.63 Test of srun --disbale-status/-X option, SIGINT to be sent to tasks. test1.64 Test interuption of srun while waiting for resource allocation. test1.65 Test of srun --mpi=list option. +test1.66 Test srun to make sure that the jobs are running on the correct node + within a allocation. +test1.67 Test that an srun program can move from the background to the + foreground. +test1.68 Test that an srun program can move from the foreground to the + background. +test1.69 Test that multiple srun programs execute simultaneously in the + background. +test1.70 Validates that srun standard input and output work with binary files. +test1.71 Validates that srun exit code matches that of a test program. +test1.72 Validate that the JobAcctGatherFrequency value is enforced when + used in srun. **NOTE** The following tests attempt to utilize multiple CPUs or partitions, - The test will print "WARNING" and terminate with an exit code of - zero if the cluster configuration does not permit proper testing. + The test will print "WARNING" and terminate with an exit code of + zero if the cluster configuration does not permit proper testing. test1.80 Confirm that a job executes with the proper task distribution - (--nodes and --distribution options). + (--nodes and --distribution options). test1.81 Confirm that a job executes with the proper node count - (--nodes option). + (--nodes option). test1.82 Confirm that a job executes with the specified nodes - (--nodelist and --exclude options). + (--nodelist and --exclude options). test1.83 Test of contiguous option with multiple nodes (--contiguous option). - Also see test1.14. + Also see test1.14. test1.84 Test of cpus-per-task option on a single node (--cpus-per-task - option). -test1.85 REMOVED + option). +test1.85 Test of node selection from within a job step on existing allocation test1.86 Confirm node selection from within a job step on existing allocation - (--nodelist, --exclude, --nodes and --nprocs options). + (--nodelist, --exclude, --nodes and --nprocs options). test1.87 Confirm node selection from within a job step on existing allocation - (--relative, --nodes and --nprocs options). + (--relative, --nodes and --nprocs options). test1.88 Basic MPI functionality tests via srun. test1.89 Test of CPU affinity support. test1.90 Test of memory affinity support for NUMA systems. @@ -206,7 +218,7 @@ test2.2 Validate scontrol help command. test2.3 Validate scontrol ping command. test2.4 Validate scontrol exit, quit, and !! commands. test2.5 Validate scontrol show commands for configuation, daemons, - nodes, frontends, and partitions. + nodes, frontends, and partitions. test2.6 Validate scontrol verbose and quiet options. test2.7 Validate scontrol pidinfo command. test2.8 Validate scontrol show commands for jobs and steps. @@ -242,17 +254,17 @@ test4.# Testing of sinfo options. test4.1 Confirm sinfo usage option works (--usage option). test4.2 Confirm sinfo help option works (--help option). test4.3 Test partition information, both long and short (--long and - --summarize options) and partition filtering (--partition option). + --summarize options) and partition filtering (--partition option). test4.4 Test node information, both regular and long (--Node, --long, - and --exact options). + and --exact options). test4.5 Test sinfo node information filtering (--state and --nodes options). test4.6 Test sinfo iteration (--iterate option). test4.7 Confirm that sinfo verbose option works (--verbose option). test4.8 Check sinfo output without header (--noheader option). test4.9 Check sinfo formating options (--format option and SINFO_FORMAT - environment variable). + environment variable). test4.10 Confirm that sinfo reports a proper version number (--version - option). + option). test4.11 Test down node reason display (--list-reasons option). test4.12 Test cpu total and allocation numbers. @@ -263,13 +275,13 @@ test5.1 Confirm squeue usage option works (--usage option). test5.2 Confirm squeue help option works (--help option). test5.3 Test squeue iteration (--iterate option). test5.4 Test squeue formating options (--noheader, --format and --step - options and SQUEUE_FORMAT environment variable). + options and SQUEUE_FORMAT environment variable). test5.5 Test squeue sorting (--sort option). test5.6 Test squeue filtering (--jobs, --nodelist, --states, --steps and - --user options). + --user options). test5.7 Confirm that squeue verbose option works (--verbose option). test5.8 Confirm that squeue reports a proper version number (--version - option). + option). test6.# Testing of scancel options. @@ -284,42 +296,42 @@ test6.7 Validate scancel signal option (--signal and --verbose options). test6.8 Validate scancel state and name filters (--state and --name options). test6.9 Validate scancel of individual job steps (job.step specification). test6.10 Validate scancel user and partition filters, delete all remaining - jobs (--partition and --user options). + jobs (--partition and --user options). test6.11 Validate scancel quiet option, no warning if job gone - (--quiet option). + (--quiet option). test6.12 Test scancel signal to batch script (--batch option) test6.13 Test routing all signals through slurmctld rather than directly - to slurmd (undocumented --ctld option). + to slurmd (undocumented --ctld option). test6.14 Test scancel nodelist option (--nodelist or -w) test7.# Testing of other functionality. ========================================== test7.1 Test priorities slurmctld assigns to jobs. Uses srun --hold and - --batch options. + --batch options. test7.2 Test of PMI functions available via API library. Tests - --pmi-threads option in srun command. + --pmi-threads option in srun command. test7.3 Test of slurm_step_launch API with spawn_io=true - (needed by poe on IBM AIX systems). + (needed by poe on IBM AIX systems). test7.4 Test of TotalView operation with srun, with and without bulk - transfer. + transfer. test7.5 Test of TotalView termination logic for srun. test7.6 Test of TotalView operation with sattach test7.7 Test of sched/wiki2 plugin. This is intended to execute in the - place of Moab or Maui and emulate its actions to confirm proper - operation of the plugin. + place of Moab or Maui and emulate its actions to confirm proper + operation of the plugin. test7.8 Test of sched/wiki plugin. This is intended to execute in the - place of Maui and emulate its actions to confirm proper - operation of the plugin. + place of Maui and emulate its actions to confirm proper + operation of the plugin. test7.9 Test that no files are open in spawned tasks (except stdin, - stdout, and stderr) to insure successful checkpoint/restart. + stdout, and stderr) to insure successful checkpoint/restart. test7.10 Test if we can trick SLURM into using the wrong user ID - through an LD_PRELOAD option. + through an LD_PRELOAD option. test7.11 Test of SPANK plugin. test7.12 Test of slurm_job_step_stat() API call. test7.13 Verify the correct setting of a job's ExitCode test7.14 Verify the ability to modify the Derived Exit Code/String fields - of a job record in the database + of a job record in the database test7.15 Verify signal mask of tasks have no ignored signals. test7.16 Verify that auth/munge credential is properly validated. @@ -340,17 +352,17 @@ test8.9 Bluegene/Q only: Test to make sure if a nodeboard goes unavailable test8.10 Bluegene/Q only: Test to make sure that the correct number of nodes and tasks in a job and a step. test8.11 Bluegene/Q only: Test that certain number of tasks will submit to - srun. + srun. test8.20 Bluegene/Q only: Test that job step allocations are a valid size - and within the job's allocation + and within the job's allocation test8.21 Bluegene/Q only: Test that multple job step allocations are - properly packed within the job's allocation + properly packed within the job's allocation test8.22 Bluegene/Q only: Stress test of running many job step allocations - within the job's allocation + within the job's allocation test8.23 Bluegene/Q only: Test that multple jobs allocations are properly - packed within a midplane + packed within a midplane test8.24 Bluegene/Q only: Test advanced reservation creation with mutliple - blocks. + blocks. test8.25 Bluegene/P only: Confirm salloc can reserve the HTC partitons. @@ -380,9 +392,9 @@ test10.8 Test job information, in commandline mode (-Dj -c option). test10.9 Test smap iteration (--iterate option). test10.10 Check smap output without header (--noheader option). test10.11 Confirm that smap reports a proper version number - (--version option). + (--version option). test10.12 Test bg base partition XYZ to Rack Midplane and back - resolution (--resolve option). + resolution (--resolve option). test10.13 Test bluegene.conf file creation and validate it (-Dc option). @@ -396,17 +408,17 @@ test11.5 Test of checkpoint logic (direct with srun) test11.6 Test of checkpoint logic (with poe) test11.7 Test of hostfile logic (with poe) test11.8 Test of running different executables with different arguments - for each task (--multi-prog option with poe). + for each task (--multi-prog option with poe). test12.# Testing of sacct command and options =============================================== test12.1 Test sacct --help option. test12.2 Test validity/accuracy of accounting data for exit code, - memory and real-time information along with stating a running job. + memory and real-time information along with stating a running job. test12.3 Test sacct filtering of records by account and job name. test12.4 Test sacct --b, g, j, l, n, p, u, v options. test12.5 Test sacct --helpformat option. - +test12.6 Test hdf5 acct_gather_profile (--profile=task) test13.# Testing of switch plugins ==================================== @@ -419,13 +431,13 @@ test14.# Testing of sbcast options. test14.1 Confirm sbcast usage option works (--usage option). test14.2 Confirm sbcast help option works (--help option). test14.3 Confirm that sbcast reports a proper version number - (--version option). + (--version option). test14.4 Test sbcast file overwrite (--force option). test14.5 Test sbcast time preservation (--preserve option). test14.6 Test sbcast logging (--verbose option). test14.7 Test sbcast security issues. test14.8 Test sbcast transmission buffer options (--size and - --fanout options). + --fanout options). test14.9 Verify that an sbcast credential is properly validated. test15.# Testing of salloc options. @@ -433,14 +445,14 @@ test15.# Testing of salloc options. test15.1 Confirm salloc usage option works (--usage option). test15.2 Confirm salloc help option works (--help option). test15.3 Confirm that salloc reports a proper version number - (--version option). + (--version option). test15.4 Confirm that a job executes with appropriate user id and group id. test15.5 Confirm that job time limit function works (--time and - --kill-command options). + --kill-command options). test15.6 Test of salloc verbose mode (--verbose option). test15.7 Test of processors, memory, and temporary disk space - constraints options (--mincpus, --mem, and --tmp options). - Also test that priority zero job is not started (--hold option). + constraints options (--mincpus, --mem, and --tmp options). + Also test that priority zero job is not started (--hold option). test15.8 Test of immediate allocation option (--immediate option). test15.9 Confirm that salloc exit code processing. test15.10 Confirm that a job allocates the proper procesor count (--tasks) @@ -450,19 +462,19 @@ test15.13 Verify the appropriate job environment variables are set test15.14 Test of job dependencies (--depedency option). test15.15 Test of user signal upon allocation (--bell and --no-bell options) test15.16 Verify that SLURM directed environment variables are processed: - SALLOC_BELL and SALLOC_NO_BELL (can't really confirm from Expect) + SALLOC_BELL and SALLOC_NO_BELL (can't really confirm from Expect) test15.17 Test the launch of a batch job within an existing job allocation. - This logic is used by LSF + This logic is used by LSF test15.18 Test of running non-existant job, confirm timely termination. test15.19 Confirm that a job executes with the proper node count - (--nodes option). + (--nodes option). test15.20 Confirm that a job executes with the specified nodes - (--nodelist and --exclude options). + (--nodelist and --exclude options). test15.21 Test of contiguous option with multiple nodes (--contiguous option). test15.22 Test of partition specification on job submission (--partition - option). + option). test15.23 Test of environment variables that control salloc actions: - SALLOC_DEBUG and SALLOC_TIMELIMIT + SALLOC_DEBUG and SALLOC_TIMELIMIT test15.24 Test of --overcommit option. test15.25 Test of job account(--account option). test15.26 Test interuption of salloc while waiting for resource allocation. @@ -472,9 +484,9 @@ test16.# Testing of sattach options. test16.1 Confirm sattach usage option works (--usage option). test16.2 Confirm sattach help option works (--help option). test16.3 Confirm that sattach reports a proper version number - (--version option). + (--version option). test16.4 Basic sattach functionality test (--layout, --verbose, --label - and --output-filter options). + and --output-filter options). test17.# Testing of sbatch options. @@ -482,33 +494,33 @@ test17.# Testing of sbatch options. test17.1 Confirm sbatch usage option works (--usage option). test17.2 Confirm sbatch help option works (--help option). test17.3 Confirm that sbatch reports a proper version number - (--version option). + (--version option). test17.4 Confirm that as sbatch job executes as the appropriate user and - group. + group. test17.5 Confirm that sbatch stdout, and stderr options work (--output - and --error option respectively, including use of %j specification) + and --error option respectively, including use of %j specification) test17.6 Confirm that a job executes with the proper task count (--tasks - option). + option). test17.7 Confirm that sbatch sets appropriate working directory (--workdir - option) + option) test17.8 Confirm that sbatch sets appropriate time limit (--time - option) + option) test17.9 Confirm that sbatch sets appropriate job name (--job-name option) test17.10 Test of processors, memory, and temporary disk space - constraints options (--mincpus, --mem, and --tmp options). - Also test that priority zero job is not started (--hold - option). + constraints options (--mincpus, --mem, and --tmp options). + Also test that priority zero job is not started (--hold + option). test17.11 Test of shared and contiguous options (--share and --contiguous). - Also uses --hold option. + Also uses --hold option. test17.12 Verify node configuration specification (--constraint option) test17.13 Verify the appropriate job environment variables are set test17.14 Verify that user environment variables are propagated to the job test17.15 Verify that user user limits are propagated to the job test17.16 Verify that command line arguments get forwarded to job script test17.17 Confirm that node sharing flags are respected (--nodelist and - --share options) + --share options) test17.18 Test of job dependencies and deferred begin time (--depedency - and --begin options). + and --begin options). test17.19 Test of job submit to multiple partitions (--partition). test17.20 Test of mail options (--mail-type and --mail-user options) test17.21 Tests #SLURM entry functionality in a batch script @@ -516,10 +528,10 @@ test17.22 Test of running non-existant job, confirm timely termination. test17.23 Test of nice value specification (--nice option). test17.24 Test of --partition and --verbose options. test17.25 Verify environment variables controlling sbatch are processed: - SBATCH_ACCOUNT, SBATCH_DEBUG and SBATCH_TIMELIMIT + SBATCH_ACCOUNT, SBATCH_DEBUG and SBATCH_TIMELIMIT test17.26 Test of --input option. test17.27 Test that a job executes with the specified nodes, requires multiple - nodes (--nodes, --nodelist and --exclude options). + nodes (--nodes, --nodelist and --exclude options). test17.28 Tests #SBATCH entry functionality in a batch script. test17.29 Verify that command arguments get forwarded to job script. test17.30 Test of comment field specification (--comment option). @@ -570,7 +582,7 @@ test21.18 sacctmgr modify multiple users test21.19 sacctmgr add and delete coordinator test21.20 sacctmgr add and modify QoS test21.21 sacctmgr add an account to this cluster and try using it with - salloc, sbatch and srun. + salloc, sbatch and srun. test21.22 sacctmgr load file test21.23 sacctmgr wckey test21.24 sacctmgr dump file @@ -583,7 +595,7 @@ test21.30 sacctmgr test if the QoS values are enforced test22.# Testing of sreport commands and options. - These also test the sacctmgr archive dump/load functions. + These also test the sacctmgr archive dump/load functions. ================================================== test22.1 sreport cluster utilization report test22.2 sreport h, n, p, P, t, V options @@ -619,3 +631,19 @@ test27.2 sdiag --help test27.3 sdiag --version test27.4 sdiag --all (default output) test27.5 sdiag --reset + + +test28.# Testing of job array options. +======================================== +test28.1 Confirms sbatch --array and scancel of the job arrays. +test28.2 checks that the --array environment varibles are correct, and + checks that the --output and -error files were created and + contain the correct information. +test28.3 Validates that the scontrol show job contains the job array + fields +test28.4 Validates scontrol update command for a job array with + a job array index and the whole job array. +test28.5 Validates that scontrol can hold and release a whole job + array or an individual jobid index. +test28.6 Validates that when a job array is submitted to multiple + partitions that the jobs run on them. diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 76bef929c08a639e2d1cd333fe948ce039d2135b..94bb6a10e300fb2aef352cbdadfa19e53afac8f5 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -25,7 +25,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -89,6 +89,7 @@ cset sbcast "${slurm_dir}/bin/sbcast" cset scancel "${slurm_dir}/bin/scancel" cset scontrol "${slurm_dir}/bin/scontrol" cset sdiag "${slurm_dir}/bin/sdiag" +cset sh5util "${slurm_dir}/bin/sh5util" cset sinfo "${slurm_dir}/bin/sinfo" cset smap "${slurm_dir}/bin/smap" cset sprio "${slurm_dir}/bin/sprio" @@ -928,23 +929,23 @@ proc test_topology { } { # ################################################################ proc test_track_wckey { } { - global scontrol number + global scontrol number - log_user 0 - set track_wckey 0 - spawn $scontrol show config - expect { - -re "TrackWCKey *= ($number)" { - set track_wckey $expect_out(1,string) - exp_continue - } - eof { - wait - } - } + log_user 0 + set track_wckey 0 + spawn $scontrol show config + expect { + -re "TrackWCKey *= ($number)" { + set track_wckey $expect_out(1,string) + exp_continue + } + eof { + wait + } + } - log_user 1 - return $track_wckey + log_user 1 + return $track_wckey } ################################################################ @@ -1007,6 +1008,43 @@ proc test_account_storage { } { return $account_storage } +################################################################ +# +# Proc: test_enforce_limits +# +# Purpose: Determine resouce limits are enforced +# This is based upon +# the value of AccountingStorageEnforce in the slurm.conf. +# +# Returns 1 if the system is enforcing limits, 0 otherwise +# +################################################################ + +proc test_enforce_limits { } { + global alpha_numeric_comma scontrol + + log_user 0 + set enforce_limits 0 + spawn $scontrol show config + expect { + -re "AccountingStorageEnforce *= ($alpha_numeric_comma)" { + if {[string first "safe" $expect_out(1,string)] != -1 } { + set enforce_limits 1 + } + if {[string first "limits" $expect_out(1,string)] != -1 } { + set enforce_limits 1 + } + exp_continue + } + eof { + wait + } + } + log_user 1 + + return $enforce_limits +} + ################################################################ # # Proc: test_using_slurmdbd @@ -1189,23 +1227,23 @@ proc test_front_end { } { ################################################################ proc test_multiple_slurmd { } { - global scontrol + global scontrol - log_user 0 - set multiple_slurmd 0 - spawn $scontrol show config - expect { - "MULTIPLE_SLURMD" { - set multiple_slurmd 1 - exp_continue - } - eof { - wait - } - } - log_user 1 + log_user 0 + set multiple_slurmd 0 + spawn $scontrol show config + expect { + "MULTIPLE_SLURMD" { + set multiple_slurmd 1 + exp_continue + } + eof { + wait + } + } + log_user 1 - return $multiple_slurmd + return $multiple_slurmd } @@ -1745,7 +1783,7 @@ proc default_partition {} { set name "" set fd [open "|$scontrol --all --oneliner show partition"] while {[gets $fd line] != -1} { - if {[regexp {^PartitionName=(\w+).*Default=YES} $line frag name] + if {[regexp {^PartitionName=([^ ]*).*Default=YES} $line frag name] == 1} { break } @@ -1910,9 +1948,9 @@ proc is_super_user { } { # ################################################################ proc check_acct_associations { } { - global sacctmgr number alpha_numeric_under + global sacctmgr number alpha_numeric_under - set rc 1 + set rc 1 log_user 0 send_user "Testing Associations\n" # @@ -1963,7 +2001,7 @@ proc check_acct_associations { } { eof { wait } - } + } foreach cluster [array names c_min] { # Here we are checking for holes in the list from above @@ -1978,6 +2016,46 @@ proc check_acct_associations { } { return $rc } +################################################################ +# +# Proc: get_job_acct_freq +# +# Purpose: gets the value of the job account gather frequency +# +# Returns: job account gather frequency +# +################################################################ +proc get_job_acct_freq { } { + global scontrol number + + log_user 0 + set freq_val 0 + + spawn $scontrol show config + expect { + -re "JobAcctGatherFrequency *= ($number)" { + set freq_val $expect_out(1,string) + if {$freq_val == 0} { + set freq_val 0 + } + } + -re "JobAcctGatherFrequency *= task=($number)" { + set freq_val $expect_out(1,string) + if {$freq_val == 0} { + set freq_val 0 + } + } + eof { + wait + } + } + + log_user 1 + return $freq_val + + +} + ################################################################ # # Proc:check_accounting_admin_level @@ -2028,7 +2106,7 @@ proc check_accounting_admin_level { } { eof { wait } - } + } log_user 1 return $admin_level @@ -2443,3 +2521,117 @@ proc change_subbp_state { node ionodes state } { return $exit_code } +################################################################ +# +# Proc: get_array_config +# +# Purpose: Use scontrol to determine the MaxArraySize +# +# Returns: MaxArraySize value +# +################################################################ + +proc get_array_config { } { + global scontrol number + + log_user 0 + set array_size 1 + spawn $scontrol show config + expect { + -re "MaxArraySize *= ($number)" { + set array_size $expect_out(1,string) + exp_continue + } + eof { + wait + } + } + log_user 1 + return $array_size +} + +################################################# +# +# Proc: scale_to_megs +# +# Purpose: scale the value by the factor T|G|M +# to megabytes +# Returns: the scaled variable +# +################################################# + +proc scale_to_megs { value factor } { + + if {[string compare $factor "T"] == 0} { + set value [expr $value * 1024 * 1024] + } elseif {[string compare $factor "G"] == 0} { + set value [expr $value * 1024] + } elseif {[string compare $factor "M"] == 0} { + set value [expr $value * 1] + } else { + set value [expr $value / (1024 * 1024)] + } + + return $value +} + +################################################# +# +# Proc: scale_to_ks +# +# Purpose: scale the value by the factor G|M|K +# to kilobytes +# Returns: the scaled variable +# +################################################# + +proc scale_to_ks { value factor } { + + if {[string compare $factor "G"] == 0} { + set value [expr $value * 1024 * 1024] + } elseif {[string compare $factor "M"] == 0} { + set value [expr $value * 1024] + } elseif {[string compare $factor "K"] == 0} { + set value [expr $value * 1] + } else { + set value [expr $value / 1024] + } + + return $value +} + +############################################################ +# +# Proc: check_node_mem +# +# Purpose: check that the nodes have memory configured +# +# Returns: 1 if the nodes have memory, 0 otherwise +# +############################################################ + +proc check_node_mem { } { + global scontrol number + + log_user 0 + set mem_size 0 + + spawn $scontrol show node + expect { + -re "RealMemory=($number)" { + set mem_size $expect_out(1,string) + exp_continue + } + eof { + wait + } + } + + if {$mem_size == 1} { + return 0 + } else { + return 1 + } + log_user 1 + +} diff --git a/testsuite/expect/globals_accounting b/testsuite/expect/globals_accounting index 1025e95e9a6e7aa4249c8d0e15a679caf7de2b6e..6ab97cb1d8dc9ad030fbe233be6687396d8c10f7 100644 --- a/testsuite/expect/globals_accounting +++ b/testsuite/expect/globals_accounting @@ -20,7 +20,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc12.3.1 b/testsuite/expect/inc12.3.1 index c397d280d6f2db3c097a4eccd94942227f088a48..c0176868070b0dd846b62b21cfbed6bc6e7456ca 100644 --- a/testsuite/expect/inc12.3.1 +++ b/testsuite/expect/inc12.3.1 @@ -6,7 +6,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc12.3.2 b/testsuite/expect/inc12.3.2 index c2f655a56ce43ec3d2cbb861f06d8e37dd8cab34..5750b073ff85894df33fd751eecffeb8432facbf 100755 --- a/testsuite/expect/inc12.3.2 +++ b/testsuite/expect/inc12.3.2 @@ -7,7 +7,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc21.21.1 b/testsuite/expect/inc21.21.1 index 84d81b2c7e05c8dee272808de57c20628f1946e9..a3ba199d71325a857ac6d7a3f419e0a0e3446189 100644 --- a/testsuite/expect/inc21.21.1 +++ b/testsuite/expect/inc21.21.1 @@ -5,7 +5,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc21.21.2 b/testsuite/expect/inc21.21.2 index aa0733b1a1e171d958c129a971dd87cd4f78ae92..900964daafc55b8aac83ca0eb7f89001590679f1 100644 --- a/testsuite/expect/inc21.21.2 +++ b/testsuite/expect/inc21.21.2 @@ -5,7 +5,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc21.21.3 b/testsuite/expect/inc21.21.3 index a43d64d2ce2382e8c76df7e3ebd9ed2f28b14ecd..de5d480c082c3ef95711b77c4ff3a496c98b1fff 100644 --- a/testsuite/expect/inc21.21.3 +++ b/testsuite/expect/inc21.21.3 @@ -5,7 +5,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc21.21.4 b/testsuite/expect/inc21.21.4 index e74b6b72d48de3d2a0916af1ce1c4e4b7e314eaf..482aeb17d8f15605f31ff11187689ce0b7f2e3ce 100644 --- a/testsuite/expect/inc21.21.4 +++ b/testsuite/expect/inc21.21.4 @@ -5,7 +5,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc21.30.1 b/testsuite/expect/inc21.30.1 index cacb0617781dbb2d6059dbe0d594931719a65c38..923450dca48d04728bde9333f6d30e596095f289 100644 --- a/testsuite/expect/inc21.30.1 +++ b/testsuite/expect/inc21.30.1 @@ -2,14 +2,12 @@ # Purpose: Test of SLURM functionality # to be called from test21.30 # Tests if the group nodes limit is enforced -# -# ############################################################################ # Copyright (C) 2012 SchedMD LLC # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,16 +29,17 @@ proc inc21_30_1 {} { #test GrpNode limit - global srun salloc acct bin_sleep jobmatch job_id1 job_id2 number + global srun salloc acct bin_sleep number grn_num global exit_code - set jobmatch 0 - spawn $salloc -N1 --account=$acct $srun $bin_sleep 10 + send_user "\nStarting GrpNode limit test\n\n" + set job_id1 0 + set job_id2 0 + spawn $salloc -N$grn_num --account=$acct --exclusive $srun $bin_sleep 10 expect { -re "Granted job allocation ($number)" { set job_id1 $expect_out(1,string) send_user "\njob $job_id1 has been submitted\n" - incr jobmatch } timeout { send_user "\nFAILURE: salloc is not reponding\n" @@ -50,13 +49,21 @@ proc inc21_30_1 {} { wait } } + if {$job_id1 == 0} { + send_user "\nFAILURE: jobs were not submitted\n" + set exit_code 1 + return + } - spawn $salloc -N2 --account=$acct $srun $bin_sleep 10 + spawn $salloc -N1 --account=$acct --exclusive $srun $bin_sleep 10 expect { + -re "Granted job allocation ($number)" { + set job_id2 $expect_out(1,string) + send_user "\njob $job_id2 has been submitted\n" + } -re "job ($number)" { set job_id2 $expect_out(1,string) send_user "\nJob $job_id2 is waiting for resources. This is expected.\n" - incr jobmatch } timeout { send_user "\nFAILURE: salloc is not reponding\n" @@ -66,16 +73,15 @@ proc inc21_30_1 {} { wait } } - - if {$jobmatch==0} { + if {$job_id2 == 0} { send_user "\nFAILURE: jobs were not submitted\n" set exit_code 1 + } else { + # checks the state of the job + check_state $job_id2 } - # checks the state of the job - check_state $job_id2 - - #cancels remaining jobs + # cancels remaining jobs cancel_job $job_id1 cancel_job $job_id2 } diff --git a/testsuite/expect/inc21.30.2 b/testsuite/expect/inc21.30.2 index 33eabc710a363d66e52d68b23fbb5197a75b80ee..9f3d590b1db36a41abd983ec6772d1b19c29ce13 100644 --- a/testsuite/expect/inc21.30.2 +++ b/testsuite/expect/inc21.30.2 @@ -8,7 +8,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,9 +31,11 @@ proc inc21_30_2 {} { #test GrpCpus global srun salloc acct bin_sleep jobmatch job_id1 job_id2 number exit_code + global grcpu_num + send_user "\nStarting GrpCPUs limit test\n\n" set jobmatch 0 - spawn $salloc --account=$acct -n6 $srun $bin_sleep 5 + spawn $salloc --account=$acct -n$grcpu_num $srun $bin_sleep 5 expect { -re "Granted job allocation ($number)" { set job_id1 $expect_out(1,string) @@ -42,14 +44,14 @@ proc inc21_30_2 {} { } timeout { send_user "\nFAILURE: salloc is not reponding\n" - exit 1 + set exit_code 1 } eof { wait } } - spawn $salloc --account=$acct -n6 $srun $bin_sleep 5 + spawn $salloc --account=$acct -n1 $srun $bin_sleep 5 expect { -re "job ($number)" { set job_id2 $expect_out(1,string) @@ -58,7 +60,7 @@ proc inc21_30_2 {} { } timeout { send_user "\nFAILURE: salloc is not reponding\n" - exit 1 + set exit_code 1 } eof { wait @@ -67,7 +69,7 @@ proc inc21_30_2 {} { if {$jobmatch==0} { send_user "\nFAILURE: jobs were not submitted\n" - set exit 1 + set exit_code 1 } # checks the state of the job diff --git a/testsuite/expect/inc21.30.3 b/testsuite/expect/inc21.30.3 index 83422b5b21c16dd72a06bb577b67838d083d0790..843b3a1b704a9e898cc72d99a8aad9d51bd93a95 100644 --- a/testsuite/expect/inc21.30.3 +++ b/testsuite/expect/inc21.30.3 @@ -9,7 +9,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -30,72 +30,64 @@ proc inc21_30_3 {} { # test GrpJob limits + global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code grjobs_num - global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code + send_user "\nStarting GrpJob limit test\n\n" + set check_num $grjobs_num set jobmatch 0 - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - send_user "\njob $job_id1 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait - } - } - - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - send_user "\njob $job_id2 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - - } - eof { - wait - } - } - - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "job ($number)" { - set job_id3 $expect_out(1,string) - send_user "\nJob $job_id3 is waiting for resources. This is expected\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait + for {set inx 0} {$inx <= $check_num} {incr inx} { + spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 + expect { + -re "Granted job allocation ($number)" { + set job_id($inx) $expect_out(1,string) + if {$inx < $check_num} { + send_user "\njob $inx $job_id($inx) was submitted\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) wasn't suppose to work but it did, limit was $check_num?\n" + set exit_code 1 + } + } + -re "job ($number) queued and waiting for resources" { + set job_id($inx) $expect_out(1,string) + if {$inx >= $check_num} { + send_user "\nJob $inx $job_id($inx) is waiting for resources. This is expected\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) was suppose to work but it didn't, limit was $check_num?\n" + set exit_code 1 + } + } + timeout { + send_user "\nFAILURE: salloc is not responding\n" + set exit_code 1 + } + eof { + wait + } } } if {$jobmatch==0} { send_user "\nFAILURE: jobs were not submitted\n" - exit 1 + set exit_code 1 } #checks the job state - check_state $job_id3 - - # cancels the remaining jobs - cancel_job $job_id1 - cancel_job $job_id2 - cancel_job $job_id3 + if [info exists job_id($check_num)] { + check_state $job_id($check_num) + } else { + send_user "\nFAILURE: didn't attempt to start enough jobs\n" + set exit_code 1 + } + for {set inx 0} {$inx <= $check_num} {incr inx} { + if [info exists job_id($inx)] { + # cancels the jobs + cancel_job $job_id($inx) + } + } } diff --git a/testsuite/expect/inc21.30.4 b/testsuite/expect/inc21.30.4 index 936bdc992a0aef1ac76c3880861170c416a5d7ec..e4f4b6e535677f9c22ed258739f3968cb4714984 100644 --- a/testsuite/expect/inc21.30.4 +++ b/testsuite/expect/inc21.30.4 @@ -9,7 +9,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,70 +31,54 @@ proc inc21_30_4 {} { # test GrpSubmit - global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code + global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code grsub_num + + send_user "\nStarting GrpSubmit limit test\n\n" + set check_num $grsub_num set jobmatch 0 - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - send_user "\njob $job_id1 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait + for {set inx 0} {$inx <= $check_num} {incr inx} { + spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 + expect { + -re "Granted job allocation ($number)" { + set job_id($inx) $expect_out(1,string) + if {$inx < $check_num} { + send_user "\njob $inx $job_id($inx) was submitted\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) wasn't suppose to work but it did, limit was $check_num?\n" + set exit_code 1 + } + } + -re "error" { + if {$inx >= $check_num} { + send_user "\nJob $inx didn't get submitted. This is expected\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) was suppose to work but it didn't, limit was $check_num?\n" + set exit_code 1 + } + } + timeout { + send_user "\nFAILURE: salloc is not responding\n" + set exit_code 1 + } + eof { + wait + } } } - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - send_user "\njob $job_id2 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait - } + if {$jobmatch == 0} { + send_user "\nFAILURE: jobs were not submitted\n" + set exit_code 1 } - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "error" { - set job_id3 $expect_out(1,string) - send_user "\nThis error is expected do not worry\n" - incr jobmatch - } - -re "Granted job allocation" { - send_user "\nThis should have failed but did not\n" - exit 1 - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait + for {set inx 0} {$inx <= $check_num} {incr inx} { + if [info exists job_id($inx)] { + # cancels the jobs + cancel_job $job_id($inx) } } - - if {$jobmatch==0} { - send_user "\nFAILURE: jobs were not submitted\n" - exit 1 - } - - # cancels the remaining jobs - cancel_job $job_id1 - cancel_job $job_id2 - cancel_job $job_id3 - } diff --git a/testsuite/expect/inc21.30.5 b/testsuite/expect/inc21.30.5 index dc3b94fd65839c7f06330e1e588e330d0dada998..7dcf189b523c7349ec42dc9470a5e543ebe473b5 100644 --- a/testsuite/expect/inc21.30.5 +++ b/testsuite/expect/inc21.30.5 @@ -9,7 +9,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,21 +31,45 @@ proc inc21_30_5 {} { #test MaxCpus limits - global salloc acct number srun job_id1 bin_sleep + global salloc acct number srun bin_sleep maxcpu_num - spawn $salloc --account=$acct -n11 $srun $bin_sleep 2 + send_user "\nStarting MaxCPUs limit test\n\n" + set job_id1 0 + + spawn $salloc --account=$acct -n$maxcpu_num $srun $bin_sleep 2 expect { -re "job ($number)" { set job_id1 $expect_out(1,string) send_user "\nThis is expected do not worry\n" + + } + -re "Granted job allocation ($number)" { + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + # cancels remaining jobs + cancel_job $job_id1 + + spawn $salloc --account=$acct -n[expr $maxcpu_num + 1] $srun $bin_sleep 2 + expect { + -re "Pending job allocation ($number)" { + set job_id1 $expect_out(1,string) + send_user "\nThis is expected do not worry\n" } -re "Granted job allocation ($number)" { - send_user "\nFAILURE: This should have failed but did not\n" - exit 1 + send_user "\nFAILURE: This should have pended but did not\n" + set exit_code 1 } timeout { send_user "\nFAILURE: salloc not responding\n" - exit 1 + set exit_code 1 } eof { wait diff --git a/testsuite/expect/inc21.30.6 b/testsuite/expect/inc21.30.6 index 70e3d897a52765438b5c1b7f427dfd88b836eb16..d5e3793b28500a5d6a0eddf7d1da3e656e3143d2 100644 --- a/testsuite/expect/inc21.30.6 +++ b/testsuite/expect/inc21.30.6 @@ -9,7 +9,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,21 +31,46 @@ proc inc21_30_6 {} { #test MaxNode limit - global salloc acct number srun job_id1 bin_sleep + global salloc acct number srun job_id1 bin_sleep maxnode_num - spawn $salloc --account=$acct -N11 $srun $bin_sleep 2 + send_user "\nStarting MaxNode limit test\n\n" + set job_id1 0 + + spawn $salloc --account=$acct -N$maxnode_num --exclusive -t1 $srun $bin_sleep 2 + expect { + -re "Granted job allocation ($number)" { + set job_id1 $expect_out(1,string) + } + -re "Pending job allocation ($number)" { + set job_id1 $expect_out(1,string) + send_user "\nFAILURE: This should have ran but did not\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + # cancels remaining jobs + cancel_job $job_id1 + + spawn $salloc --account=$acct -N[expr $maxnode_num + 1] --exclusive -t1 $srun $bin_sleep 2 expect { - -re "job ($number)" { + -re "Pending job allocation ($number)" { set job_id1 $expect_out(1,string) send_user "\nThis is expected do not worry\n" } -re "Granted job allocation ($number)" { - send_user "\nFAILURE: This should have failed but did not\n" - exit 1 + send_user "\nFAILURE: This should have pended but did not\n" + set exit_code 1 } timeout { send_user "\nFAILURE: salloc not responding\n" - exit 1 + set exit_code 1 } eof { wait diff --git a/testsuite/expect/inc21.30.7 b/testsuite/expect/inc21.30.7 index bf6cdff57580eb9367bbb13016f4ff8077f1f71a..1db4623d27732fae9f6a5eee80f1705321cb3e4c 100644 --- a/testsuite/expect/inc21.30.7 +++ b/testsuite/expect/inc21.30.7 @@ -9,7 +9,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,71 +31,62 @@ proc inc21_30_7 {} { #test MaxJobs limit - global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code + global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code maxjobs_num - set jobmatch 0 - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - send_user "\njob $job_id1 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait - } - } - - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - send_user "\njob $job_id2 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait - } - } + send_user "\nStarting MaxJob limit test\n\n" + set check_num $maxjobs_num - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "job ($number)" { - set job_id3 $expect_out(1,string) - send_user "\nThis error is expected do not worry\n" - incr jobmatch - } - -re "Granted job allocation" { - send_user "\nThis should have failed but did not\n" - exit 1 - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait + set jobmatch 0 + for {set inx 0} {$inx <= $check_num} {incr inx} { + spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 + expect { + -re "Granted job allocation ($number)" { + set job_id($inx) $expect_out(1,string) + if {$inx < $check_num} { + send_user "\njob $inx $job_id($inx) was submitted\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) wasn't suppose to work but it did, limit was $check_num?\n" + set exit_code 1 + } + } + -re "job ($number) queued and waiting for resources" { + set job_id($inx) $expect_out(1,string) + if {$inx >= $check_num} { + send_user "\nJob $inx $job_id($inx) is waiting for resources. This is expected\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) was suppose to work but it didn't, limit was $check_num?\n" + set exit_code 1 + } + } + timeout { + send_user "\nFAILURE: salloc is not responding\n" + set exit_code 1 + } + eof { + wait + } } } - check_state $job_id3 - if {$jobmatch==0} { send_user "\nFAILURE: jobs were not submitted\n" - exit 1 + set exit_code 1 } - # cancels the remaining jobs - cancel_job $job_id1 - cancel_job $job_id2 - cancel_job $job_id3 + #checks the job state + if [info exists job_id($check_num)] { + check_state $job_id($check_num) + } else { + send_user "\nFAILURE: didn't attempt to start enough jobs\n" + set exit_code 1 + } + for {set inx 0} {$inx <= $check_num} {incr inx} { + if [info exists job_id($inx)] { + # cancels the jobs + cancel_job $job_id($inx) + } + } } diff --git a/testsuite/expect/inc21.30.8 b/testsuite/expect/inc21.30.8 index 69c613f630f11d433c054036bcddb16d2796c58d..8dbf20d783918b28ba6d9f079cc08934a3f65ba8 100644 --- a/testsuite/expect/inc21.30.8 +++ b/testsuite/expect/inc21.30.8 @@ -9,7 +9,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -31,66 +31,53 @@ proc inc21_30_8 {} { #test MaxJobsSubmits limit - global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code + global salloc srun acct bin_sleep jobmatch job_id1 job_id2 job_id3 number exit_code maxjobsub_num + + send_user "\nStarting MaxJobsSubmits limit test\n\n" + set check_num $maxjobsub_num set jobmatch 0 - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - send_user "\njob $job_id1 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait + for {set inx 0} {$inx <= $check_num} {incr inx} { + spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 + expect { + -re "Granted job allocation ($number)" { + set job_id($inx) $expect_out(1,string) + if {$inx < $check_num} { + send_user "\njob $inx $job_id($inx) was submitted\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx $job_id($inx) wasn't suppose to work but it did, limit was $check_num?\n" + set exit_code 1 + } + } + -re "error" { + if {$inx >= $check_num} { + send_user "\nJob $inx didn't get submitted. This is expected\n" + incr jobmatch + } else { + send_user "\nFAILURE: Job $inx was suppose to work but it didn't, limit was $check_num?\n" + set exit_code 1 + } + } + timeout { + send_user "\nFAILURE: salloc is not responding\n" + set exit_code 1 + } + eof { + wait + } } } - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - send_user "\njob $job_id2 was submitted\n" - incr jobmatch - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait - } + if {$jobmatch == 0} { + send_user "\nFAILURE: jobs were not submitted\n" + set exit_code 1 } - spawn $salloc --account=$acct -n1 $srun $bin_sleep 100 - expect { - -re "error" { - send_user "\nThis error is expected do not worry\n" - incr jobmatch - } - -re "Granted job allocation" { - send_user "\nThis should have failed but did not\n" - exit 1 - } - timeout { - send_user "\nFAILURE: salloc is not responding\n" - exit 1 - } - eof { - wait + for {set inx 0} {$inx <= $check_num} {incr inx} { + if [info exists job_id($inx)] { + # cancels the jobs + cancel_job $job_id($inx) } } - if {$jobmatch==0} { - send_user "\nFAILURE: jobs were not submitted\n" - exit 1 - } - - # cancels the remaining jobs - cancel_job $job_id1 - cancel_job $job_id2 - } diff --git a/testsuite/expect/inc22.1.1 b/testsuite/expect/inc22.1.1 index fe3f5307566cfe0f4b9d1a2becd6bf2b9737da87..5329c62659146070e7ee428e3dfb4b3a1c622d0f 100644 --- a/testsuite/expect/inc22.1.1 +++ b/testsuite/expect/inc22.1.1 @@ -17,7 +17,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc22.1.2 b/testsuite/expect/inc22.1.2 index 45b3c883fecc52f0998ed4702983588976749e35..12cc40fc7ddd75dbd705c7191ccad5576e7693b3 100644 --- a/testsuite/expect/inc22.1.2 +++ b/testsuite/expect/inc22.1.2 @@ -17,7 +17,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc22.1.3 b/testsuite/expect/inc22.1.3 index 2f19fc4f619940008056b98a2402dc288e6cc2f1..4a4e6f4aff1df73254d2c6060ba1c507ba8905e2 100644 --- a/testsuite/expect/inc22.1.3 +++ b/testsuite/expect/inc22.1.3 @@ -17,7 +17,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc22.1.4 b/testsuite/expect/inc22.1.4 index d3fce87cabbe307b0efd94197e21e12fc654a006..0d86a8f930e29e9a535c91c636bdaf7f68c27151 100644 --- a/testsuite/expect/inc22.1.4 +++ b/testsuite/expect/inc22.1.4 @@ -17,7 +17,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -46,7 +46,7 @@ proc inc22_1_4 {} { #Test for job 1 send_user "\nSearch for job1 on cluster $cluster\n" set usermatch 0 - spawn $sacct -p -C $cluster --state=completed --start=$job1_start_str --end=$job1_end_str --fields=node,jobname,jobid + spawn $sacct -p -C $cluster --state=completed --start=$job1_start_str --end=$job1_end_str --format=node,jobname,jobid expect { -re "$job1_acct" { incr usermatch @@ -68,7 +68,7 @@ proc inc22_1_4 {} { # Test for job2 send_user "\nSearch for job2 on cluster $cluster\n" set usermatch 0 - spawn $sacct -p -C $cluster --state=completed --start=$job2_start_str --end=$job2_end_str --fields=node,jobname,jobid + spawn $sacct -p -C $cluster --state=completed --start=$job2_start_str --end=$job2_end_str --format=node,jobname,jobid expect { -re "$job2_acct" { incr usermatch @@ -90,7 +90,7 @@ proc inc22_1_4 {} { # Test for job3 send_user "\nSearch for job3 on cluster $cluster\n" set usermatch 0 - spawn $sacct -p -C $cluster --state=completed --start=$job3_start_str --end=$job3_end_str --fields=node,jobname,jobid + spawn $sacct -p -C $cluster --state=completed --start=$job3_start_str --end=$job3_end_str --format=node,jobname,jobid expect { -re "$job3_acct" { incr usermatch diff --git a/testsuite/expect/inc3.11.1 b/testsuite/expect/inc3.11.1 index b2698db8c73210aba1b6f40eb85f3b3e941fab48..56d7d64ce3fcfbe17a33fcd357ece0a528a0bde5 100644 --- a/testsuite/expect/inc3.11.1 +++ b/testsuite/expect/inc3.11.1 @@ -13,7 +13,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -33,7 +33,7 @@ proc inc3_11_1 {} { global def_node user_name def_partition exit_code cluster_cpus res_name - global cons_res_actived + global cons_res_actived def_node_name set num_nodes [available_nodes $def_partition] @@ -60,6 +60,7 @@ proc inc3_11_1 {} { {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=badtype,ignore_jobs} {StartTime=now+10minutes EndTime=now Nodes=$def_node User=$user_name Flags=ignore_jobs} {StartTime=now Duration=5 Nodes=$def_node User=$user_name Licenses=DUMMY_FOR_TESTING Flags=ignore_jobs} + {StartTime=now Duration=5 Nodes=$def_node_name\[1\-2\] CoreCnt=1 User=$user_name} " #{StartTime=now Duration=5 Nodes=$def_node Account=badaccountname} if {$cons_res_actived == 1} { diff --git a/testsuite/expect/inc3.11.2 b/testsuite/expect/inc3.11.2 index 84655444b623ec1f1e663544fd568ec08f3bfc59..5abe9758c2729e0635d121b108cabd3dcf1e0295 100644 --- a/testsuite/expect/inc3.11.2 +++ b/testsuite/expect/inc3.11.2 @@ -11,7 +11,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.3 b/testsuite/expect/inc3.11.3 index d33274f4ecd4d0e87b84c88805c8a95f97ba3d56..d6700042893b965f6150088d36e5ea62fe7a345f 100644 --- a/testsuite/expect/inc3.11.3 +++ b/testsuite/expect/inc3.11.3 @@ -12,7 +12,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.4 b/testsuite/expect/inc3.11.4 index a9e4938ade97105c3e0eda9d44326a9c2067f515..41159e6f3de23583b7551f756e548a85d42e7c04 100644 --- a/testsuite/expect/inc3.11.4 +++ b/testsuite/expect/inc3.11.4 @@ -12,7 +12,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.5 b/testsuite/expect/inc3.11.5 index 74338db2ae7ecf17dd7c312b7cf226a02e8755e9..31db17971a709f0d6edeaaef87183574fd232ca8 100644 --- a/testsuite/expect/inc3.11.5 +++ b/testsuite/expect/inc3.11.5 @@ -11,7 +11,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.6 b/testsuite/expect/inc3.11.6 index 6c2e5562da57388f7a24c640b84751b884dd672e..275e9c402bae51b01ee436656d3ca251f2aa216d 100644 --- a/testsuite/expect/inc3.11.6 +++ b/testsuite/expect/inc3.11.6 @@ -11,7 +11,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.7 b/testsuite/expect/inc3.11.7 index 15dc5c3f17cd262f6559a1744ba6dfc13feb2097..432f995ab86feca433073adf63fb2f2cf13c277a 100644 --- a/testsuite/expect/inc3.11.7 +++ b/testsuite/expect/inc3.11.7 @@ -12,7 +12,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.8 b/testsuite/expect/inc3.11.8 index 6606bdf85164b773bfd2251d727e59c09cdc4bc5..7d82f2d9146fef2796c6a27005b04bf977169dbe 100644 --- a/testsuite/expect/inc3.11.8 +++ b/testsuite/expect/inc3.11.8 @@ -12,7 +12,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/inc3.11.9 b/testsuite/expect/inc3.11.9 new file mode 100644 index 0000000000000000000000000000000000000000..3d7add430e33814e24c117ec4c33b130071ea511 --- /dev/null +++ b/testsuite/expect/inc3.11.9 @@ -0,0 +1,561 @@ +############################################################################ +# Purpose: Test of SLURM functionality +# to be called from test3.11 +# Several cases for core based reservations using nodelists +# Pluging select/cons_res needed +# +############################################################################ +# Copyright (C) 2013 Barcelona Supercomputing Center +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ + +proc inc3_11_9 {} { + global user_name exit_code res_name res_nodes res_nodecnt res_corecnt + global bin_rm file_in bin_sleep sbatch number scontrol + global alpha_numeric_under scancel + global cluster_cpus cores_per_node def_partition + global res_nodes res_thread_cnt node_count + global def_node_name def_node_inx_min def_node_inx_max + + send_user "\n+++++ STARTING TEST 9 +++++\n" + + # Make the job script + exec $bin_rm -f $file_in + make_bash_script $file_in "$bin_sleep 100" + + # Make a reservation, just to get node size infomation + set ret_code [create_res "StartTime=now Duration=1 NodeCnt=1 User=$user_name" 0] + if {$ret_code != 0} { + send_user "\n\033\[31mFAILURE: Unable to create a valid reservation\033\[m\n" + exit $ret_code + } + # Delete the reservation + set ret_code [delete_res $res_name] + if {$ret_code != 0} { + exit $ret_code + } + + set num_nodes [available_nodes $def_partition] + set core_res_num [ expr $cores_per_node / 2 ] + set thread_res_num [ expr $core_res_num * $res_thread_cnt ] + + # Submit a batch job using half the threads on the nodes + set sbatch_pid [spawn $sbatch --nodes=1-$num_nodes --time=10:00 --ntasks-per-node=$thread_res_num --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + exit 1 + } + + if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: job failed to start\n" + cancel_job $job_id + exit 1 + } + send_user "\nJOB is running as expected\n" + + # Make a reservation using 1 core per node in first 5 nodes + if {$num_nodes < 5} { + send_user "\nWARNING: Insufficient node count for remaining test\n"; + cancel_job $job_id + return + } + + set min_node_inx $def_node_inx_min + set max_node_inx [expr $def_node_inx_min + 4] + set ret_code [create_res "StartTime=now Duration=60 Nodes=$def_node_name\[$min_node_inx\-$max_node_inx\] CoreCnt=1,1,1,1,1 User=$user_name" 0] + if {$ret_code != 0} { + send_user "\n\033\[31mFAILURE: Unable to create a valid reservation\033\[m\n" + exit $ret_code + } + + if {$res_nodecnt != 5} { + send_user "\n\033\[31mFAILURE: reservation created with $res_nodecnt nodes when 5 were requested\033\[m\n" + exit 1 + } + + if {$res_corecnt != 5} { + send_user "\n\033\[31mFAILURE: reservation created with $res_corecnt cores when just 5 was requested\033\[m\n" + exit 1 + } + + send_user "\n\033\[32mSUCCESS: reservation was created as expected\033\[m\n" + + # Delete the reservation + set ret_code [delete_res $res_name] + if {$ret_code != 0} { + cancel_job $job_id + exit $ret_code + } + + set core_res_num [expr $core_res_num + 1] + # Make the reservation using more cores then free in a node + set ret_code [create_res "StartTime=now Duration=60 Nodes=$def_node_name\[$min_node_inx\-$max_node_inx\] CoreCnt=1,1,1,1,$core_res_num User=$user_name" 1] + if {$ret_code != 0} { + send_user "\n\033\[32mSUCCESS: Reservation can not be created as expected\033\[m\n" + } else { + send_user "\n\033\[31mFAILURE: reservation was created when it should have not\033\[m\n" + set exit_code 1 + + # Delete the reservation + set ret_code [delete_res $res_name] + } + + # Make the reservation using more cores than free in a node (now) + # but those cores being free at reservation start time + set ret_code [create_res "StartTime=now+3600 Duration=60 Nodes=$def_node_name\[$min_node_inx\-$max_node_inx\] CoreCnt=1,1,1,1,$core_res_num User=$user_name" 0] + if {$ret_code != 0} { + send_user "\n\033\[31mFAILURE: Reservation can not be created when it should\033\[m\n" + set exit_code 1 + } else { + send_user "\n\033\[32mSUCCESS: Reservation can be created as expected\033\[m\n" + # Delete the reservation + set ret_code [delete_res $res_name] + } + + # Make the reservation using more cores than free at reservation start time + set ret_code [create_res "StartTime=now+300 Duration=60 Nodes=$def_node_name\[$min_node_inx\-$max_node_inx\] CoreCnt=1,1,1,1,$core_res_num User=$user_name" 1] + if {$ret_code != 0} { + send_user "\n\033\[32mSUCCESS: Reservation can not be created as expected\033\[m\n" + } else { + send_user "\n\033\[31mFAILURE: Reservation can be created when it should not\033\[m\n" + set exit_code 1 + + # Delete the reservation + set ret_code [delete_res $res_name] + } + + cancel_job $job_id + + + send_user "\n\nLet's check overlapping reservations\n\n"; + + set core_res_num [ expr $cores_per_node / 2 ] + set total_core_res [ expr $core_res_num * $node_count ] + # Make a reservation for all nodes using just half the processor in each node + set ret_code [create_res "StartTime=now Duration=60 Nodecnt=$node_count CoreCnt=$total_core_res User=$user_name" 1] + if {$ret_code != 0} { + send_user "\n\033\[31mFAILURE: Unable to create a valid reservation\033\[m\n" + exit $ret_code + } + + send_user "\n\033\[32mSUCCESS: reservation was created as expected\033\[m\n" + + + if {$core_res_num < 2} { + send_user "\n\033\[32mWARNING: not enough cores for testing\033\[m\n" + set ret_code [delete_res $res_name] + } + + set res_name1 "$res_name" + + set total_core_res [ expr $core_res_num + 1 ] + # Now creating a reservation using first 5 nodes and more cores per node than available + set ret_code [create_res "StartTime=now Duration=60 Nodes=$def_node_name\[$min_node_inx\-$max_node_inx\] CoreCnt=1,1,1,1,$total_core_res User=$user_name" 1] + if {$ret_code == 0} { + send_user "\n\033\[31mFAILURE: reservation was created when it should not\033\[m\n" + set ret_code [delete_res $res_name1] + exit $ret_code + } + + send_user "\n\033\[32mSUCCESS: reservation was not created as expected\033\[m\n" + + # Now creating a reservation using first 5 nodes and just 1 core per node + set ret_code [create_res "StartTime=now Duration=60 Nodes=$def_node_name\[$min_node_inx\-$max_node_inx\] CoreCnt=1,1,1,1,1 User=$user_name" 1] + if {$ret_code != 0} { + send_user "\n\033\[31mFAILURE: Unable to create a valid reservation\033\[m\n" + set ret_code [delete_res $res_name1] + exit $ret_code + } + + send_user "\n\033\[32mSUCCESS: reservation was created as expected\033\[m\n" + + # Submit a batch job: a job using cores available in first 5 nodes + set core_res_num [ expr $cores_per_node / 2 ] + set core_res_num [ expr $core_res_num - 1 ] + set thread_res_num [ expr $core_res_num * $res_thread_cnt ] + + set sbatch_pid [spawn $sbatch --ntasks-per-node=$thread_res_num --nodelist=$def_node_name\[$min_node_inx\-$max_node_inx\] --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + sleep 1 + # Show the job, make sure reservation tag is right + spawn $scontrol show job $job_id + expect { + -re "Invalid job id specified" { + send_user "\n\033\[31mFAILURE: Job $job_id not found\033\[m\n" + set exit_code 1 + exp_continue + } + -re "JobState=PENDING" { + send_user "\n\033\[31mFAILURE: Job $job_id is PENDING\033\[m\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: scontrol not responding\033\[m\n" + set exit_code 1 + } + eof { + wait + } + } + + if { $exit_code == 1 } { + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + send_user "\n\033\[32mJOB is running as expected\033\[m\n" + + cancel_job $job_id + + # Submit a batch job: a job using more cores than available in first 5 nodes + set core_res_num [ expr $cores_per_node / 2 ] + set thread_res_num [ expr $core_res_num * $res_thread_cnt ] + set sbatch_pid [spawn $sbatch --ntasks-per-node=$thread_res_num --nodelist=$def_node_name\[$min_node_inx\-$max_node_inx\] --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + sleep 1 + # Show the job, make sure reservation tag is right + spawn $scontrol show job $job_id + expect { + -re "Invalid job id specified" { + send_user "\n\033\[31mFAILURE: Job $job_id not found\033\[m\n" + set exit_code 1 + exp_continue + } + -re "JobState=PENDING" { + send_user "\n\033\[32m Job is PENDING as expected\033\[m\n" + exp_continue + } + -re "JobState=RUNNING" { + send_user "\n\033\[31mFAILURE: Job $job_id is RUNNING\033\[m\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: scontrol not responding\033\[m\n" + set exit_code 1 + } + eof { + wait + } + } + if { $exit_code == 1 } { + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + cancel_job $job_id + + # Submit a batch job: a job using cores reserved in first reservation + set core_res_num [ expr $cores_per_node / 2 ] + set thread_res_num [ expr $core_res_num * $res_thread_cnt ] + set sbatch_pid [spawn $sbatch --ntasks-per-node=$thread_res_num --nodelist=$def_node_name\[$min_node_inx\-$max_node_inx\] --reservation=$res_name1 --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + sleep 1 + # Show the job, make sure reservation tag is right + spawn $scontrol show job $job_id + expect { + -re "Invalid job id specified" { + send_user "\n\033\[31mFAILURE: Job $job_id not found\033\[m\n" + set exit_code 1 + exp_continue + } + -re "JobState=RUNNING" { + send_user "\n\033\[32m Job is RUNNING as expected\033\[m\n" + exp_continue + } + -re "JobState=PENDING" { + send_user "\n\033\[31mFAILURE: Job $job_id is PENDING\033\[m\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: scontrol not responding\033\[m\n" + set exit_code 1 + } + eof { + wait + } + } + if { $exit_code == 1 } { + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + cancel_job $job_id + + # Submit a batch job: a job using more cores than reserved in first reservation + set core_res_num [ expr $cores_per_node / 2 ] + set core_res_num [ expr $core_res_num * 5 ] + set core_res_num [ expr $core_res_num + 1 ] + set thread_res_num [ expr $core_res_num * $res_thread_cnt ] + set sbatch_pid [spawn $sbatch --ntasks-per-node=$thread_res_num --nodelist=$def_node_name\[$min_node_inx\-$max_node_inx\] --reservation=$res_name1 --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + sleep 1 + # Show the job, make sure reservation tag is right + spawn $scontrol show job $job_id + expect { + -re "Invalid job id specified" { + send_user "\n\033\[31mFAILURE: Job $job_id not found\033\[m\n" + set exit_code 1 + exp_continue + } + -re "JobState=PENDING" { + send_user "\n\033\[32m Job is PENDING as expected\033\[m\n" + exp_continue + } + -re "JobState=RUNNING" { + send_user "\n\033\[31mFAILURE: Job $job_id is RUNNING\033\[m\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: scontrol not responding\033\[m\n" + set exit_code 1 + } + eof { + wait + } + } + if { $exit_code == 1 } { + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + cancel_job $job_id + + # Submit a batch job: a job using cores reserved in second reservation + set thread_res_num [ expr 1 * $res_thread_cnt ] + set sbatch_pid [spawn $sbatch --ntasks-per-node=$thread_res_num --nodelist=$def_node_name\[$min_node_inx\-$max_node_inx\] --reservation=$res_name --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + sleep 1 + # Show the job, make sure reservation tag is right + spawn $scontrol show job $job_id + expect { + -re "Invalid job id specified" { + send_user "\n\033\[31mFAILURE: Job $job_id not found\033\[m\n" + set exit_code 1 + exp_continue + } + -re "JobState=RUNNING" { + send_user "\n\033\[32m Job is RUNNING as expected\033\[m\n" + exp_continue + } + -re "JobState=PENDING" { + send_user "\n\033\[31mFAILURE: Job $job_id is PENDING\033\[m\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: scontrol not responding\033\[m\n" + set exit_code 1 + } + eof { + wait + } + } + if { $exit_code == 1 } { + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + exit 1 + } + + cancel_job $job_id + + # Submit a batch job: a job using more cores than reserved in second reservation + set thread_res_num [ expr 2 * $res_thread_cnt ] + set sbatch_pid [spawn $sbatch --ntasks-per-node=$thread_res_num --nodelist=$def_node_name\[$min_node_inx\-$max_node_inx\] --reservation=$res_name --output=/dev/null $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: sbatch not responding\033\[m\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\n\033\[31mFAILURE: batch submit failure\033\[m\n" + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + set exit_code 1 + exit 1 + } + + sleep 1 + # Show the job, make sure reservation tag is right + spawn $scontrol show job $job_id + expect { + -re "Invalid job id specified" { + send_user "\n\033\[31mFAILURE: Job $job_id not found\033\[m\n" + set exit_code 1 + exp_continue + } + -re "JobState=PENDING" { + send_user "\n\033\[32m Job is PENDING as expected\033\[m\n" + exp_continue + } + -re "JobState=RUNNING" { + send_user "\n\033\[31mFAILURE: Job $job_id is RUNNING\033\[m\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\n\033\[31mFAILURE: scontrol not responding\033\[m\n" + set exit_code 1 + } + eof { + wait + } + } + if { $exit_code == 1 } { + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] + return + } + + cancel_job $job_id + + set ret_code [delete_res $res_name1] + set ret_code [delete_res $res_name] +} diff --git a/testsuite/expect/pkill b/testsuite/expect/pkill index 22759da765d0da5c67980237220b062eb56c1a37..327aeeaf448a55b9ed4903d576b2433d6c2182ba 100755 --- a/testsuite/expect/pkill +++ b/testsuite/expect/pkill @@ -10,7 +10,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/regression b/testsuite/expect/regression index 986d1be1dbf1a5c777c344451420bc68ba52a10b..fe59bf08401844b0577df442814717a8520c7786 100755 --- a/testsuite/expect/regression +++ b/testsuite/expect/regression @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/regression.py b/testsuite/expect/regression.py index 82b18d86e5c92687c59d17a0f3a4c1bbb089ddcc..62aa463c3cd4a357f97b58c25abb116158d25b63 100755 --- a/testsuite/expect/regression.py +++ b/testsuite/expect/regression.py @@ -6,7 +6,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.1 b/testsuite/expect/test1.1 index f1048728f5451843df633616da8d144eca56da50..83565e1e94ba6a4c3490c7bf1c59f10260111e33 100755 --- a/testsuite/expect/test1.1 +++ b/testsuite/expect/test1.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.10 b/testsuite/expect/test1.10 index 9735481979704b1cfc860e1e66dc07b419f5dec9..ffdd0301d529bdc155821ab16069e59f08373bb4 100755 --- a/testsuite/expect/test1.10 +++ b/testsuite/expect/test1.10 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.11 b/testsuite/expect/test1.11 index bd2c5fa0dcc76982d3b5403306148387f313871e..42a21513a93b5cd571561b9e9e9cd2105cc9b7dc 100755 --- a/testsuite/expect/test1.11 +++ b/testsuite/expect/test1.11 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.12 b/testsuite/expect/test1.12 index 00372ab9b6957743d7410dc551e2d279dbd3cd58..df16a0e67934f0e687629d5f8beba1a8b3246bd6 100755 --- a/testsuite/expect/test1.12 +++ b/testsuite/expect/test1.12 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.13 b/testsuite/expect/test1.13 index 49af4991dcb5501a5fbade744252ce524a0e5b9e..67845aa0148efea9e0c2b89a7078f2fd3ebea7e3 100755 --- a/testsuite/expect/test1.13 +++ b/testsuite/expect/test1.13 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.14 b/testsuite/expect/test1.14 index 01a8f14df95568ebe26d6f3e75b3194ad89e5cbb..c42618676fed02b973d29cd3c646def62170b922 100755 --- a/testsuite/expect/test1.14 +++ b/testsuite/expect/test1.14 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -35,6 +35,7 @@ source ./globals set test_id "1.14" set exit_code 0 set file_in "test$test_id.input" +set file_in2 "test$test_id.input2" set file_out "test$test_id.output" set job_id 0 set sleep_secs 10 @@ -59,19 +60,23 @@ if {[test_launch_poe]} { # Run one more step than allocated CPUs and make sure it waits # The "sleep 2" is meant to insure the earlier job steps start first # -exec $bin_rm -f $file_in $file_out +exec $bin_rm -f $file_in $file_in2 $file_out make_bash_script $file_in " echo tasks_per_node=\$SLURM_TASKS_PER_NODE inx=0 while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \] do - $srun --exclusive -n1 sleep $sleep_secs & + $srun --exclusive -n1 $bin_sleep $sleep_secs & inx=\$((inx+1)) done $bin_sleep 2 - $srun -v --exclusive -n1 $bin_printenv $node_name_env & + $srun -v --exclusive -n1 ./$file_in2 & wait " +make_bash_script $file_in2 " + $bin_sleep 2 + $scontrol show steps +" # # Spawn a job via sbatch @@ -112,16 +117,15 @@ if {[wait_for_file $file_out] != 0} { send_user "\nFAILURE: Output file $file_out is missing\n" exit 1 } -set match1 0 -set match2 0 +set matches 0 +set tasks_per_node 0 spawn $bin_cat $file_out expect { - -re "Job step creation temporarily disabled, retrying" { - incr match1 - exp_continue + -re "tasks_per_node=($number)" { + set tasks_per_node $expect_out(1,string) } - -re "Job step created" { - incr match2 + -re "StepId=$job_id" { + incr matches exp_continue } eof { @@ -129,9 +133,9 @@ expect { } } -if { $match1 != 1 || $match2 != 1 } { +if { $matches > $tasks_per_node } { send_user "\nFAILURE: Problem with exclusive resource allocation " - send_user "for step ($match1, $match2)\n" + send_user "for step ($matches > $tasks_per_node)\n" set exit_code 1 } @@ -152,11 +156,11 @@ make_bash_script $file_in " inx=0 while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \] do - $srun --exclusive -n1 sleep $sleep_secs & + $srun --exclusive -n1 $bin_sleep $sleep_secs & inx=\$((inx+1)) done $bin_sleep 2 - $srun --exclusive -n1 --immediate hostname & + $srun -v --exclusive -n1 --immediate $file_in2 & wait " @@ -199,17 +203,17 @@ if {[wait_for_file $file_out] != 0} { send_user "\nFAILURE: Output file $file_out is missing\n" exit 1 } -set match1 0 +set matches 0 spawn $bin_cat $file_out expect { - -re "Job step creation temporarily disabled, retrying" { + -re "StepId=$job_id" { send_user "\nFAILURE: Problem --exclusive and --immediate option for step\n" set exit_code 1 exp_continue } -re "Unable to create job step" { send_user "This error was expected, no worries\n" - incr match1 + incr matches exp_continue } eof { @@ -217,13 +221,13 @@ expect { } } -if { $match1 != 1 } { - send_user "\nFAILURE: Problem --exclusive and --immediate option for step\n" +if { $matches != 1 } { + send_user "\nFAILURE: Problem --exclusive and --immediate option for step ($matches)\n" set exit_code 1 } if {$exit_code == 0} { - exec $bin_rm -f $file_in $file_out + exec $bin_rm -f $file_in $file_in2 $file_out send_user "\nSUCCESS\n" } exit $exit_code diff --git a/testsuite/expect/test1.15 b/testsuite/expect/test1.15 index 93f6fff0f00171b4f930ee5927b2bc3c33bb3c99..b4907e9352dc9cba24433258789e810ccb4eb859 100755 --- a/testsuite/expect/test1.15 +++ b/testsuite/expect/test1.15 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.16 b/testsuite/expect/test1.16 index 48fbae1bb00c364eda3d3ae9f2166d08de06c0d4..bb7ad9206bd3b419269869a12e74e27c9c4b93e0 100755 --- a/testsuite/expect/test1.16 +++ b/testsuite/expect/test1.16 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.17 b/testsuite/expect/test1.17 index 4a6e58286a0da1bb013ec4a6e9459209830e1b98..0ee91c33918f67405f9927b14efd86e5e2148767 100755 --- a/testsuite/expect/test1.17 +++ b/testsuite/expect/test1.17 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.18 b/testsuite/expect/test1.18 index b4a909e6cb52e3ec7d4e9aa03d22e5cc04ef86ec..e84e30866916bd9ae10b75557e0c240fa1ec4a78 100755 --- a/testsuite/expect/test1.18 +++ b/testsuite/expect/test1.18 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.19 b/testsuite/expect/test1.19 index 8edb2851af1de767b9419b585201e9d189b8c106..86d7894c9c342512c1434b3f91c9c61d4e823551 100755 --- a/testsuite/expect/test1.19 +++ b/testsuite/expect/test1.19 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -41,6 +41,9 @@ set file_out_J "test$test_id.J.%J.output" set file_out_n "test$test_id.n.%n.output" set file_out_s "test$test_id.s.%s.output" set file_out_t "test$test_id.t.%t.output" +set file_out_u "test$test_id.u.%u.output" +set file_err_u "test$test_id.u.%u.error" +set user_name "" set job_id 0 print_header $test_id @@ -67,6 +70,19 @@ if {[test_launch_poe]} { set node_name_env SLURMD_NODENAME } +# +# Gets current user +# +spawn $bin_id -un +expect { + -re "($alpha_numeric_under)" { + set user_name $expect_out(1,string) + } + eof { + wait + } +} + # # Spawn a program that generates "task_id" (%t) in stdout file names # and confirm they are created @@ -186,6 +202,87 @@ if {[wait_for_file $file_out_J_glob] != 0} { exec $bin_rm -f $file_out_J_glob } +# +# Spawn a shell via srun with the %u in stdout file name and confirm +# it is created +# +set job_id 0 +set srun_pid [spawn $srun --output=$file_out_u -v sleep 10] +expect { + -re "jobid ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + slow_kill $srun_pid + exit 1 + } + eof { + wait + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: srun did not submit job\n" + exit 1 +} + +# +# Check that the correct files were created +# +set file_out_u_glob "test$test_id.u.$user_name.output" + +if {[wait_for_file $file_out_u_glob]==0} { + exec $bin_rm -f $file_out_u_glob +} else { + send_user "\nFAILURE: file format is not created correctly\n" + set exit_code 1 +} + +# +# Spawn shell via srun with %u in the stderr file name and confirm +# it is created +# +set job_id 0 +set srun_pid [spawn $srun --error=$file_err_u --output=/dev/null -v sleep aaa] +expect { + -re "jobid ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + -re "error" { + send_user "\nDo not worry this error is expected\n" + set srun_exit 1 + exp_continue; + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + slow_kill $srun_pid + exit 1 + } + eof { + wait + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: srun did not submit job\n" + exit 1 +} + +# +# Check that the correct files where created +# +set file_err_u_glob "test$test_id.u.$user_name.error" + +if {[wait_for_file $file_err_u_glob]==0} { + exec $bin_rm -f $file_err_u_glob +} else { + send_user "\nFAILURE: file format is not created correctly\n" + set exit_code 1 +} + # # Spawn a shell via srun that includes "node_id" (%n) in stdout # file name and confirm it is created diff --git a/testsuite/expect/test1.2 b/testsuite/expect/test1.2 index eb8c8d5cc0c240564f640301ba28d2f19193bf8d..8d31052e3fcb717afb7e1aaae00140e4f7d9df11 100755 --- a/testsuite/expect/test1.2 +++ b/testsuite/expect/test1.2 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.20 b/testsuite/expect/test1.20 index cc64fd1367faaa77040e95f9e3561bfa96d62188..3dd9fa43dd5681766db65522eccf83e3c8ad0350 100755 --- a/testsuite/expect/test1.20 +++ b/testsuite/expect/test1.20 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.21 b/testsuite/expect/test1.21 index 880dfbddaa2c7ef81985c3e53e3d2fc1cb691218..a7a83f0a3d27bacf092ae59a93439a253b01c8f3 100755 --- a/testsuite/expect/test1.21 +++ b/testsuite/expect/test1.21 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.22 b/testsuite/expect/test1.22 index c241417c9ff661a98da61e657514aacbd2cffacd..4e7b5cb366abb735e75d83715591d95afc87b556 100755 --- a/testsuite/expect/test1.22 +++ b/testsuite/expect/test1.22 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23 index 3347b294f8e85cad11fd7ff74cb498a3c02e933a..0b7a61eb72545a6e998a4cd3d0b9e62487d6ec63 100755 --- a/testsuite/expect/test1.23 +++ b/testsuite/expect/test1.23 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.24 b/testsuite/expect/test1.24 index f12d44c8543dc68d2601d97c0b2daeff6b024dfc..5b8384d1333fd2e816412a886f2ea4d8a2b4c44d 100755 --- a/testsuite/expect/test1.24 +++ b/testsuite/expect/test1.24 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.25 b/testsuite/expect/test1.25 index b6ef79ea7ed5489fdc28fd258921658133ddb2f3..3722d80b35b858c21556bd39508e07ae59204458 100755 --- a/testsuite/expect/test1.25 +++ b/testsuite/expect/test1.25 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.26 b/testsuite/expect/test1.26 index 1ab88c0ea7124e747ff842abec28908da20a91b3..81d06bddd3e07e38a978539a541d949d435397ae 100755 --- a/testsuite/expect/test1.26 +++ b/testsuite/expect/test1.26 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.27 b/testsuite/expect/test1.27 index dcc428bb75938522251a8ace8e5fdefacf01ec27..b0fd734b62b7482847f2f8ea32b04464f91b48f0 100755 --- a/testsuite/expect/test1.27 +++ b/testsuite/expect/test1.27 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.28 b/testsuite/expect/test1.28 index 0565c4886066b22bf7b5459e33ea2fbf89191bf5..dc5038a493af4a6420bd491e3fc0bb3299949332 100755 --- a/testsuite/expect/test1.28 +++ b/testsuite/expect/test1.28 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.29 b/testsuite/expect/test1.29 index 0f728281b07427640ab9dcd8333426f7dcf7ec7f..2fc4e3c9f5dd4e5a4cad1b394314ec05eb8bb07a 100755 --- a/testsuite/expect/test1.29 +++ b/testsuite/expect/test1.29 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -242,6 +242,7 @@ if {$matches != $target} { send_user "Check PropagateResourceLimits configuration parameter\n" send_user "Check $file_err for errors\n" send_user "A long running slurmd could cause a file size limit error\n" + send_user "slurmd could have been started with limits lower than user launching the task\n" set exit_code 1 } diff --git a/testsuite/expect/test1.29.prog.c b/testsuite/expect/test1.29.prog.c index 0c06316575ab84cfcfc7c1bfb4d59d4c65444ecd..7f967a59c75950b65fed1306efc757605db8d54a 100644 --- a/testsuite/expect/test1.29.prog.c +++ b/testsuite/expect/test1.29.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.3 b/testsuite/expect/test1.3 index 4fb390a6a5c7a4f5f1de1ae87307190c13f12e50..ef6c301c4026cba5e49c095b3e5a224f0b40a744 100755 --- a/testsuite/expect/test1.3 +++ b/testsuite/expect/test1.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.30 b/testsuite/expect/test1.30 index 59cd689ec5c2ce892747b0a7af81238af3098aa7..66fc27b47a01500cfb789c12c22f0466b57351f6 100755 --- a/testsuite/expect/test1.30 +++ b/testsuite/expect/test1.30 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.31 b/testsuite/expect/test1.31 index 0e1ef0d000f7eb6495eb52a988b15d584eb6809f..43d513f9cb0f8182564ed5db467dd2c758fac688 100755 --- a/testsuite/expect/test1.31 +++ b/testsuite/expect/test1.31 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.32 b/testsuite/expect/test1.32 index 3c2e7fd2b7fda2344e5c8b95837a4abd7bbda6f9..c3ce6e5c129b0650839b1b9b57b939ab0a38c622 100755 --- a/testsuite/expect/test1.32 +++ b/testsuite/expect/test1.32 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.32.prog.c b/testsuite/expect/test1.32.prog.c index e316a34562039125fb21037a5e6b16f704c822db..3e824f6687dc661dba548c71a6fe2c4b382d4c03 100644 --- a/testsuite/expect/test1.32.prog.c +++ b/testsuite/expect/test1.32.prog.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.33 b/testsuite/expect/test1.33 index ba53e215068ca0b044c96d16835adaffd45c9088..ee25e965591a31fc2df9a26962a71841e3515b6f 100755 --- a/testsuite/expect/test1.33 +++ b/testsuite/expect/test1.33 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -64,7 +64,14 @@ if {[test_bluegene]} { # # Delete left-over scripts and build new ones # +# POE BUG: If the application exits immediately then pmdv12 hangs until +# slurm kills it with a timeout, thus the sleep below is required: +# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD +# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12 +# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct> +# make_bash_script $exit_script " + $bin_sleep 2 RC=`expr \$SLURM_PROCID + 10` exit \$RC " diff --git a/testsuite/expect/test1.34 b/testsuite/expect/test1.34 index 1d731e43fb6a183f77118089710a82fa57ea07a7..aa06045059f5d4cae086c0e812b5d5777372f5ab 100755 --- a/testsuite/expect/test1.34 +++ b/testsuite/expect/test1.34 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -64,6 +64,10 @@ expect { set matches 1 exp_continue } + -re "($number)" { + set proc_pid "$expect_out(0,string)" + exp_continue + } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid @@ -82,7 +86,8 @@ if {$matches != 1} { # Post-processing # if {$exit_code == 0} { - make_bash_script $file_in "$bin_rm -f *${test_prog}*core /tmp/*${test_prog}*core" + make_bash_script $file_in "\ +$bin_rm -f *${test_prog}*core /tmp/*${test_prog}*core core.$proc_pid" # Run with "spawn" rather than "exec" to better handle other user's core # files that we can not delete spawn ./$file_in diff --git a/testsuite/expect/test1.34.prog.c b/testsuite/expect/test1.34.prog.c index 7ccf2770bd7c4c0586b3fb644e055b7a85bd5bf6..ee795cb794d53eb6aed18af1d42f4cd8b50d3753 100644 --- a/testsuite/expect/test1.34.prog.c +++ b/testsuite/expect/test1.34.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -24,10 +24,21 @@ * with SLURM; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> + int main (int argc, char *argv[]) { char *buffer; + /* Print the pid so the test1.34 script + * can rm the core file core.getpid() + * fflush() otherwise stdout loss in + * the core dump. + */ + printf("%d\n", getpid()); + fflush(stdout); buffer = (char *) 0; buffer[1] = 'a'; return; diff --git a/testsuite/expect/test1.35 b/testsuite/expect/test1.35 index d9a8253faeb1c234f563707940ab9f49f1019a31..6633738afa03bca6a8e18e5127a6d3f26e26b6d8 100755 --- a/testsuite/expect/test1.35 +++ b/testsuite/expect/test1.35 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -58,8 +58,8 @@ set step_mem_opt "--comment=no_mem" spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectTypeParameters" expect { -re "MEMORY" { - set job_mem_opt "--mem-per-cpu=32" - set step_mem_opt "--mem-per-cpu=1" + set job_mem_opt "--mem-per-cpu=64" + set step_mem_opt "--mem-per-cpu=2" exp_continue } eof { diff --git a/testsuite/expect/test1.36 b/testsuite/expect/test1.36 index e898e7bfd02b8b03b5c578ec65943d78fd621dc0..18405078caec13654777ba796454c35a07320a94 100755 --- a/testsuite/expect/test1.36 +++ b/testsuite/expect/test1.36 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.37 b/testsuite/expect/test1.37 index 86218abab01edc83fe1a75c808a91a4bb8d1a365..616190fcb31969e00d43043b433845315823513e 100755 --- a/testsuite/expect/test1.37 +++ b/testsuite/expect/test1.37 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.38 b/testsuite/expect/test1.38 index ac3f7355f60f25c4e3cba17bf9a18b81c6a9c674..73ebd23a8298972cfbe24dd6bc9614596be8aba2 100755 --- a/testsuite/expect/test1.38 +++ b/testsuite/expect/test1.38 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.4 b/testsuite/expect/test1.4 index 2bf1d4e7a55e0866a67da82745785c697181128e..59613b78e333291e4e89aec5fae80f520b6b59e3 100755 --- a/testsuite/expect/test1.4 +++ b/testsuite/expect/test1.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.40 b/testsuite/expect/test1.40 index d5670af35169fd005058ded7194dc81e810e8ad5..90645494b287e9a17b3bcd725ec4105f49942ef5 100755 --- a/testsuite/expect/test1.40 +++ b/testsuite/expect/test1.40 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.41 b/testsuite/expect/test1.41 index 261b82876a3523c39496cc6dd2476e90bbfed591..fc2d4fef5cfcac4fdcf93b8bcf825ec2d355f942 100755 --- a/testsuite/expect/test1.41 +++ b/testsuite/expect/test1.41 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.42 b/testsuite/expect/test1.42 index e8546c67962cb67855298326efc2bd3a9ba94920..e5d2667912b39ea280b0a17f5349d1599b4b4c1e 100755 --- a/testsuite/expect/test1.42 +++ b/testsuite/expect/test1.42 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.43 b/testsuite/expect/test1.43 index 4c469041e562d6cde53be177a9950b8aaa76cf87..d8a65d5f0223a83502796b5df45cc529793e919f 100755 --- a/testsuite/expect/test1.43 +++ b/testsuite/expect/test1.43 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.44 b/testsuite/expect/test1.44 index 1c93962ecbd4c04670a263bad99753561473bbf6..8d5a48de1e5e383c852ad45a9ffb2aed8f2b5ba8 100755 --- a/testsuite/expect/test1.44 +++ b/testsuite/expect/test1.44 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.45 b/testsuite/expect/test1.45 index be125dcf8fd39c3cca43693a82fe08f34827a0ee..8756faf6869846d58a33c9d73ddf80b38180bcbb 100755 --- a/testsuite/expect/test1.45 +++ b/testsuite/expect/test1.45 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.46 b/testsuite/expect/test1.46 index 3a94150624f308f8fef32c05bc5dc9715ace1192..dc7904fefae4b4e2382bb0313c324b238bde8650 100755 --- a/testsuite/expect/test1.46 +++ b/testsuite/expect/test1.46 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.47 b/testsuite/expect/test1.47 index ffc03b721e69cb72d10df06ef084ab6f1192dce3..5cac4e88c9b64af1e1424a63169eb8fe3e56919c 100755 --- a/testsuite/expect/test1.47 +++ b/testsuite/expect/test1.47 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.48 b/testsuite/expect/test1.48 index d77cd116198a7ae326f8b64bc38a3abbfcbc1b12..933b019e0a86e2ae002391588de5c0b64334078c 100755 --- a/testsuite/expect/test1.48 +++ b/testsuite/expect/test1.48 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.49 b/testsuite/expect/test1.49 index 8dde80715576ced366c1a486aa18c4cb405f93d7..875a4e91eb1553d84cc975a3fbba00c7ba17ed53 100755 --- a/testsuite/expect/test1.49 +++ b/testsuite/expect/test1.49 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.5 b/testsuite/expect/test1.5 index d47356a138377037c6392c19585cb4a5680d385e..6c556e5eec3e4e293b76aed4662dff780c5dfda0 100755 --- a/testsuite/expect/test1.5 +++ b/testsuite/expect/test1.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.50 b/testsuite/expect/test1.50 index 008f7d1dd70b76667fefa6c4acc5568fae4db6fb..0fa6dbda9b597a5e987ebf0c510c1d1c2603f076 100755 --- a/testsuite/expect/test1.50 +++ b/testsuite/expect/test1.50 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.51 b/testsuite/expect/test1.51 index ccbdc3ff1465e45cc756ab2becec3d14dc949c46..a30c381f0d50ee4c1b9bfeef56566c7fa9377da1 100755 --- a/testsuite/expect/test1.51 +++ b/testsuite/expect/test1.51 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.52 b/testsuite/expect/test1.52 index 80ad97be67907cd504dae8f6cb0b7197d73930fc..2d9ec01b026fc4de9660be0c3923fe523c942133 100755 --- a/testsuite/expect/test1.52 +++ b/testsuite/expect/test1.52 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.53 b/testsuite/expect/test1.53 index 2532c08cd8fed4edb6d170fd2f8a4bae8a266dc5..7a6e930af37129e030e380e562396c79b4668f94 100755 --- a/testsuite/expect/test1.53 +++ b/testsuite/expect/test1.53 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.54 b/testsuite/expect/test1.54 index f5b0819315655ad0bb8ac7f93929466f1c71a6e4..9920521bbb81085b3ca31ecd11b37965b356e962 100755 --- a/testsuite/expect/test1.54 +++ b/testsuite/expect/test1.54 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.55 b/testsuite/expect/test1.55 index 35a5e6167f0200d7a909a66606c20c36f9432ce1..91e6467a0f17fce58063ab4a7589fb0b08895bc9 100755 --- a/testsuite/expect/test1.55 +++ b/testsuite/expect/test1.55 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.56 b/testsuite/expect/test1.56 index 532a7d226f198cbc64a8bd5a7706ab0ae5b5c16a..39919a5219f43ba83a2fce140f8d926db3c705bf 100755 --- a/testsuite/expect/test1.56 +++ b/testsuite/expect/test1.56 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.57 b/testsuite/expect/test1.57 index b1096ac1809c30035ab6630f30decb85ceb84a7b..accd2d7e16bcb2ee55b9dd057f7e4960533afb01 100755 --- a/testsuite/expect/test1.57 +++ b/testsuite/expect/test1.57 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.58 b/testsuite/expect/test1.58 index 09f59f4b93069737d28c25d1e3cf6a65a88ac8e9..0afe389a47a4c0819b6564261eafae1381b74430 100755 --- a/testsuite/expect/test1.58 +++ b/testsuite/expect/test1.58 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.59 b/testsuite/expect/test1.59 index 97d2c8633b6f79952d3dff0c45663f3d2e6079e6..ec80998e91f57ab2958a166de595af40be3bd5af 100755 --- a/testsuite/expect/test1.59 +++ b/testsuite/expect/test1.59 @@ -15,7 +15,7 @@ # UCRL-CODE-217948. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.6 b/testsuite/expect/test1.6 index c25c4141fc22f57f22c0210351675abe3d083dd1..1c679d5d6d3bff91cd0c449516e4ffe21aa48f46 100755 --- a/testsuite/expect/test1.6 +++ b/testsuite/expect/test1.6 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.60 b/testsuite/expect/test1.60 index 8ab6bef6764693ee8e238b5dc3a96d514fde0b98..0a698bbf2eac65e61acac4c7a4516f12d7a5815c 100755 --- a/testsuite/expect/test1.60 +++ b/testsuite/expect/test1.60 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.61 b/testsuite/expect/test1.61 index b33a2591728d5c1c8dacbee6415449f8b17b1ce8..846ccc36eec13cb07a478da6df38148fe71865c2 100755 --- a/testsuite/expect/test1.61 +++ b/testsuite/expect/test1.61 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.62 b/testsuite/expect/test1.62 index 7d8483b88e9a08e7d411ea3f6539f34708a84d2b..d4270fef635c7d13f759ea4aedc4af41311e5740 100755 --- a/testsuite/expect/test1.62 +++ b/testsuite/expect/test1.62 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -43,7 +43,7 @@ proc run_gpu_test { gres_cnt } { set bad_format 0 set devices 0 set invalid 0 - set srun_pid [spawn $srun -N1 -n1 --gres=gpu:$gres_cnt -t1 $file_in] + set srun_pid [spawn $srun -N1 -n1 --gres=gpu:$gres_cnt -t1 ./$file_in] expect { -re "Unable to allocate" { incr invalid @@ -75,6 +75,10 @@ proc run_gpu_test { gres_cnt } { send_user "\nWARNING: This could indicate that gres.conf lacks device files for the GPUs\n" exp_continue } + -re "CUDA_VISIBLE_DEVICES=" { + send_user "\nWARNING: This could indicate that gres.conf lacks device files for the GPUs\n" + exp_continue + } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid @@ -102,6 +106,9 @@ print_header $test_id if {[test_cray]} { send_user "\nWARNING: This test is incompatible with Cray systems\n" exit $exit_code +} elseif {[test_launch_poe]} { + send_user "\nWARNING: This test is incompatible with POE systems\n" + exit $exit_code } # diff --git a/testsuite/expect/test1.63 b/testsuite/expect/test1.63 index 866b3a90aa36a48cb9784ddf62c2172fd1b65ef4..deac7de0986c4592a9ee89892c116f9d092007bb 100755 --- a/testsuite/expect/test1.63 +++ b/testsuite/expect/test1.63 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.64 b/testsuite/expect/test1.64 index dcfe3a5726a7915df70df086f193314be09233e0..6971b8a7b0abacf003f337a224ecf99347557d0a 100755 --- a/testsuite/expect/test1.64 +++ b/testsuite/expect/test1.64 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.65 b/testsuite/expect/test1.65 index 4342ff8ac439e7203e9ad8226aa2fcf79bc29839..24e1be24a900ec267371b8fdc276d10f2dfc1311 100755 --- a/testsuite/expect/test1.65 +++ b/testsuite/expect/test1.65 @@ -11,7 +11,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.66 b/testsuite/expect/test1.66 new file mode 100755 index 0000000000000000000000000000000000000000..6ebb35722edfab09bf137914a6f8be582a21b83c --- /dev/null +++ b/testsuite/expect/test1.66 @@ -0,0 +1,108 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# srun to make sure that the jobs are running on the correct +# nodes within the allocation. +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.66" +set exit_code 0 +set script "test$test_id.bash" +set file_out "test$test_id.out" +set job_id 0 + +print_header $test_id + +set node_cnt [available_nodes [default_partition]] +if {$node_cnt < 2} { + send_user "WARNING: Not enough nodes in default partition ($node_cnt < 2)\n" + exit 0 +} + +exec $bin_rm -f $script $file_out +make_bash_script $script " + $scontrol show hostname \$SLURM_JOB_NODELIST + TEST_NODE=`$scontrol show hostname \$SLURM_JOB_NODELIST | tail -1` + echo SUBMIT_NODE=\$TEST_NODE + $srun -w \$TEST_NODE -N1 env | grep SLURM_STEP +" + +spawn $sbatch -N2 --output=$file_out $script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch is not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: sbatch did not submit the job\n" + exit 1 +} + +if {[wait_for_file $file_out] == 0} { + set node_name "UNSET" + set matches 0 + spawn $bin_cat $file_out + expect { + -re "SUBMIT_NODE=($alpha_numeric_nodelist)" { + set node_name $expect_out(1,string) + exp_continue + } + -re "SLURM_STEP_NODELIST=($alpha_numeric_nodelist)" { + if {![string compare $expect_out(1,string) $node_name]} { + incr matches + exp_continue + } + } + eof { + wait + } + } + if {$matches != 1} { + send_user "\nFAILURE: unexpected output ($matches != 1)\n" + set exit_code 1 + } +} else { + send_user "\nFAILURE: Output file missing\n" + set exit_code 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $script $file_out + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.67 b/testsuite/expect/test1.67 new file mode 100755 index 0000000000000000000000000000000000000000..c91dc748c9e661feae48ccb4709afcfe6c61bc6a --- /dev/null +++ b/testsuite/expect/test1.67 @@ -0,0 +1,127 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test that an srun program can move from the background to the +# foreground. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.67" +set file_in "test$test_id.bash" +set test_timer "test$test_id.test_timer" +set test_srun "test$test_id.test_srun" +set job_id 0 +set exit_code 0 + +print_header $test_id + +if {[test_bluegene]} { + send_user "\nWARNING: This test is incompatible with Bluegene systems\n" + exit $exit_code +} elseif {[test_cray]} { + send_user "\nWARNING: This test is incompatible with Cray systems\n" + exit $exit_code +} elseif {[test_launch_poe]} { + send_user "\nWARNING: This test is incompatible with POE systems\n" + exit $exit_code +} + +# remove any remainging files +exec $bin_rm -f $test_timer $test_srun $file_in + +make_bash_script $test_timer " +for i in \{1..10\} +do + echo \"time \$i\" + sleep 5 +done +echo \"Test finished\" +" + +make_bash_script $test_srun " +set -m +$srun -t1 -v ./$test_timer & +$bin_sleep 15 +echo \"sending job to foreground\" +fg +" + +make_bash_script $file_in " +bash -i ./$test_srun +" + +set timeout $max_job_delay +set send_match 0 +set time_match 0 +set fini_match 0 +spawn bash -i $file_in +expect { + -re "sending job to foreground" { + set send_match 1 + if {$time_match == 0} { + send_user "\nFAILURE: srun is not generating output\n" + set exit_code 1 + } + exp_continue + } + -re "time ($number)" { + incr time_match + exp_continue + } + -re "Test finished" { + set fini_match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$send_match != 1} { + send_user "\nFAILURE: srun was not sent to the foreground\n" + set exit_code 1 +} +if {$time_match != 10} { + send_user "\nFAILURE: srun is not generating output ($time_match != 10)\n" + set exit_code 1 +} +if {$fini_match != 1} { + send_user "\nFAILURE: srun did not finish the program submitted\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + exec $bin_rm $test_timer $test_srun $file_in +} +exit $exit_code + diff --git a/testsuite/expect/test1.68 b/testsuite/expect/test1.68 new file mode 100755 index 0000000000000000000000000000000000000000..330d6fd99dd134f5d1b825df8f7ab83423de8a8b --- /dev/null +++ b/testsuite/expect/test1.68 @@ -0,0 +1,154 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test that an srun program can move from the foreground to the +# background. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.68" +set file_in "test$test_id.bash" +set test_timer "test$test_id.test_timer" +set test_srun "test$test_id.test_srun" +set stop_srun "test$test_id.stop_srun" +set job_id 0 +set exit_code 0 + +print_header $test_id + +if {[test_bluegene]} { + send_user "\nWARNING: This test is incompatible with Bluegene systems\n" + exit $exit_code +} elseif {[test_cray]} { + send_user "\nWARNING: This test is incompatible with Cray systems\n" + exit $exit_code +} elseif {[test_launch_poe]} { + send_user "\nWARNING: This test is incompatible with POE systems\n" + exit $exit_code +} + +#remove any remaining files +exec $bin_rm -f $test_timer $test_srun $file_in $stop_srun + +make_bash_script $test_timer " +for i in \{1..10\} +do + echo \"time \$i\" + sleep 5 +done +echo \"Test finished\" +" + +make_bash_script $stop_srun " +$bin_sleep 15 +srun_proc=\$(ps -C srun -o pid=) +echo \"sending job to the backgound, pid=\$srun_proc\" +kill -s stop \$srun_proc +" + +make_bash_script $test_srun " +set -m +./$stop_srun & +$srun -v -t1 ./$test_timer +bg +$bin_sleep 25 +echo \"sending job to foreground\" +fg +" + +make_bash_script $file_in " +bash -i ./$test_srun +" + +set timeout $max_job_delay +set fg_match 0 +set bg_match 0 +set bg_time_num 0 +set fini_match 0 +set time_num 0 +spawn bash -i $file_in +expect { + -re "sending job to the backgound" { + if {$time_num == 0} { + send_user "\nFAILURE: srun is not generating output\n" + set exit_code 1 + } + set bg_time_num $time_num + set bg_match 1 + exp_continue + } + -re "sending job to foreground" { + if {$time_num == $bg_time_num} { + send_user "\nFAILURE: srun is not progressing ($time_num == $bg_time_num)\n" + set exit_code 1 + } + set fg_match 1 + exp_continue + } + -re "time ($number)" { + incr time_num + exp_continue + } + -re "Test finished" { + set fini_match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$fg_match != 1} { + send_user "\nFAILURE: srun was not sent to the foreground\n" + set exit_code 1 +} +if {$bg_match != 1} { + send_user "\nFAILURE: srun was not sent to the background\n" + set exit_code 1 +} +if {$time_num != 10} { + send_user "\nFAILURE: srun is not generating output ($time_num != 10)\n" + set exit_code 1 +} +if {$fini_match != 1} { + send_user "\nFAILURE: srun did not finish the program submitted\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + exec $bin_rm $test_timer $test_srun $file_in $stop_srun +} +exit $exit_code + + + diff --git a/testsuite/expect/test1.69 b/testsuite/expect/test1.69 new file mode 100755 index 0000000000000000000000000000000000000000..9cf95747a31060d199d7a3b03117c4b350620837 --- /dev/null +++ b/testsuite/expect/test1.69 @@ -0,0 +1,150 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test that multiple srun programs execute simultaneously in the +# background. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.69" +set file_in "test$test_id.bash" +set test_timer "test$test_id.test_timer" +set test_srun "test$test_id.test_srun" +set run_timer 2 +set exit_code 0 + +print_header $test_id + +if {[test_bluegene]} { + send_user "\nWARNING: This test is incompatible with Bluegene systems\n" + exit $exit_code +} elseif {[test_cray]} { + send_user "\nWARNING: This test is incompatible with Cray systems\n" + exit $exit_code +} elseif {[test_launch_poe]} { + send_user "\nWARNING: This test is incompatible with POE systems\n" + exit $exit_code +} + +# remove any remainging files +exec $bin_rm -f $test_timer $test_srun $file_in + +make_bash_script $test_timer " +for i in \{1..10\} +do + echo \"timer \$a time \$i\" + sleep 5 +done +echo \"Timer \$a finished\" +" + +make_bash_script $test_srun " +set -m +for a in \{1..$run_timer\} +do +export a +$srun -v -t1 ./$test_timer & +done +" + +make_bash_script $file_in " +bash -i ./$test_srun +" + +# We make sure that all timers run at the same time by making sure that +# all timer's report "time 3" before any of them report "time 4", etc. +set timeout $max_job_delay +set tmp_time 1 +set time_num 1 +set fini_num 1 +set timer_match 0 +set time_match 0 +set fini_match 0 +spawn bash -i $file_in +expect { + + -re "timer ($number) time ($number)" { + set tr_num $expect_out(1,string) + set t_num $expect_out(2,string) + + for {set i 1} {$run_timer>=$i} {incr i} { + if {$tr_num == $i} { + incr timer_match + } + } + if {$t_num == $time_num} { + incr time_match + } else { + set exit_code 1 + } + if {$tmp_time == $run_timer} { + incr time_num + set tmp_time 0 + } + + incr tmp_time + exp_continue + } + -re "Timer ($number) finished" { + set tmp_fini $expect_out(1,string) + for {set i 1} {$run_timer>=$i} {incr i} { + if {$tmp_fini == $i} { + incr fini_match + } + } + incr fini_num + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + + +if {$timer_match != [expr $run_timer*10]} { + send_user "\nFAILURE: srun did not execute all timers ($timer_match != [expr $run_timer*10])\n" + set exit_code 1 +} +if {$time_match != [expr $run_timer*10]} { + send_user "\nFAILURE: srun is not generating output ($time_match != [expr $run_timer*10])\n" + set exit_code 1 +} +if {$fini_match != $run_timer} { + send_user "\nFAILURE: srun did not finish the program submitted ($fini_match != $run_timer)\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + exec $bin_rm $test_timer $test_srun $file_in +} +exit $exit_code diff --git a/testsuite/expect/test1.7 b/testsuite/expect/test1.7 index 506135d7509d08799be0ebdacd40cd4ecca2e4c2..1c4861fe9dd0115991613623d47780f5091442e0 100755 --- a/testsuite/expect/test1.7 +++ b/testsuite/expect/test1.7 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.70 b/testsuite/expect/test1.70 new file mode 100755 index 0000000000000000000000000000000000000000..195d8484040a5be6cdc12c392999a75810c22df8 --- /dev/null +++ b/testsuite/expect/test1.70 @@ -0,0 +1,91 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# validates that srun standard input and output work +# with binary files. +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.70" +set file_out "test$test_id.out" +set file_in "$srun" +set exit_code 0 + +print_header $test_id + +if {[test_cray]} { + send_user "\nWARNING: This test is incompatible with Cray systems\n" + exit 0 +} elseif {[test_launch_poe]} { + send_user "\nWARNING: This test is incompatible with poe\n" + exit 0 +} + +# Remove any remaining files +exec $bin_rm -f $file_out + +set sub_match 0 +spawn $srun -n1 -v -i $file_in -o $file_out cat +expect { + -re "launching" { + set sub_match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$sub_match != 1} { + send_user "\nFAILURE: srun did not submitt job\n" +} + +spawn diff $file_in $file_out +expect { + -re "differ" { + send_user "\nFAILURE: binary file and the output file do not match\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: diff is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + exec $bin_rm -f $file_out +} +exit $exit_code diff --git a/testsuite/expect/test1.71 b/testsuite/expect/test1.71 new file mode 100755 index 0000000000000000000000000000000000000000..9bfa0c8303e24f822e235b166d8a89a9422b6da5 --- /dev/null +++ b/testsuite/expect/test1.71 @@ -0,0 +1,128 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# validates that srun exit code matches that of a +# test program +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.71" +set file_out "test$test_id.testfile" +set file_c "segv.c" +set script "test$test_id.script" +set err_num_c 0 +set err_num_srun 0 +set exit_code 0 + +print_header $test_id + +# Remove any remaining files +exec $bin_rm -f $file_out $script $file_c + +exec $bin_echo " +int main(char **argv, int argc) +{ + char *tmp; + tmp\[1000000\] = 3; +}" > $file_c + +exec $bin_cc -std=c99 -o $file_out $file_c +exec $bin_chmod 700 $file_out + +make_bash_script $script " +./$file_out +echo exit_code = $? +" + +# Run script alone +set error_match 0 +spawn ./$script +expect { + + -re "Segmentation fault" { + send_user "\nThis error is expected do not worry\n" + incr error_match + exp_continue + } + -re "exit_code = ($number)" { + set err_num_c $expect_out(1,string) + incr error_match + } + timeout { + send_user "\nFAILURE: program is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$error_match != 2} { + send_user "\nFAILURE: program did not return a segmentation fault ($error_match != 2)\n" + set exit_code 1 +} + +# Run script with srun +set error_match 0 +spawn $srun -n1 ./$script +expect { + -re "Segmentation fault" { + send_user "\nThis error is expected\n" + incr error_match + exp_continue + } + -re "exit_code = ($number)" { + set err_num_srun $expect_out(1,string) + incr error_match + } + timeout { + send_user "\nFAILURE: is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$error_match != 2} { + send_user "\nFAILURE: srun did not have a segmentation error when runnning the program ($error_match != 2)\n" + set exit_code 1 +} + +# Match exit codes from both programs +if {$err_num_c != $err_num_srun} { + send_user "\nFAILURE: exit codes do not match ($err_num_c != $err_num_srun)\n" + set exit_code +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + exec $bin_rm -f $file_out $script $file_c +} +exit $exit_code diff --git a/testsuite/expect/test1.72 b/testsuite/expect/test1.72 new file mode 100755 index 0000000000000000000000000000000000000000..d246f618c16055c571d3a0db923c6973f3ff9ec5 --- /dev/null +++ b/testsuite/expect/test1.72 @@ -0,0 +1,97 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Validates that the JobAcctGatherFrequency value is enforced +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.72" +set freq_val 0 +set exit_code 0 + +if {[get_job_acct_freq] == 0} { + send_user "\nWARNING:this test requires JobAcctGatherFrequency to be set\n" + exit $exit_code +} +if {[check_node_mem] == 0} { + send_user "\nWARNING: this test requires that the nodes have memory\n" + exit $exit_code +} + +print_header $test_id + +# +# set value of freq_val to JobAcctGatherFrequency +# +set freq_val [get_job_acct_freq] + +set sub_match 0 +spawn $srun -v --mem=200 --acctg-freq=[expr $freq_val - 10] sleep 5 +expect { + -re "jobid" { + set sub_match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$sub_match != 1} { + send_user "\nFAILURE: srun did not submit job\n" + set exit_code 1 +} + +set sub_match 0 +spawn $srun -v --mem=200 --acctg-freq=[expr $freq_val + 10] sleep 5 +expect { + -re "Invalid accounting frequency" { + send_user "\nThis error is expect do not worry.\n" + set sub_match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$sub_match != 1} { + send_user "\nFAILURE: srun did not fail when it should have\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.8 b/testsuite/expect/test1.8 index 3ea8426ebc85c2c1a1fe46db445fb0606a8d0fd2..c8951efef86a903c4d8418f7ac4a0cba6685e51c 100755 --- a/testsuite/expect/test1.8 +++ b/testsuite/expect/test1.8 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.80 b/testsuite/expect/test1.80 index 62f778f1a470c54b096f59d811d39889c3f53c0b..f9564cc0fd024112c4295f513f80d40d484d581f 100755 --- a/testsuite/expect/test1.80 +++ b/testsuite/expect/test1.80 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.81 b/testsuite/expect/test1.81 index 5fccd483d841e48358f5f13f4e098bf25ceae26a..a730126c3050dbc727b292ce05cd9af6b653cbd3 100755 --- a/testsuite/expect/test1.81 +++ b/testsuite/expect/test1.81 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.82 b/testsuite/expect/test1.82 index 97352e96af7aab0341f39bd16e7fe88cb17403ed..d815316387c22e482c0ce1b312a862f07a94ea7a 100755 --- a/testsuite/expect/test1.82 +++ b/testsuite/expect/test1.82 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.83 b/testsuite/expect/test1.83 index 1c405004c92c06c11d53b915dc76913155de828b..b89a735340504dbe08e7763d51cfb200ed122fb1 100755 --- a/testsuite/expect/test1.83 +++ b/testsuite/expect/test1.83 @@ -20,7 +20,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.84 b/testsuite/expect/test1.84 index 0e8c835ffdd361b781734d050fe3be97aa535ded..5d1f3c85157949a67576e3536ed1ec647d9c77d0 100755 --- a/testsuite/expect/test1.84 +++ b/testsuite/expect/test1.84 @@ -19,7 +19,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -106,6 +106,9 @@ expect { wait } } + +# Add sleep for any epilog clean up of previous jobs +sleep 2 set fd [open "|$scontrol --oneliner show node $def_hostlist"] exp_internal 1 while {[gets $fd line] != -1} { diff --git a/testsuite/expect/test1.85 b/testsuite/expect/test1.85 new file mode 100755 index 0000000000000000000000000000000000000000..0c7de057b9700f7986d7a57c725898b151205bf3 --- /dev/null +++ b/testsuite/expect/test1.85 @@ -0,0 +1,176 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of node selection from within a job step on existing allocation +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "WARNING: ..." with an explanation of why the test can't be made, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +# +# NOTE: This assumes node names are of the form <alpha><number>, where +# the value of <number> indicates the nodes relative location. +# Change tha node name parsing logic as needed for other formats. +############################################################################ +# Copyright (C) 2013 SchedMD LLC +# Written by David Bigagli, SchedMD +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +#############################################################################!/usr/bin/expect + +source ./globals + +set test_id "1.85" +set prompt "PROMPT: " + +print_header $test_id + +if {[test_launch_poe]} { + set node_name_env MP_I_UPMD_HOSTNAME +} else { + set node_name_env SLURMD_NODENAME +} + +# spawn an allocation to run on 2 +# hosts with 4 tasks. +# +spawn $salloc -N 2 -n 4 -t 1 +set salloc_id $spawn_id +expect { + -re "Node count specification invalid|More processors requested" { + send_user "\nWARNING: can't test srun task distribution\n" + exit 0 + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + -re "Granted job allocation ($number)" { + send_user "Allocation started\n" + send "export PS1=\"$prompt\"\r" + exp_continue + } + -re "\"$prompt" { + # skip this, just echo of setting prompt" + exp_continue + } + -re "$prompt" { + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + slow_kill $salloc_id + exit 1 + } +} + +# srun the nodelist allocated by slurm. +set host_num 0 +set host_list "" +send "$srun -l -n 1 -N 1 $bin_printenv SLURM_NODELIST\r" +expect { + -re "($number): *($alpha_numeric_nodelist)" { + set host_num $expect_out(1,string) + set host_list $expect_out(2,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + -re $prompt { + } +} + +# convert the nodelist into a list of hostnames. +set count 0 +set host1 "" +set host2 "" +send "$srun -l -n1 -N1 $scontrol show hostnames $host_list\r" +expect { + -re "($number): *($alpha_numeric_under)" { + if { $count == 0 } { + set host1 $expect_out(2,string) + incr count + exp_continue + } + if { $count == 1 } { + set host2 $expect_out(2,string) + incr count + exp_continue + } + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + -re $prompt { + } +} +send_user "Got exec hosts $host1 $host2\n" + +# reset the spawn process to be initial allocation +# then srun a first task. +set hostname "" +set spawn_id $salloc_id +send "$srun -l -n 1 -N 1 $bin_printenv $node_name_env\r" +expect { + -re "($number): *($alpha_numeric_under)" { + set hostname $expect_out(2,string) + exp_continue + } + -re $prompt { + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } + +} + +# force the second task on the second node, at the end +# make sure the task ran on the specified node. +set hostname "" +send "$srun -l -n 1 -N 1 -w $host2 $bin_printenv $node_name_env\r" +expect { + -re "($number): *($alpha_numeric_under)" { + set hostname $expect_out(2,string) + exp_continue + } + -re $prompt { + send "exit\r" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {[ string compare $host2 $hostname] != 0} { + send_user "FAILURE\n" + send_user "The execution hostname $hostname != $host2 requested hostname.\n" + exit 1 +} + +send_user "SUCCESS\n" diff --git a/testsuite/expect/test1.86 b/testsuite/expect/test1.86 index 1c427c8d3689be725015ee24a9c2eae6bd82ecfc..34fa8cea01c9745608be5cf80bd3e0ebf267b526 100755 --- a/testsuite/expect/test1.86 +++ b/testsuite/expect/test1.86 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.87 b/testsuite/expect/test1.87 index d0b5bed0cb65bfb6e0afcfd11115f0023f6aadcb..cba6780e541544dc79a8cbc935a781b3a42b7f12 100755 --- a/testsuite/expect/test1.87 +++ b/testsuite/expect/test1.87 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.88 b/testsuite/expect/test1.88 index e4a6a3f8fec461fbce7304f58e740ade602fc8e7..4eb46ab31e4176388f471ad837034a86b34154d8 100755 --- a/testsuite/expect/test1.88 +++ b/testsuite/expect/test1.88 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -109,7 +109,7 @@ make_bash_script $file_in " # set timeout $max_job_delay set no_start 0 -set sbatch_pid [spawn $sbatch -N3 -n6 --output=$file_out --error=$file_err -t1 $file_in] +set sbatch_pid [spawn $sbatch -N1-3 -n6 --output=$file_out --error=$file_err -t1 $file_in] expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) diff --git a/testsuite/expect/test1.88.prog.c b/testsuite/expect/test1.88.prog.c index 2a2a5d8d672e1a756c84bca806c009d2466b527b..7ba694578dc69965ebef6b720c551f5472f7e040 100644 --- a/testsuite/expect/test1.88.prog.c +++ b/testsuite/expect/test1.88.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.89 b/testsuite/expect/test1.89 index 7b9f4d7e8e51f5724b1b05eedf5e4b767b3c378b..13c6c97fdc0e23da356216dc33fd42b1ea1a64bb 100755 --- a/testsuite/expect/test1.89 +++ b/testsuite/expect/test1.89 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -40,6 +40,11 @@ set prompt "PROMPT:" print_header $test_id +if {[test_serial]} { + send_user "\nWARNING: This test is incompatible with select/serial systems\n" + exit 0 +} + # # Test if task affinity support is supported. # @@ -99,7 +104,7 @@ exec $bin_chmod 700 $file_prog # Create an allocation # set timeout $max_job_delay -set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash] +set salloc_pid [spawn $salloc -N1 --exclusive -v -t2 $bin_bash] expect { -re "Granted job allocation ($number)" { set job_id $expect_out(1,string) @@ -122,12 +127,15 @@ expect { # # Reading a second prompt is required by some versions of Expect # -if { 0 } { - expect { - -re $prompt { - } +set timeout 1 +expect { + -re $prompt { + exp_continue + } + timeout { } } +set timeout 30 # # Run a job step to get allocated processor count and affinity @@ -181,7 +189,7 @@ expect { } } if {$task_mask != $expected_mask} { - send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n" + send_user "\nFAILURE: affinity mask inconsistency ($task_mask != $expected_mask)\n" set exit_code 1 } @@ -197,7 +205,7 @@ if { $launch_poe != 0 } { file delete $file_prog send_user "\nSUCCESS\n" } else { - send_user "\nFAILURE: See note about reading second prompt in the script (line 123)\n" + send_user "\nFAILURE: See note about reading second prompt in the script (line 128)\n" } exit $exit_code } @@ -225,7 +233,7 @@ expect { -re $prompt } if {$task_mask != $task_cnt} { - send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$task_cnt)\n" + send_user "\nFAILURE: affinity mask inconsistent ($task_mask != $task_cnt)\n" set exit_code 1 } set verbose_cnt 0 diff --git a/testsuite/expect/test1.89.prog.c b/testsuite/expect/test1.89.prog.c index 755d31bcc3e51ced81c71c6bb08f61a64af7597f..01cc8fe6da0413a72b1c2615b170768c6cfc2d57 100644 --- a/testsuite/expect/test1.89.prog.c +++ b/testsuite/expect/test1.89.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.9 b/testsuite/expect/test1.9 index 9dc84f5a93cd57b55fe2441914ab25762f9e8eb9..46fa68e1768db443ef212ae74c00496395554769 100755 --- a/testsuite/expect/test1.9 +++ b/testsuite/expect/test1.9 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.90 b/testsuite/expect/test1.90 index 2a0c7fcbfa1e3ec74fc5df3018333f5ac91112ce..bc10c07f57237f62319891142c75178b15c8fd16 100755 --- a/testsuite/expect/test1.90 +++ b/testsuite/expect/test1.90 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -136,6 +136,19 @@ expect { } } +# +# Reading a second prompt is required by some versions of Expect +# +set timeout 1 +expect { + -re $prompt { + exp_continue + } + timeout { + } +} +set timeout 30 + # # Run a job step to get allocated processor count and affinity # @@ -162,7 +175,8 @@ expect { send_user "or failure to recognize prompt\n" set exit_code 1 } - -re $prompt + -re $prompt { + } } # @@ -220,14 +234,15 @@ if {$mem_mask != $full_mask} { } # -# Run a job step with verbosity and all tasks using memory of CPU 0 +# Run all tasks all bound to the same CPU's memory (local CPU) # -set task_mask 0 -set verbose_cnt 0 -send "$srun -n $task_cnt --mem_bind=verbose,map_mem:0 ./$file_prog\r" +send "$srun -n $task_cnt --cpu_bind=rank --mem_bind=local ./$file_prog\r" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { - incr task_mask $expect_out(3,string) + if {$expect_out(2,string) != $expect_out(3,string)} { + send_user "\nFAILURE: failed to use local memory for a task\n" + set exit_code 1 + } exp_continue } -re "error" { @@ -239,18 +254,27 @@ expect { send_user "\nFAILURE: salloc not responding " send_user "or failure to recognize prompt\n" set exit_code 1 + exp_continue } -re $prompt } -if {$task_mask != $task_cnt} { - send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$task_cnt)\n" - set exit_code 1 +if { [test_launch_poe] } { + if {$exit_code == 0} { + exec $bin_rm -f $file_prog + send_user "\nSUCCESS\n" + } + exit $exit_code } + +# +# Run a job step with verbosity and all tasks using memory of CPU 0 +# +set task_mask 0 set verbose_cnt 0 send "$srun -n $task_cnt --mem_bind=verbose,map_mem:0 ./$file_prog\r" expect { - -re "mem_bind=MAP" { - incr verbose_cnt + -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { + incr task_mask $expect_out(3,string) exp_continue } -re "error" { @@ -265,21 +289,15 @@ expect { } -re $prompt } -if {$verbose_cnt != $task_cnt} { - send_user "\nFAILURE: verbose messages count inconsisent ($verbose_cnt,$task_cnt)\n" +if {$task_mask != $task_cnt} { + send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$task_cnt)\n" set exit_code 1 } - -# -# Run all tasks all bound to the same CPU's memory (local CPU) -# -send "$srun -n $task_cnt --cpu_bind=rank --mem_bind=local ./$file_prog\r" +set verbose_cnt 0 +send "$srun -n $task_cnt --mem_bind=verbose,map_mem:0 ./$file_prog\r" expect { - -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { - if {$expect_out(2,string) != $expect_out(3,string)} { - send_user "\nFAILURE: failed to use local memory for a task\n" - set exit_code 1 - } + -re "mem_bind=MAP" { + incr verbose_cnt exp_continue } -re "error" { @@ -291,10 +309,13 @@ expect { send_user "\nFAILURE: salloc not responding " send_user "or failure to recognize prompt\n" set exit_code 1 - exp_continue } -re $prompt } +if {$verbose_cnt != $task_cnt} { + send_user "\nFAILURE: verbose messages count inconsisent ($verbose_cnt,$task_cnt)\n" + set exit_code 1 +} # # Run all tasks all bound to the same CPU's memory by specifying a map (for each CPU) diff --git a/testsuite/expect/test1.90.prog.c b/testsuite/expect/test1.90.prog.c index e83da61d7b62f99940cc3325a094c84fce8cc51b..9bd40608a98b5ebfbfdc2510051f65cf8495d1e7 100644 --- a/testsuite/expect/test1.90.prog.c +++ b/testsuite/expect/test1.90.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91 index 31cd0c72d51faf3427dff23223e85cd6f509d716..c71bb296e8e737b89c8d0c9368d89eb5545fe4a8 100755 --- a/testsuite/expect/test1.91 +++ b/testsuite/expect/test1.91 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -44,6 +44,9 @@ print_header $test_id if {[test_launch_poe]} { send_user "\nWARNING: This test is incompatible with launch/poe systems\n" exit 0 +} elseif {[test_serial]} { + send_user "\nWARNING: This test is incompatible with select/serial systems\n" + exit 0 } # diff --git a/testsuite/expect/test1.91.prog.c b/testsuite/expect/test1.91.prog.c index 0ed53b172e16914dbc51c5864120c20504015735..e32e61b748d1fc79d632fff7b533dfad754f3a86 100644 --- a/testsuite/expect/test1.91.prog.c +++ b/testsuite/expect/test1.91.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.92 b/testsuite/expect/test1.92 index 5f42262a80c5b00a2986660cf6ad67c4892e0eb9..18c982e835827b1568311a352bb624dcbab74e1a 100755 --- a/testsuite/expect/test1.92 +++ b/testsuite/expect/test1.92 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.93 b/testsuite/expect/test1.93 index c4af2b0141f5f2f7611515a17e6fdee24de49047..2ba42457b37e3237c4b3a9fe45ec4c05fde44ea8 100755 --- a/testsuite/expect/test1.93 +++ b/testsuite/expect/test1.93 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.94 b/testsuite/expect/test1.94 new file mode 100755 index 0000000000000000000000000000000000000000..91bdad3fbc18bc6d71541e464527c5f2f034851c --- /dev/null +++ b/testsuite/expect/test1.94 @@ -0,0 +1,215 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of MPICH2 task spawn logic +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "WARNING: ..." with an explanation of why the test can't be made, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2002 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# CODE-OCEC-09-009. All rights reserved. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.94" +set exit_code 0 +set file_in "test$test_id.input" +set file_out "test$test_id.output" +set file_err "test$test_id.error" +set master_prog "test$test_id.master" +set slave_prog "test$test_id.slave" +set job_id 0 + +print_header $test_id + +# +# Test for existence of mpi compiler +# +if {[info exists mpicc] == 0} { + send_user "\nWARNING: mpicc not defined, can't perform mpi testing\n" + exit 0 +} +if {[file executable $mpicc] == 0} { + send_user "\nWARNING: $mpicc does not exists\n" + exit 0 +} +# +# Test is only works with mpi/pmi2 plugin. +# +set invalid 1 +log_user 0 +spawn $scontrol show config +expect { + -re "pmi2" { + set invalid 0 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +log_user 1 +if {$invalid == 1} { + send_user "\nWARNING: test compatible only with MpiDefault=pmi2\n" + exit $exit_code +} + +# +# Delete left-over program and rebuild it +# +exec $bin_rm -f $file_in $file_out $file_err +exec $bin_rm -f $master_prog ${master_prog}.o +exec $bin_rm -f $slave_prog ${slave_prog}.o +exec $mpicc -o $master_prog ${master_prog}.c +exec $mpicc -o $slave_prog ${slave_prog}.c + +# Delete left-over stdout/err files +file delete $file_out $file_err + +# +# Build input script file +# +make_bash_script $file_in " + $srun -n1 $master_prog $slave_prog +" + +# +# Spawn an sbatch job that uses stdout/err and confirm their contents +# +set timeout $max_job_delay +set no_start 0 +set sbatch_pid [spawn $sbatch -n4 --output=$file_out --error=$file_err -t1 $file_in] +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + -re "Batch job submission failed" { + set no_start 1 + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$no_start != 0} { + send_user "\nWARNING: partition too small for test\n" + if {$job_id != 0} { + cancel_job $job_id + } + exit 0 +} +if {$job_id == 0} { + send_user "\nFAILURE: batch submit failure\n" + exit 1 +} + +# +# Wait for job to complete +# +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: waiting for job to complete\n" + set exit_code 1 +} + +# +# Check for desired output in stdout +# +set expected_msg 4 +set expected_sum 12 +if {[wait_for_file $file_out] == 0} { + set complete 0 + set matches 0 + set rank_sum 0 + spawn $bin_cat $file_out + expect { + -re "Rank.($number). on $alpha_numeric_under just received msg from Rank ($number)" { + incr rank_sum $expect_out(1,string) + incr rank_sum $expect_out(2,string) + incr matches + exp_continue + } + eof { + wait + } + } + if {$matches == 0} { + send_user "\nFAILURE: No MPI communications occurred\n" + send_user " The version of MPI you are using may be incompatible " + send_user "with the configured switch\n" + send_user " Core files may be present from failed MPI tasks\n\n" + set exit_code 1 + } elseif {$matches != $expected_msg} { + send_user "\nFAILURE: unexpected output ($matches of $expected_msg)\n" + set exit_code 1 + } elseif {$rank_sum != $expected_sum} { + send_user "\nFAILURE: Invalid rank values ($rank_sum != $expected_sum)\n" + set exit_code 1 + } +} else { + set exit_code 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in $file_out $file_err + exec $bin_rm -f $master_prog ${master_prog}.o + exec $bin_rm -f $slave_prog ${slave_prog}.o + send_user "\nSUCCESS\n" +} else { + set matches 0 + spawn head $file_err + expect { + -re "Error creating CQ" { + incr matches + exp_continue + } + eof { + wait + } + } + if {$matches != 0} { + send_user "WARNING: If using MVAPICH then\n" + send_user " Configure \"PropagateResourceLimitsExcept=MEMLOCK\"\n" + send_user " Also start slurmd with \"ulimit -l unlimited\"\n" + } else { + send_user "Check contents of $file_err\n" + } +} + +exit $exit_code diff --git a/testsuite/expect/test1.94.master.c b/testsuite/expect/test1.94.master.c index 1d2a466e8440063435facc92ad5e431ebcb4e11d..c8948bd6b93892ed752d5f9ba80cad5b4ec2b26a 100644 --- a/testsuite/expect/test1.94.master.c +++ b/testsuite/expect/test1.94.master.c @@ -2,7 +2,7 @@ * test1.94.master.c - Test of MPICH2 task spawn logic ***************************************************************************** * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.94.slave.c b/testsuite/expect/test1.94.slave.c index 9fd984159b50ca6ea62ffbecbe838bb53e846af3..f6c0a0f94d667c45c83d5af708e7b72f1a9cb4cd 100644 --- a/testsuite/expect/test1.94.slave.c +++ b/testsuite/expect/test1.94.slave.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.95 b/testsuite/expect/test1.95 index 52532f50706bf6c2826c2e444e6c1ad57c61b548..2b10fde7f6f176e76f3cd2c79155ae0662a5f662 100755 --- a/testsuite/expect/test1.95 +++ b/testsuite/expect/test1.95 @@ -12,7 +12,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.95.prog.upc b/testsuite/expect/test1.95.prog.upc index 7175018d6e3cb97bf725de79a997f6a01eb254a5..179c257b5b5023b8700f06dfd821f7dde01de86f 100644 --- a/testsuite/expect/test1.95.prog.upc +++ b/testsuite/expect/test1.95.prog.upc @@ -5,7 +5,7 @@ * Written by Morris Jette <jette@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test1.96 b/testsuite/expect/test1.96 index 1ba8bbbca17ba15b6112ab90d3e631e2d213335c..cfc2fa7598802a1914cc23e3f4459dc9350a34c1 100755 --- a/testsuite/expect/test1.96 +++ b/testsuite/expect/test1.96 @@ -12,7 +12,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.1 b/testsuite/expect/test10.1 index 18ac76416c44bf91d23886a600a9368a7d54681e..3b90de64ba2cafbcd52b00cb90c989c5682dd244 100755 --- a/testsuite/expect/test10.1 +++ b/testsuite/expect/test10.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.10 b/testsuite/expect/test10.10 index 0bb3bd09f484b13751f74150f13b980d66d2d887..2fe3ff1787ece1df22f4226aa227af1137a3dd75 100755 --- a/testsuite/expect/test10.10 +++ b/testsuite/expect/test10.10 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.11 b/testsuite/expect/test10.11 index 4247ad9e38abd55ee0f9441f83ff7570d4c9e423..ffead530127d648d51a5c80abae2c4f751d9e9f9 100755 --- a/testsuite/expect/test10.11 +++ b/testsuite/expect/test10.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.12 b/testsuite/expect/test10.12 index cf0c3d653f9f0bc241d0af50359657b5f855049b..34a2ba68a640b778ea194c3ec8098d5e49e1e027 100755 --- a/testsuite/expect/test10.12 +++ b/testsuite/expect/test10.12 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.13 b/testsuite/expect/test10.13 index d886edd8ba01d622974a926820dd8755be5ea1f4..d1dda50ca0b662aecc3ca6c56316760f3e979b2c 100755 --- a/testsuite/expect/test10.13 +++ b/testsuite/expect/test10.13 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.2 b/testsuite/expect/test10.2 index 012216627142cbceaec6b722e17ef27332e422f9..7b31598e30e5e123b9859819132aeb04274c5196 100755 --- a/testsuite/expect/test10.2 +++ b/testsuite/expect/test10.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.3 b/testsuite/expect/test10.3 index 3d569877d693c1ac11dfabc07bd3027ad3c2d3c9..215c55fa82ce96b938a19332e2ea5880327e0bdd 100755 --- a/testsuite/expect/test10.3 +++ b/testsuite/expect/test10.3 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.4 b/testsuite/expect/test10.4 index 08c72ffa624c741ccc684c4dbec4dd7f05ad2d85..e194364cd110e5c263bd24b08cc7b6a162c333d5 100755 --- a/testsuite/expect/test10.4 +++ b/testsuite/expect/test10.4 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.5 b/testsuite/expect/test10.5 index c0adfb6042e639b4da9f7067a5332a8686603902..4043b2ce21804698b0bb1eb971f52493b8310cea 100755 --- a/testsuite/expect/test10.5 +++ b/testsuite/expect/test10.5 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.6 b/testsuite/expect/test10.6 index 3cdc7fe04f854d4b0ac51053e31f500e1c5e4fa4..8cadfc4e39c1b3ee29d2f863a6801bda45aec418 100755 --- a/testsuite/expect/test10.6 +++ b/testsuite/expect/test10.6 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.7 b/testsuite/expect/test10.7 index fb5d160eb0d2ab35aa915a4018d4a03706de492e..ce1947b84b0574857100b429f576e8e1acb2489a 100755 --- a/testsuite/expect/test10.7 +++ b/testsuite/expect/test10.7 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.8 b/testsuite/expect/test10.8 index 6cc26eba1077fd545d097382116793f60f93221f..4eda6812c14ae7df02996ad813fa442806866453 100755 --- a/testsuite/expect/test10.8 +++ b/testsuite/expect/test10.8 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test10.9 b/testsuite/expect/test10.9 index 7e854b4f9391a625ec1a2ab8c4dc1c0d9ab262a1..cb65ba156e69957e22cfbf9572612e5a36eb2024 100755 --- a/testsuite/expect/test10.9 +++ b/testsuite/expect/test10.9 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.1 b/testsuite/expect/test11.1 index 03410834e99802addb9748d8942d5582f5e95d94..b044e5ef5cc4a1e451f0c9d1777892c6e34b6560 100755 --- a/testsuite/expect/test11.1 +++ b/testsuite/expect/test11.1 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.2 b/testsuite/expect/test11.2 index 89db338e85442a3aa7af232180b2ff516a17878e..b8f6039543a0c2ffd9cf53bd4c1999b10466fc0a 100755 --- a/testsuite/expect/test11.2 +++ b/testsuite/expect/test11.2 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -112,7 +112,8 @@ if { $got_output != 1 } { set got_output 0 spawn $poe printenv MP_EUIDEVICE -labelio yes -rmpool $partition -euilib ip -euidevice eth0 expect { - -re "($number): *eth0" { + # Might return "ethernet" or "eth0" on various systems + -re "($number): *eth" { set got_output 1 exp_continue } diff --git a/testsuite/expect/test11.3 b/testsuite/expect/test11.3 index a6374eb2b9e2cf88b86ebb82b4c60e4fb38c4dfd..840fd182d4a88213aad1de9be84480c598e9fbb2 100755 --- a/testsuite/expect/test11.3 +++ b/testsuite/expect/test11.3 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.4 b/testsuite/expect/test11.4 index 6aa47689ea1b90ada6d5e88da8dd9ecf7c5c5367..344a9dd6d33118018bb20c460eae4c15a98458f7 100755 --- a/testsuite/expect/test11.4 +++ b/testsuite/expect/test11.4 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.5 b/testsuite/expect/test11.5 index 3e74342707cd941796dcda453560a7c62cb81aa7..a186f0110399d5ebb72e0810d07b61d4f1bfb826 100755 --- a/testsuite/expect/test11.5 +++ b/testsuite/expect/test11.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.6 b/testsuite/expect/test11.6 index 4d1db48da5dfa2713cbfca0a9193c5f6b308ba50..4350a2b05de7b877fbe155ab18573c47b6464e6d 100755 --- a/testsuite/expect/test11.6 +++ b/testsuite/expect/test11.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.7 b/testsuite/expect/test11.7 index 74b379a9ba1f3a1f0d69d7083c2082da5f6f3d75..33d7d574f030b398c4ea39e10c3b3cf87950d55e 100755 --- a/testsuite/expect/test11.7 +++ b/testsuite/expect/test11.7 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test11.8 b/testsuite/expect/test11.8 index 4b58d74917ef59f7abf429e14c9e8c9d92b50a18..337b652e4e3349d2be2159df0137c68f08208117 100755 --- a/testsuite/expect/test11.8 +++ b/testsuite/expect/test11.8 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test12.1 b/testsuite/expect/test12.1 index 1ad791f856f8b393d82f8e7ec1b840cc7b580bd3..eeb1600e726ada409f0f1ad4e9ada64e05f85818 100755 --- a/testsuite/expect/test12.1 +++ b/testsuite/expect/test12.1 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test12.2 b/testsuite/expect/test12.2 index cc0b78ef9a3844647adb54654aba4e880db1286d..706990a11b9e4b0b84b01b29e10363c53fb1e977 100755 --- a/testsuite/expect/test12.2 +++ b/testsuite/expect/test12.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -38,6 +38,7 @@ set file_in "test$test_id.input" set file_out "test$test_id.output" set file_err "test$test_id.error" set file_prog "test$test_id.prog" +set file_name "test$test_id.filename" set job_id 0 set matches 0 set aix 0 @@ -46,7 +47,7 @@ set aix 0 set mem_size 1024000 set sleep_time 21 set ret_code 42 - +set file_size 10485760 print_header $test_id if {[test_front_end]} { @@ -75,7 +76,7 @@ proc _get_mem {prog} { set ave_used -1 set exit_code 0 - spawn $prog --noheader -p --job=$job_id.$step_id --fields maxvmsize,maxvmsizetask,avevmsize + spawn $prog --noheader -p --job=$job_id.$step_id --format maxvmsize,maxvmsizetask,avevmsize expect { -re "($float)(\[KMG\]*).($number).($float)(\[KMG\]*)" { set mem_used $expect_out(1,string) @@ -83,24 +84,10 @@ proc _get_mem {prog} { set mem_task $expect_out(3,string) set ave_used $expect_out(4,string) set scale2 $expect_out(5,string) - if {[string compare $scale1 "G"] == 0} { - set mem_used [expr $mem_used * 1024 * 1024] - } elseif {[string compare $scale1 "M"] == 0} { - set mem_used [expr $mem_used * 1024] - } elseif {[string compare $scale1 "K"] == 0} { - set mem_used [expr $mem_used * 1] - } else { - set mem_used [expr $mem_used / 1024] - } - if {[string compare $scale2 "G"] == 0} { - set ave_used [expr $ave_used * 1024 * 1024] - } elseif {[string compare $scale2 "M"] == 0} { - set ave_used [expr $ave_used * 1024] - } elseif {[string compare $scale2 "K"] == 0} { - set ave_used [expr $ave_used * 1] - } else { - set ave_used [expr $ave_used / 1024] - } + + set mem_used [scale_to_ks $mem_used $scale1] + set ave_used [scale_to_ks $ave_used $scale2] + exp_continue } timeout { @@ -139,6 +126,83 @@ proc _get_mem {prog} { return 0 } +# Check the job written and read file size. +proc _get_file_size {prog} { + global sstat number float job_id step_id file_size + + set max_disk_write -1 + set ave_disk_write -1 + set max_disk_read -1 + set ave_disk_read -1 + + spawn $prog --noheader -p --job=$job_id.$step_id --format MaxDiskWrite,AveDiskWrite,MaxDiskRead,AveDiskRead + expect { + -re "($float)(\[MGT\]*).($float)(\[MGT\]*).($float)(\[MGT\]*).($float)(\[MGT\]*)" { + set max_disk_write $expect_out(1,string) + set scale1 $expect_out(2,string) + set ave_disk_write $expect_out(3,string) + set scale2 $expect_out(4,string) + set max_disk_read $expect_out(5,string) + set scale3 $expect_out(6,string) + set ave_disk_read $expect_out(7,string) + set scale4 $expect_out(8,string) + + set max_disk_write [scale_to_megs $max_disk_write $scale1] + set ave_disk_write [scale_to_megs $ave_disk_write $scale2] + set max_disk_read [scale_to_megs $max_disk_read $scale3] + set ave_disk_read [scale_to_megs $ave_disk_read $scale4] + } + timeout { + send_user "\nFAILURE: $prog not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + set mb_file_size [ expr ($file_size/(1024 * 1024)) ] + if { $max_disk_write == -1 } { + send_user "\nFAILURE: MaxDiskWrite not reported\n" + return 1 + } + if { $ave_disk_write == -1 } { + send_user "\nFAILURE: AveDiskWrite not reported\n" + return 1 + } + if { $max_disk_read == -1 } { + send_user "\nFAILURE: MaxDiskRead not reported\n" + return 1 + } + if { $ave_disk_read == -1 } { + send_user "\nFAILURE: AveDiskRead not reported\n" + return 1 + } + + if { $max_disk_write != $max_disk_read } { + send_user "\nFAILURE: written file size does not match read size " + send_user "file_size:$mb_file_size MB " + send_user "max_disk_write:$max_disk_write MB " + send_user "max_disk_read:$max_disk_read MB\n" + return 1 + } + + if { $ave_disk_write != $ave_disk_read } { + send_user "\nFAILURE: average written file size " + send_user "does not match average read size " + send_user "file_size:$mb_file_size MB " + send_user "ave_disk_write:$ave_disk_write MB" + send_user "ave_disk_read:$ave_disk_read MB\n" + return 1 + } + + send_user "\SUCCESS: $prog reported correct written and read file size " + send_user "file_size:$mb_file_size MB max_disk_write:$max_disk_write MB " + send_user "max_disk_read:$max_disk_read MB\n" + + return 0 +} + # # Check if accounting is enabled # @@ -188,16 +252,17 @@ exec $bin_cc -O -o $file_prog ${file_prog}.c exec $bin_chmod 700 $file_prog make_bash_script $file_in " - $srun ./$file_prog $ret_code $sleep_time $mem_size + $srun ./$file_prog $ret_code $sleep_time $mem_size $file_size $file_name " # # Run a simple job # Usage: test12.2.prog <exit_code> <sleep_secs> <mem_kb> +# <file_size> <file_name> # set config_prob 0 set timeout [expr $max_job_delay + $sleep_time] -set job_mem_limit [expr ($mem_size + $max_mem_error) / 1024 + 1] +set job_mem_limit [expr ($mem_size + $max_mem_error) / 1024 + 4] set sbatch_pid [spawn $sbatch --mem-per-cpu=$job_mem_limit --output=$file_out --error=$file_err -t2 $file_in] expect { -re "Requested node configuration is not available" { @@ -241,6 +306,10 @@ if {[_get_mem $sstat] != 0} { set exit_code 1 } +if {[_get_file_size $sstat] != 0} { + set exit_code 1 +} + # # Wait for job to complete # @@ -253,7 +322,7 @@ if {[wait_for_job $job_id "DONE"] != 0} { # Report basic sacct info # -spawn $sacct --noheader -P --job=$job_id.$step_id --fields jobid,jobname,state,exitcode +spawn $sacct --noheader -P --job=$job_id.$step_id --format jobid,jobname,state,exitcode expect { -re "$job_id\.$step_id" { incr matches @@ -295,7 +364,7 @@ if {$matches < $match_target} { # expected integer but got "08" (looks like invalid octal number) # set elapsed_time 0 -spawn $sacct --noheader -P --job=$job_id.$step_id --fields elapsed +spawn $sacct --noheader -P --job=$job_id.$step_id --format elapsed expect { -re "($number):($number):(\[0-9\])(\[0-9\])" { set hours $expect_out(1,string) @@ -328,6 +397,10 @@ if {[_get_mem $sacct] != 0} { set exit_code 1 } +if {[_get_file_size $sacct] != 0} { + set exit_code 1 +} + if {$aix == 1 && $exit_code == 1} { send_user "\nIf there were failures in the memory parts of this\n" send_user "test, they are expected. AIX doesn't do a great job\n" @@ -338,7 +411,7 @@ if {$aix == 1 && $exit_code == 1} { } if {$exit_code == 0} { - exec rm -f $file_prog $file_in $file_out $file_err + exec rm -f $file_prog $file_in $file_out $file_err $file_name send_user "\nSUCCESS\n" } exit $exit_code diff --git a/testsuite/expect/test12.2.prog.c b/testsuite/expect/test12.2.prog.c index 515c256ae61835553f05e8d8d7bf3019d74ffdf3..02c600aca1ddbba1960e58589ee2fe92722139ce 100644 --- a/testsuite/expect/test12.2.prog.c +++ b/testsuite/expect/test12.2.prog.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -29,15 +29,24 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +int main (int argc, char **argv) { int exit_code, sleep_time, mem_kb; + int i; + long long int n; + long long int file_size; + int fd; char *mem; + char *file_name; - if (argc != 4) { + if (argc != 6) { fprintf(stderr, - "Usage: %s <exit_code> <sleep_time> <mem_kb>\n", + "Usage: %s <exit_code> <sleep_time> <mem_kb> <file_size>\n", argv[0]); exit(1); } @@ -45,13 +54,42 @@ main (int argc, char **argv) exit_code = atoi(argv[1]); sleep_time = atoi(argv[2]); mem_kb = atoi(argv[3]); - + file_size = atoll(argv[4]); + file_name = argv[5]; mem = malloc(mem_kb * 1024); /* need to do a memset on the memory or AIX will not count * the memory in the job step's Resident Set Size */ memset(mem, 0, (mem_kb * 1024)); + + /* Don't use malloc() to write() and read() a blob + * of memory as it will interfere with the memory + * test, don't use stdio for the same reason, it + * allocates memory. + */ + fd = open(file_name, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); + n = file_size/sizeof(int); + + for (i = 0; i < n; i++) { + if (write(fd, &i, sizeof(int)) != sizeof(int)) { + fprintf(stderr, "FAILURE: write error\n"); + exit(1); + } + } + fsync(fd); + close(fd); + + fd = open(file_name, O_RDONLY, S_IRUSR|S_IWUSR); + for (i = 0; i < n; i++) { + if (read(fd, &i, sizeof(int)) != sizeof(int)) { + fprintf(stderr, "FAILURE: read error\n"); + exit(1); + } + } + close(fd); + sleep(sleep_time); free(mem); + exit(exit_code); } diff --git a/testsuite/expect/test12.3 b/testsuite/expect/test12.3 index b71db5330600b4a01404a1126ba8634acffd6653..06063320b9f5f03077325cecea67b40067b10f53 100755 --- a/testsuite/expect/test12.3 +++ b/testsuite/expect/test12.3 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -58,6 +58,12 @@ if {[test_bluegene]} { exit $exit_code } +if {[test_launch_poe]} { + set step_id 1 +} else { + set step_id 0 +} + # # Check accounting config and bail if not found. # @@ -167,7 +173,7 @@ set job_1_node_inx 0 set timeout $max_job_delay set srun_pid [spawn $srun -N2 -t1 -A $test_acct -J $job_name_1 -v $bin_id] expect { - -re "launching ($number).0 on host ($alpha_numeric_under)," { + -re "launching ($number).$step_id on host ($alpha_numeric_under)," { set job_id_1 $expect_out(1,string) set job_1_node($job_1_node_inx) $expect_out(2,string) incr job_1_node_inx @@ -195,7 +201,7 @@ set job_2_node_inx 0 set srun_pid [spawn $srun -N2 -x $job_1_node(0) -t1 -A $test_acct -J $job_name_2 -v $bin_id] expect { - -re "launching ($number).0 on host ($alpha_numeric_under)," { + -re "launching ($number).$step_id on host ($alpha_numeric_under)," { set job_id_2 $expect_out(1,string) set job_2_node($job_2_node_inx) $expect_out(2,string) incr job_2_node_inx @@ -223,7 +229,7 @@ set job_3_node_inx 0 set srun_pid [spawn $srun -w$job_1_node(0),$job_2_node(1) -t1 -A $test_acct -J $job_name_3 -v $bin_id] expect { - -re "launching ($number).0 on host ($alpha_numeric_under)," { + -re "launching ($number).$step_id on host ($alpha_numeric_under)," { set job_id_3 $expect_out(1,string) set job_3_node($job_3_node_inx) $expect_out(2,string) incr job_3_node_inx diff --git a/testsuite/expect/test12.4 b/testsuite/expect/test12.4 index 28adae94e205a6431ee04a47803f03dbcf00287b..ce934d48f0968d395c2c4ccb6a852996164183e2 100755 --- a/testsuite/expect/test12.4 +++ b/testsuite/expect/test12.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -558,109 +558,109 @@ proc sacct_vargs { soption vargs job_id} { set matches [sacct_job b $job_id] if {$matches != 2} { - send_user "\nFAILURE: sacct -b failed ($matches)\n" + send_user "\nFAILURE: sacct -b failed ($matches != 2)\n" set exit_code 1 } set matches [sacct_job -brief $job_id] if {$matches != 2} { - send_user "\nFAILURE: sacct --brief failed ($matches)\n" + send_user "\nFAILURE: sacct --brief failed ($matches != 2)\n" set exit_code 1 } set matches [sacct_vargs g $user_gid $job_id] if {$matches != 3} { - send_user "\nFAILURE: sacct -g failed ($matches)\n" + send_user "\nFAILURE: sacct -g failed ($matches != 3)\n" set exit_code 1 } set matches [sacct_vargs -gid $user_gid $job_id] if {$matches != 3} { - send_user "\nFAILURE: sacct --gid failed ($matches)\n" + send_user "\nFAILURE: sacct --gid failed ($matches != 3)\n" set exit_code 1 } set matches [sacct_vargs -group $user_gid $job_id] if {$matches != 3} { - send_user "\nFAILURE: sacct --group failed ($matches)\n" + send_user "\nFAILURE: sacct --group failed ($matches != 3)\n" set exit_code 1 } set matches [sacct_job l $job_id] if {$matches != 7} { - send_user "\nFAILURE: sacct --allusers failed ($matches)\n" + send_user "\nFAILURE: sacct --allusers failed ($matches != 7)\n" set exit_code 1 } set matches [sacct_job -long $job_id] if {$matches != 7} { - send_user "\nFAILURE: sacct -l failed ($matches)\n" + send_user "\nFAILURE: sacct -l failed ($matches != 7)\n" set exit_code 1 } set matches [sacct_job n $job_id] if {$matches != 1} { - send_user "\nFAILURE: sacct -n failed ($matches)\n" + send_user "\nFAILURE: sacct -n failed ($matches != 1)\n" set exit_code 1 } set matches [sacct_job -noheader $job_id] if {$matches != 1} { - send_user "\nFAILURE: sacct -n failed ($matches)\n" + send_user "\nFAILURE: sacct -n failed ($matches != 1)\n" set exit_code 1 } set matches [sacct_job p $job_id] if {$matches != 2} { - send_user "\nFAILURE: sacct -p failed ($matches)\n" + send_user "\nFAILURE: sacct -p failed ($matches != 2)\n" set exit_code 1 } set matches [sacct_job -parsable $job_id] if {$matches != 2} { - send_user "\nFAILURE: sacct --parsable failed ($matches)\n" + send_user "\nFAILURE: sacct --parsable failed ($matches != 2)\n" set exit_code 1 } set matches [sacct_job P $job_id] if {$matches != 2} { - send_user "\nFAILURE: sacct -P failed ($matches)\n" + send_user "\nFAILURE: sacct -P failed ($matches != 2)\n" set exit_code 1 } set matches [sacct_job -parsable2 $job_id] if {$matches != 2} { - send_user "\nFAILURE: sacct --parsable2 failed ($matches)\n" + send_user "\nFAILURE: sacct --parsable2 failed ($matches != 2)\n" set exit_code 1 } set matches [sacct_vargs u $user_name $job_id] if {$matches != 3} { - send_user "\nFAILURE: sacct -g failed ($matches)\n" + send_user "\nFAILURE: sacct -g failed ($matches != 3)\n" set exit_code 1 } set matches [sacct_vargs -uid $user_name $job_id] if {$matches != 3} { - send_user "\nFAILURE: sacct --gid failed ($matches)\n" + send_user "\nFAILURE: sacct --gid failed ($matches != 3)\n" set exit_code 1 } set matches [sacct_vargs -user $user_name $job_id] if {$matches != 3} { - send_user "\nFAILURE: sacct --group failed ($matches)\n" + send_user "\nFAILURE: sacct --group failed ($matches != 3)\n" set exit_code 1 } set matches [sacct_job v $job_id] if {$matches != 4} { - send_user "\nFAILURE: sacct -v failed ($matches)\n" + send_user "\nFAILURE: sacct -v failed ($matches != 4)\n" set exit_code 1 } set matches [sacct_job -verbose $job_id] if {$matches != 4} { - send_user "\nFAILURE: sacct --verbosee failed ($matches)\n" + send_user "\nFAILURE: sacct --verbosee failed ($matches != 4)\n" set exit_code 1 } diff --git a/testsuite/expect/test12.5 b/testsuite/expect/test12.5 index cf319ca4b8bb1047a9590e2eff69767412c50ef6..d0b4bbb2e1a097c822882a3798ac93fc137dc292 100755 --- a/testsuite/expect/test12.5 +++ b/testsuite/expect/test12.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -70,7 +70,7 @@ proc sacct_job { soption } { incr matches exp_continue } - -re "AveCPUFreq *AvePages *AveRSS *AveVMSize" { + -re "AveCPUFreq *AveDiskRead *AveDiskWrite *AvePages" { if {$debug} {send_user "\nmatch2\n"} incr matches exp_continue diff --git a/testsuite/expect/test12.6 b/testsuite/expect/test12.6 new file mode 100755 index 0000000000000000000000000000000000000000..fe6cc0092fd2da482ba88f4ccf3a9954e1ccd851 --- /dev/null +++ b/testsuite/expect/test12.6 @@ -0,0 +1,249 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test hdf5 acct_gather_profile (--profile=task) +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2013 Bull S. A. S. +# Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. +# +# Written by Rod Schultz <rod.schultz@bull.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "12.6" +set exit_code 0 +set file_out "test$test_id.output" +set file_prog "test$test_id.prog" +set job_id 0 + +proc get_jobacct_freq {} { + global scontrol + set task_freq 30 + log_user 0 + match_max -d 100000 + spawn $scontrol show config + expect { + timeout { + send_user "\nCan't find JobAcctGatherFrequency in slurm.conf. Using $task_freq" + return $task_freq + } + } + wait + log_user 1 + set opt "" + set lines [split $expect_out(buffer) "\n"] + set nl [llength $lines] + for {set lx 0} {$lx < $nl} {incr lx} { + set line [lindex $lines $lx] + if {[string first "JobAcctGatherFrequency" $line] != -1 } { + set poseq [string first "=" $line] + set opt [string range $line $poseq+2 end] + set opt [string tolower $opt] + break + } + } + if {$opt == ""} { + send_user "\nJobAcctGatherFrequency in slurm.conf. Using $task_freq" + } + + set tokens [split $opt ","] + set nt [llength $tokens] + if {$nt == 1} { + if {[string first "task=" $opt] == -1} { + return $opt + } + } + for {set tx 0} {$tx < $nt} {incr tx} { + set opt [lindex $tokens $tx] + if {[string first "task=" $opt] != -1} { + return [string range $opt 5 end] + } + } + return task_freq +} + +print_header $test_id + +if {[test_front_end]} { + send_user "\nWARNING: This test is incompatible with front-end systems\n" + exit $exit_code +} + +# Check if acct_gather_profile/hdf5 is installed +set profile 0 +log_user 0 +spawn $scontrol show config +expect { + -re "acct_gather_profile/hdf5" { + set profile 1 + } + eof { + wait + } +} +log_user 1 +if {$profile == 0} { + send_user "\nWARNING: acct_gather_profile/hdf5 not installed on this system\n" + exit 0 +} +send_user "\nacct_gather_profile/hdf5 plugin installed\n" +send_user "Note: this test takes 3 minutes to run\n" + +set task_freq [get_jobacct_freq] + +# +# Build a test program to put a known load on the system +# +exec $bin_rm -f $file_prog +exec $bin_cc -I$build_dir $file_prog.c -lm -o $file_prog +exec $bin_chmod 700 $file_prog + +set timeout [expr $max_job_delay + 200] + +# Override task polling supplied by slurm.conf +# I am not sure why we are doing this (but we are) +# The original task_freq had a + 5 here. That will always fail with a bad +# allocation if you are enforcing memory limits, so we changed it to - 5 +# instead. I don't think it really matters though. +set task_freq [expr $task_freq - 5] +set srun_pid [spawn $srun --acctg-freq=$task_freq --profile=task -t5 ./$file_prog] +expect { + -re "SLURM_JobId=($number)" { + set job_id $expect_out(1,string) + exp_continue + } + -re "error:" { + send_user "\nFAILURE: something happened on start of job\n" + exit 1 + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + exit 1 + } + eof { + wait + } +} + +set timeout 10 +set srun_pid [spawn $sh5util -j $job_id] +expect { + timeout { + send_user "\nFAILURE: sh5util merge not responding\n" + slow_kill $srun_pid + exit 1 + } + eof { + wait + } +} + +set hdf5_file "job_$job_id.h5" +set srun_pid [spawn $sh5util -j $job_id -l Node:TimeSeries -s Tasks -o $file_out] +expect { + timeout { + send_user "\nFAILURE: sh5util extract not responding\n" + slow_kill $srun_pid + exit 1 + } + eof { + wait + } +} + +set line "" +set nerr 0 +set lno 0 +set fd 0 +set last_et 0 +set fd [open $file_out "r"] +while {$fd > 0 && [gets $fd line] != -1} { + incr lno + if {$lno > 2} { + set tokens [split $line ","] + if {[llength $tokens] < 14} { + send_user "\nFAILURE: too few items on line $lno" + set exit_code 1 + break; + } + set et [lindex $tokens 5] + set cur_et [expr $et - $last_et] + set last_et $et + set low_rd [expr 0.975 * 10 * $cur_et] + set hi_rd [expr 1.025 * 10 * $cur_et] + if {$lno == 2 && $et < 30} { + send_user "\nWarning: jobacct_gather_freq < 30, results are unreliable\n" + } + if {$lno == 2 && $et < $task_freq} { + send_user "\nFAILURE: sample $et is not --acctg-freq=$task_freq\n" + set exit_code 1 + } + set cputil [lindex $tokens 8] + # The range on cpu utilization is pretty wide + # Linux accounting resolution is only to one second, so in a + # typical 30 interval an extra second is 3%. The burn loop + # consumes a bit more that asked for. There is additional type + # managing the I/O portion. Slurm and linux also consume some + # cpu. + if {$cputil < 38.0} { + send_user "\nWarning: CPU Busy $cputil not near 40% on line $lno\n" + incr nerr + } + if {$cputil > 47.0} { + send_user "\nWarning: CPU Busy $cputil not near 40% on line $lno\n" + incr nerr + } + set rdmb [lindex $tokens 12] + if {$rdmb < $low_rd} { + send_user "\nWarning: Read Megabytes $rdmb not near 100.0 on line $lno\n" + incr nerr + } + if {$rdmb > $hi_rd} { + send_user "\nWarning: Read Megabytes $rdmb not near 100.0 on line $lno\n" + incr nerr + } + } +} +close $fd + +if {$lno == 0} { + send_user "\nFAILURE: $file_out is empty\n" + set exit_code 1 +} + +if {$nerr > 3} { + send_user "\nFAILURE: $nerr is too many values out of range\n" + set exit_code 1 +} + +if {$exit_code == 0} { + exec rm -f $file_prog $file_out $hdf5_file + send_user "\nSUCCESS\n" +} else { + exec mv $hdf5_file test$test_id.hdf5 +} + +exit $exit_code diff --git a/testsuite/expect/test12.6.prog.c b/testsuite/expect/test12.6.prog.c new file mode 100644 index 0000000000000000000000000000000000000000..ebfa5e8aa9e1daa8412fe6a300b53994792b7055 --- /dev/null +++ b/testsuite/expect/test12.6.prog.c @@ -0,0 +1,166 @@ +/*****************************************************************************\ + * Consume CPU and I/O resources. + * + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * Written by Rod Schultz <rod.schultz@bull.com> + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation version 2 of the License. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * + *****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/time.h> +#include <math.h> + +#define SECOND2RUN 180 +#define BURNBUFSIZ 1024*1024 +#define CPUBUSY_PCT 40 +#define READLOOP 10 +#define WRITELOOP 20 +// WRITELOOP must be greater than READLOOP + +int busyloop(int burn, int nxny, double *m1, double *m2, double *m1m2) +{ + int i, j, k; + int iters = 0; + struct timeval tv; + gettimeofday(&tv, NULL); + int loopstart = tv.tv_usec; + int curtime = tv.tv_usec; + int busy = 0; + int nxny_x, ioff, joff; + + while (busy < burn) { + iters++; + nxny_x = 0; + for (i=0; i<nxny; i++) { + ioff=i*nxny; + for (j=0; j<nxny; j++) { + joff=j*nxny; + m1m2[nxny_x]=0; + for (k=0; k<nxny; k++) { + m1m2[nxny_x] += m1[ioff+k]*m2[k+joff]; + } + nxny_x++; + } + } + gettimeofday(&tv, NULL); + curtime = tv.tv_usec; + if (curtime < loopstart) + curtime += 1000000; // second rollover + busy = (curtime-loopstart); + } + return busy; +} + +void do_io(char *burnpath, char *burnbuf, int iosize, int nread, int nwrite) +{ + int ix; + FILE *fd = NULL; + fd = fopen (burnpath, "w"); + if (!fd) { + perror ("fopen for write"); + } + for (ix=0; ix<nwrite; ix++) { + if (!fwrite (burnbuf, iosize, 1, fd)) { + perror ("fwrite"); + } + } + fclose(fd); + fd = fopen (burnpath, "r"); + if (!fd) { + perror ("fopen for read"); + } + for (ix=0; ix<nread; ix++) { + if (!fread (burnbuf, iosize, 1, fd)) { + perror ("fread"); + } + } + fclose(fd); +} + +int main (int argc, char **argv) +{ + int nxny = 100; // Dimension of matrices + int writes = 0, reads = 0; + int actualbusy = 0, desiredbusy = 0; + int job_id, step_id, task_id; + int dobusy, loopstart, curtime, ms_busy, nap, irx, i,j; + double *m1, *m2, *m1m2; + float realpctbusy; + char *env_str, *burnbuf; + char burnpath[64]; + struct timeval tv; + if (READLOOP > WRITELOOP) { + printf("\nFATAL: Compile parameter READLOOP > WRITELOOP"); + exit(1); + } + if ((env_str = getenv("SLURM_JOB_ID")) == NULL) { + fprintf(stderr, "info: getenv(SLURM_JOB_ID) failed. " + "(Not running SLURM?)\n"); + } else { + job_id = atoi(env_str); + if ((env_str = getenv("SLURM_STEPID")) != NULL) { + step_id = atoi(env_str); + } + if ((env_str = getenv("SLURM_PROCID")) != NULL) { + task_id = atoi(env_str); + } + } + + burnbuf = malloc(sizeof(char)*BURNBUFSIZ); + memset(burnbuf,'\0',BURNBUFSIZ); + sprintf(burnpath,"/tmp/ioburn_%d_%d_%d",job_id,step_id,task_id); + ms_busy = CPUBUSY_PCT*10000; + m1 = malloc(sizeof(double)*nxny*nxny); + m2 = malloc(sizeof(double)*nxny*nxny); + m1m2 = malloc(sizeof(double)*nxny*nxny); + for (i=0; i<nxny; i++) { + for (j=0;j<nxny;j++) { + m1[i*nxny+j]= ((double) random()); + m2[i*nxny+j]= ((double) random()); + } + } + // 1 second load (%busy, io burn, sleep) + for (irx=0; irx<SECOND2RUN; irx++) { + gettimeofday(&tv, NULL); + desiredbusy += ms_busy; + loopstart = tv.tv_usec; + dobusy = desiredbusy - actualbusy; + if (dobusy > 0) + actualbusy += busyloop(dobusy, nxny, m1, m2, m1m2); + do_io(burnpath, burnbuf, BURNBUFSIZ, READLOOP, WRITELOOP); + reads += READLOOP; + writes += WRITELOOP; + gettimeofday(&tv, NULL); + curtime = tv.tv_usec; + if (curtime < loopstart) + curtime += 1000000; + nap = 1000000 - (curtime - loopstart); + usleep(nap); + } + realpctbusy = ((float) actualbusy)*100.0/((float)(SECOND2RUN*1000000)); + printf("\ntest12.6.prog finished after %d seconds. busy=%3.1f%% " + "Reads=%d Writes=%d SLURM_JobId=%d StepId=%d TaskId=%d\n", + SECOND2RUN,realpctbusy,reads, writes,job_id,step_id,task_id); + free(m1); + free(m2); + free(m1m2); + free(burnbuf); + remove(burnpath); + return 0; +} diff --git a/testsuite/expect/test13.1 b/testsuite/expect/test13.1 index 86c7eb088d44297caf820008798877a315f9eb62..c5502c23a61682f65d5859a3832533f37e13f4c8 100755 --- a/testsuite/expect/test13.1 +++ b/testsuite/expect/test13.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test13.2 b/testsuite/expect/test13.2 index 8bd68d4d9662577b75f124b236de3ffda48db9dd..f847ac9f29a73949004b1b86bed391afefabe064 100755 --- a/testsuite/expect/test13.2 +++ b/testsuite/expect/test13.2 @@ -10,7 +10,7 @@ # Copyright (C) 2012 SchedMD LLC # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.1 b/testsuite/expect/test14.1 index 821355e19ac837c59208941f996942aafa6643ca..00117b57043b1def12b23fa766bf7ee94dc496a8 100755 --- a/testsuite/expect/test14.1 +++ b/testsuite/expect/test14.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.2 b/testsuite/expect/test14.2 index aa0bc59e6ae2169428e46b5a434552c7082ff26f..1e14c71dbc07a7569883908cc05f60f795819e75 100755 --- a/testsuite/expect/test14.2 +++ b/testsuite/expect/test14.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.3 b/testsuite/expect/test14.3 index e3a9343d7aea73f9996d2b0decd868e308d525b4..48e4d3eaaaa29c37db00e989df78bd4b708f67d2 100755 --- a/testsuite/expect/test14.3 +++ b/testsuite/expect/test14.3 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.4 b/testsuite/expect/test14.4 index ecb950b2a6a37ed7adcfde19fe6c6a5fac55845e..e160b7977c8b44d531d341821d0a9dcfe52f9b1a 100755 --- a/testsuite/expect/test14.4 +++ b/testsuite/expect/test14.4 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.5 b/testsuite/expect/test14.5 index 00a699c00d2f026b7a43ba8acc41b4d2d34a30c9..1abe0024df91587b842e0af89f1ccf307b0a1709 100755 --- a/testsuite/expect/test14.5 +++ b/testsuite/expect/test14.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.6 b/testsuite/expect/test14.6 index 7610742fd3cf88098dcd72bedc64bbabe83d7c29..545038f5c6721f732468c18592829d5cceeddcc1 100755 --- a/testsuite/expect/test14.6 +++ b/testsuite/expect/test14.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.7 b/testsuite/expect/test14.7 index e385d78481ab7ad905847f7fcbfd751cdc88cf32..0daabec270f2cbb4237480ae6e4cd0816c3d29d6 100755 --- a/testsuite/expect/test14.7 +++ b/testsuite/expect/test14.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.8 b/testsuite/expect/test14.8 index 5ecdb1c6f70d517fd65ce4518bc05d8f84620842..be63ff808b4492e6d1447dc8265d6e50537432f8 100755 --- a/testsuite/expect/test14.8 +++ b/testsuite/expect/test14.8 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test14.9 b/testsuite/expect/test14.9 index 48d529fc1be64e416b0b52082e53f07f6415eca1..1edc59cf4c46d62d2bb74b34fc42a818123af707 100755 --- a/testsuite/expect/test14.9 +++ b/testsuite/expect/test14.9 @@ -11,7 +11,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.1 b/testsuite/expect/test15.1 index e35b3248335403e787f62d49913fcaadfcf77e69..89e0557364d13f7f0ff7e118283d0f89665b3e48 100755 --- a/testsuite/expect/test15.1 +++ b/testsuite/expect/test15.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.10 b/testsuite/expect/test15.10 index 0d939d2aaaab351ea6ce57cb136c28b043234605..c4e4e7fc8a2b582491320007bbcf95e48e057845 100755 --- a/testsuite/expect/test15.10 +++ b/testsuite/expect/test15.10 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.11 b/testsuite/expect/test15.11 index 75f4a7d22a4187b05648bf13849d974052dde3c2..27779b9e8dc4e5b0e96cafe64f0d6134e5ffd21e 100755 --- a/testsuite/expect/test15.11 +++ b/testsuite/expect/test15.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.12 b/testsuite/expect/test15.12 index 831ff5b6945aab3871ee3bb651a1307bf529d63e..66dbe70fa3e3ebc47272511dcbcb879eeec2273f 100755 --- a/testsuite/expect/test15.12 +++ b/testsuite/expect/test15.12 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.13 b/testsuite/expect/test15.13 index a83265326c45a8ca7721fa318b1566fe291103dc..f83cd9f2ed84d5bc1e29e8a5c4d2e27b6925eb13 100755 --- a/testsuite/expect/test15.13 +++ b/testsuite/expect/test15.13 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.14 b/testsuite/expect/test15.14 index d570aadcf9af7e5fb205ae32fb20defd96a5b29f..9e345a9f762f705d2b1e249662c19f267c60ebd7 100755 --- a/testsuite/expect/test15.14 +++ b/testsuite/expect/test15.14 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.15 b/testsuite/expect/test15.15 index 29c8a2b7aeeadf7414c7a1a08fa5b12e5acc0586..f90dd6176ac713a5cdca914d4c9d9d618492cca5 100755 --- a/testsuite/expect/test15.15 +++ b/testsuite/expect/test15.15 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.16 b/testsuite/expect/test15.16 index 59bae5bc89046f3d76d667b2077177259a3c7e7d..7eec23c96e1af33067af0dc9b4e5470d599622f6 100755 --- a/testsuite/expect/test15.16 +++ b/testsuite/expect/test15.16 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.17 b/testsuite/expect/test15.17 index f4330e976d3524aa557877ca18383e5ea4f73443..3d3036acbc5af8c6d2f14cb0ffb1730ab8c82430 100755 --- a/testsuite/expect/test15.17 +++ b/testsuite/expect/test15.17 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.18 b/testsuite/expect/test15.18 index 83397b46dbe5f626bab84f7a14f5dc4b42900683..dd4cdeb5c9129d9f1dafbb73ed340fa031f21597 100755 --- a/testsuite/expect/test15.18 +++ b/testsuite/expect/test15.18 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.19 b/testsuite/expect/test15.19 index 156b8b81b50e570e804537dee10eca417c01537f..2db5fede9399ba068aef9e6eab4fd073aad4c144 100755 --- a/testsuite/expect/test15.19 +++ b/testsuite/expect/test15.19 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.2 b/testsuite/expect/test15.2 index a8a9886e7da1692273512c1e6208adb042621662..54a8d79dab032656a4e3b5975d8c35fe0aefc946 100755 --- a/testsuite/expect/test15.2 +++ b/testsuite/expect/test15.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.20 b/testsuite/expect/test15.20 index 66083341e6e4a42c75d272944c19c4339ece05c1..917321fdaa92c4175f804d9d992376ab4da43d11 100755 --- a/testsuite/expect/test15.20 +++ b/testsuite/expect/test15.20 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.21 b/testsuite/expect/test15.21 index 36de01ac9ff90f3c11df563f11d78e403a56c094..6ee9239382a28e428b0edc2e5bb684e4108e7080 100755 --- a/testsuite/expect/test15.21 +++ b/testsuite/expect/test15.21 @@ -19,7 +19,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -50,7 +50,7 @@ if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatible topology configured systems\n" exit 0 } elseif {[test_serial]} { - send_user "\nWARNING: This test is not compatible with serial system\n" + send_user "\nWARNING: This test is incompatible with select/serial systems\n" exit 0 } diff --git a/testsuite/expect/test15.22 b/testsuite/expect/test15.22 index 74726d7c64de952ee57fd0becf0e1636ddd2aa97..8d572219e79e4420f37a4dd2ce0d4243085b9979 100755 --- a/testsuite/expect/test15.22 +++ b/testsuite/expect/test15.22 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.23 b/testsuite/expect/test15.23 index e366e4a0bfc095228665c2845946cd65f795eafb..28a8f6cedd32ce5895fd9253a33895dcdf91316e 100755 --- a/testsuite/expect/test15.23 +++ b/testsuite/expect/test15.23 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.24 b/testsuite/expect/test15.24 index f8b68a7c33e2f0d4cdb6f137af18e6f9070befd1..6111bdb19516c3132a86b1f5d2e9b109f955d8d2 100755 --- a/testsuite/expect/test15.24 +++ b/testsuite/expect/test15.24 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.25 b/testsuite/expect/test15.25 index 63d8aa5c70d8a88c82f666dd8ec67a3ba68ad193..f587dd4bfb123fc60e40a62cbe301227ea6f9701 100755 --- a/testsuite/expect/test15.25 +++ b/testsuite/expect/test15.25 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.26 b/testsuite/expect/test15.26 index 23883a448cdd6da5197117cb668243f2450a98f1..6bbaacbf0c41c2841234a216259b6e98e9a256ce 100755 --- a/testsuite/expect/test15.26 +++ b/testsuite/expect/test15.26 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.3 b/testsuite/expect/test15.3 index 860264a238636a69efbc7a3975c844e93122d239..6bde1c5657d8b90549309967e3842550979bb04e 100755 --- a/testsuite/expect/test15.3 +++ b/testsuite/expect/test15.3 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.4 b/testsuite/expect/test15.4 index 1e0e040540b0c9c578469cb6451248f8e382f56e..4708d7b5d62a8a8b05a28fbcc11dcbb81daaedc1 100755 --- a/testsuite/expect/test15.4 +++ b/testsuite/expect/test15.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.5 b/testsuite/expect/test15.5 index 3bea6a9a142984fa800ffc66f7cbd0428febae8e..1e8e0ddd35319ccb83c6be34da6dbade60212332 100755 --- a/testsuite/expect/test15.5 +++ b/testsuite/expect/test15.5 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.6 b/testsuite/expect/test15.6 index 3022a660948166b396da4a94b1551b3151952e66..ac37a7da7080a5fc97e8aa90d9b62d49b0cd336b 100755 --- a/testsuite/expect/test15.6 +++ b/testsuite/expect/test15.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.7 b/testsuite/expect/test15.7 index 48215c11f46bc49014de3f4fb4c1b872dcad0788..a7748828b2718ff53de62e98e52212098553d943 100755 --- a/testsuite/expect/test15.7 +++ b/testsuite/expect/test15.7 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.8 b/testsuite/expect/test15.8 index 4e99afade77a5e85a01fb7f6eb04580d72127d3d..2a005f2d3415799d1f4b344240790b57ee5cc126 100755 --- a/testsuite/expect/test15.8 +++ b/testsuite/expect/test15.8 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test15.9 b/testsuite/expect/test15.9 index 24569f19fc956a441bd6f15110e3f04e1a3d24d5..aaa8fe9290dc3a1de9cbbffe30cfac358f900e9c 100755 --- a/testsuite/expect/test15.9 +++ b/testsuite/expect/test15.9 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test16.1 b/testsuite/expect/test16.1 index 6652a12b720efa057a0deaea9664f4eecdd4bd9c..6ed621e42e870e40888e4608659e4525212d7a6a 100755 --- a/testsuite/expect/test16.1 +++ b/testsuite/expect/test16.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test16.2 b/testsuite/expect/test16.2 index b9f55f5810082799ecdbaf8016503743c4eb51bf..02b154cd6885044299320fc9f4f3772cc58e5bbe 100755 --- a/testsuite/expect/test16.2 +++ b/testsuite/expect/test16.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test16.3 b/testsuite/expect/test16.3 index bb0dbca4cf882704f8cdfd3471353092b0d8e3ee..e8900045e09c2bef11c74f7ce8a7ad72ea6d2fe8 100755 --- a/testsuite/expect/test16.3 +++ b/testsuite/expect/test16.3 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test16.4 b/testsuite/expect/test16.4 index 43ef44ddb74a367c307297627d81d942961b80cd..8f091b8f651b67079dad261d9d67286f141d7ef1 100755 --- a/testsuite/expect/test16.4 +++ b/testsuite/expect/test16.4 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test16.4.prog.c b/testsuite/expect/test16.4.prog.c index e66e84a8ed19c42102499be2be27249990c243a2..cc9ef53b8b47bf0bd8f8b5cae99482adb2737b36 100644 --- a/testsuite/expect/test16.4.prog.c +++ b/testsuite/expect/test16.4.prog.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.1 b/testsuite/expect/test17.1 index a67f65398d93df599899412df1204b63da597c14..6a5a7e10550c74a7e89ea39b38fc73489363a07d 100755 --- a/testsuite/expect/test17.1 +++ b/testsuite/expect/test17.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.10 b/testsuite/expect/test17.10 index 965a92f3a96037dcfe433d7187ca6c1aea287045..5a7fbd7b86281852e51cb6306afa1fb9e53821ea 100755 --- a/testsuite/expect/test17.10 +++ b/testsuite/expect/test17.10 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.11 b/testsuite/expect/test17.11 index 9cd0972c0eb83f944c667bb71ce9526b3ca2a105..9f425332325f562050d22725956e48eca90a4825 100755 --- a/testsuite/expect/test17.11 +++ b/testsuite/expect/test17.11 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.12 b/testsuite/expect/test17.12 index c48c8a47724c1dcd3dcc58e35a61e7334adeb3d2..33134646e1acf44f6e309f46ddfc41b6919f683d 100755 --- a/testsuite/expect/test17.12 +++ b/testsuite/expect/test17.12 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.13 b/testsuite/expect/test17.13 index e8a4e7d63256530d14799daa8b5a381caec81c02..aced31aad40a044eb842212f9e95cae96ae2eee8 100755 --- a/testsuite/expect/test17.13 +++ b/testsuite/expect/test17.13 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.14 b/testsuite/expect/test17.14 index c9954834089413621967f559600650cd355e1eee..b0131159ec0b7ce426d36f250e6fa5728f0f7398 100755 --- a/testsuite/expect/test17.14 +++ b/testsuite/expect/test17.14 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.15 b/testsuite/expect/test17.15 index 3b2fb61fc92d2573d675a389cac5c6d75f0dea28..b2bf8cab9135b5bd01597af0845e1bbefe8d8c0b 100755 --- a/testsuite/expect/test17.15 +++ b/testsuite/expect/test17.15 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.15.prog.c b/testsuite/expect/test17.15.prog.c index f1a6ac5f26cb05698bf1bf21f04105d949df1ea4..6fe323d961d5ff91bea8cc2a63ed6800a14e1346 100644 --- a/testsuite/expect/test17.15.prog.c +++ b/testsuite/expect/test17.15.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.16 b/testsuite/expect/test17.16 index f883cf56f64f115f81966d38c4cbd300b554b61a..8ecbbc0e8fe7997b52ee9138124f578e09a4ad86 100755 --- a/testsuite/expect/test17.16 +++ b/testsuite/expect/test17.16 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.17 b/testsuite/expect/test17.17 index 9f01987c5fe1332b239eab051182c108548c68ec..bc8bbdea2f4e151d20d52998682ca4edd5fbcbe2 100755 --- a/testsuite/expect/test17.17 +++ b/testsuite/expect/test17.17 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.18 b/testsuite/expect/test17.18 index afe8e5d11015d4588e8fdaf7e59dbda01e8ff9b1..da76695299fd8b3178d1b2e422545dd62ef542ce 100755 --- a/testsuite/expect/test17.18 +++ b/testsuite/expect/test17.18 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.19 b/testsuite/expect/test17.19 index b88696f7ffc0b7f9507965beab08b1eb595e16cf..c2a8c63ae2ec23f67c0b12fe2c53a7efcc2a2b29 100755 --- a/testsuite/expect/test17.19 +++ b/testsuite/expect/test17.19 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -52,7 +52,7 @@ make_bash_script $file_in "$bin_id" set part_cnt 0 set fd [open "|$scontrol --all --oneliner show partition"] while {[gets $fd line] != -1} { - if {[regexp {^PartitionName=(\w+).*State=UP} $line frag part($part_cnt)] == 1} { + if {[regexp {^PartitionName=([^ ]*).*State=UP} $line frag part($part_cnt)] == 1} { incr part_cnt } } diff --git a/testsuite/expect/test17.2 b/testsuite/expect/test17.2 index 85ccef65abc512c0e9d5242b4bc15af4f3990320..5f2ae8ce18bad6f9d00b774e2537ef21c2a56fe6 100755 --- a/testsuite/expect/test17.2 +++ b/testsuite/expect/test17.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.20 b/testsuite/expect/test17.20 index d48a0344d9c6f7c9f286825154d71f5e84581b93..5ec8a7d4f035fbbd9b3528dd2119c00e1cb7dc8f 100755 --- a/testsuite/expect/test17.20 +++ b/testsuite/expect/test17.20 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.21 b/testsuite/expect/test17.21 index 5fc495f31959d84833d21a441c89fc85f7bcd9f8..e645948a90d7d2f5164440cca59843d5cd4bf0df 100755 --- a/testsuite/expect/test17.21 +++ b/testsuite/expect/test17.21 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.22 b/testsuite/expect/test17.22 index 91eb5b4d68e6235721c56356bd5d890b419f9aba..cbddca70105fecaf25004efeb9ac17b3379e2820 100755 --- a/testsuite/expect/test17.22 +++ b/testsuite/expect/test17.22 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.23 b/testsuite/expect/test17.23 index 0c59a5f7d846ffad3cb373b6354f4c467ed57e44..a804393ee203b63df69e51c87da277fb577928d6 100755 --- a/testsuite/expect/test17.23 +++ b/testsuite/expect/test17.23 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.24 b/testsuite/expect/test17.24 index e59cf166a5a39875b3777610a746376017567a1b..340a542995978b89e0bdbe0e7a973c14a1ab9bba 100755 --- a/testsuite/expect/test17.24 +++ b/testsuite/expect/test17.24 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.25 b/testsuite/expect/test17.25 index 093059cce4872f92f481bb5f64fd2abca14e935e..07c25141575367da6e89aed605dc1a41b13ffe5a 100755 --- a/testsuite/expect/test17.25 +++ b/testsuite/expect/test17.25 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.26 b/testsuite/expect/test17.26 index 81b9d26b1ffb32939ccbdff9de245faa1adbcb2a..0ca15ecf3749a3118e0097f24dbb6097b74b617f 100755 --- a/testsuite/expect/test17.26 +++ b/testsuite/expect/test17.26 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.27 b/testsuite/expect/test17.27 index 0a28217e30d2af25b62453f11c0da776181bfeec..24d072ba16f23557c19224f016179bfd3caa138c 100755 --- a/testsuite/expect/test17.27 +++ b/testsuite/expect/test17.27 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.28 b/testsuite/expect/test17.28 index bd987018e79d4a204127c2d7ce996801abadac37..609e00117119c73a6dd5b186dcead79b3ad83692 100755 --- a/testsuite/expect/test17.28 +++ b/testsuite/expect/test17.28 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.29 b/testsuite/expect/test17.29 index a9c65c2389f63d622396ebed78384e3230292ab1..2c8be01f37f111f3ba3dee294cb3643fbde9d7bc 100755 --- a/testsuite/expect/test17.29 +++ b/testsuite/expect/test17.29 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.3 b/testsuite/expect/test17.3 index 8c2109515c69104ed8bfcdccbe0ba6e1f2571c34..ada7a052ebd96bb85446e054397dc65e377854ab 100755 --- a/testsuite/expect/test17.3 +++ b/testsuite/expect/test17.3 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.31 b/testsuite/expect/test17.31 index 6c24d5095044186fcc0a7e1b01a20e7fbfdc6f2c..8aeda18602cb9b60b8a1dbad55775f9d0ec21438 100755 --- a/testsuite/expect/test17.31 +++ b/testsuite/expect/test17.31 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.32 b/testsuite/expect/test17.32 index 8b5d5d192483e81a0e0e86c1bf1f743180fa75f9..ef982503da16cbe5fb0c6df1d774e624b992069e 100755 --- a/testsuite/expect/test17.32 +++ b/testsuite/expect/test17.32 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.33 b/testsuite/expect/test17.33 index 775541d5f057bfdad2be7df2b7f90dfd5e76a230..330d11a7ad98f2720076438fe448d5aa70a6ca62 100755 --- a/testsuite/expect/test17.33 +++ b/testsuite/expect/test17.33 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.4 b/testsuite/expect/test17.4 index fbea18ac6ee86d986f90b466d670ae630f804b5e..2152defe32d0b22fe7380d904b7f1cd162d3df05 100755 --- a/testsuite/expect/test17.4 +++ b/testsuite/expect/test17.4 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.5 b/testsuite/expect/test17.5 index 745fb006e5c1955dfa7fd2e12f306880227e4895..3bb257c514b9917f6b8c2113bbcdf418cf15b324 100755 --- a/testsuite/expect/test17.5 +++ b/testsuite/expect/test17.5 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -35,12 +35,15 @@ source ./globals set test_id "17.5" +set user_name "" set file_in "test$test_id.input" set file_script "test$test_id.bash" set file_out "test$test_id.output" set file_err "test$test_id.error" set file_out_j "test$test_id.j.%j.output" set file_err_j "test$test_id.j.%j.error" +set file_out_u "test$test_id.%u.output" +set file_err_u "test$test_id.%u.error" set exit_code 0 print_header $test_id @@ -63,6 +66,19 @@ make_bash_script $file_script " exit 0 " +# +# Get the name of the current user +# +spawn $bin_id -un +expect { + -re "($alpha_numeric_under)" { + set user_name $expect_out(1,string) + } + eof { + wait + } +} + # # Spawn a shell via sbatch that submits from a different directory and uses stdout/err # and confirm their contents @@ -234,6 +250,56 @@ if {[wait_for_file $file_err_j_glob] == 0} { set exit_code 1 } +# +# Spawn a shell via sbatch that uses stdout/err with %u in their names +# +set job_id 0 +spawn $sbatch -N1 --output=$file_out_u --error=$file_err_u -t1 $file_in +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if { $job_id == 0 } { + send_user "\nFAILURE: failed to submit job\n" + exit 1 +} + +# +# Wait for job to complete +# +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: waiting for job to complete\n" + set exit_code 1 +} + +# +# Check for desired output +# +set file_out_u_glob "test$test_id.$user_name.output" +set file_err_u_glob "test$test_id.$user_name.error" +if {[wait_for_file $file_out_u_glob] == 0} { + exec $bin_rm -f $file_out_u_glob +} else { + send_user "\nFAILURE: file format of %u in stdout failed\n" + set exit_code 1 +} +if {[wait_for_file $file_err_u_glob] == 0} { + exec $bin_rm -f $file_err_u_glob +} else { + send_user "\nFAILURE: file format of %u in stderr failed\n" + set exit_code 1 +} + # # Spawn a program to run for a while with no output or error # diff --git a/testsuite/expect/test17.6 b/testsuite/expect/test17.6 index a65ccad69aeb79d29a7161b42ebcfdb9e0ffc8f4..7bd1fb4178e0e09df69430df033f88cce7501d01 100755 --- a/testsuite/expect/test17.6 +++ b/testsuite/expect/test17.6 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.7 b/testsuite/expect/test17.7 index ae0952566b1fa761a0f5223b34e2a8481af6b128..7033f673eccae7a7924bdf403170f1c3343afb17 100755 --- a/testsuite/expect/test17.7 +++ b/testsuite/expect/test17.7 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.8 b/testsuite/expect/test17.8 index 0bac5ba07a24ac5aa79e6564a4d38bd579320ce6..959bb9e5be6957e42e35c1e2e8374b85a0b3aa0a 100755 --- a/testsuite/expect/test17.8 +++ b/testsuite/expect/test17.8 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test17.9 b/testsuite/expect/test17.9 index dfa824183f45723f9feaca2db73d237c5c12cdcb..c57b05f4abdcce14c02be558a433fadd55f6be2f 100755 --- a/testsuite/expect/test17.9 +++ b/testsuite/expect/test17.9 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.1 b/testsuite/expect/test19.1 index 095aabbe62279adcff3f580597c5cba32c2f061b..ee66fdce2392cb20a571ac65b74e9150b5516bd9 100755 --- a/testsuite/expect/test19.1 +++ b/testsuite/expect/test19.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.2 b/testsuite/expect/test19.2 index f2f468598fa52afe9cadcd7a1a09f42b7478db18..f734de1455990eeb89abf7ef1f08896d2d144df9 100755 --- a/testsuite/expect/test19.2 +++ b/testsuite/expect/test19.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.3 b/testsuite/expect/test19.3 index e214ada41cacfe9cc3ee97e34db6199ec15270bd..286201ebbebccb2a15fa52bc85b24e5d20a864b6 100755 --- a/testsuite/expect/test19.3 +++ b/testsuite/expect/test19.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.4 b/testsuite/expect/test19.4 index 4a8be54b746633e1e4a50024ae8a02854c69a9ef..7c6892dff3da9f197f62c51edfda2afb50c1b5da 100755 --- a/testsuite/expect/test19.4 +++ b/testsuite/expect/test19.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.5 b/testsuite/expect/test19.5 index b6f8366ddb2b6c5e4ffbea792444494357a9d01b..0974276b64251e71316423af92e27f1df8daa28c 100755 --- a/testsuite/expect/test19.5 +++ b/testsuite/expect/test19.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.6 b/testsuite/expect/test19.6 index 498e40d28e28470b7ced62702694f5def7a7b4c9..53cb2a4489e50440681c69f345006c7032e1508b 100755 --- a/testsuite/expect/test19.6 +++ b/testsuite/expect/test19.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test19.7 b/testsuite/expect/test19.7 index f79fb881c9767984ce8414421c401581c2fb4211..8a936cc9575644b2308fde896950e5644b301c32 100755 --- a/testsuite/expect/test19.7 +++ b/testsuite/expect/test19.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.1 b/testsuite/expect/test2.1 index bc1dee2b3ead03475d62a99b4bef3de7ec242d96..bc2267d21f56a23dda167300a4ca915c6be538a3 100755 --- a/testsuite/expect/test2.1 +++ b/testsuite/expect/test2.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.10 b/testsuite/expect/test2.10 index 00f06482b477051c7d055f86134ef5eb2397c8a1..d0d4e03f9eca60f395ea27f7fd7ac4cf86f67f29 100755 --- a/testsuite/expect/test2.10 +++ b/testsuite/expect/test2.10 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.11 b/testsuite/expect/test2.11 index 48fd1bdf3aa48ac0a8f9173cae052795fe89a107..a6f396c5934150c00080e9b78ac2cb5efc998d59 100755 --- a/testsuite/expect/test2.11 +++ b/testsuite/expect/test2.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.12 b/testsuite/expect/test2.12 index 59dc03454db6b7923aa51a19b9775fc6a32b0afa..990afc7f52fc80e0c28e1bc5a5f5cf3f94de2900 100755 --- a/testsuite/expect/test2.12 +++ b/testsuite/expect/test2.12 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.13 b/testsuite/expect/test2.13 index d35f7d7f762f9aba9670a30afd03971a39937f94..61c82baded4aa979221bd679ea7c016d34f14912 100755 --- a/testsuite/expect/test2.13 +++ b/testsuite/expect/test2.13 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.14 b/testsuite/expect/test2.14 index c2385b9e2a1dbf30ce54bc359a9ec7a1e2e39ad4..da1b8c6b97043604bb2765e5113d397408882cc1 100755 --- a/testsuite/expect/test2.14 +++ b/testsuite/expect/test2.14 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.15 b/testsuite/expect/test2.15 index 849c4797dbe72111e434d2c8ca7826b554dfbce4..06ee4a166ff28ca82ed63d94a00a98b2fd3c06aa 100755 --- a/testsuite/expect/test2.15 +++ b/testsuite/expect/test2.15 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.2 b/testsuite/expect/test2.2 index 637644cc5300e8263a90a8293cbc4820f483db9c..e95543b237bbe4b21aae9f7c453663f9b262d947 100755 --- a/testsuite/expect/test2.2 +++ b/testsuite/expect/test2.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.3 b/testsuite/expect/test2.3 index 6393e7d57e4ad061375ed8729052441911b39c98..0745bfb35ac024976655e6bfe7a5992c23f5a941 100755 --- a/testsuite/expect/test2.3 +++ b/testsuite/expect/test2.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.4 b/testsuite/expect/test2.4 index 8e6a17352292f097991897a0ab4a450981446b58..f9b7dfde17830b61b7b42463c5b5d4d9f6e0cea3 100755 --- a/testsuite/expect/test2.4 +++ b/testsuite/expect/test2.4 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.5 b/testsuite/expect/test2.5 index 4da5630ec49fee50b5c35e5f7fb4332074fb8f16..e31a7d1d23edde680f76706a8f9b93cf056e5845 100755 --- a/testsuite/expect/test2.5 +++ b/testsuite/expect/test2.5 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.6 b/testsuite/expect/test2.6 index 5a995a612f1c15aab75e71074a5b3323990427f7..95e4da92feab4d188daacc0272e256b7cf5d6ded 100755 --- a/testsuite/expect/test2.6 +++ b/testsuite/expect/test2.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.7 b/testsuite/expect/test2.7 index 655ce38e04b5e4182a15293653eb8dd28f1a01d7..26ad1d9acef91e39f476438c614aeab5649fe61d 100755 --- a/testsuite/expect/test2.7 +++ b/testsuite/expect/test2.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.8 b/testsuite/expect/test2.8 index 0f747287fdddb8bf30f696d62fa3dd413d6e8e17..7c271412dc2f42e2c2c714b4bb23e199938145f5 100755 --- a/testsuite/expect/test2.8 +++ b/testsuite/expect/test2.8 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test2.9 b/testsuite/expect/test2.9 index 2151e3129c71332421e611c86527c53e5389557d..6228cb877415c75f62b258d6c787eacc7c53c29a 100755 --- a/testsuite/expect/test2.9 +++ b/testsuite/expect/test2.9 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test20.1 b/testsuite/expect/test20.1 index 160efa085e046afbafdc0764aba3fb45a859a402..a01e367913fcdb036c1fa0d690e173dd883a06b8 100755 --- a/testsuite/expect/test20.1 +++ b/testsuite/expect/test20.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test20.2 b/testsuite/expect/test20.2 index 531075276e140a1f4a3dfc3f7c1a39087b7e89a5..877ab06b4544d4858e6059ee6e615a8427ee7ec2 100755 --- a/testsuite/expect/test20.2 +++ b/testsuite/expect/test20.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test20.3 b/testsuite/expect/test20.3 index 52e77e0fdfcfa68d67215bf3e47c907afb6862d8..ada186d6098ce9300efb74c584760f623731d2dc 100755 --- a/testsuite/expect/test20.3 +++ b/testsuite/expect/test20.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test20.4 b/testsuite/expect/test20.4 index c875050e4cb8780585f0c61a5874de879bba11e6..4c80a0a4049797df56526b2a0b3fc1ac84d09d76 100755 --- a/testsuite/expect/test20.4 +++ b/testsuite/expect/test20.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.1 b/testsuite/expect/test21.1 index 63e3975fc3e8f3ea853b124e229583c21b7d92df..da94779b880b862753307c93f0dc56c0124101f4 100755 --- a/testsuite/expect/test21.1 +++ b/testsuite/expect/test21.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.10 b/testsuite/expect/test21.10 index 66567ba450cfdccfde7aaf70694cd6523014a8a1..5da8000b39b1a0bb5447ac389df5321a4270e667 100755 --- a/testsuite/expect/test21.10 +++ b/testsuite/expect/test21.10 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.11 b/testsuite/expect/test21.11 index b9a06d761f0e7553d93606049a2b3fd2ef2f2417..e71ae621838eace6a3e8c17302a9ef2afa33fc8f 100755 --- a/testsuite/expect/test21.11 +++ b/testsuite/expect/test21.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.12 b/testsuite/expect/test21.12 index 171b60dc8972562028875778c7129d115387dbdb..1dc415391b01f3f6d77fe475f297b5c4fd8effaa 100755 --- a/testsuite/expect/test21.12 +++ b/testsuite/expect/test21.12 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.13 b/testsuite/expect/test21.13 index 369920c6d8f02427554cc4ee5f9fd59152fee7ca..7fda7c585664362cb05525ef137c6c039db72058 100755 --- a/testsuite/expect/test21.13 +++ b/testsuite/expect/test21.13 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.14 b/testsuite/expect/test21.14 index 70c1d01a900b2520edb7c8e55cda480ab6daf6f2..02c0a1a316b774b2d087ca50ac4a953a6f7ae241 100755 --- a/testsuite/expect/test21.14 +++ b/testsuite/expect/test21.14 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.15 b/testsuite/expect/test21.15 index 1e199c7a9af03389f616450bd1f358cf37aa5bb1..33193dc2404c34494a935126ef9195ac4cbcb79d 100755 --- a/testsuite/expect/test21.15 +++ b/testsuite/expect/test21.15 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.16 b/testsuite/expect/test21.16 index 2521f56c537d08b19ab02d49d8eb33e246f510f9..976c80b39130543697cb334464af5fda9e580350 100755 --- a/testsuite/expect/test21.16 +++ b/testsuite/expect/test21.16 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.17 b/testsuite/expect/test21.17 index 7430109bf9f91985320ffa670418b6a4920e60f2..8bafba10ee36d0b6d64417bbff2fb693b4cbe8dd 100755 --- a/testsuite/expect/test21.17 +++ b/testsuite/expect/test21.17 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.18 b/testsuite/expect/test21.18 index 6bd5b886bc314a75954af4d6f784ffb239b3ed94..7740f3864af1f48025da114af5699f5a0e2ebe0f 100755 --- a/testsuite/expect/test21.18 +++ b/testsuite/expect/test21.18 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.19 b/testsuite/expect/test21.19 index 264b10862366f2e3b83b98f8f9fd6b75df9698b9..9614f3936da593568ff46739d781a38227152d73 100755 --- a/testsuite/expect/test21.19 +++ b/testsuite/expect/test21.19 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.2 b/testsuite/expect/test21.2 index a7f0d02eeb1f8edb8a51c81510a1b1a5f39d5808..adb1fcdb9426831fa646545019fdb4c01b77b36c 100755 --- a/testsuite/expect/test21.2 +++ b/testsuite/expect/test21.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.20 b/testsuite/expect/test21.20 index 5b9941404171002589ea71de2da5c227829c943d..0cda30b9c70cccc7bcdc3a944251fa1128577103 100755 --- a/testsuite/expect/test21.20 +++ b/testsuite/expect/test21.20 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.21 b/testsuite/expect/test21.21 index cd93224b09b0e442e53a852661d9c92b5b6c8424..4c83a2b578d2a40dda3f5d9fd576b5db60588858 100755 --- a/testsuite/expect/test21.21 +++ b/testsuite/expect/test21.21 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.22 b/testsuite/expect/test21.22 index 1d58b4da6a33856593fce68f9e2157b050e83490..028e3f2f9597008e71573572cb9313fb13f35100 100755 --- a/testsuite/expect/test21.22 +++ b/testsuite/expect/test21.22 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.23 b/testsuite/expect/test21.23 index c123fe00a5d13cd524df2843a4fe24d5db7a6206..3094005796001e004a054d3f95c1c0f07c809404 100755 --- a/testsuite/expect/test21.23 +++ b/testsuite/expect/test21.23 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -168,7 +168,7 @@ if {$using_slurmdbd} { sleep 5 } -spawn $sacct -p -j $job_id --fields=wckey +spawn $sacct -p -j $job_id --format=wckey expect { -re "$wk" { incr matches @@ -232,7 +232,7 @@ if {$job_id == 0} { sleep 5 } - spawn $sacct -p -j $job_id --fields=wckey + spawn $sacct -p -j $job_id --format=wckey expect { -re "$wk" { incr matches @@ -294,7 +294,7 @@ if {$job_id == 0} { sleep 5 } - spawn $sacct -p -j $job_id --fields=wckey + spawn $sacct -p -j $job_id --format=wckey expect { -re "$wk" { incr matches diff --git a/testsuite/expect/test21.24 b/testsuite/expect/test21.24 index bd74fb82277e0b4c995c221f06e5da99781d05d6..0622b17578853e55ff16ddf479d32c17e3ad22a7 100755 --- a/testsuite/expect/test21.24 +++ b/testsuite/expect/test21.24 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.25 b/testsuite/expect/test21.25 index a9d4fc364916cfe7d7b3318c0f1d1f81695556d4..95afa19608a90ab946292825448f933d71b62cc2 100755 --- a/testsuite/expect/test21.25 +++ b/testsuite/expect/test21.25 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.26 b/testsuite/expect/test21.26 index 1e6a6ff01bea9f6da1a778edbf606cba3639ca5d..e6167b2930f488c60083bb147734280d12301f33 100755 --- a/testsuite/expect/test21.26 +++ b/testsuite/expect/test21.26 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.27 b/testsuite/expect/test21.27 index aae3be5955e8d12ed8814bb842d50fc3ead79947..1bfb0632742774e9e36acb409c0d0d9135b597d9 100755 --- a/testsuite/expect/test21.27 +++ b/testsuite/expect/test21.27 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.28 b/testsuite/expect/test21.28 index 7ebf451040aaee719a9fb78dd311da665851ee75..89194979ded44eff776451e06439f1e910a56698 100755 --- a/testsuite/expect/test21.28 +++ b/testsuite/expect/test21.28 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.29 b/testsuite/expect/test21.29 index 9c6ca4654ce908def3a1155714355063c64c3626..05a7a70dc20f33e8329f5c97479db63c156f950b 100755 --- a/testsuite/expect/test21.29 +++ b/testsuite/expect/test21.29 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.3 b/testsuite/expect/test21.3 index 83763e17889488cd04a4094ba2b7371919255965..111c8e075c503822572333cd755eb698f2dfbdc9 100755 --- a/testsuite/expect/test21.3 +++ b/testsuite/expect/test21.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.30 b/testsuite/expect/test21.30 index 021803bb834b388ff5deb36334322e6d1cac3bb8..2c5ac95d19d1be67179f4621bc068ef1bdaac5d4 100755 --- a/testsuite/expect/test21.30 +++ b/testsuite/expect/test21.30 @@ -11,7 +11,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -42,9 +42,6 @@ source ./inc21.30.8 set test_id "21.30" set exit_code 0 set acct test_acct -set job_id1 "" -set job_id2 "" -set job_id3 "" set user_name "" set qosname name set qostest [format "%s %s" $test_id "qosTest"] @@ -88,8 +85,8 @@ proc check_state { job } { } if {$state_match != 1} { - send_user "\nFAILURE job is running when it should be pending\n" - exit 1 + send_user "\nFAILURE job should be pending, but is not\n" + set exit_code 1 } } @@ -165,6 +162,17 @@ proc endit { } { exit $exit_code } +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} elseif { [test_enforce_limits] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountingStorageEnforce\n" + exit 0 +} + #gets user spawn $bin_id -u -n expect { @@ -247,12 +255,18 @@ mod_qos -1 $grcpu_num -1 -1 -1 -1 -1 -1 sleep 5 #test GrpCpus inc21_30_2 +if {$exit_code != 0} { + endit +} mod_qos -1 -1 $grjobs_num -1 -1 -1 -1 -1 sleep 5 # test GrpJob limits inc21_30_3 +if {$exit_code != 0} { + endit +} mod_qos -1 -1 -1 $grsub_num -1 -1 -1 -1 @@ -260,6 +274,9 @@ mod_qos -1 -1 -1 $grsub_num -1 -1 -1 -1 sleep 5 # test GrpSubmit inc21_30_4 +if {$exit_code != 0} { + endit +} mod_qos -1 -1 -1 -1 $maxcpu_num -1 -1 -1 @@ -267,23 +284,35 @@ mod_qos -1 -1 -1 -1 $maxcpu_num -1 -1 -1 sleep 5 #test MaxCpus limits inc21_30_5 +if {$exit_code != 0} { + endit +} mod_qos -1 -1 -1 -1 -1 $maxnode_num -1 -1 sleep 5 #test MaxNode limit inc21_30_6 +if {$exit_code != 0} { + endit +} mod_qos -1 -1 -1 -1 -1 -1 $maxjobs_num -1 sleep 5 #test MaxJobs limit inc21_30_7 +if {$exit_code != 0} { + endit +} mod_qos -1 -1 -1 -1 -1 -1 -1 $maxjobsub_num sleep 5 #test MaxJobsSubmits limit inc21_30_8 +if {$exit_code != 0} { + endit +} endit diff --git a/testsuite/expect/test21.4 b/testsuite/expect/test21.4 index 92defa194c8742432ad2710a3e37d171c4ea2407..91aa4c68e9c5c7aee5080d85f023124c437faacf 100755 --- a/testsuite/expect/test21.4 +++ b/testsuite/expect/test21.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5 index 6d97febe8abf9d60d0b6d6e15e888de0e9ae4ee4..c50f41daf43a8534c08e0b907255187a9b8e0ee8 100755 --- a/testsuite/expect/test21.5 +++ b/testsuite/expect/test21.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.6 b/testsuite/expect/test21.6 index 1c9f3d5f0e49f7bec384a563caefc3a30fe714eb..d64be2f7f03b8cc77d25fe3edc672d2eb02596b9 100755 --- a/testsuite/expect/test21.6 +++ b/testsuite/expect/test21.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.7 b/testsuite/expect/test21.7 index 722abec58d5d661ac19d7f58126ee4c512e5a71f..4312851cf3e05083e787fe91912e068ae6017b42 100755 --- a/testsuite/expect/test21.7 +++ b/testsuite/expect/test21.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8 index f27abb2c057dc1bf9efc2ff6595798261a2d1d2e..e3ddc0a5bffde83eb48e95338ea5fe4bc4e6a677 100755 --- a/testsuite/expect/test21.8 +++ b/testsuite/expect/test21.8 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9 index 7caf580df5ae12512f58ed846761cfcaab943cd2..434c4342889896b38b9ce5925623ceb0d0b78ebd 100755 --- a/testsuite/expect/test21.9 +++ b/testsuite/expect/test21.9 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test22.1 b/testsuite/expect/test22.1 index 6ecf0330338b2462627fda7ba99d7e10af254a6e..da5b9c6c7a754edd2ed0f5833bbac5f9904cbf48 100755 --- a/testsuite/expect/test22.1 +++ b/testsuite/expect/test22.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -346,7 +346,7 @@ if { $exit_code } { # Use sacct to see if the job loaded # set matches 0 -set my_pid [eval spawn $sacct -p -C $cluster --fields=cluster,account,associd,wckey,wckeyid,start,end,elapsed --noheader --start=$start_str --end=$end_str] +set my_pid [eval spawn $sacct -p -C $cluster --format=cluster,account,associd,wckey,wckeyid,start,end,elapsed --noheader --start=$start_str --end=$end_str] expect { -re "There was a problem" { send_user "FAILURE: there was a problem with the sacctmgr command\n" diff --git a/testsuite/expect/test22.2 b/testsuite/expect/test22.2 index a477da3d3daab0422a2fe05bf17546cb6caf7242..242ad261039b818e144ea0df3a288ff4751f86dc 100755 --- a/testsuite/expect/test22.2 +++ b/testsuite/expect/test22.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test23.1 b/testsuite/expect/test23.1 index 712efa5a26a70e7f3015035ba537842fde1ad71a..fbd2a943e0572bfa8b3be9e3cb308d6c40afe80c 100755 --- a/testsuite/expect/test23.1 +++ b/testsuite/expect/test23.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test23.2 b/testsuite/expect/test23.2 index 7054ffcd7b2e19b5bca5b9b43d9497a69e5ed9a3..7ad66b0aa08f812cdab43c9364e4cbb73ffd4328 100755 --- a/testsuite/expect/test23.2 +++ b/testsuite/expect/test23.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -192,23 +192,10 @@ if {$real_memory < 100} { # Build input script file1 # make_bash_script $file_in1 " - $srun $step_mem_opt $file_in2 + $srun $step_mem_opt sleep 60 & + $srun $step_mem_opt sleep 60 & + $srun $step_mem_opt sleep 60 " - -# -# Build input script file2 -# -make_bash_script $file_in2 " - $srun $step_mem_opt $file_in3 -" - -# -# Build input script file3 -# -make_bash_script $file_in3 " - $srun $step_mem_opt sleep 10 -" - # # Spawn a job via srun using this account # NOTE: --mem option here and in scripts above to permit running more than one @@ -256,12 +243,13 @@ if {$job_id3 == 0} { } # -# Wait for job to start running, then signal it +# Wait for job to start running, then status it # if {[wait_for_job $job_id3 "RUNNING"] != 0} { send_user "\nFAILURE: waiting for job to start running\n" exit 1 } +sleep 5 ################################################################ # # Proc: sstat_job @@ -274,8 +262,8 @@ if {[wait_for_job $job_id3 "RUNNING"] != 0} { # ################################################################ -proc sstat_job { soption job_id} { - global sstat +proc sstat_job { soption job_id } { + global sstat number set debug 0 set exit_code 0 set matches 0 @@ -283,234 +271,192 @@ proc sstat_job { soption job_id} { send_user "sstat -$soption -p -j $job_id\n" if { $soption == "-allsteps" || $soption == "a" } { - - spawn $sstat -$soption -p -j $job_id - expect { - -re "SLURM accounting storage is disabled" { - set not_support 1 - exp_continue - } - -re "$job_id.0" { - if {$debug} {send_user "\nmatch1\n"} - incr matches - exp_continue - } - -re "$job_id.1" { - if {$debug} {send_user "\nmatch2\n"} - incr matches - exp_continue - } - -re "$job_id.2" { - if {$debug} {send_user "\nmatch3\n"} - incr matches - exp_continue - } - timeout { - send_user "\nFAILURE: sstat not responding\n" - set exit_code 1 - } - eof { - wait + spawn $sstat -$soption -p -j $job_id + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "$job_id.($number)" { + if {$debug} {send_user "\nmatch $expect_out(1,string)\n"} + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sstat not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 } + return $matches } - if {$not_support != 0} { - send_user "\nWARNING: can not test without accounting enabled\n" - exit 0 - } - if {$matches != 3} { - send_user "\nFAILURE: sstat -$soption failed ($matches)\n" - set exit_code 1 - } - return $matches -} - if { $soption == "-noheader" || $soption == "n" } { - - spawn $sstat -$soption -p -j $job_id - expect { - -re "SLURM accounting storage is disabled" { - set not_support 1 - exp_continue - } - -re "JobID|MaxVMSize|MaxVMSizeNode|MaxVMSizeTask| \ - AveVMSize|MaxRSS|MaxRSSNode|MaxRSSTask|AveRSS| \ - MaxPages|MaxPagesNode|MaxPagesTask|AvePages| \ - MinCPU|MinCPUNode|MinCPUTask|AveCPU|NTasks" { - if {$debug} {send_user "\nmatch4\n"} - incr matches - exp_continue - } - -re "$job_id" { - if {$debug} {send_user "\nmatch5\n"} - incr matches - exp_continue + spawn $sstat -$soption -p -j $job_id + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "JobID|MaxVMSize|MaxVMSizeNode|MaxVMSizeTask| \ + AveVMSize|MaxRSS|MaxRSSNode|MaxRSSTask|AveRSS| \ + MaxPages|MaxPagesNode|MaxPagesTask|AvePages| \ + MinCPU|MinCPUNode|MinCPUTask|AveCPU|NTasks" { + if {$debug} {send_user "\nmatch4\n"} + incr matches + exp_continue + } + -re "$job_id" { + if {$debug} {send_user "\nmatch5\n"} + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sstat not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 } - timeout { - send_user "\nFAILURE: sstat not responding\n" - set exit_code 1 - } - eof { - wait - } - } - - if {$not_support != 0} { - send_user "\nWARNING: can not test without accounting enabled\n" - exit 0 - } - if {$matches != 1} { - send_user "\nFAILURE: sstat -$soption failed ($matches)\n" - set exit_code 1 + return $matches } - return $matches -} if { $soption == "-parsable" || $soption == "p" } { - - spawn $sstat -$soption -p -j $job_id - expect { - -re "SLURM accounting storage is disabled" { - set not_support 1 - exp_continue - } - -re "JobID\\|MaxVMSize\\|MaxVMSizeNode\\|MaxVMSizeTask\\|AveVMSize\\|MaxRSS\\|" { - if {$debug} {send_user "\nmatch6\n"} - incr matches - exp_continue - } - -re "MaxRSSNode\\|MaxRSSTask\\|AveRSS\\|MaxPages\\|MaxPagesNode\\|MaxPagesTask\\|" { - if {$debug} {send_user "\nmatch7\n"} - incr matches - exp_continue - } - -re "AvePages\\|MinCPU\\|MinCPUNode\\|MinCPUTask\\|AveCPU\\|NTasks\\|" { - if {$debug} {send_user "\nmatch8\n"} - incr matches - exp_continue + spawn $sstat -$soption -p -j $job_id + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "JobID\\|MaxVMSize\\|MaxVMSizeNode\\|MaxVMSizeTask\\|AveVMSize\\|MaxRSS\\|" { + if {$debug} {send_user "\nmatch6\n"} + incr matches + exp_continue + } + -re "MaxRSSNode\\|MaxRSSTask\\|AveRSS\\|MaxPages\\|MaxPagesNode\\|MaxPagesTask\\|" { + if {$debug} {send_user "\nmatch7\n"} + incr matches + exp_continue + } + -re "AvePages\\|MinCPU\\|MinCPUNode\\|MinCPUTask\\|AveCPU\\|NTasks\\|" { + if {$debug} {send_user "\nmatch8\n"} + incr matches + exp_continue + } + -re "$job_id..\\|" { + if {$debug} {send_user "\nmatch9\n"} + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sstat not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 } - -re "$job_id..\\|" { - if {$debug} {send_user "\nmatch9\n"} - incr matches - exp_continue - } - timeout { - send_user "\nFAILURE: sstat not responding\n" - set exit_code 1 - } - eof { - wait - } - } - - if {$not_support != 0} { - send_user "\nWARNING: can not test without accounting enabled\n" - exit 0 - } - if {$matches != 4} { - send_user "\nFAILURE: sstat -$soption failed ($matches)\n" - set exit_code 1 + return $matches } - return $matches -} if { $soption == "-parsable2" || $soption == "P" } { - - spawn $sstat -$soption -j $job_id - expect { - -re "SLURM accounting storage is disabled" { - set not_support 1 - exp_continue - } - -re "JobID\\|MaxVMSize\\|MaxVMSizeNode\\|MaxVMSizeTask\\|AveVMSize\\|MaxRSS\\|" { - if {$debug} {send_user "\nmatch10\n"} - incr matches - exp_continue + spawn $sstat -$soption -j $job_id + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "JobID\\|MaxVMSize\\|MaxVMSizeNode\\|MaxVMSizeTask\\|AveVMSize\\|MaxRSS\\|" { + if {$debug} {send_user "\nmatch10\n"} + incr matches + exp_continue + } + -re "MaxRSSNode\\|MaxRSSTask\\|AveRSS\\|MaxPages\\|MaxPagesNode\\|MaxPagesTask\\|" { + if {$debug} {send_user "\nmatch11\n"} + incr matches + exp_continue + } + -re "AvePages\\|MinCPU\\|MinCPUNode\\|MinCPUTask\\|AveCPU\\|NTasks" { + if {$debug} {send_user "\nmatch12\n"} + incr matches + exp_continue + } + -re "$job_id..\\|" { + if {$debug} {send_user "\nmatch13\n"} + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sstat not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 } - -re "MaxRSSNode\\|MaxRSSTask\\|AveRSS\\|MaxPages\\|MaxPagesNode\\|MaxPagesTask\\|" { - if {$debug} {send_user "\nmatch11\n"} - incr matches - exp_continue - } - -re "AvePages\\|MinCPU\\|MinCPUNode\\|MinCPUTask\\|AveCPU\\|NTasks" { - if {$debug} {send_user "\nmatch12\n"} - incr matches - exp_continue - } - -re "$job_id..\\|" { - if {$debug} {send_user "\nmatch13\n"} - incr matches - exp_continue - } - timeout { - send_user "\nFAILURE: sstat not responding\n" - set exit_code 1 - } - eof { - wait - } - } - - if {$not_support != 0} { - send_user "\nWARNING: can not test without accounting enabled\n" - exit 0 - } - if {$matches != 4} { - send_user "\nFAILURE: sstat -$soption failed ($matches)\n" - set exit_code 1 - } return $matches } if { $soption == "-verbose" || $soption == "v" } { - - spawn $sstat -$soption -p -j $job_id - expect { - -re "SLURM accounting storage is disabled" { - set not_support 1 - exp_continue - } - -re "JobID.MaxVMSize.MaxVMSizeNode.MaxVMSizeTask.AveVMSize.MaxRSS" { - if {$debug} {send_user "\nmatch14\n"} - incr matches - exp_continue - } - -re "MaxRSSNode.MaxRSSTask.AveRSS.MaxPages.MaxPagesNode.MaxPagesTask" { - if {$debug} {send_user "\nmatch15\n"} - incr matches - exp_continue - } - -re "AvePages.MinCPU.MinCPUNode.MinCPUTask.AveCPU.NTasks" { - if {$debug} {send_user "\nmatch16\n"} - incr matches - exp_continue - } - -re "$job_id" { - if {$debug} {send_user "\nmatch17\n"} - incr matches - exp_continue - } - timeout { - send_user "\nFAILURE: sstat not responding\n" - set exit_code 1 + spawn $sstat -$soption -p -j $job_id + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "JobID.MaxVMSize.MaxVMSizeNode.MaxVMSizeTask.AveVMSize.MaxRSS" { + if {$debug} {send_user "\nmatch14\n"} + incr matches + exp_continue + } + -re "MaxRSSNode.MaxRSSTask.AveRSS.MaxPages.MaxPagesNode.MaxPagesTask" { + if {$debug} {send_user "\nmatch15\n"} + incr matches + exp_continue + } + -re "AvePages.MinCPU.MinCPUNode.MinCPUTask.AveCPU.NTasks" { + if {$debug} {send_user "\nmatch16\n"} + incr matches + exp_continue + } + -re "$job_id" { + if {$debug} {send_user "\nmatch17\n"} + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sstat not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 } - eof { - wait - } - } - - if {$not_support != 0} { - send_user "\nWARNING: can not test without accounting enabled\n" - exit 0 - } - if {$matches != 4} { - send_user "\nFAILURE: sstat -$soption failed ($matches)\n" - set exit_code 1 - } return $matches } - - } ################################################################ @@ -534,65 +480,58 @@ proc sstat_vargs { soption vargs job_id} { send_user "sstat -$soption $vargs -p -j $job_id\n" if { $soption == "o" || $soption == "-format" } { - - spawn $sstat -$soption $vargs -p -j $job_id - expect { - -re "SLURM accounting storage is disabled" { - set not_support 1 - exp_continue - } - -re "AveCPU.AvePages.AveRSS.AveVMSize" { - if {$debug} {send_user "\nmatch18\n"} - incr matches - exp_continue - } - -re "JobID.MaxPages.MaxPagesNode.MaxPagesTask" { - if {$debug} {send_user "\nmatch19\n"} - incr matches - exp_continue - } - -re "MaxRSS.MaxRSSNode.MaxRSSTask.MaxVMSize" { - if {$debug} {send_user "\nmatch20\n"} - incr matches - exp_continue - } - -re "MaxVMSizeNode.MaxVMSizeTask.MinCPU.MinCPUNode" { - if {$debug} {send_user "\nmatch21\n"} - incr matches - exp_continue + spawn $sstat -$soption $vargs -p -j $job_id + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "AveCPU.AvePages.AveRSS.AveVMSize" { + if {$debug} {send_user "\nmatch18\n"} + incr matches + exp_continue + } + -re "JobID.MaxPages.MaxPagesNode.MaxPagesTask" { + if {$debug} {send_user "\nmatch19\n"} + incr matches + exp_continue + } + -re "MaxRSS.MaxRSSNode.MaxRSSTask.MaxVMSize" { + if {$debug} {send_user "\nmatch20\n"} + incr matches + exp_continue + } + -re "MaxVMSizeNode.MaxVMSizeTask.MinCPU.MinCPUNode" { + if {$debug} {send_user "\nmatch21\n"} + incr matches + exp_continue + } + -re "MinCPUTask.NTasks" { + if {$debug} {send_user "\nmatch22\n"} + incr matches + exp_continue + } + -re "$job_id" { + if {$debug} {send_user "\nmatch23\n"} + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sstat not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 } - -re "MinCPUTask.NTasks" { - if {$debug} {send_user "\nmatch22\n"} - incr matches - exp_continue - } - -re "$job_id" { - if {$debug} {send_user "\nmatch23\n"} - incr matches - exp_continue - } - timeout { - send_user "\nFAILURE: sstat not responding\n" - set exit_code 1 - } - eof { - wait - } - } - - if {$not_support != 0} { - send_user "\nWARNING: can not test without accounting enabled\n" - exit 0 - } - if {$matches != 6} { - send_user "\nFAILURE: sstat -$soption failed ($matches)\n" - set exit_code 1 - } return $matches } } ################################################################ -sleep 1 set matches [sstat_job a $job_id3] if {$matches != 3} { send_user "\nFAILURE: sstat -a failed ($matches)\n" @@ -601,67 +540,67 @@ if {$matches != 3} { set matches [sstat_job -allsteps $job_id3] if {$matches != 3} { - send_user "\nFAILURE: sstat --allsteps failed ($matches)\n" + send_user "\nFAILURE: sstat --allsteps failed ($matches != 3)\n" set exit_code 1 } set matches [sstat_job n $job_id3] if {$matches != 1} { - send_user "\nFAILURE: sstat -n failed ($matches)\n" + send_user "\nFAILURE: sstat -n failed ($matches != 1)\n" set exit_code 1 } set matches [sstat_job -noheader $job_id3] if {$matches != 1} { - send_user "\nFAILURE: sstat --noheader failed ($matches)\n" + send_user "\nFAILURE: sstat --noheader failed ($matches != 1)\n" set exit_code 1 } set matches [sstat_job p $job_id3] if {$matches != 4} { - send_user "\nFAILURE: sstat -p failed ($matches)\n" + send_user "\nFAILURE: sstat -p failed ($matches != 4)\n" set exit_code 1 } set matches [sstat_job -parsable $job_id3] if {$matches != 4} { - send_user "\nFAILURE: sstat --parsable failed ($matches)\n" + send_user "\nFAILURE: sstat --parsable failed ($matches != 4)\n" set exit_code 1 } set matches [sstat_job P $job_id3] if {$matches != 4} { - send_user "\nFAILURE: sstat -P failed ($matches)\n" + send_user "\nFAILURE: sstat -P failed ($matches != 4)\n" set exit_code 1 } set matches [sstat_job -parsable2 $job_id3] if {$matches != 4} { - send_user "\nFAILURE: sstat --parsable2 failed ($matches)\n" + send_user "\nFAILURE: sstat --parsable2 failed ($matches != 4)\n" set exit_code 1 } set matches [sstat_job v $job_id3] if {$matches != 4} { - send_user "\nFAILURE: sstat -v failed ($matches)\n" + send_user "\nFAILURE: sstat -v failed ($matches != 4)\n" set exit_code 1 } set matches [sstat_job -verbose $job_id3] if {$matches != 4} { - send_user "\nFAILURE: sstat --verbose failed ($matches)\n" + send_user "\nFAILURE: sstat --verbose failed ($matches != 4)\n" set exit_code 1 } set matches [sstat_vargs o $ac,$ap,$ar,$av,$ji,$mp,$mpn,$mpt,$mr,$mrn,$mrt,$mvs,$mvn,$mvt,$mc,$mn,$mt,$nt $job_id3] if {$matches != 6} { - send_user "\nFAILURE: sstat -o failed ($matches)\n" + send_user "\nFAILURE: sstat -o failed ($matches != 6)\n" set exit_code 1 } set matches [sstat_vargs -format $ac,$ap,$ar,$av,$ji,$mp,$mpn,$mpt,$mr,$mrn,$mrt,$mvs,$mvn,$mvt,$mc,$mn,$mt,$nt $job_id3] if {$matches != 6} { - send_user "\nFAILURE: sstat --format failed ($matches)\n" + send_user "\nFAILURE: sstat --format failed ($matches != 6)\n" set exit_code 1 } diff --git a/testsuite/expect/test24.1 b/testsuite/expect/test24.1 index e7f09bfdecf9278df8875837fc6f356a15adcd36..0498de1fc2a74b9fc682eeebfe8ee8bc11ab3b4c 100755 --- a/testsuite/expect/test24.1 +++ b/testsuite/expect/test24.1 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -135,7 +135,7 @@ expect { } if {$matches != 13} { - send_user "\nFAILURE: we didn't get the correct priorities from the plugin $matches\n" + send_user "\nFAILURE: we didn't get the correct priorities from the plugin ($matches != 13)\n" set exit_code 1 } diff --git a/testsuite/expect/test24.1.prog.c b/testsuite/expect/test24.1.prog.c index 5a3e26290b1c1f1f0a23a5fdb18fbab1a0e8369f..64e7db0738b477fcdadf48edbd9c9ad492e8eb76 100644 --- a/testsuite/expect/test24.1.prog.c +++ b/testsuite/expect/test24.1.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under @@ -54,6 +54,7 @@ void *acct_db_conn = NULL; uint32_t cluster_cpus = 50; int long_flag = 1; int exit_code = 0; +uint16_t part_max_priority = 1; sshare_time_format_t time_format = SSHARE_TIME_MINS; char *time_format_string = "Minutes"; time_t last_job_update = (time_t) 0; @@ -96,11 +97,11 @@ int _setup_assoc_list(void) update.objects = list_create(slurmdb_destroy_association_rec); /* Just so we don't have to worry about lft's and rgt's we - * will just push these on in order. + * will just append these on in order. * Note: the commented out lfts and rgts as of 10-29-10 are - * correct. We do a push instead of append so they go on + * correct. By doing an append they go on * sorted in hierarchy order. The sort that happens inside - * the internal slurm code will sort alpha automatically, (We + * the internal slurm code will sort alpha automatically, (You can * test this by putting AccountF before AccountE. */ @@ -112,7 +113,7 @@ int _setup_assoc_list(void) /* assoc->lft = 1; */ /* assoc->rgt = 28; */ assoc->acct = xstrdup("root"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -123,7 +124,7 @@ int _setup_assoc_list(void) /* assoc->lft = 2; */ /* assoc->rgt = 13; */ assoc->acct = xstrdup("AccountA"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountA id 2 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -134,7 +135,7 @@ int _setup_assoc_list(void) assoc->parent_id = 2; assoc->shares_raw = 30; assoc->acct = xstrdup("AccountB"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountB id 21 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -147,7 +148,7 @@ int _setup_assoc_list(void) assoc->usage->usage_raw = 20; assoc->acct = xstrdup("AccountB"); assoc->user = xstrdup("User1"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountA id 2 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -158,7 +159,7 @@ int _setup_assoc_list(void) assoc->parent_id = 2; assoc->shares_raw = 10; assoc->acct = xstrdup("AccountC"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountC id 22 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -171,7 +172,7 @@ int _setup_assoc_list(void) assoc->usage->usage_raw = 25; assoc->acct = xstrdup("AccountC"); assoc->user = xstrdup("User2"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); @@ -183,7 +184,7 @@ int _setup_assoc_list(void) assoc->usage->usage_raw = 0; assoc->acct = xstrdup("AccountC"); assoc->user = xstrdup("User3"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -194,7 +195,7 @@ int _setup_assoc_list(void) assoc->parent_id = 1; assoc->shares_raw = 60; assoc->acct = xstrdup("AccountD"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountD id 3 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -205,7 +206,7 @@ int _setup_assoc_list(void) assoc->parent_id = 3; assoc->shares_raw = 25; assoc->acct = xstrdup("AccountE"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountE id 31 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -218,7 +219,7 @@ int _setup_assoc_list(void) assoc->usage->usage_raw = 25; assoc->acct = xstrdup("AccountE"); assoc->user = xstrdup("User4"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountD id 3 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -229,7 +230,7 @@ int _setup_assoc_list(void) assoc->parent_id = 3; assoc->shares_raw = 35; assoc->acct = xstrdup("AccountF"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountF id 32 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -242,7 +243,7 @@ int _setup_assoc_list(void) assoc->usage->usage_raw = 0; assoc->acct = xstrdup("AccountF"); assoc->user = xstrdup("User5"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -253,7 +254,7 @@ int _setup_assoc_list(void) assoc->parent_id = 1; assoc->shares_raw = 0; assoc->acct = xstrdup("AccountG"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); /* sub of AccountG id 4 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); @@ -266,7 +267,7 @@ int _setup_assoc_list(void) assoc->usage->usage_raw = 30; assoc->acct = xstrdup("AccountG"); assoc->user = xstrdup("User6"); - list_push(update.objects, assoc); + list_append(update.objects, assoc); assoc_mgr_update_assocs(&update); list_destroy(update.objects); @@ -298,9 +299,8 @@ int main (int argc, char **argv) xfree(conf->accounting_storage_type); conf->accounting_storage_type = xstrdup("accounting_storage/slurmdbd"); /* set up a known environment to test against. Since we are - only concerned about the fairshare we won't look at the other - factors here. - */ + * only concerned about the fairshare we won't look at the other + * factors here. */ conf->priority_decay_hl = 1; conf->priority_favor_small = 0; conf->priority_max_age = conf->priority_decay_hl; @@ -325,7 +325,7 @@ int main (int argc, char **argv) if (slurm_priority_init() != SLURM_SUCCESS) fatal("failed to initialize priority plugin"); /* on some systems that don't have multiple cores we need to - sleep to make sure the thread gets started. */ + * sleep to make sure the thread gets started. */ sleep(1); memset(&resp, 0, sizeof(shares_response_msg_t)); resp.assoc_shares_list = assoc_mgr_get_shares(NULL, 0, NULL, NULL); @@ -334,13 +334,13 @@ int main (int argc, char **argv) /* free memory */ if (slurm_priority_fini() != SLURM_SUCCESS) fatal("failed to finalize priority plugin"); - if(job_list) + if (job_list) list_destroy(job_list); - if(resp.assoc_shares_list) + if (resp.assoc_shares_list) list_destroy(resp.assoc_shares_list); - if(assoc_mgr_association_list) + if (assoc_mgr_association_list) list_destroy(assoc_mgr_association_list); - if(assoc_mgr_qos_list) + if (assoc_mgr_qos_list) list_destroy(assoc_mgr_qos_list); return 0; } diff --git a/testsuite/expect/test24.2 b/testsuite/expect/test24.2 index ac140d05d43b422b362555706be8cb2bd10a6e86..69352db8ac16f767ad3e2a0e86df6f034537ef2f 100755 --- a/testsuite/expect/test24.2 +++ b/testsuite/expect/test24.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test25.1 b/testsuite/expect/test25.1 index b823f9e786e704bd7a38aaf29c4d882dde8c8734..105075214db96fdff2e80f812dea4afabc3c7360 100755 --- a/testsuite/expect/test25.1 +++ b/testsuite/expect/test25.1 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test26.1 b/testsuite/expect/test26.1 index 2490034ca5c95c965706bc50fe06e058f6e41738..ba10e0fe47f4f2352727eaa0d0003c0c4f48065e 100755 --- a/testsuite/expect/test26.1 +++ b/testsuite/expect/test26.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test26.2 b/testsuite/expect/test26.2 index c336fb6c0d050ea7b2e783b36f40eb0e39d05416..6ddd7a65b4c61e3f5bd398ca2a11e6c8a85d5145 100755 --- a/testsuite/expect/test26.2 +++ b/testsuite/expect/test26.2 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test27.1 b/testsuite/expect/test27.1 index b0614ebac4905ddc04932408916a1e8213cbf5ff..c56f9d096c50f35c908134456be7d6e8cc50f26d 100755 --- a/testsuite/expect/test27.1 +++ b/testsuite/expect/test27.1 @@ -10,7 +10,7 @@ # Copyright (C) 2012 SchedMD LLC # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test27.2 b/testsuite/expect/test27.2 index c7a4f106092f93c8d61cd3984af9fb87448967d5..155af9fdb50bb205a53369512855c9f096514b42 100755 --- a/testsuite/expect/test27.2 +++ b/testsuite/expect/test27.2 @@ -10,7 +10,7 @@ # Copyright (C) 2012 SchedMD LLC # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test27.3 b/testsuite/expect/test27.3 index 166ab7b77d93953b851f2faec42e8512bb7d5a01..ccdd9dbd34cf79b336d69b2b07fb04665f26a675 100755 --- a/testsuite/expect/test27.3 +++ b/testsuite/expect/test27.3 @@ -10,7 +10,7 @@ # Copyright (C) 2012 SchedMD LLC # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test27.4 b/testsuite/expect/test27.4 index d086095742794eca093c68eb3d3db8937c62dd37..e37ab938d65dbc9a887b6efb8aa4dd1e12de1a33 100755 --- a/testsuite/expect/test27.4 +++ b/testsuite/expect/test27.4 @@ -10,7 +10,7 @@ # Copyright (C) 2012 SchedMD LLC # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test27.5 b/testsuite/expect/test27.5 index 11089fdcf73a6784651221c172d21e0471731a32..e37e46084dae170c92eba622f5010cb1688031ff 100755 --- a/testsuite/expect/test27.5 +++ b/testsuite/expect/test27.5 @@ -10,7 +10,7 @@ # Copyright (C) 2012 SchedMD LLC # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test28.1 b/testsuite/expect/test28.1 new file mode 100755 index 0000000000000000000000000000000000000000..2fdb6a3c4fe681a62b4e134458a671006e56467b --- /dev/null +++ b/testsuite/expect/test28.1 @@ -0,0 +1,208 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Confirms that the sbatch --array option is submitted and +# scancel cancels the job array. +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "28.1" +set exit_code 0 +set file_script "test$test_id.sh" +set file_in "test$test_id.input" +set file_out "test$test_id.output" +set array_id "" +set job_id "" +set array_size 4 + +print_header $test_id + +if {[get_array_config] < [expr $array_size + 1]} { + send_user "\nWARNING: MaxArraySize is too small\n" + exit 0 +} + +proc scontrol_check { job_id } { + global scontrol alpha_numeric_under array_id exit_code + + spawn $scontrol show job $job_id + expect { + -re "JobState=($alpha_numeric_under)" { + set tmp $expect_out(1,string) + if {[string compare $tmp "CANCELLED"]} { + send_user "\nFAILURE: Job was not cancelled\n" + set exit_code 1 + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +proc multi_squeue_check {job_id} { + + global squeue number array_id exit_code + + set array_in 0 + spawn $squeue -r + expect { + -re "$job_id\_($number\)" { + set array_id $expect_out(1,string) + if {$array_id != $array_in} { + send_user "\nFAILURE: array IDs do not match $array_id != $array_in\n" + set exit_code 1 + } + incr array_in + exp_continue + } + timeout { + send_user "\nFAILURE: squeue is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + return $array_in +} + +proc multi_scontrol_check { job_id } { + + global scontrol number array_id array_cnt exit_code + + set array_cnt 0 + spawn $scontrol show job $job_id + expect { + -re "ArrayTaskId=($number)" { + set array_id $expect_out(1,string) + if {$array_id != $array_cnt} { + send_user "\nFAILURE: incorrect number of jobs $array_id = $array_cnt\n" + set exit_code 1 + } + incr array_cnt + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + return $array_cnt +} + +############Test Starts Here########### +make_bash_script $file_script "sleep 10" + +# submit a batch job with an array of $array_size +spawn $sbatch -N1 --array=0-[expr $array_size - 1] --begin=midnight --input=$file_in --output=$file_out $file_script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + send_user "\njob $job_id was submitted\n" + } + -re "error" { + send_user "\nFAILURE: sbatch did not submit jobs\n" + exit 1 + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# checks all the job array indexes +set job_cnt [multi_scontrol_check $job_id] +if {$job_cnt != $array_size} { + send_user "\nFAILURE: job count found by scontrol bad ($job_cnt != $array_size)\n" + set exit_code +} + +# uses squeue to check for the jobs +set job_cnt [multi_squeue_check $job_id] +if {$job_cnt != $array_size} { + send_user "\nFAILURE: job count found by squeue bad ($job_cnt != $array_size)\n" + set exit_code 1 +} + +# cancel a job with a specific job array index +set idmatch 0 +spawn $scancel -v $job_id\_$array_id +expect { + -re "Terminating job" { + exp_continue + incr idmatch + } + timeout { + send_user "\nFAILURE: scancel not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# checks to see the job was cancelled +scontrol_check $job_id\_$array_id + +# cancels the entire job array +spawn $scancel -v $job_id +expect { + -re "Terminating job" { + exp_continue + } + timeout { + send_user "\nFAILURE: scancel not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# checks that all the job indexes where cancelled +scontrol_check $job_id + +if {$exit_code == 0} { + file delete $file_in $file_out $file_script + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test28.2 b/testsuite/expect/test28.2 new file mode 100755 index 0000000000000000000000000000000000000000..1971abc0affa04701aa06d9939f0e1a3becda28a --- /dev/null +++ b/testsuite/expect/test28.2 @@ -0,0 +1,163 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# checks that the --array environment varibles are correct, and +# checks that the --output and --error files were created and +# contain the correct information. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "28.2" +set file_script "test$test_id.sh" +set file_out "test$test_id-%A_%a.output" +set file_error "test$test_id-%A_%a.error" +set job_id 0 +set array_begin 0 +set array_end 4 +set array_id "" +set array_in "" +set array_var "" +set exit_code 0 + +if {[get_array_config] < [expr $array_end + 1]} { + send_user "\nWARNING: MaxArraySize is too small\n" + exit 0 +} + +proc delete_file {} { + + global test_id job_id bin_rm array_end file_script + + set cnt 0 + for {set cnt 0} {$cnt<$array_end} {incr cnt} { + exec $bin_rm -f test$test_id-$job_id\_$cnt\.output + exec $bin_rm -f test$test_id-$job_id\_$cnt\.error + + } + exec $bin_rm -f $file_script +} + +# delete any remaining output files +delete_file + +make_bash_script $file_script " +$bin_echo array_id=\$SLURM_ARRAY_JOB_ID +$bin_echo task_id=\$SLURM_ARRAY_TASK_ID +$bin_sleep aaaa +exit 0 +" + +# submit a batch with an array from 0 to 3; array size 4 +set job_id 0 +spawn $sbatch --array=$array_begin-[expr $array_end -1] --output=$file_out --error=$file_error -t1 $file_script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } + +} +if {$job_id == 0} { + send_user "\nFAILURE: sbatch did not submit jobs\n" + exit 1 +} + +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: waiting for job to complete\n" + set exit_code 1 +} + +# Checks that the correct error and output files were created with the correct format +for {set cnt 0} {$cnt<$array_end} {incr cnt} { + if {[wait_for_file test$test_id-$job_id\_$cnt\.output] != 0} { + send_user "\nFAILURE: outout file was not created or not the correct format. test$test_id-$job_id\_$cnt\.output not found\n" + set exit_code 1 + } + if {[wait_for_file test$test_id-$job_id\_$cnt\.error] !=0} { + send_user "\nFAILURE: error file was not created or not the correct format. test$test_id-$job_id\_$cnt\.error not found\n" + set exit_code 1 + } +} +send_user "\nchecking environment variables\n" +# Checks that the array job ids are correct +for {set index 0} {$index < $array_end} {incr index} { + set env_cnt 0 + spawn $bin_cat test$test_id-$job_id\_$index\.output + expect { + -re "array_id=$job_id" { + incr env_cnt + exp_continue + } + -re "task_id=$index" { + incr env_cnt + exp_continue + } + eof { + wait + } + } + if {$env_cnt != 2} { + send_user "\nFAILURE: Missing environment variables in file test$test_id-$job_id\_$index\.output\n" + set exit_code 1 + } +} + +# checks the contents of of the error file +for {set index 0} {$index < $array_end} {incr index} { + set err_match 0 + spawn $bin_cat test$test_id-$job_id\_$index\.error + expect { + -re "invalid time" { + send_user "\nDo not worry this error is expected\n" + incr err_match + } + eof { + wait + } + } + if {$err_match != 1} { + send_user "\nFAILURE: Bad contents in the error file test$test_id-$job_id\_$index\.error\n" + set exit_code 1 + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" +} +delete_file +exit $exit_code diff --git a/testsuite/expect/test28.3 b/testsuite/expect/test28.3 new file mode 100755 index 0000000000000000000000000000000000000000..948ecfba94e87008cca5d16c15329fe654e2c64a --- /dev/null +++ b/testsuite/expect/test28.3 @@ -0,0 +1,105 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Validates that the scontrol show job option has the job +# array fields +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "28.3" +set exit_code 0 +set array_size 4 +set file_script "test$test_id.sh" +set job_id 0 + +print_header $test_id + +if {[get_array_config] < [expr $array_size + 1]} { + send_user "\nWARNING: MaxArraySize is to small\n" + exit 0 +} + +proc check_ids { job } { + + global scontrol array_size exit_code number + + for {set index 0} {$index<$array_size} {incr index} { + set match 0 + spawn $scontrol show job $job\_$index + expect { + -re "JobId=($number) ArrayJobId=$job ArrayTaskId=($number) " { + if {$job == [expr $expect_out(1,string) - $expect_out(2,string)]} { + incr match + } + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$match != 1} { + send_user "\nFAILURE: Array IDs for $job\_$index not found\n" + set exit_code 1 + } + } +} + +make_bash_script $file_script "sleep 10" + +spawn $sbatch -N1 --array=0-[expr $array_size - 1] --begin=midnight --output=/dev/null $file_script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + send_user "\njob $job_id was submitted\n" + } + -re "error" { + send_user "\nFAILURE: sbatch did not submit jobs\n" + set exit_code 1 + } + timout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id != 0} { + check_ids $job_id + cancel_job $job_id +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_script + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test28.4 b/testsuite/expect/test28.4 new file mode 100755 index 0000000000000000000000000000000000000000..35901033bbf1ec9eb9dcdfa966e7303c8e22a132 --- /dev/null +++ b/testsuite/expect/test28.4 @@ -0,0 +1,132 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Validates that scontrol update will update job using +# the jobid and jobid with array index +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "28.4" +set exit_code 0 +set array_size 4 +set file_script "test$test_id.sh" +set job_id 0 +set array_in 2 + +print_header $test_id + +if {[get_array_config] < [expr $array_size + 1]} { + send_user "\nWARNING: MaxArraySize is to small\n" + exit 0 +} + +proc check_update { job job_time } { + + global scontrol number exit_code + + set time_match 0 + spawn $scontrol show job $job + expect { + -re "TimeLimit=00:($number):00" { + if {$expect_out(1,string) == $job_time } { + incr time_match + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$time_match != 1} { + send_user "\nFAILURE: job $job was never updated\n" + set exit_code 1 + } +} + +proc update_job { job job_time } { + + global scontrol exit_code + + spawn $scontrol update jobid=$job TimeLimit=$job_time + expect { + timeout { + send_user "FAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +#####################Test starts here###################### +# Submit a job array with 4 elements +make_bash_script $file_script "sleep 10" + +spawn $sbatch -N1 --array=0-[expr $array_size -1] --begin=midnight --output=/dev/null -t1 $file_script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + -re "error" { + send_user "\nFAILURE: sbatch did not submit jobs\n" + set exit_code 1 + } + timout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: sbatch did not submit jobs\n" + exit 1 +} + +# Update a specific job using job array syntax +update_job ${job_id}_$array_in 2 +check_update ${job_id}_$array_in 2 + +# Update a specific job using job ID syntax +update_job [expr $job_id + 1] 4 +check_update ${job_id}_1 4 + +cancel_job $job_id + +if {$exit_code == 0} { + exec $bin_rm -f $file_script + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test28.5 b/testsuite/expect/test28.5 new file mode 100755 index 0000000000000000000000000000000000000000..500907ac59250c659d9e92c9ae6e2bf6db1806b8 --- /dev/null +++ b/testsuite/expect/test28.5 @@ -0,0 +1,194 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# test that a scontrol can hold and release a whole job array +# or an indevidual job index. +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "28.5" +set exit_code 0 +set job_id 0 +set array_in 2 +set array_size 4 +set file_script "test$test_id.sh" + +print_header $test_id + +if {[get_array_config] < [expr $array_size + 1]} { + send_user "\nWARNING: MaxArraySize is to small for this test\n" + exit 0 +} + +proc hold_job {job} { + + global scontrol exit_code + + spawn $scontrol hold $job + expect { + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +proc release_job {job} { + + global scontrol exit_code + + spawn $scontrol release $job + expect { + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +#change this so it checks through all the jobs in the array +proc check_hold_job {job} { + + global scontrol exit_code number + + set pmatch 0 + spawn $scontrol show job $job + expect { + -re "Priority=($number)" { + if {$expect_out(1,string) == 0} { + incr pmatch + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$pmatch!=1} { + send_user "\nFAILURE: job $job did not hold\n" + set exit_code 1 + } +} + +proc check_release_job { job } { + + global scontrol exit_code number + + set pmatch 0 + spawn $scontrol show job $job + expect { + -re "Priority=($number)" { + if {$expect_out(1,string) != 0} { + incr pmatch + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$pmatch!=1} { + send_user "\nFAILURE: job $job was not released\n" + set exit_code 1 + } +} + +################Test Starts Here################ +make_bash_script $file_script "sleep 10" + +spawn $sbatch -N1 --array=0-[expr $array_size -1] --begin=midnight --output=/dev/null -t1 $file_script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + send_user "\njob $job_id was submitted\n" + exp_continue + } + -re "error" { + send_user "\nFAILURE: sbatch did not submit jobs\n" + set exit_code 1 + } + timout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} + + +# hold the whole array then release individual jobs in the array +if {$job_id != 0} { + + send_user "\nHolding entire job array\n" + hold_job $job_id + check_hold_job ${job_id}_0 + check_hold_job ${job_id}_1 + + send_user "\nReleasing job index $array_in in the array\n" + release_job ${job_id}_$array_in + check_hold_job ${job_id}_0 + check_release_job ${job_id}_$array_in + + send_user "\nReleasing the entire job array\n" + release_job $job_id + check_release_job ${job_id}_0 + check_release_job ${job_id}_1 + + send_user "\nHolding job index $array_in in the array\n" + hold_job ${job_id}_$array_in + check_hold_job ${job_id}_$array_in + + send_user "\nReleasing job index $array_in in the array\n" + release_job ${job_id}_$array_in + check_release_job ${job_id}_$array_in + + cancel_job $job_id +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_script + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test28.6 b/testsuite/expect/test28.6 new file mode 100755 index 0000000000000000000000000000000000000000..0ee5f95c4db0482ec980dce90a645e75244e6420 --- /dev/null +++ b/testsuite/expect/test28.6 @@ -0,0 +1,183 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# test that when a job array is submitted to multiple +# partitions that the jobs run on all the assigned partitions +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "test28.6" +set job_id 0 +set def_part "" +set test_part "$test_id\_part" +set script "$test_id\_script" +set array_size 2 +set exit_code 0 + +print_header $test_id + +if {[get_array_config] < [expr $array_size + 1]} { + send_user "\nWARNING: MaxArraySize is to small for this test\n" + exit 0 +} + +make_bash_script $script " +sleep 100 +" + +proc check_job { job_id } { + global scontrol array_size number exit_code + + for {set index 0} {$index<$array_size} {incr index} { + + set matches 0 + spawn $scontrol show job $job_id\_$index + expect { + -re "JobState=RUNNING" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$matches != 1} { + send_user "\nFAILURE: job $job_id\_$index was not found\n" + set exit_code 1 + } + } +} + +# +# Get the default partition +# +set def_part [default_partition] +if {[string compare $def_part ""] == 0} { + send_user "\nFAILURE: default partition was not found\n" + set exit_code 1 +} + +# +# Get the available nodes from the partition +# +set node_list "" +spawn $sinfo -h -o "%N" -p $def_part +expect { + -re "($alpha_numeric_nodelist)" { + set node_list $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sinfo is not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {[string compare $node_list ""] == 0} { + send_user "\nFAILURE: node list was not found\n" + set exit_code 1 +} + +# +# Create partition +# +spawn $scontrol create PartitionName=$test_part Nodes=$node_list +expect { + -re "error" { + send_user"\nFAILURE: scontrol did not create partition\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Submit array job on default partition +# +spawn $sbatch -N1 -t1 -o /dev/null -e /dev/null --array=0-[expr $array_size -1] --partition=$def_part,$test_part $script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch is not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: sbatch did not submit job\n" + set exit_code 1 +} + +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: Job never started\n" + set exit_code 1 +} +cancel_job $job_id + +# +# Delete the new partition +# +spawn $scontrol delete partition=$test_part +expect { + -re "error" { + send_user "\nFAILURE: partition was not deleted\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + exec $bin_rm -f $script + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test3.1 b/testsuite/expect/test3.1 index d31e01938f158868b513369fa9d6a754788f94b1..e89ff5371acfe8428b6bbfcae57625e8ee361336 100755 --- a/testsuite/expect/test3.1 +++ b/testsuite/expect/test3.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.10 b/testsuite/expect/test3.10 index de5e01354f86b794eeeb2247a6b7c4c6f96df8f3..d612ac48d55a135f2eb9e54493f671e3b59cbe85 100755 --- a/testsuite/expect/test3.10 +++ b/testsuite/expect/test3.10 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.11 b/testsuite/expect/test3.11 index 824c93675bf939566c31025434e2288d850d2713..72cfd41c1c0b5e892f0fbe8efd2e81ebd232f268 100755 --- a/testsuite/expect/test3.11 +++ b/testsuite/expect/test3.11 @@ -14,7 +14,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -40,6 +40,7 @@ source ./inc3.11.5 source ./inc3.11.6 source ./inc3.11.7 source ./inc3.11.8 +source ./inc3.11.9 @@ -53,6 +54,10 @@ set res_thread_cnt 0 set user_name "" set def_partition "" set def_node "" +set def_share_force 0 +set def_node_name "" +set def_node_inx_min -1 +set def_node_inx_max -1 set ii 0 print_header $test_id @@ -65,7 +70,7 @@ print_header $test_id proc create_res { res_params failure_expected } { #exp_internal 1 global scontrol - global alpha_numeric_under number + global alpha_numeric_under number alpha_numeric_nodelist global res_name global res_nodes global res_nodecnt @@ -144,7 +149,7 @@ proc create_res { res_params failure_expected } { set ret_code 1 exp_continue } - -re "Nodes=($alpha_numeric_under)" { + -re "Nodes=($alpha_numeric_nodelist)" { set res_nodes $expect_out(1,string) exp_continue } @@ -272,6 +277,17 @@ if {[string compare $def_partition ""] == 0} { send_user "\nFAILURE: failed to find default partition\n" exit 1 } + +spawn $scontrol show partition $def_partition +expect { + -re " Shared=FORCE" { + set def_share_force 1 + exp_continue + } + eof { + wait + } +} spawn $sinfo -h -o "=%N=" -p $def_partition expect { -re "=(.+)=" { @@ -282,6 +298,18 @@ expect { wait } } +spawn $sinfo -h -o "=%N=" -p $def_partition +expect { + -re "=($alpha_numeric).($number)-($number).=" { + set def_node_name $expect_out(1,string) + set def_node_inx_min $expect_out(2,string) + set def_node_inx_max $expect_out(3,string) + exp_continue + } + eof { + wait + } +} if {[string compare $def_node ""] == 0} { send_user "\nFAILURE:default partition seems to have no nodes\n" exit 1 @@ -329,22 +357,22 @@ expect { } } -log_user 0 set cons_res_actived 0 -spawn $scontrol show config -expect { - -re "select/cons_res" { - set cons_res_actived 1 - } - timeout { - send_user "\nFAILURE: scontrol not responding\n" - set exit_code 1 - } - eof { - wait +if {$def_share_force == 0} { + spawn $scontrol show config + expect { + -re "select/cons_res" { + set cons_res_actived 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } } } -log_user 1 inc3_11_1 inc3_11_2 @@ -355,9 +383,12 @@ inc3_11_6 if {$cons_res_actived == 1} { inc3_11_7 inc3_11_8 + inc3_11_9 } if {$exit_code == 0} { send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" } exit $exit_code diff --git a/testsuite/expect/test3.12 b/testsuite/expect/test3.12 index 8cafc04263aa1871b025ee7ee7dfef52f108af07..9847bbc93984751281d64d7296b644e526c46f16 100755 --- a/testsuite/expect/test3.12 +++ b/testsuite/expect/test3.12 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.2 b/testsuite/expect/test3.2 index efa7d3daad2ef6ea915177c02635e008bd2da3a3..c2cffb5046b8af5837efd3cc4172c0121bc8ac2e 100755 --- a/testsuite/expect/test3.2 +++ b/testsuite/expect/test3.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.3 b/testsuite/expect/test3.3 index a25f129d7a14de0a4f95c5aacacf7cecb3fe19d5..84aad28767196a0566f45992f6d52cc7180d5c75 100755 --- a/testsuite/expect/test3.3 +++ b/testsuite/expect/test3.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.4 b/testsuite/expect/test3.4 index e2d69508c4b332df8fc24a9a050d0bf850ec593c..367771ca4fde056af76f4ca03589604748228758 100755 --- a/testsuite/expect/test3.4 +++ b/testsuite/expect/test3.4 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.5 b/testsuite/expect/test3.5 index 7621e5b72c5781b9e2f3d4367ad70fc348570ab6..247c3d0966c37380b8b275ec6cf2b684d2b7d059 100755 --- a/testsuite/expect/test3.5 +++ b/testsuite/expect/test3.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.6 b/testsuite/expect/test3.6 index 124a3f00c29b52cb39214af3a17424d0234734df..6b58d1605130698037e47286bd990861f2d30d8c 100755 --- a/testsuite/expect/test3.6 +++ b/testsuite/expect/test3.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.7 b/testsuite/expect/test3.7 index d19fb36fc16116c43a0f01ff435b16fbf9640352..1018a4bd4dcb03910172c0ed5806348908ccb2db 100755 --- a/testsuite/expect/test3.7 +++ b/testsuite/expect/test3.7 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -126,7 +126,8 @@ exec $bin_cc -o $file_prog ${file_prog}.c exec $bin_chmod 700 $file_prog # -# Submit two jobs to the same node +# Submit two jobs to the same node,. +# The first job includes srun, second only the application # set srun_pid [spawn $sbatch -N1 -t2 --output=$file_out1 $file_prog_sh1] expect { @@ -177,7 +178,6 @@ if {[string compare $host_name ""] == 0} { exit 1 } # Submit another job to that same node -wait_for_job $job_id2 RUNNING set srun_pid [spawn $sbatch -N1 -t2 --output=$file_out2 $file_prog_sh2] expect { -re "Submitted batch job ($number)" { @@ -250,6 +250,7 @@ if {$exit_code == 0} { exec $bin_rm -f $file_out1 $file_out2 $file_prog $file_prog_sh1 $file_prog_sh2 send_user "\nSUCCESS\n" } else { - send_user "\nFAILURE: May be due to use of gang scheduler, a race conditions, or the ProcTrack plugin not identifying the application as part of the job\n" + send_user "\nFAILURE: May be due to use of gang scheduler, a race conditions, or the ProcTrack plugin not identifying the application as part of the job.\n" + send_user "\nFAILURE: launch/poe and proctrack/pgid are incompatible.\n" } exit $exit_code diff --git a/testsuite/expect/test3.7.prog.c b/testsuite/expect/test3.7.prog.c index ec0fac96c358c8ac33126f2d41922378f618a719..3f92689419b55a1bd9234a1dd141d01624b2d19d 100644 --- a/testsuite/expect/test3.7.prog.c +++ b/testsuite/expect/test3.7.prog.c @@ -10,7 +10,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.8 b/testsuite/expect/test3.8 index b1a3e1ade9cde63b762b5e1f92e7f3e51db355a6..123817ad19a7a9715014f2943ecad810ae998baf 100755 --- a/testsuite/expect/test3.8 +++ b/testsuite/expect/test3.8 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test3.9 b/testsuite/expect/test3.9 index ac9a1398ee961bdee91e47cc4d9903abf74b7699..7d2892915d0eb25a09dcf5529ff037d682ad5c68 100755 --- a/testsuite/expect/test3.9 +++ b/testsuite/expect/test3.9 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.1 b/testsuite/expect/test4.1 index fb5f4c632bc306e48480d4d855b2837211ad3408..fedcaa54be09a437de530ee46ae07b10487de862 100755 --- a/testsuite/expect/test4.1 +++ b/testsuite/expect/test4.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.10 b/testsuite/expect/test4.10 index 3cf86cb575347dd86be517e8717288f443535f57..95fbd43b2e3842fc484515448308a30659b5758e 100755 --- a/testsuite/expect/test4.10 +++ b/testsuite/expect/test4.10 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.11 b/testsuite/expect/test4.11 index 66d611196e280af283ca776b26dbabbcc6567803..798973907417a7a1a84c7b3187488a7512dfca1e 100755 --- a/testsuite/expect/test4.11 +++ b/testsuite/expect/test4.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.12 b/testsuite/expect/test4.12 index 7aabf3369811d1449ebb35f47a1d528afe413f7d..66ea76c67afe775bde88d5fb48c3940006342468 100755 --- a/testsuite/expect/test4.12 +++ b/testsuite/expect/test4.12 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.2 b/testsuite/expect/test4.2 index b33ef82c3c0b4d8fc93a857c193f60b9309fb267..005ce7666c0aab56581c88fd29b1623b00ba6193 100755 --- a/testsuite/expect/test4.2 +++ b/testsuite/expect/test4.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.3 b/testsuite/expect/test4.3 index 289626012f572ce5ad29408f904b07b1f868992e..8280c1563d42095af317fafdbc544c7947a64881 100755 --- a/testsuite/expect/test4.3 +++ b/testsuite/expect/test4.3 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.4 b/testsuite/expect/test4.4 index 97d029464228dc308a9cf31c4543679d1e5ccae6..577df157c4e3e5c963fb37f6a649e4139a3edb6f 100755 --- a/testsuite/expect/test4.4 +++ b/testsuite/expect/test4.4 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.5 b/testsuite/expect/test4.5 index 5e2a3723f4e4ef7d227b6aefb486749eea9d1191..a663a54a831764081e525577fa6f1d0af9f9e356 100755 --- a/testsuite/expect/test4.5 +++ b/testsuite/expect/test4.5 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.6 b/testsuite/expect/test4.6 index ba64dd55e2f4d97df47dd78db181481bb70c7b3c..b1f1e31049c78459265f3c16a566c5bf6b759601 100755 --- a/testsuite/expect/test4.6 +++ b/testsuite/expect/test4.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.7 b/testsuite/expect/test4.7 index 51a807f8100874e54cfd2de5eb7f31a17c1282e1..101f0c990bd404fe80edb448fe91047347a34107 100755 --- a/testsuite/expect/test4.7 +++ b/testsuite/expect/test4.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.8 b/testsuite/expect/test4.8 index 1c778e8f8014b35a6225159e251aaa53bcb2d473..3fcc57c295fc7741fcda6195d958c7c0bbc3464f 100755 --- a/testsuite/expect/test4.8 +++ b/testsuite/expect/test4.8 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test4.9 b/testsuite/expect/test4.9 index d291c04bb1edb6011ee244efd6bc798508b8e8a6..8f35faa54a20efa3ddacc5bb2673bf9114f7ce13 100755 --- a/testsuite/expect/test4.9 +++ b/testsuite/expect/test4.9 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.1 b/testsuite/expect/test5.1 index 3d5cec8ec24655669a924d3d4a368197c28f0c81..c20486e516b870f71a19f9ef237a8b9380d9ac19 100755 --- a/testsuite/expect/test5.1 +++ b/testsuite/expect/test5.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.2 b/testsuite/expect/test5.2 index b34d42bfb5a986c4943e3a5c5f7e45c4a761409f..34a00a722df1de9523ec533c4986b34828db0c45 100755 --- a/testsuite/expect/test5.2 +++ b/testsuite/expect/test5.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.3 b/testsuite/expect/test5.3 index a69349fd8ab795b8b0423e959bfb701a26b85361..30285119246a0bedde0c934e0bc239b58fae1a95 100755 --- a/testsuite/expect/test5.3 +++ b/testsuite/expect/test5.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.4 b/testsuite/expect/test5.4 index e1ed5f08e1d1cd743d7457a9a1cb84b1eb089c84..ddb35c4c27fb1527f4a5e79a5c873953100fa50b 100755 --- a/testsuite/expect/test5.4 +++ b/testsuite/expect/test5.4 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -212,7 +212,7 @@ spawn $squeue --iterate=5 --steps set iteration 0 set step_found 0 expect { - -re "($end_of_line)($job_id1.$step_id) " { + -re "($end_of_line) *($job_id1.$step_id) " { set step_found 1 set squeue_pid [exp_pid] exec $bin_kill -TERM $squeue_pid diff --git a/testsuite/expect/test5.5 b/testsuite/expect/test5.5 index b46716270bc135feea5e9b574cf0e19f06752bd4..f5ad720ac9c1bda48f649012f45df550dab35daf 100755 --- a/testsuite/expect/test5.5 +++ b/testsuite/expect/test5.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.6 b/testsuite/expect/test5.6 index 503d95688623ffa0fcd8b995eab2a1ebc67d6160..1fbd2a43c01299481af0da664c0f736e9bff42af 100755 --- a/testsuite/expect/test5.6 +++ b/testsuite/expect/test5.6 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.7 b/testsuite/expect/test5.7 index bb10c261b3e3fb17bc2728cb83cc67fa98418ac4..967549c79bdc83c89de7884b7ae024578490c91b 100755 --- a/testsuite/expect/test5.7 +++ b/testsuite/expect/test5.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test5.8 b/testsuite/expect/test5.8 index 9c1c83f2768e425e9aaf36e877465fbafc9c18a1..2382410009ecd0d27b61e65b1c79d9b06aa8a41e 100755 --- a/testsuite/expect/test5.8 +++ b/testsuite/expect/test5.8 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.1 b/testsuite/expect/test6.1 index 1014f2455c8652e8aa652c4c9cd9e5208b26b600..f40841ac80452cb1c73ba384457c529c3194ba2a 100755 --- a/testsuite/expect/test6.1 +++ b/testsuite/expect/test6.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.10 b/testsuite/expect/test6.10 index 8949c986d52de01b1ec6ade51311a7a0244d8025..b6bec205c25675e8057cb7c7ab65d4602b04f2d9 100755 --- a/testsuite/expect/test6.10 +++ b/testsuite/expect/test6.10 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.11 b/testsuite/expect/test6.11 index b1ae1b0566e3c6e546ad5956b0a3d713c09907d6..1028dbf31e263d3b5d17b636471ac15a1e0049f4 100755 --- a/testsuite/expect/test6.11 +++ b/testsuite/expect/test6.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.12 b/testsuite/expect/test6.12 index d4ec2410e8829cadf9a7108b460567e533f7693f..53555d056e61dd3230b055fc5209cbe20650bb72 100755 --- a/testsuite/expect/test6.12 +++ b/testsuite/expect/test6.12 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.13 b/testsuite/expect/test6.13 index edfe57afe31ce4879ee8009d62bcd9daa8baf2f0..0935daa644641b277b03b07ad67b0dabddde665f 100755 --- a/testsuite/expect/test6.13 +++ b/testsuite/expect/test6.13 @@ -14,7 +14,7 @@ # UCRL-CODE-217948. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.14 b/testsuite/expect/test6.14 index 1fdedaaf34162f42e667b56aef8005a20cb5b0d8..89b9445f9bd08b7acd3549fc69ea8e612daea6e9 100755 --- a/testsuite/expect/test6.14 +++ b/testsuite/expect/test6.14 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.2 b/testsuite/expect/test6.2 index 432c25d335887073fd5a445ce1fd6d1f31d24c69..251da2b2ca44770faec4eef9747cd6c2aa7c5fb5 100755 --- a/testsuite/expect/test6.2 +++ b/testsuite/expect/test6.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.3 b/testsuite/expect/test6.3 index 497dde7c21f78382bd0a967d5c941a9eb47555ac..18d7f869330af1cf269f59090c719c7c38620972 100755 --- a/testsuite/expect/test6.3 +++ b/testsuite/expect/test6.3 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.4 b/testsuite/expect/test6.4 index 4bf5df3142373cf20614f58882583b6c2b12d89d..9866fa70580c4f1f9d6d4835945f89d411204e01 100755 --- a/testsuite/expect/test6.4 +++ b/testsuite/expect/test6.4 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.5 b/testsuite/expect/test6.5 index d5c1480232b36e29d3124df4ad925e8295e3849d..0010a4be1649f4214c040a58b89acf7b42c4cb52 100755 --- a/testsuite/expect/test6.5 +++ b/testsuite/expect/test6.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.6 b/testsuite/expect/test6.6 index 2fbfc550a5eac3291f5909be94777f24375b6426..e99693568423617f69af0f5e17ec2bbc067c276d 100755 --- a/testsuite/expect/test6.6 +++ b/testsuite/expect/test6.6 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.7 b/testsuite/expect/test6.7 index 99e0b58ece815c215c92c97458a7a3934da2e2cb..0ebf4e3540ff6ae960f52d0353b4cea8106a020d 100755 --- a/testsuite/expect/test6.7 +++ b/testsuite/expect/test6.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.8 b/testsuite/expect/test6.8 index 5624818e34e1e9b1a51f9fc4c1dbddd14d3afd1f..25e2b6451fce996b175a38651e4dcacdda91e616 100755 --- a/testsuite/expect/test6.8 +++ b/testsuite/expect/test6.8 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test6.9 b/testsuite/expect/test6.9 index f892c92e376d85f7d4af14e0f5969800a3fd001b..68cc46fa6964f31d68c8429cedbbe15f6b297a0a 100755 --- a/testsuite/expect/test6.9 +++ b/testsuite/expect/test6.9 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.1 b/testsuite/expect/test7.1 index 13fb11d4ef16e5806a9dbf25183d09bcc98bb236..caee5d52afeb6e1048b016e7ca1fd996d43ba7a9 100755 --- a/testsuite/expect/test7.1 +++ b/testsuite/expect/test7.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.10 b/testsuite/expect/test7.10 index 038a5ee8cb3b6c4dc7914474b49c212a480c8318..ad60c9c1878bdb325837ae42505d05fc10d74051 100755 --- a/testsuite/expect/test7.10 +++ b/testsuite/expect/test7.10 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.11 b/testsuite/expect/test7.11 index 331a714c32f1593c91fc68374fc5a8b115ebe37c..ed3adfb254a762c23d8b55127a44319e0668ba4e 100755 --- a/testsuite/expect/test7.11 +++ b/testsuite/expect/test7.11 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.11.prog.c b/testsuite/expect/test7.11.prog.c index 721777c38ea43c2be93ad7670d31756414b8a524..eeb5a0577796ef59c8caa1069e3b49a3c7cb6406 100644 --- a/testsuite/expect/test7.11.prog.c +++ b/testsuite/expect/test7.11.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.12 b/testsuite/expect/test7.12 index 9ba6d7203d0ec66cec7962f21892a10604408794..28635dc05a7c35b50e3a123fa5b523b05ba94ef5 100755 --- a/testsuite/expect/test7.12 +++ b/testsuite/expect/test7.12 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.12.prog.c b/testsuite/expect/test7.12.prog.c index 00b800a779736c45d6db255cc185a2eba20ce7ca..4d50af149c89fbf2f1a8c99c91a6827d38f0d693 100644 --- a/testsuite/expect/test7.12.prog.c +++ b/testsuite/expect/test7.12.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.13 b/testsuite/expect/test7.13 index 9707714bd88c4c89e4a9b1eb4c89436bfd2eb182..051d4e684fc08c54af419f3875e6bf4958af1178 100755 --- a/testsuite/expect/test7.13 +++ b/testsuite/expect/test7.13 @@ -12,7 +12,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.14 b/testsuite/expect/test7.14 index 401adada76f37e8bbcd9850a9e750bf457266c0d..223e1ef22dba66a8a9de81d54eb60a81827efc69 100755 --- a/testsuite/expect/test7.14 +++ b/testsuite/expect/test7.14 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.15 b/testsuite/expect/test7.15 index bd5138b695c001193660fb33cb769459db59eab4..0a367593304666bc094e5b9a842afb595d00ea28 100755 --- a/testsuite/expect/test7.15 +++ b/testsuite/expect/test7.15 @@ -11,7 +11,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.16 b/testsuite/expect/test7.16 index 6bb85a8adf4f47b1881276ec20c84e8cc0175698..580807fb2e80d767d57ea50b395329f314c44ce6 100755 --- a/testsuite/expect/test7.16 +++ b/testsuite/expect/test7.16 @@ -11,7 +11,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -90,8 +90,8 @@ expect { } } if {$job_id != 0} { - send_user "\FAILURE: allocation grated with bad Munge credential\n" - exit $exit_code + send_user "\FAILURE: allocation granted with bad Munge credential\n" + exit 1 } if { $exit_code == 0 } { diff --git a/testsuite/expect/test7.2 b/testsuite/expect/test7.2 index 25bbd643f54b023a787ba0744b70f5118bde8830..f76d852e0700ff7788eb7a8a77a59a280fd3e2f4 100755 --- a/testsuite/expect/test7.2 +++ b/testsuite/expect/test7.2 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.2.prog.c b/testsuite/expect/test7.2.prog.c index fd4b8ffe62e44dedcfa24467032fa4b59140d843..bf1fe858c98e076a42075bc8f8b60031b9ea5938 100644 --- a/testsuite/expect/test7.2.prog.c +++ b/testsuite/expect/test7.2.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.3 b/testsuite/expect/test7.3 index b341be5b15b89488724d779e95e2d0df52cfb4cb..c42f7f6d26512d6d7b3e795494212013db1e79ba 100755 --- a/testsuite/expect/test7.3 +++ b/testsuite/expect/test7.3 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.3.io.c b/testsuite/expect/test7.3.io.c index 0492aa90f5ab34ed2a31d83b6c67ecaed870a616..7eccd5c476db4f2ae587a0323ea043f213904433 100644 --- a/testsuite/expect/test7.3.io.c +++ b/testsuite/expect/test7.3.io.c @@ -12,7 +12,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.3.prog.c b/testsuite/expect/test7.3.prog.c index b9892595e6873fce08a3b778b2bac0b0bd8e2ee9..522cc98dd915893123c2da0357f2564f1e6aa2ef 100644 --- a/testsuite/expect/test7.3.prog.c +++ b/testsuite/expect/test7.3.prog.c @@ -11,7 +11,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.4 b/testsuite/expect/test7.4 index b6957e37bba58723bc5ab1d4ee76a96107e594a0..6f9bd0265d9a11cd6d34a622832bfe51372ded93 100755 --- a/testsuite/expect/test7.4 +++ b/testsuite/expect/test7.4 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.4.prog.c b/testsuite/expect/test7.4.prog.c index a5cc2c7ce050512829daa223cd516e418be9eca7..d05c7107e4f28f701b82f1d5b32d56808ea1fd70 100644 --- a/testsuite/expect/test7.4.prog.c +++ b/testsuite/expect/test7.4.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.5 b/testsuite/expect/test7.5 index 469198a5ca4d2fae611c94935a177e7018ea1788..270038405604a15ed42cafe3657da46d05c2e860 100755 --- a/testsuite/expect/test7.5 +++ b/testsuite/expect/test7.5 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.5.prog.c b/testsuite/expect/test7.5.prog.c index 80f4e57104a74202cd1d40ea8a07e187ca08b8b5..621172dd49a4c2324ad5dd320537c298baba9762 100644 --- a/testsuite/expect/test7.5.prog.c +++ b/testsuite/expect/test7.5.prog.c @@ -9,7 +9,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.6 b/testsuite/expect/test7.6 index 0c095d6116862a095ea4a45e32d0b2ebccf8ef79..aca96dec72d30a2926d05bbc81bf31cfc179e04d 100755 --- a/testsuite/expect/test7.6 +++ b/testsuite/expect/test7.6 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.6.prog.c b/testsuite/expect/test7.6.prog.c index 878dc9ef23a8667e7e941b8a1f5e4dedae6ae238..3f593657d625ecacf80a26b3190750bc950e63d0 100644 --- a/testsuite/expect/test7.6.prog.c +++ b/testsuite/expect/test7.6.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.7 b/testsuite/expect/test7.7 index ca0637520594531bfcc2ea2ad0734b68ee5cd02a..3021ef8cb347058612bb972207e3c4b517c0ed3b 100755 --- a/testsuite/expect/test7.7 +++ b/testsuite/expect/test7.7 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.7.prog.c b/testsuite/expect/test7.7.prog.c index 0a2bb55b388d8f797296c552447fd5c85a8fd06c..94f1300f247ea3ca26417745fb8e57dd176e086b 100644 --- a/testsuite/expect/test7.7.prog.c +++ b/testsuite/expect/test7.7.prog.c @@ -8,7 +8,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.8 b/testsuite/expect/test7.8 index e7cf13054132629056b5b1ae5b9a2d210a85f665..4039866d32b6b61f1ace8950c85462128a540692 100755 --- a/testsuite/expect/test7.8 +++ b/testsuite/expect/test7.8 @@ -15,7 +15,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.8.prog.c b/testsuite/expect/test7.8.prog.c index 173485eb3bfa56e75824ea20b11f853b374bba79..10b61d07a9ac2691cc0dd73dea5cc2b76fa18d95 100644 --- a/testsuite/expect/test7.8.prog.c +++ b/testsuite/expect/test7.8.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.9 b/testsuite/expect/test7.9 index 276d83fd454f8c059308cc6a6f0222189db5c9af..bd1f8c5379e8bd82bafd39acb185275201798a51 100755 --- a/testsuite/expect/test7.9 +++ b/testsuite/expect/test7.9 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test7.9.prog.c b/testsuite/expect/test7.9.prog.c index 3bf90eab0fb65bdd9450be255373f873db609d43..5d99cb4e5389b8a722f2d55cb2d6583da1708cab 100644 --- a/testsuite/expect/test7.9.prog.c +++ b/testsuite/expect/test7.9.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.1 b/testsuite/expect/test8.1 index b83cb770a14dc5e747082799607493439a458dc7..5f652caa6ff30a9cbd0e3d7b134fc0d51a352cf2 100755 --- a/testsuite/expect/test8.1 +++ b/testsuite/expect/test8.1 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.10 b/testsuite/expect/test8.10 index 5eda6138db0b8ddf2f72c4e7acf73433a307a8b2..4eaa771667f27638bee9c8ee1163e8a4256e81b9 100755 --- a/testsuite/expect/test8.10 +++ b/testsuite/expect/test8.10 @@ -12,7 +12,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.11 b/testsuite/expect/test8.11 index 4a10fd9150acd8b52c2877f89d56f62c4ed9ce11..dfd5dc6b72f82d42e98d3a5b2b52664c16c6da5a 100755 --- a/testsuite/expect/test8.11 +++ b/testsuite/expect/test8.11 @@ -12,7 +12,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under @@ -35,20 +35,27 @@ set test_id "8.11" set exit_code 0 set job_id 0 set ret_code 0 +set launch_string "launching" if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} { send_user "\nWARNING: This test is only compatible with bluegene systems\n" exit 0 } +if {![string compare [test_launch_type] "runjob"]} { + # On a real bluegene/Q the output is different so we have to + # account for that. + set launch_string "srun: auth" +} + #########salloc test########### # test salloc without overcommit proc salloc_submit {num_task} { - global exit_code salloc srun job_id number bin_sleep ret_code + global exit_code salloc srun job_id number bin_sleep ret_code launch_string spawn $salloc -N1 -n$num_task --ntasks-per-node=$num_task $srun -v $bin_sleep 50 expect { - -re "launching" { + -re $launch_string { set ret_code 0 } -re "You requested" { @@ -67,11 +74,11 @@ proc salloc_submit {num_task} { # test salloc with overcommit proc salloc_overcommit {num_task} { - global exit_code salloc srun job_id number bin_sleep ret_code + global exit_code salloc srun job_id number bin_sleep ret_code launch_string spawn $salloc -N1 -n$num_task -O --ntasks-per-node=$num_task $srun -v $bin_sleep 50 expect { - -re "launching" { + -re $launch_string { set ret_code 0 } -re "You requested" { diff --git a/testsuite/expect/test8.2 b/testsuite/expect/test8.2 index 9d567601b70fdec48ebb944ee5c04edb670cd2c3..312b8a7715d04da6f28b13cf30da29a545f0288f 100755 --- a/testsuite/expect/test8.2 +++ b/testsuite/expect/test8.2 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.20 b/testsuite/expect/test8.20 index a2cef6ff88d0e745f032e43086019d4a061d2aad..b7327cf0a4605eca18b24d301185714cf125e78b 100755 --- a/testsuite/expect/test8.20 +++ b/testsuite/expect/test8.20 @@ -12,7 +12,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.21 b/testsuite/expect/test8.21 index b59c0cc0b5dd3a940602b88a916cfe52e16ec0ee..234ffdeef8ee2588468af71d53aa28d3cf8c1ee5 100755 --- a/testsuite/expect/test8.21 +++ b/testsuite/expect/test8.21 @@ -12,7 +12,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.22 b/testsuite/expect/test8.22 index ea029d02856d0ab4357675f5a5a609da65a016b1..78e51354180944a799688edbd3b54dc379bd3d11 100755 --- a/testsuite/expect/test8.22 +++ b/testsuite/expect/test8.22 @@ -12,7 +12,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.23 b/testsuite/expect/test8.23 index b7c2b8b68a1d258d58f368b0f66a3e57d9c271d1..d4e3c9bba2d926f935d8b5afbad9985f1d63dbbc 100755 --- a/testsuite/expect/test8.23 +++ b/testsuite/expect/test8.23 @@ -12,7 +12,7 @@ # Written by Morris Jette <jette@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.24 b/testsuite/expect/test8.24 index ef2379ff5fcde3bcbacdd3661c14b4a78e5016d1..58e76998a20edbc8f2c792990ec14b02f92997fe 100755 --- a/testsuite/expect/test8.24 +++ b/testsuite/expect/test8.24 @@ -16,7 +16,7 @@ # # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.25 b/testsuite/expect/test8.25 index 39443356d2f3b30bfeaa213eff6e4b4a901caac2..0250b6118d5680e257c6f0c0a61a192ea185f52c 100755 --- a/testsuite/expect/test8.25 +++ b/testsuite/expect/test8.25 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.3 b/testsuite/expect/test8.3 index a39b6d07f418e068ab6620377ac8d6bff8d434d1..1f24651582eb0b2f806a125582829e18773fe524 100755 --- a/testsuite/expect/test8.3 +++ b/testsuite/expect/test8.3 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.4 b/testsuite/expect/test8.4 index a9aae6f741d0fdb7d16675f93af16cc2003cdb2c..fadefc82c941e9d8c261dd86eb8ecd02ddddeb0c 100755 --- a/testsuite/expect/test8.4 +++ b/testsuite/expect/test8.4 @@ -17,7 +17,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.4.prog.c b/testsuite/expect/test8.4.prog.c index 8342d7b0562469bb8858e59926a4635c78ba3493..8c63bdcf761a2731f2c550a936aa99bf691bc61f 100644 --- a/testsuite/expect/test8.4.prog.c +++ b/testsuite/expect/test8.4.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.5 b/testsuite/expect/test8.5 index 6ea9867dbb2deb2622c396a046a878b075bf3a92..d5a959630f36748eab2830f506b5413f4d66dc46 100755 --- a/testsuite/expect/test8.5 +++ b/testsuite/expect/test8.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.6 b/testsuite/expect/test8.6 index f921fb40bb2e9b3f2914480ce622f351ef91e794..059bd42f2c94bed57b8bdd9950722d7f256c71d9 100755 --- a/testsuite/expect/test8.6 +++ b/testsuite/expect/test8.6 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.7 b/testsuite/expect/test8.7 index bc1835f0045b1f1cd425cb82c8906f72a2649b19..061a96bc80366054a33c715e9314c1e6c9ff8999 100755 --- a/testsuite/expect/test8.7 +++ b/testsuite/expect/test8.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.7.prog.c b/testsuite/expect/test8.7.prog.c index 225e8909d47a49fca15a38aa35518b2a08df5e94..58005019eefd680be27f8f0b358da4d11f99148a 100644 --- a/testsuite/expect/test8.7.prog.c +++ b/testsuite/expect/test8.7.prog.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.8 b/testsuite/expect/test8.8 index f8bdb1f1ef1971ed21dcd43076b018fc3bd8e821..f78383340a0d53bf2ca2c59914b1cf1992bd6312 100755 --- a/testsuite/expect/test8.8 +++ b/testsuite/expect/test8.8 @@ -14,7 +14,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test8.9 b/testsuite/expect/test8.9 index 2fd0525e36a841d26dd9fc9d9d565afb0f8f36de..28069a3eba5efbf05084b276ab5c5f84e3811e65 100755 --- a/testsuite/expect/test8.9 +++ b/testsuite/expect/test8.9 @@ -14,7 +14,7 @@ # Written by Nathan Yee <nyee32@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.1 b/testsuite/expect/test9.1 index aaa56576b67f19b6c1c09a457e17380c37945ecc..5370a215a6611a1b1966968ea257d3af94e14537 100755 --- a/testsuite/expect/test9.1 +++ b/testsuite/expect/test9.1 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.2 b/testsuite/expect/test9.2 index a605ff0668335edcbe2d4612c16a92950e8f75b5..ce90079cf904b2f5094a3cfe162ece5c65c97412 100755 --- a/testsuite/expect/test9.2 +++ b/testsuite/expect/test9.2 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.3 b/testsuite/expect/test9.3 index 015c3df00091f1a363b1fa13cb3a6be45686a1a5..2d627640e23a97117afc1ae7e637758fe920455b 100755 --- a/testsuite/expect/test9.3 +++ b/testsuite/expect/test9.3 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.4 b/testsuite/expect/test9.4 index 52607aaf8f2a325fc2f5dcd19ebfb75da4d7b4b9..2d2ccbfdbc8d2e7c23b844f2221feeec799d98e5 100755 --- a/testsuite/expect/test9.4 +++ b/testsuite/expect/test9.4 @@ -21,7 +21,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.5 b/testsuite/expect/test9.5 index b3db6c2e3d9039f0896ea61030ed2149494b9c9c..e27d1d9bbd71d1f7e651ac6b9351e1351769b6eb 100755 --- a/testsuite/expect/test9.5 +++ b/testsuite/expect/test9.5 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.6 b/testsuite/expect/test9.6 index 83c22d31bf7527750bd5b1c4811685b6f988ff44..d87c0554672f2071d1cdde05679bb411b54c91cb 100755 --- a/testsuite/expect/test9.6 +++ b/testsuite/expect/test9.6 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.7 b/testsuite/expect/test9.7 index 64a0df0f764efcd9ddf357549ef9fd0bde65a064..1f7b41780a520e3816b8ee8a8663cbd239f99560 100755 --- a/testsuite/expect/test9.7 +++ b/testsuite/expect/test9.7 @@ -13,7 +13,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.7.bash b/testsuite/expect/test9.7.bash index 9a99808bc8069074241da004ad8408328d60490f..a6bf6304689054b52aee451621583a8c2253b844 100755 --- a/testsuite/expect/test9.7.bash +++ b/testsuite/expect/test9.7.bash @@ -10,7 +10,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.8 b/testsuite/expect/test9.8 index 9f41d90f775b9ec05c92d85018ebe537131395b7..525940ccd03c687fb8e99c625d6a07e952a07cb9 100755 --- a/testsuite/expect/test9.8 +++ b/testsuite/expect/test9.8 @@ -16,7 +16,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.9 b/testsuite/expect/test9.9 index bbd2d30f691db31f5c09e106c276717a8d335769..857c181ab0a3380b05fe8bff769e9648bc3d2925 100755 --- a/testsuite/expect/test9.9 +++ b/testsuite/expect/test9.9 @@ -10,7 +10,7 @@ # Written by Danny Auble <da@schedmd.com> # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the included file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/test9.9.prog.c b/testsuite/expect/test9.9.prog.c index 95a68e9368f3701636b07c14ec2cfb0a81b3986a..862d79bbf51a43ad41af9a702fb1cb44518d7bfb 100644 --- a/testsuite/expect/test9.9.prog.c +++ b/testsuite/expect/test9.9.prog.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * test9.9.prog.c - link and test algo of the multifactor plugin. + * test9.9.prog.c - Timing test for 5000 jobs. * * Usage: test9.9.prog <sbatch_path> <exec_prog> <prog_name> <iterations> * @@ -11,7 +11,7 @@ * Written by Morris Jette <jette@schedmd.com> * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/expect/usleep b/testsuite/expect/usleep index 1cce5733023d5c2dbe465c1493db3c1726fbe073..9d23052369c76398df09dfecbe3829e3abfbe21f 100755 --- a/testsuite/expect/usleep +++ b/testsuite/expect/usleep @@ -10,7 +10,7 @@ # CODE-OCEC-09-009. All rights reserved. # # This file is part of SLURM, a resource management program. -# For details, see <http://www.schedmd.com/slurmdocs/>. +# For details, see <http://slurm.schedmd.com/>. # Please also read the supplied file: DISCLAIMER. # # SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/Makefile.in b/testsuite/slurm_unit/Makefile.in index 3ce9df0dca43e1a937047eaae110d1f6fec4f1b9..5c05bcc4f9eb4bb057e9c039f3046be5f9e857c2 100644 --- a/testsuite/slurm_unit/Makefile.in +++ b/testsuite/slurm_unit/Makefile.in @@ -52,9 +52,10 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = testsuite/slurm_unit -DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -72,6 +73,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -80,11 +82,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -167,6 +171,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -187,6 +193,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -196,6 +205,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -203,6 +214,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -237,6 +257,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -264,6 +287,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/testsuite/slurm_unit/README b/testsuite/slurm_unit/README new file mode 100644 index 0000000000000000000000000000000000000000..963ab53d46f42d0d8d00a8a4e9fbde3444c054a7 --- /dev/null +++ b/testsuite/slurm_unit/README @@ -0,0 +1,10 @@ +Deja Gnu Unit Tests + +1. These tests execute only when Slurm is built directly in the source + directory, not when a separate build directory is used. +2. Insure the "expect", "dejagnu" and "check" packages are installed. +3. From the top level source directory, execute "make check", which builds + the executable files including various src/common/*.o files that will be + unit tested. +3. Change working directory to "testsuite/slurm_unit". +4. Execute "make check" to execute the unit tests. diff --git a/testsuite/slurm_unit/api/Makefile.in b/testsuite/slurm_unit/api/Makefile.in index 117cf1099c7c3195c12794dd7ae3bcdfd135ef54..8f1bd1669a22e8873f9b798bdd35d30774195f22 100644 --- a/testsuite/slurm_unit/api/Makefile.in +++ b/testsuite/slurm_unit/api/Makefile.in @@ -57,6 +57,7 @@ subdir = testsuite/slurm_unit/api DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +75,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +84,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -190,6 +194,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -210,6 +216,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -219,6 +228,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -226,6 +237,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -260,6 +280,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -287,6 +310,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/testsuite/slurm_unit/api/manual/Makefile.in b/testsuite/slurm_unit/api/manual/Makefile.in index 1ede39a63e65b3e603a160a8b733e6dcf1686b2f..cb3c46e78f7724e2452cec0574ba3d01715e3e5b 100644 --- a/testsuite/slurm_unit/api/manual/Makefile.in +++ b/testsuite/slurm_unit/api/manual/Makefile.in @@ -59,6 +59,7 @@ subdir = testsuite/slurm_unit/api/manual DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -76,6 +77,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -84,11 +86,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -182,6 +186,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -202,6 +208,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -211,6 +220,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -218,6 +229,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -252,6 +272,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -279,6 +302,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ diff --git a/testsuite/slurm_unit/api/manual/cancel-tst.c b/testsuite/slurm_unit/api/manual/cancel-tst.c index 9dc4b9ecf00b1403d911a826592a352f2050a2e5..3fbfe9b318887ce2309335cbc53a900a4c3f991a 100644 --- a/testsuite/slurm_unit/api/manual/cancel-tst.c +++ b/testsuite/slurm_unit/api/manual/cancel-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/complete-tst.c b/testsuite/slurm_unit/api/manual/complete-tst.c index a9e359ad0a6275965ba276eb1ad004c40b8fe0eb..07b8c6934d19e0750b455ec200c3a363943207e2 100644 --- a/testsuite/slurm_unit/api/manual/complete-tst.c +++ b/testsuite/slurm_unit/api/manual/complete-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/job_info-tst.c b/testsuite/slurm_unit/api/manual/job_info-tst.c index a5cebc8c1e898ab81eb6294d5b48dce43c93ee75..a6b24790c92a3fcdeb8cc4d54b2a45351b334913 100644 --- a/testsuite/slurm_unit/api/manual/job_info-tst.c +++ b/testsuite/slurm_unit/api/manual/job_info-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/node_info-tst.c b/testsuite/slurm_unit/api/manual/node_info-tst.c index 386b14c1666bc8e9af76ad32aac8dea6f8da72ef..dba75f6f9b9dbe28ee597b5ec67338e2dcf28ffb 100644 --- a/testsuite/slurm_unit/api/manual/node_info-tst.c +++ b/testsuite/slurm_unit/api/manual/node_info-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/partition_info-tst.c b/testsuite/slurm_unit/api/manual/partition_info-tst.c index 6a29748fc2b8f47e657a7fc912b61c990f91ea0c..e203e0c8d22868e9cae50b5c3d7fc4c5d559c0bf 100644 --- a/testsuite/slurm_unit/api/manual/partition_info-tst.c +++ b/testsuite/slurm_unit/api/manual/partition_info-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/reconfigure-tst.c b/testsuite/slurm_unit/api/manual/reconfigure-tst.c index 24caab9498b8f154bf06551aa19b8f9f4a549db1..292801987efceea9fc86c1a5e83aee1fdb92200a 100644 --- a/testsuite/slurm_unit/api/manual/reconfigure-tst.c +++ b/testsuite/slurm_unit/api/manual/reconfigure-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/submit-tst.c b/testsuite/slurm_unit/api/manual/submit-tst.c index 512ba1faca1128fb05cf64ac7851949875530d81..1cee4b2ef07b8631171841d986071bdb50d8af90 100644 --- a/testsuite/slurm_unit/api/manual/submit-tst.c +++ b/testsuite/slurm_unit/api/manual/submit-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/api/manual/update_config-tst.c b/testsuite/slurm_unit/api/manual/update_config-tst.c index 65555746d9469d9cdaba7ff36ea2a40eee57cdb3..696ad2ef7a34b22e38cf3bd659905d9ed5797fe8 100644 --- a/testsuite/slurm_unit/api/manual/update_config-tst.c +++ b/testsuite/slurm_unit/api/manual/update_config-tst.c @@ -7,7 +7,7 @@ * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. - * For details, see <http://www.schedmd.com/slurmdocs/>. + * For details, see <http://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * SLURM is free software; you can redistribute it and/or modify it under diff --git a/testsuite/slurm_unit/common/Makefile.am b/testsuite/slurm_unit/common/Makefile.am index 93d81fe884a86cc6a285f2dc7723d7b0a89dd274..6316190d9a7f50ad33f39b24789ae904c83454cf 100644 --- a/testsuite/slurm_unit/common/Makefile.am +++ b/testsuite/slurm_unit/common/Makefile.am @@ -1,7 +1,7 @@ AUTOMAKE_OPTIONS = foreign -INCLUDES = -I$(top_srcdir) -LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) +INCLUDES = -I$(top_srcdir) $(HWLOC_CPPFLAGS) +LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) $(HWLOC_LIBS) check_PROGRAMS = \ $(TESTS) @@ -11,3 +11,15 @@ TESTS = \ log-test \ bitstring-test +if HAVE_CHECK +MYCFLAGS = @CHECK_CFLAGS@ -Wall -ansi -pedantic -std=c99 +MYCFLAGS += -D_ISO99_SOURCE -Wunused-but-set-variable +MYCFLAGS += $(top_builddir)/src/common/libcommon.la +TESTS += xtree-test \ + xhash-test +xtree_test_CFLAGS = $(MYCFLAGS) +xtree_test_LDADD = @CHECK_LIBS@ +xhash_test_CFLAGS = $(MYCFLAGS) +xhash_test_LDADD = @CHECK_LIBS@ +endif + diff --git a/testsuite/slurm_unit/common/Makefile.in b/testsuite/slurm_unit/common/Makefile.in index a4d491aeafaf167f9cd07e522b0eaec2c3e0de04..fd434fc9c948d188cad5da7d0486496945a0a988 100644 --- a/testsuite/slurm_unit/common/Makefile.in +++ b/testsuite/slurm_unit/common/Makefile.in @@ -51,12 +51,17 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -check_PROGRAMS = $(am__EXEEXT_1) -TESTS = pack-test$(EXEEXT) log-test$(EXEEXT) bitstring-test$(EXEEXT) +check_PROGRAMS = $(am__EXEEXT_2) +TESTS = pack-test$(EXEEXT) log-test$(EXEEXT) bitstring-test$(EXEEXT) \ + $(am__EXEEXT_1) +@HAVE_CHECK_TRUE@am__append_1 = xtree-test \ +@HAVE_CHECK_TRUE@ xhash-test + subdir = testsuite/slurm_unit/common DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ $(top_srcdir)/auxdir/libtool.m4 \ $(top_srcdir)/auxdir/ltoptions.m4 \ $(top_srcdir)/auxdir/ltsugar.m4 \ @@ -74,6 +79,7 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_debug.m4 \ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ $(top_srcdir)/auxdir/x_ac_iso.m4 \ @@ -82,11 +88,13 @@ am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ $(top_srcdir)/auxdir/x_ac_munge.m4 \ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ $(top_srcdir)/auxdir/x_ac_pam.m4 \ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ $(top_srcdir)/auxdir/x_ac_readline.m4 \ $(top_srcdir)/auxdir/x_ac_rfc5424_time.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ @@ -99,24 +107,38 @@ mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = -am__EXEEXT_1 = pack-test$(EXEEXT) log-test$(EXEEXT) \ - bitstring-test$(EXEEXT) +@HAVE_CHECK_TRUE@am__EXEEXT_1 = xtree-test$(EXEEXT) \ +@HAVE_CHECK_TRUE@ xhash-test$(EXEEXT) +am__EXEEXT_2 = pack-test$(EXEEXT) log-test$(EXEEXT) \ + bitstring-test$(EXEEXT) $(am__EXEEXT_1) bitstring_test_SOURCES = bitstring-test.c bitstring_test_OBJECTS = bitstring-test.$(OBJEXT) bitstring_test_LDADD = $(LDADD) am__DEPENDENCIES_1 = bitstring_test_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ - $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) log_test_SOURCES = log-test.c log_test_OBJECTS = log-test.$(OBJEXT) log_test_LDADD = $(LDADD) log_test_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ - $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) pack_test_SOURCES = pack-test.c pack_test_OBJECTS = pack-test.$(OBJEXT) pack_test_LDADD = $(LDADD) pack_test_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ - $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +xhash_test_SOURCES = xhash-test.c +xhash_test_OBJECTS = xhash_test-xhash-test.$(OBJEXT) +xhash_test_DEPENDENCIES = +xhash_test_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(xhash_test_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +xtree_test_SOURCES = xtree-test.c +xtree_test_OBJECTS = xtree_test-xtree-test.$(OBJEXT) +xtree_test_DEPENDENCIES = +xtree_test_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(xtree_test_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -130,8 +152,10 @@ CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ -SOURCES = bitstring-test.c log-test.c pack-test.c -DIST_SOURCES = bitstring-test.c log-test.c pack-test.c +SOURCES = bitstring-test.c log-test.c pack-test.c xhash-test.c \ + xtree-test.c +DIST_SOURCES = bitstring-test.c log-test.c pack-test.c xhash-test.c \ + xtree-test.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -164,6 +188,8 @@ BLUEGENE_LOADED = @BLUEGENE_LOADED@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ CMD_LDFLAGS = @CMD_LDFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ @@ -184,6 +210,9 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ GLIB_CFLAGS = @GLIB_CFLAGS@ GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ @@ -193,6 +222,8 @@ GOBJECT_QUERY = @GOBJECT_QUERY@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ HAVEPGCONFIG = @HAVEPGCONFIG@ HAVE_AIX = @HAVE_AIX@ @@ -200,6 +231,15 @@ HAVE_MAN2HTML = @HAVE_MAN2HTML@ HAVE_NRT = @HAVE_NRT@ HAVE_OPENSSL = @HAVE_OPENSSL@ HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ @@ -234,6 +274,9 @@ NRT_CPPFLAGS = @NRT_CPPFLAGS@ NUMA_LIBS = @NUMA_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ @@ -261,6 +304,9 @@ READLINE_LIBS = @READLINE_LIBS@ REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ SED = @SED@ SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ @@ -350,8 +396,16 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign -INCLUDES = -I$(top_srcdir) -LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) +INCLUDES = -I$(top_srcdir) $(HWLOC_CPPFLAGS) +LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) $(HWLOC_LIBS) +@HAVE_CHECK_TRUE@MYCFLAGS = @CHECK_CFLAGS@ -Wall -ansi -pedantic \ +@HAVE_CHECK_TRUE@ -std=c99 -D_ISO99_SOURCE \ +@HAVE_CHECK_TRUE@ -Wunused-but-set-variable \ +@HAVE_CHECK_TRUE@ $(top_builddir)/src/common/libcommon.la +@HAVE_CHECK_TRUE@xtree_test_CFLAGS = $(MYCFLAGS) +@HAVE_CHECK_TRUE@xtree_test_LDADD = @CHECK_LIBS@ +@HAVE_CHECK_TRUE@xhash_test_CFLAGS = $(MYCFLAGS) +@HAVE_CHECK_TRUE@xhash_test_LDADD = @CHECK_LIBS@ all: all-am .SUFFIXES: @@ -404,6 +458,12 @@ log-test$(EXEEXT): $(log_test_OBJECTS) $(log_test_DEPENDENCIES) $(EXTRA_log_test pack-test$(EXEEXT): $(pack_test_OBJECTS) $(pack_test_DEPENDENCIES) $(EXTRA_pack_test_DEPENDENCIES) @rm -f pack-test$(EXEEXT) $(LINK) $(pack_test_OBJECTS) $(pack_test_LDADD) $(LIBS) +xhash-test$(EXEEXT): $(xhash_test_OBJECTS) $(xhash_test_DEPENDENCIES) $(EXTRA_xhash_test_DEPENDENCIES) + @rm -f xhash-test$(EXEEXT) + $(xhash_test_LINK) $(xhash_test_OBJECTS) $(xhash_test_LDADD) $(LIBS) +xtree-test$(EXEEXT): $(xtree_test_OBJECTS) $(xtree_test_DEPENDENCIES) $(EXTRA_xtree_test_DEPENDENCIES) + @rm -f xtree-test$(EXEEXT) + $(xtree_test_LINK) $(xtree_test_OBJECTS) $(xtree_test_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -414,6 +474,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitstring-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/log-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xhash_test-xhash-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xtree_test-xtree-test.Po@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @@ -436,6 +498,34 @@ distclean-compile: @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< +xhash_test-xhash-test.o: xhash-test.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xhash_test_CFLAGS) $(CFLAGS) -MT xhash_test-xhash-test.o -MD -MP -MF $(DEPDIR)/xhash_test-xhash-test.Tpo -c -o xhash_test-xhash-test.o `test -f 'xhash-test.c' || echo '$(srcdir)/'`xhash-test.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/xhash_test-xhash-test.Tpo $(DEPDIR)/xhash_test-xhash-test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xhash-test.c' object='xhash_test-xhash-test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xhash_test_CFLAGS) $(CFLAGS) -c -o xhash_test-xhash-test.o `test -f 'xhash-test.c' || echo '$(srcdir)/'`xhash-test.c + +xhash_test-xhash-test.obj: xhash-test.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xhash_test_CFLAGS) $(CFLAGS) -MT xhash_test-xhash-test.obj -MD -MP -MF $(DEPDIR)/xhash_test-xhash-test.Tpo -c -o xhash_test-xhash-test.obj `if test -f 'xhash-test.c'; then $(CYGPATH_W) 'xhash-test.c'; else $(CYGPATH_W) '$(srcdir)/xhash-test.c'; fi` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/xhash_test-xhash-test.Tpo $(DEPDIR)/xhash_test-xhash-test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xhash-test.c' object='xhash_test-xhash-test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xhash_test_CFLAGS) $(CFLAGS) -c -o xhash_test-xhash-test.obj `if test -f 'xhash-test.c'; then $(CYGPATH_W) 'xhash-test.c'; else $(CYGPATH_W) '$(srcdir)/xhash-test.c'; fi` + +xtree_test-xtree-test.o: xtree-test.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xtree_test_CFLAGS) $(CFLAGS) -MT xtree_test-xtree-test.o -MD -MP -MF $(DEPDIR)/xtree_test-xtree-test.Tpo -c -o xtree_test-xtree-test.o `test -f 'xtree-test.c' || echo '$(srcdir)/'`xtree-test.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/xtree_test-xtree-test.Tpo $(DEPDIR)/xtree_test-xtree-test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xtree-test.c' object='xtree_test-xtree-test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xtree_test_CFLAGS) $(CFLAGS) -c -o xtree_test-xtree-test.o `test -f 'xtree-test.c' || echo '$(srcdir)/'`xtree-test.c + +xtree_test-xtree-test.obj: xtree-test.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xtree_test_CFLAGS) $(CFLAGS) -MT xtree_test-xtree-test.obj -MD -MP -MF $(DEPDIR)/xtree_test-xtree-test.Tpo -c -o xtree_test-xtree-test.obj `if test -f 'xtree-test.c'; then $(CYGPATH_W) 'xtree-test.c'; else $(CYGPATH_W) '$(srcdir)/xtree-test.c'; fi` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/xtree_test-xtree-test.Tpo $(DEPDIR)/xtree_test-xtree-test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xtree-test.c' object='xtree_test-xtree-test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(xtree_test_CFLAGS) $(CFLAGS) -c -o xtree_test-xtree-test.obj `if test -f 'xtree-test.c'; then $(CYGPATH_W) 'xtree-test.c'; else $(CYGPATH_W) '$(srcdir)/xtree-test.c'; fi` + mostlyclean-libtool: -rm -f *.lo diff --git a/testsuite/slurm_unit/common/xhash-test.c b/testsuite/slurm_unit/common/xhash-test.c new file mode 100644 index 0000000000000000000000000000000000000000..73639c6794026d7637f909b57fac8fdc7e053e6c --- /dev/null +++ b/testsuite/slurm_unit/common/xhash-test.c @@ -0,0 +1,272 @@ +/*****************************************************************************\ + * Copyright (C) 2012 CEA/DAM/DIF + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <check.h> +#include <stdio.h> +#include <stdlib.h> + +#include "src/common/xhash.h" +#include "src/common/xmalloc.h" + +/* FIXME: how to check memory leaks with valgrind ? (to check if xhash_free + * does free all structures correctly). */ + +/***************************************************************************** + * DEFINITIONS + *****************************************************************************/ + +typedef struct hashable_st { + char id[255]; + uint32_t idn; +} hashable_t; + +const char* hashable_identify(void* voiditem) +{ + hashable_t* item = (hashable_t*)voiditem; + if (!item->id[0]) snprintf(item->id, 255, "%lu", + (unsigned long)item->idn); + return item->id; +} + +/***************************************************************************** + * FIXTURE * + *****************************************************************************/ + +xhash_t* g_ht = NULL; +hashable_t g_hashables[200]; +uint32_t g_hashableslen = sizeof(g_hashables)/sizeof(g_hashables[0]); + +static void setup(void) +{ + int i; + g_ht = xhash_init(hashable_identify, NULL, 0); + if (!g_ht) return; /* fatal error, will be detected by test cases */ + for (i = 0; i < g_hashableslen; ++i) { + g_hashables[i].id[0] = 0; + g_hashables[i].idn = i; + /* it is an error if xhash_add returns null but it will be + * detected by test cases */ + if (!xhash_add(g_ht, g_hashables + i)) return; + } +} + +static void teardown(void) +{ + xhash_free(g_ht); +} + +/***************************************************************************** + * UNIT TESTS * + ****************************************************************************/ + +START_TEST(test_init_free) +{ + xhash_t* ht = NULL; + + mark_point(); + + /* invalid case */ + ht = xhash_init(NULL, NULL, 0); + fail_unless(ht == NULL, "allocated table without identifying function"); + + /* alloc and free */ + ht = xhash_init(hashable_identify, NULL, 0); + fail_unless(ht != NULL, "hash table was not allocated"); + xhash_free(ht); +} +END_TEST + +START_TEST(test_add) +{ + xhash_t* ht = NULL; + hashable_t a[4] = {{"0", 0}, {"1", 1}, {"2", 2}, {"3", 3}}; + int i, len = sizeof(a)/sizeof(a[0]); + char buffer[255]; + ht = xhash_init(hashable_identify, NULL, 0); + fail_unless(xhash_add(NULL, a) == NULL, "invalid cases not null"); + fail_unless(xhash_add(ht, NULL) == NULL, "invalid cases not null"); + fail_unless(xhash_add(ht, a) != NULL, "xhash_add failed"); + fail_unless(xhash_add(ht, a+1) != NULL, "xhash_add failed"); + fail_unless(xhash_add(ht, a+2) != NULL, "xhash_add failed"); + fail_unless(xhash_add(ht, a+3) != NULL, "xhash_add failed"); + for (i = 0; i < len; ++i) { + snprintf(buffer, sizeof(buffer), "%d", i); + fail_unless(xhash_get(ht, buffer) == (a + i), + "bad hashable item returned"); + } + xhash_free(ht); +} +END_TEST + +START_TEST(test_find) +{ + xhash_t* ht = g_ht; + char buffer[255]; + int i; + + /* test bad match */ + fail_unless(xhash_get(ht, "bad") == NULL , "invalid case not null"); + fail_unless(xhash_get(ht, "-1") == NULL , "invalid case not null"); + fail_unless(xhash_get(ht, "10000") == NULL, "invalid case not null"); + + /* test all good indexes */ + for (i = 0; i < g_hashableslen; ++i) { + snprintf(buffer, sizeof(buffer), "%d", i); + fail_unless(xhash_get(ht, buffer) == (g_hashables + i), + "bad hashable item returned"); + } +} +END_TEST + +/* returns the number of item deleted from the hash table */ +static int test_delete_helper() +{ + xhash_t* ht = g_ht; + int ret = 0; + int i; + char buffer[255]; + for (i = 0; i < g_hashableslen; ++i) { + snprintf(buffer, sizeof(buffer), "%d", i); + if (xhash_get(ht, buffer) != (g_hashables + i)) { + ++ret; + } + } + return ret; +} + +START_TEST(test_delete) +{ + xhash_t* ht = g_ht; + int result; + char buffer[255]; + + /* invalid cases */ + xhash_delete(NULL, "1"); + fail_unless(xhash_get(ht, "1") != NULL, "invalid case null"); + /* Deleting inexistant item should do nothing. */ + xhash_delete(ht, NULL); + fail_unless(xhash_count(ht) == g_hashableslen, + "invalid delete has been done"); + result = test_delete_helper(); + fail_unless(result == 0, + "no item should have been deleted, but %d were deleted", + result); + + /* test correct deletion */ + xhash_delete(ht, "10"); + fail_unless(xhash_get(ht, "10") == NULL, "item not deleted"); + fail_unless(xhash_count(ht) == (g_hashableslen-1), "bad count"); + /* left edge */ + xhash_delete(ht, "0"); + fail_unless(xhash_get(ht, "0") == NULL, "item not deleted"); + fail_unless(xhash_count(ht) == (g_hashableslen-2), "bad count"); + /* right edge */ + snprintf(buffer, sizeof(buffer), "%u", (g_hashableslen-2)); + xhash_delete(ht, buffer); + fail_unless(xhash_get(ht, "0") == NULL, "item not deleted"); + fail_unless(xhash_count(ht) == (g_hashableslen-3), "bad count"); + + result = test_delete_helper(); + fail_unless(result == 3, "bad number of items were deleted: %d", + result); +} +END_TEST + +START_TEST(test_count) +{ + xhash_t* ht = g_ht; + hashable_t a[4] = {{"0", 0}, {"1", 1}, {"2", 2}, {"3", 3}}; + fail_unless(xhash_count(ht) == g_hashableslen, + "invalid count (fixture table)"); + ht = xhash_init(hashable_identify, NULL, 0); + xhash_add(ht, a); + xhash_add(ht, a+1); + xhash_add(ht, a+2); + xhash_add(ht, a+3); + fail_unless(xhash_count(ht) == 4, "invalid count (fresh table)"); + xhash_free(ht); +} +END_TEST + +static void test_walk_helper_callback(void* item, void* arg) +{ + hashable_t* hashable = (hashable_t*)item; + hashable->idn = UINT32_MAX; +} + +START_TEST(test_walk) +{ + xhash_t* ht = g_ht; + int i; + xhash_walk(ht, test_walk_helper_callback, NULL); + for (i = 0; i < g_hashableslen; ++i) { + fail_unless(g_hashables[i].idn == UINT32_MAX, + "hashable item was not walked over"); + } +} +END_TEST + +/***************************************************************************** + * TEST SUITE * + ****************************************************************************/ + +Suite* xhash_suite(void) +{ + Suite* s = suite_create("xhash"); + TCase* tc_core = tcase_create("Core"); + tcase_add_checked_fixture(tc_core, setup, teardown); + tcase_add_test(tc_core, test_init_free); + tcase_add_test(tc_core, test_add); + tcase_add_test(tc_core, test_find); + tcase_add_test(tc_core, test_delete); + tcase_add_test(tc_core, test_count); + tcase_add_test(tc_core, test_walk); + suite_add_tcase(s, tc_core); + return s; +} + +/***************************************************************************** + * TEST RUNNER * + ****************************************************************************/ + +int main(void) +{ + int number_failed; + SRunner* sr = srunner_create(xhash_suite()); + + srunner_run_all(sr, CK_NORMAL); + number_failed = srunner_ntests_failed(sr); + srunner_free(sr); + + return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} + diff --git a/testsuite/slurm_unit/common/xtree-test.c b/testsuite/slurm_unit/common/xtree-test.c new file mode 100644 index 0000000000000000000000000000000000000000..ab05f7e0b5905af566113a111456bc2b4478a1c3 --- /dev/null +++ b/testsuite/slurm_unit/common/xtree-test.c @@ -0,0 +1,902 @@ +/*****************************************************************************\ + * Copyright (C) 2012 CEA/DAM/DIF + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +/* TODO: voir comment vérifier les leak de mémoires avec valgrind avec ce + * framework (si jamais il y a déjà des exemples). + */ + +#include <check.h> +#include <stdlib.h> + +#include "src/common/xmalloc.h" +#include "src/common/xtree.h" + +/***************************************************************************** + * FIXTURE * + *****************************************************************************/ + +xtree_t mytree_empty; +xtree_t mytree_by_addchild; + +/* here we construct a tree in the following form : + * 1 + * / / \ \ + * 6 2 3 5 + * / \ + * 7 4 + * numbers are chronological adding order. + */ +static void init_by_addchild(void) +{ + xtree_t* tree = &mytree_by_addchild; + char* fake_addr = (char*)1; + + xtree_add_child(tree, NULL, fake_addr, XTREE_APPEND); + ++fake_addr; + xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND); + ++fake_addr; + xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND); + ++fake_addr; + xtree_add_child(tree, tree->root->start, fake_addr, XTREE_APPEND); + ++fake_addr; + xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND); + ++fake_addr; + xtree_add_child(tree, tree->root, fake_addr, XTREE_PREPEND); + ++fake_addr; + xtree_add_child(tree, tree->root->start->next, fake_addr, XTREE_PREPEND); +} + +static void setup(void) +{ + xtree_init(&mytree_empty, NULL); + init_by_addchild(); +} + +static void teardown(void) +{ + xtree_free(&mytree_empty); + xtree_free(&mytree_by_addchild); +} + +/***************************************************************************** + * UNIT TESTS * + ****************************************************************************/ + +START_TEST(test_xtree_creation_unmanaged) +{ + xtree_t* tree = &mytree_empty; + + fail_unless(tree->root == NULL, + "tree has a root on creation"); + fail_unless(tree->count == 0, + "tree has nodes on creation"); + fail_unless(tree->depth == 0, + "tree has a depth on creation"); + fail_unless(xtree_depth_const(tree) == 0, + "tree depth is not 0 on creation"); + fail_unless(tree->state == XTREE_STATE_DEPTHCACHED, + "tree is not cached on creation"); +} +END_TEST + +START_TEST(test_xtree_add_root_node_unmanaged) +{ + xtree_t* tree = &mytree_empty; + char* fake_addr = (char*)1; + + fail_unless(xtree_add_child(tree, NULL, fake_addr, XTREE_APPEND) != NULL, + "unable to add root node"); + fail_unless(tree->root != NULL, + "root node has not been allocated"); + fail_unless(tree->free == NULL, + "bad free function in the tree"); + fail_unless(tree->count == 1, + "there should be at least one node and only one in node count"); + fail_unless(xtree_depth_const(tree) == 1, + "tree should have a depth of one (depth %d)", + xtree_depth_const(tree)); + fail_unless(tree->root->data == (void*)1, + "node data is incorrect"); + fail_unless(tree->root->parent == NULL, + "root node has a parent"); + fail_unless(tree->root->start == NULL && tree->root->end == NULL, + "root node should not already have child in it"); + fail_unless(tree->root->next == NULL && tree->root->previous == NULL, + "root node have invalid siblings"); + + xtree_refresh_depth(tree); + fail_unless(tree->depth == 1, + "root node refreshed should have one depth (root level)"); + fail_unless(tree->state == XTREE_STATE_DEPTHCACHED, + "root node should now have its depth been cached"); + + ++fake_addr; + fail_unless(xtree_add_child(tree, NULL, fake_addr, XTREE_APPEND) == NULL, + "xtree_add_child with NULL parent and root node in tree should " + "return a NULL pointer"); + fail_unless(tree->root->data == (void*)1, + "xtree_add_child generated an operation and should not in context"); + fail_unless(tree->root->start == NULL, + "xtree_add_child had added an invalid child"); + fail_unless(tree->root->start == tree->root->end, + "xtree_add_child invalidated root node child list"); + + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND) + != NULL, + "unable to add child node to root node"); + fail_unless(tree->count == 2, + "bad tree node count"); + fail_unless(xtree_depth_const(tree) == 2, + "bad depth after root's first child"); + fail_unless(tree->state != XTREE_STATE_DEPTHCACHED, + "tree should not have already cached level count"); + + fail_unless(tree->root && + tree->root->data == (void*)1 && + tree->root->parent == NULL && + tree->root->next == NULL && tree->root->previous == NULL, + "root node has badly been modified"); + fail_unless(!!tree->root->start, + "root has no child, but should have one"); + fail_unless(tree->root->start == tree->root->end, + "root child list is inconsistent"); + + fail_unless(tree->root->start->data == (void*)2, + "bad child data"); + fail_unless(tree->root->start->parent == tree->root, + "child parent does not point to root node"); + fail_unless(!tree->root->start->start, + "child should be unique for now"); + fail_unless(tree->root->start->start == tree->root->start->end, + "child children list is inconsistent"); + fail_unless(!tree->root->start->next && !tree->root->start->previous, + "child should not have siblings"); + + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND) + != NULL, + "unable to add second child"); + + fail_unless(tree->root->start != tree->root->end, + "root should have more children"); + fail_unless(tree->root->start->next == tree->root->end && + tree->root->end->previous == tree->root->start && + tree->root->end->next == NULL && + tree->root->start->previous == NULL, + "root children list is inconsistent"); + fail_unless(tree->root->end->data == (void*)3, + "root second child has bad data"); +} +END_TEST + +char test_table[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static void myfree(void* x) +{ + int* item = (int*)x; + fail_unless(*item < 10 && *item >= 0, + "bad data passed to freeing function"); + fail_unless(test_table[*item] == 1, + "item was duplicated/corrupted"); + test_table[*item] = 0; + xfree(x); +} + +/* here we construct a tree in the following form : + * R + * / \ + * /\ + * /\ + * /\ + * / + * Then free it (in teardown). + */ +START_TEST(test_xtree_freeing_elements) +{ + xtree_t* tree = &mytree_empty; + xtree_node_t* node = NULL; + int* x = NULL; + int i = 0; + + xtree_set_freefunc(tree, myfree); + + x = (int*)xmalloc(sizeof(int)); + fail_unless(x != NULL, + "unable to allocate memory for test"); + *x = i; + test_table[i] = 1; + xtree_add_child(tree, NULL, x, XTREE_APPEND); + node = tree->root; + + for(i = 1; i < 10; ++i) { + x = (int*)xmalloc(sizeof(int)); + fail_unless(x != NULL, + "unable to allocate memory for test"); + *x = i; + test_table[i] = 1; + xtree_add_child(tree, node, x, XTREE_APPEND); + if ((i % 2) == 0) { + node = node->start; + } + } + + xtree_free(tree); + + for(i = 0; i < 10; ++i) { + fail_unless(test_table[i] == 0, + "one element has not been freed in the table (num %d)", + i); + } +} +END_TEST + +/* here we construct a tree in the following form : + * 1 + * / / \ \ + * 6 2 3 5 + * / \ + * 7 4 + * numbers are chronological adding order. + */ +START_TEST(test_xtree_with_add_child) +{ + xtree_t* tree = &mytree_empty; + xtree_node_t* level1_2 = NULL; + char* fake_addr = (char*)1; + + fail_unless(xtree_add_child(tree, NULL, fake_addr, XTREE_APPEND) != NULL, + NULL); + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND) + != NULL, NULL); + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND) + != NULL, NULL); + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root->start, fake_addr, XTREE_APPEND) + != NULL, NULL); + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND) + != NULL, NULL); + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_PREPEND) + != NULL, NULL); + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root->start->next, + fake_addr, XTREE_PREPEND) + != NULL, NULL); + + fail_unless(tree->root->start->data == (void*)6 && + tree->root->start->next->data == (void*)2 && + tree->root->start->next->next->data == (void*)3 && + tree->root->start->next->next->next->data == (void*)5, + "bad tree for children level 1 browsing the tree forward"); + fail_unless(tree->root->end->data == (void*)5 && + tree->root->end->previous->data == (void*)3 && + tree->root->end->previous->previous->data == (void*)2 && + tree->root->end->previous->previous->previous->data == (void*)6, + "bad tree for children level 1 browsing backward"); + fail_unless(tree->root->start->previous == NULL && + tree->root->end->next == NULL, + "bad tree edges"); + fail_unless(tree->root->start->start == NULL && /* 6 */ + tree->root->start->next->start != NULL && /* 2 */ + tree->root->start->next->end != NULL && /* 2 */ + tree->root->start->next->start != /* 2 */ + tree->root->start->next->end && /* 2 */ + tree->root->start->next->next->start == NULL && /* 3 */ + tree->root->start->next->next->next->start == NULL, /* 5 */ + "bad tree structure for children of child list level 1"); + level1_2 = tree->root->start->next; + fail_unless(level1_2->start->data == (void*)7 && + level1_2->start->start == NULL && + level1_2->start->previous == NULL && + level1_2->start->next == + level1_2->end && + level1_2->end->data == (void*)4 && + level1_2->end->next == NULL && + level1_2->end->start == NULL, + "bad tree structure for children level 2"); +} +END_TEST + +/* here we construct a tree in the following form : + * 1 + * / / / \ \ \ + * 7 2 6 4 3 5 + * numbers are chronological adding order. + */ +START_TEST(test_xtree_with_add_sibling) +{ + xtree_t* tree = &mytree_empty; + char* fake_addr = (char*)1; + + fail_unless(xtree_add_sibling(tree, NULL, fake_addr, XTREE_APPEND) != NULL, + NULL); /* 1 */ + ++fake_addr; + fail_unless(xtree_add_child(tree, tree->root, fake_addr, XTREE_APPEND) + != NULL, NULL); /* 2 */ + fail_unless(xtree_add_sibling(tree, tree->root, fake_addr, XTREE_APPEND) + == NULL, "add_sibling should return null when used with root node"); + ++fake_addr; + fail_unless(xtree_add_sibling(tree, tree->root->start, fake_addr, XTREE_APPEND) + != NULL, NULL); /* 3 */ + ++fake_addr; + fail_unless(xtree_add_sibling(tree, tree->root->end, fake_addr, XTREE_PREPEND) + != NULL, NULL); /* 4 */ + ++fake_addr; + fail_unless(xtree_add_sibling(tree, tree->root->end, fake_addr, XTREE_APPEND) + != NULL, NULL); /* 5 */ + ++fake_addr; + fail_unless(xtree_add_sibling(tree, tree->root->start, fake_addr, XTREE_APPEND) + != NULL, NULL); /* 6 */ + ++fake_addr; + fail_unless(xtree_add_sibling(tree, tree->root->start, fake_addr, XTREE_PREPEND) + != NULL, NULL); /* 7 */ + + fail_unless(tree->root->data == (void*)1, + "bad root node"); + + fail_unless(tree->root->start->data == (void*)7 && + tree->root->start->next->data == (void*)2 && + tree->root->start->next->next->data == (void*)6 && + tree->root->start->next->next->next->data == (void*)4, + "bad tree structure browsing forward"); + fail_unless(tree->root->end->data == (void*)5 && + tree->root->end->previous->data == (void*)3 && + tree->root->end->previous->previous->data == (void*)4 && + tree->root->end->previous->previous->previous->data == (void*)6, + "bad tree structure browsing backward"); + fail_unless(tree->root->start->previous == NULL && + tree->root->end->next == NULL, + "bad tree edges"); + fail_unless(tree->root->start->start == NULL && /* 7 */ + tree->root->start->next->start == NULL && /* 2 */ + tree->root->start->next->next->start == NULL && /* 6 */ + tree->root->end->start == NULL && /* 5 */ + tree->root->end->previous->start == NULL && /* 3 */ + tree->root->end->previous->previous->start == NULL, /* 4 */ + "bad tree structure level 1 should not have children"); +} +END_TEST + +START_TEST(test_xtree_depth) +{ + xtree_t* tree = &mytree_by_addchild; + uint32_t size; + + fail_unless(~tree->state & XTREE_STATE_DEPTHCACHED, + "state is cached, should not be"); + size = xtree_depth(tree); + fail_unless(size == 3, "bad depth, returned: %lu", size); + fail_unless(xtree_depth(tree) == size, "error refreshing the cached depth"); + fail_unless(xtree_depth_const(tree) == size, NULL); + fail_unless(xtree_depth_const_node(tree, tree->root) == size, NULL); + fail_unless(xtree_depth_const_node(tree, tree->root->start), + "bad subtree level depth"); + fail_unless(xtree_depth_const_node(tree, tree->root->start->next), + "bad subtree level depth"); + fail_unless(xtree_depth_const_node(tree, tree->root->start->next->start), + "bad subtree level depth"); +} +END_TEST + +typedef struct { + void* node_data; + uint8_t which; + uint32_t level; +} walk_couples_t; + +typedef struct walk_st { + walk_couples_t* table_pos; + uint8_t error; + uint8_t executed; + walk_couples_t got; +} walk_test_t; + + +static uint8_t action_test(xtree_node_t* node, + uint8_t which, + uint32_t level, + void* arg) +{ + walk_test_t* data = (walk_test_t*)arg; + if (data) { + data->executed = 1; + if (data->table_pos->node_data == node->data && + data->table_pos->which == which && + data->table_pos->level == level) { + ++data->table_pos; + } else { + ++data->error; + data->got.node_data = node->data; + data->got.which = which; + data->got.level = level; + return 0; + } + } + return 1; +} + +START_TEST(test_xtree_walk) +{ + xtree_t* tree = &mytree_by_addchild; + xtree_node_t* node = NULL; + walk_couples_t table[] = { + /* 0 */ {(void*)1, XTREE_PREORDER, 0}, + /* 1 */ {(void*)6, XTREE_LEAF , 1}, + /* 2 */ {(void*)1, XTREE_INORDER , 0}, + /* 3 */ {(void*)2, XTREE_PREORDER, 1}, + /* 4 */ {(void*)7, XTREE_LEAF , 2}, + /* 5 */ {(void*)2, XTREE_INORDER , 1}, + /* 6 */ {(void*)4, XTREE_PREORDER, 2}, + /* 7 */ {(void*)8, XTREE_LEAF , 3}, + /* 8 */ {(void*)4, XTREE_ENDORDER, 2}, + /* 9 */ {(void*)2, XTREE_ENDORDER, 1}, + /* 10 */ {(void*)1, XTREE_INORDER , 0}, + /* 11 */ {(void*)3, XTREE_LEAF , 1}, + /* 12 */ {(void*)1, XTREE_INORDER , 0}, + /* 13 */ {(void*)5, XTREE_LEAF , 1}, + /* 14 */ {(void*)1, XTREE_ENDORDER, 0} + }; + walk_test_t walk_data = {NULL, 0, 0}; /* standard: init stay static */ + walk_data.table_pos = table; + + node = xtree_add_child(tree, tree->root->start->next->end, (void*)8, + XTREE_APPEND); + fail_unless(node == tree->root->start->next->end->start, + "fail to add required node for tests"); + + /* invalid cases */ + node = xtree_walk(tree, NULL, UINT32_MAX, 0, NULL, NULL); + fail_unless(node == NULL, "invalid case, however returned not null"); + node = xtree_walk(NULL, tree->root, UINT32_MAX, 0, NULL, NULL); + fail_unless(node == NULL, "invalid case, however returned not null"); + node = xtree_walk(tree, tree->root, UINT32_MAX, 0, NULL, NULL); + fail_unless(node == NULL, "invalid case, however returned not null"); + + /* should not execute function */ + node = xtree_walk(tree, tree->root, UINT32_MAX, 0, + action_test, &walk_data); + fail_unless(node == NULL, "invalid case, however returned not null"); + fail_unless(walk_data.executed == 0, + "invalid case (min > max) but got executed"); + fail_unless(walk_data.error == 0, + "invalid case, error detected but should not have been executed"); + fail_unless(walk_data.table_pos == table, + "invalid case table_pos advanced but should not"); + + /* test tree walk through */ + node = xtree_walk(tree, NULL, 0, UINT32_MAX, action_test, &walk_data); + fail_unless(walk_data.executed == 1, + "should have executed at least one time"); + fail_unless(walk_data.table_pos != NULL, + "invalid pointer value for table_pos"); + fail_unless(walk_data.table_pos == + (table + (sizeof(table)/sizeof(table[0]))), + /* ^^^^^^ invalid addr but normal at the end of normal execution */ + "unexpected stop (data, which, level, couple index)" + " expected: %x: %u: %lu: %d," + " got %x: %u: %lu", + walk_data.table_pos->node_data, + walk_data.table_pos->which, + walk_data.table_pos->level, + (int)(walk_data.table_pos - table), + /* got */ + walk_data.got.node_data, + walk_data.got.which, + walk_data.got.level); + fail_unless(node == NULL, "returned value indicates unexpected stop"); + fail_unless(walk_data.error == 0, "error counter was incremented"); +} +END_TEST + +uint8_t compare_test(const void* node_data, const void* arg) +{ + return !(node_data == arg); +} + +START_TEST(test_xtree_find) +{ + xtree_t* tree = &mytree_by_addchild; + xtree_node_t* node = NULL; + + /* test not found result or bad params */ + node = xtree_find(tree, compare_test, NULL); + fail_unless(node == NULL, + "bad result (should be NULL): %x", + (node)?node->data:NULL); + /* the test ^^^^ is necessary since this is a macro/function, the node is + * deferred at the same time it is being tested */ + + node = xtree_find(tree, NULL, (void*)4); + fail_unless(node == NULL, + "bad result (should be NULL): %x", + (node)?node->data:NULL); + + node = xtree_find(tree, compare_test, (void*)10); + fail_unless(node == NULL, + "bad result (should be NULL): %x", + (node)?node->data:NULL); + + /* test different node depth */ + node = xtree_find(tree, compare_test, (void*)1); + fail_unless(node != NULL, + "result is null however it should have been found"); + fail_unless(node == tree->root, + "root node should have been found, but found : %x", + (node)?node->data:NULL); + + node = xtree_find(tree, compare_test, (void*)4); + fail_unless(node != NULL, + "result is null however it should have been found"); + fail_unless(tree->root->start->next->end == node, + "bad result (search 4): %x", + (node)?node->data:NULL); + + node = xtree_find(tree, compare_test, (void*)5); + fail_unless(node != NULL, + "result is null however it should have been found"); + fail_unless(tree->root->end == node, + "bad result (search 5): %x", + (node)?node->data:NULL); + + /* test node with parent and with childs */ + node = xtree_find(tree, compare_test, (void*)2); + fail_unless(node != NULL, + "result is null however it should have been found"); + fail_unless(tree->root->start->next == node, + "bad result (search 2): %x", + (node)?node->data:NULL); + +} +END_TEST + +START_TEST(test_xtree_delete) +{ + xtree_t* tree = &mytree_by_addchild; + + /* bad args */ + fail_unless(xtree_depth(tree) == 3, NULL); + fail_unless(xtree_delete(NULL, tree->root) == NULL, "bad return"); + fail_unless(xtree_get_count(tree) == 7, "bad count update"); + fail_unless(tree->state & XTREE_STATE_DEPTHCACHED, + "level should still be cached"); + fail_unless(xtree_delete(tree, NULL) == NULL, "bad return"); + fail_unless(xtree_get_count(tree) == 7, "bad count update"); + fail_unless(tree->state & XTREE_STATE_DEPTHCACHED, + "level should still be cached"); + fail_unless(xtree_depth(tree) == 3, NULL); + + /* tree structure */ + fail_unless(xtree_delete(tree, tree->root->start) == tree->root, + "parent of 6 should have been root node"); + fail_unless(xtree_depth(tree) == 3, NULL); + fail_unless(tree->root->start->data == (void*)2 && + tree->root->start->next->data == (void*)3 && + tree->root->start->next->next->data == (void*)5, + "children should be now 2 -> 3 -> 5"); + fail_unless(tree->root->start->previous == NULL, + "bad children list edges"); + fail_unless(xtree_get_count(tree) == 6, "bad count update"); + fail_unless(tree->state & XTREE_STATE_DEPTHCACHED, + "level should still be cached"); + fail_unless(tree->depth == 3 && xtree_depth(tree) == 3, + "depth should not have changed"); + + /* structure and depth changing */ + fail_unless(xtree_delete(tree, tree->root->start->start) == + tree->root->start, + "parent of 7 should have been node 2"); + fail_unless(xtree_depth(tree) == 3, NULL); + fail_unless(tree->state & XTREE_STATE_DEPTHCACHED, + "level should still be cached"); + fail_unless(tree->depth == 3, "depth should not have changed"); + fail_unless(xtree_get_count(tree) == 5, "bad count update"); + + fail_unless(xtree_delete(tree, tree->root->start->start) == + tree->root->start, + "parent of 4 should have been node 2"); + fail_unless(tree->root->start->start == NULL && + tree->root->start->end == NULL, + "bad edges for node 2"); + fail_unless(tree->root->start->data == (void*)2 && + tree->root->start->next->data == (void*)3 && + tree->root->start->next->next->data == (void*)5, + "tree deconstruction"); + fail_unless(tree->root->start->previous == NULL && + tree->root->end->next == NULL, + "tree edges deconstruction"); + fail_unless(~tree->state & XTREE_STATE_DEPTHCACHED, + "level should not be cached"); + fail_unless(xtree_depth(tree) == 2, + "the last removal should have reduced depth"); + + /* root node delete test */ + fail_unless(xtree_delete(tree, tree->root) == NULL, "bad return"); +} +END_TEST + +START_TEST(test_xtree_get_parents) +{ + xtree_t* tree = &mytree_by_addchild; + xtree_node_t** parents = NULL; + uint32_t size = 0; + + /* stress~ */ + fail_unless(xtree_get_parents(NULL, NULL, NULL) == NULL, "bad behavior"); + fail_unless(xtree_get_parents(tree, NULL, NULL) == NULL, "bad behavior"); + fail_unless(xtree_get_parents(NULL, tree->root->start, NULL) == NULL, + "bad behavior"); + fail_unless(xtree_get_parents(NULL, NULL, &size) == NULL, "bad behavior"); + fail_unless(xtree_get_parents(tree, NULL, &size) == NULL, "bad behavior"); + fail_unless(xtree_get_parents(tree, tree->root->start, NULL) == NULL, + "bad behavior"); + fail_unless(xtree_get_parents(tree, tree->root, &size) == NULL, + "bad behavior"); + + /* node 6 */ + parents = xtree_get_parents(tree, tree->root->start, &size); + fail_unless(parents != NULL, "should have a parent here"); + fail_unless(size == 1, "should have parents' list size == 1"); + fail_unless(parents[0] == tree->root, + "parents list of 6 should be root node"); + xfree(parents); + + /* node 1 */ + parents = xtree_get_parents(tree, tree->root, &size); + fail_unless(parents == NULL, "root node should not have a parent list"); + + /* node 2 */ + parents = xtree_get_parents(tree, tree->root->start->next, &size); + fail_unless(parents != NULL, "should have a parent here"); + fail_unless(size == 1, "should have parents' list size == 1"); + fail_unless(parents[0] == tree->root, + "parents list of 2 should be root node"); + xfree(parents); + + /* node 3 */ + parents = xtree_get_parents(tree, tree->root->start->next->next, &size); + fail_unless(parents != NULL, "should have a parent here"); + fail_unless(size == 1, "should have parents' list size == 1"); + fail_unless(parents[0] == tree->root, + "parents list of 3 should be root node"); + xfree(parents); + + /* node 5 */ + parents = xtree_get_parents(tree, tree->root->end, &size); + fail_unless(parents != NULL, "should have a parent here"); + fail_unless(size == 1, "should have parents' list size == 1"); + fail_unless(parents[0] == tree->root, + "parents list of 5 should be root node"); + xfree(parents); + + /* node 7 */ + parents = xtree_get_parents(tree, tree->root->start->next->start, &size); + fail_unless(parents != NULL, "should have parents here"); + fail_unless(size == 2, "should have parents' list size == 2"); + fail_unless(parents[0] == tree->root->start->next, + "parents[0] of 7 should be node 2 (actually %x)", + (parents[0])?parents[0]->data:NULL); + fail_unless(parents[1] == tree->root, + "parents[1] of 7 should be root node"); + xfree(parents); + + /* node 4 */ + parents = xtree_get_parents(tree, tree->root->start->next->end, &size); + fail_unless(parents != NULL, "should have parents here"); + fail_unless(size == 2, "should have parents' list size == 2"); + fail_unless(parents[0] == tree->root->start->next, + "parents[0] of 4 should be node 2 (actually %x)", + (parents[0])?parents[0]->data:NULL); + fail_unless(parents[1] == tree->root, + "parents[1] of 7 should be root node"); + xfree(parents); +} +END_TEST + +START_TEST(test_xtree_common) +{ + xtree_t* tree = &mytree_by_addchild; + xtree_node_t* node = NULL; + const xtree_node_t* node_list[7]; + + /* invalid cases */ + node = xtree_common(NULL, NULL, 10); + fail_unless(node == NULL, "invalid case, however returned not null"); + node = xtree_common(tree, NULL, 10); + fail_unless(node == NULL, "invalid case, however returned not null"); + node_list[0] = NULL; + node_list[1] = tree->root->end; + node_list[2] = tree->root->start; + node = xtree_common(tree, node_list, 3); + fail_unless(node == NULL, "invalid case, however returned not null"); + node_list[0] = tree->root; + node = xtree_common(tree, node_list, 1); + fail_unless(node == NULL, "invalid case, however returned not null"); + node_list[0] = tree->root->start; + node_list[1] = tree->root->end; + node = xtree_common(NULL, node_list, 2); + fail_unless(node == NULL, "invalid case, however returned not null"); + node = xtree_common(tree, node_list, 0); + fail_unless(node == NULL, "invalid case, however returned not null"); + + /* test for good common ancestor */ + + /* 7, 5 -> 1 */ + node_list[0] = tree->root->start->next->start; + node_list[1] = tree->root->end; + node = xtree_common(tree, node_list, 2); + fail_unless(node == tree->root, "bad returned node : %x", + (node)?node->data:NULL); + + /* 2, 7 -> 1 */ + node_list[0] = tree->root->start->next; + node_list[1] = tree->root->start->next->start; + node = xtree_common(tree, node_list, 2); + fail_unless(node == tree->root, "bad returned node"); + + /* 4, 7 -> 2 */ + node_list[0] = tree->root->start->next->end; + node = xtree_common(tree, node_list, 2); + fail_unless(node == tree->root->start->next, "bad returned node"); + + /* 4, 7, 2 -> 1 */ + node_list[2] = tree->root->start->next; + node = xtree_common(tree, node_list, 3); + fail_unless(node == tree->root, "bad returned node"); + + /* 6, 7 -> 1 */ + node_list[0] = tree->root->start; + node = xtree_common(tree, node_list, 2); + fail_unless(node == tree->root, "bad returned node"); + + /* 2, 7 -> 1 */ + node_list[0] = tree->root->start->next; + node = xtree_common(tree, node_list, 2); + fail_unless(node == tree->root, "bad returned node"); + + /* 2, 1 -> NULL */ + node_list[1] = tree->root; + node = xtree_common(tree, node_list, 2); + fail_unless(node == NULL, "bad returned node"); + + /* 2, 3, 5, 6 -> 1 */ + node_list[1] = tree->root->end->previous; + node_list[2] = tree->root->end; + node_list[3] = tree->root->start; + node = xtree_common(tree, node_list, 4); + fail_unless(node == tree->root, "bad returned node"); + + /* 2, 3, 5, 6, 7, 4 -> 1 */ + node_list[4] = tree->root->start->next->start; + node_list[5] = tree->root->start->next->end; + node = xtree_common(tree, node_list, 6); + fail_unless(node == tree->root, "bad returned node"); + + /* 2, 3, 5, 6, 7, 4, 1 -> NULL */ + node_list[6] = tree->root; + node = xtree_common(tree, node_list, 7); + fail_unless(node == NULL, "bad returned node"); +} +END_TEST + +START_TEST(test_xtree_get_leaves) +{ + xtree_t* tree = &mytree_by_addchild; + xtree_node_t** nodes = NULL; + uint32_t size = 0; + + /* invalid cases */ + nodes = xtree_get_leaves(NULL, NULL, NULL); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + nodes = xtree_get_leaves(tree, NULL, NULL); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + nodes = xtree_get_leaves(tree, tree->root, NULL); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + nodes = xtree_get_leaves(tree, NULL, &size); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + nodes = xtree_get_leaves(NULL, tree->root, &size); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + nodes = xtree_get_leaves(NULL, tree->root, NULL); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + nodes = xtree_get_leaves(tree, NULL, &size); + fail_unless(nodes == NULL, "invalid case, however returned not null"); + + /* get real leaves */ + nodes = xtree_get_leaves(tree, tree->root->start, &size); + fail_unless(nodes == NULL, "should have no leaves descending 6"); + + nodes = xtree_get_leaves(tree, tree->root->start->next, &size); + fail_unless(size == 2, "should have 2 leaves from 2"); + fail_unless(nodes[0] == tree->root->start->next->start, + "nodes[0] != nodes 7"); + fail_unless(nodes[1] == tree->root->start->next->end, + "nodes[1] != nodes 4"); + xfree(nodes); + + nodes = xtree_get_leaves(tree, tree->root, &size); + fail_unless(size != 6, "should have 6 leaves from root node"); + fail_unless(nodes[0] == tree->root->start, "bad leaves result"); + fail_unless(nodes[1] == tree->root->start->next->start, "bad leaves result"); + fail_unless(nodes[2] == tree->root->start->next->end, + "bad leaves result"); + fail_unless(nodes[3] == tree->root->start->next->next, "bad leaves result"); + fail_unless(nodes[4] == tree->root->end, + "bad leaves result"); + xfree(nodes); +} +END_TEST + +/***************************************************************************** + * TEST SUITE * + ****************************************************************************/ + +Suite* xtree_suite(void) +{ + Suite* s = suite_create("xtree"); + TCase* tc_core = tcase_create("Core"); + + tcase_add_checked_fixture(tc_core, setup, teardown); + tcase_add_test(tc_core, test_xtree_creation_unmanaged); + tcase_add_test(tc_core, test_xtree_add_root_node_unmanaged); + tcase_add_test(tc_core, test_xtree_freeing_elements); + tcase_add_test(tc_core, test_xtree_with_add_child); + tcase_add_test(tc_core, test_xtree_with_add_sibling); + tcase_add_test(tc_core, test_xtree_depth); + tcase_add_test(tc_core, test_xtree_walk); + tcase_add_test(tc_core, test_xtree_find); + tcase_add_test(tc_core, test_xtree_delete); + tcase_add_test(tc_core, test_xtree_get_parents); + tcase_add_test(tc_core, test_xtree_common); + tcase_add_test(tc_core, test_xtree_get_leaves); + suite_add_tcase(s, tc_core); + + return s; +} + +/***************************************************************************** + * TEST RUNNER * + ****************************************************************************/ + +int main(void) +{ + int number_failed; + SRunner* sr = srunner_create(xtree_suite()); + + srunner_run_all(sr, CK_NORMAL); + number_failed = srunner_ntests_failed(sr); + srunner_free(sr); + + return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} +