diff --git a/META b/META index 61dc5fc5d5909805e316bca70d704aa4f3f086e0..5fe143bd8e0b25a63e6057d8c4328de19cfd46dc 100644 --- a/META +++ b/META @@ -9,8 +9,8 @@ Name: slurm Major: 2 Minor: 6 - Micro: 8 - Version: 2.6.8 + Micro: 9 + Version: 2.6.9 Release: 1 ## diff --git a/NEWS b/NEWS index 51e2c712dd6a731f40c73df1398dfb70d0cd48d4..5dc1fcc3da9a488c124ffe51311f2c7ae1b7036b 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,13 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and admins. +* Changes in Slurm 2.6.9 +======================== + -- Fix sinfo to work correctly with draining/mixed nodes as well as filtering + on Mixed state. + -- Fix sacctmgr update user with no "where" condition. + -- Fix logic bugs for SchedulerParameters option of max_rpc_cnt. + * Changes in Slurm 2.6.8 ======================== -- Add support for Torque/PBS job array options and environment variables. diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 294ed10fa9b07c4cb5be51d58a64868c04d354b7..fdf353b65e25731293848e5c842e775dbd97ce6b 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -2199,10 +2199,9 @@ slurm_fd_t slurm_open_controller_conn(slurm_addr_t *addr) myproto->primary_controller.sin_port; } - msg_timeout *= 10; /* Do iteration every 0.1 secs */ for (retry = 0; retry < msg_timeout; retry++) { if (retry) - usleep(100000); + sleep(1); if (working_cluster_rec) { if (working_cluster_rec->control_addr.sin_port == 0) { slurm_set_addr( @@ -3683,12 +3682,11 @@ List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout) if (conn_timeout == (uint16_t) NO_VAL) conn_timeout = MIN(slurm_get_msg_timeout(), 10); - conn_timeout *= 10; /* Do iteration every 0.1 secs */ /* This connect retry logic permits Slurm hierarchical communications * to better survive slurmd restarts */ for (i = 0; i <= conn_timeout; i++) { - if (i > 0) - usleep(100000); + if (i) + sleep(1); fd = slurm_open_msg_conn(&msg->address); if ((fd >= 0) || (errno != ECONNREFUSED)) break; diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index a989df1610b82b90117bf4a78f59f357d0272a27..12836ecd2f474c05687d43c7de48dd20e5caeb8f 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -244,7 +244,8 @@ static bool _job_is_completing(void) static bool _many_pending_rpcs(void) { //info("thread_count = %u", slurmctld_config.server_thread_count); - if (slurmctld_config.server_thread_count >= defer_rpc_cnt) + if ((defer_rpc_cnt > 0) && + (slurmctld_config.server_thread_count >= defer_rpc_cnt)) return true; return false; } diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index 198e3b4bf807f7886cd72756f77ad6928a7df3d0..55ea73bef30fbf564a18567bfba9adfdccf5a466 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -1697,6 +1697,13 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) user_cond->assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); + /* We need this to make sure we only change users, not + * accounts if this list didn't exist it would change + * accounts. Having it blank is fine, it just needs to + * exist. This also happens in _set_cond, but that doesn't + * always happen. + */ + user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); diff --git a/src/sinfo/print.c b/src/sinfo/print.c index 2b31371b40e3a17f0686226bc071f2233142c26a..dda08139266de507474b4f26c03e8dbea62fc45a 100644 --- a/src/sinfo/print.c +++ b/src/sinfo/print.c @@ -995,16 +995,6 @@ int _print_state_compact(sinfo_data_t * sinfo_data, int width, if (sinfo_data && sinfo_data->nodes_total) { my_state = sinfo_data->node_state; - if (IS_NODE_DRAIN(sinfo_data)) { - /* don't worry about mixed since the - * whole node is being drained. */ - } else if ((sinfo_data->cpus_alloc && sinfo_data->cpus_other) - || (sinfo_data->cpus_idle - && (sinfo_data->cpus_idle - != sinfo_data->cpus_total))) { - my_state &= NODE_STATE_FLAGS; - my_state |= NODE_STATE_MIXED; - } upper_state = node_state_string_compact(my_state); lower_state = _str_tolower(upper_state); diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c index a19716ed0fb21361083aa67218db75c0e2616226..3aa5c7a8a3b2f63ffe2aa6695c3d92b302c38ce7 100644 --- a/src/sinfo/sinfo.c +++ b/src/sinfo/sinfo.c @@ -245,6 +245,8 @@ _query_server(partition_info_msg_t ** part_pptr, static reserve_info_msg_t *old_resv_ptr = NULL, *new_resv_ptr; int error_code; uint16_t show_flags = 0; + int cc; + node_info_t *node_ptr; if (params.all_flag) show_flags |= SHOW_ALL; @@ -304,6 +306,46 @@ _query_server(partition_info_msg_t ** part_pptr, old_node_ptr = new_node_ptr; *node_pptr = new_node_ptr; + /* Set the node state as NODE_STATE_MIXED. */ + for (cc = 0; cc < new_node_ptr->record_count; cc++) { + node_ptr = &(new_node_ptr->node_array[cc]); + if (IS_NODE_DRAIN(node_ptr)) { + /* don't worry about mixed since the + * whole node is being drained. */ + } else { + uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus; + int single_node_cpus = + (node_ptr->cpus / g_node_scaling); + + select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, + SELECT_NODEDATA_SUBCNT, + NODE_STATE_ALLOCATED, + &alloc_cpus); + if (params.cluster_flags & CLUSTER_FLAG_BG) { + if (!alloc_cpus && + (IS_NODE_ALLOCATED(node_ptr) || + IS_NODE_COMPLETING(node_ptr))) + alloc_cpus = node_ptr->cpus; + else + alloc_cpus *= single_node_cpus; + } + idle_cpus = node_ptr->cpus - alloc_cpus; + select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, + SELECT_NODEDATA_SUBCNT, + NODE_STATE_ERROR, + &err_cpus); + if (params.cluster_flags & CLUSTER_FLAG_BG) + err_cpus *= single_node_cpus; + idle_cpus -= err_cpus; + + if ((alloc_cpus && err_cpus) || + (idle_cpus && (idle_cpus != node_ptr->cpus))) { + node_ptr->node_state &= NODE_STATE_FLAGS; + node_ptr->node_state |= NODE_STATE_MIXED; + } + } + } + if (old_resv_ptr) { if (clear_old) old_resv_ptr->last_update = 0; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index c598437aeab794ffe2e3e4713056c41d92cd786a..a2b2a71be7f8c214d216fed1ec3ee1494799b68a 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -2819,7 +2819,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) if (error_code == SLURM_SUCCESS) { error_code = validate_job_create_req(job_desc_msg); if (job_desc_msg->array_bitmap) - schedule_cnt = 0; /* Do full schedule cycle */ + schedule_cnt = 100; } dump_job_desc(job_desc_msg); if (error_code == SLURM_SUCCESS) {