From 56d0b9f3675beada72e93a32c0bf65f52d50b931 Mon Sep 17 00:00:00 2001 From: yury <yury> Date: Tue, 12 Aug 2008 10:51:32 +0000 Subject: [PATCH] b=16367 r=nikita,adilger - fixes pools thread often wake up and eat CPU; - cleanups and comments. Read more details in the bug. --- lustre/include/lustre_dlm.h | 30 ++++-- lustre/ldlm/ldlm_pool.c | 176 +++++++++++++++++++++--------------- lustre/ldlm/ldlm_request.c | 20 ---- 3 files changed, 121 insertions(+), 105 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index fe555dcfae..17f8fc4a2a 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -259,14 +259,25 @@ struct ldlm_pool_ops { int (*po_setup)(struct ldlm_pool *pl, int limit); }; -/* One second for pools thread check interval. */ +/** + * One second for pools thread check interval. Each pool has own period. + */ #define LDLM_POOLS_THREAD_PERIOD (1) -/* 5% margin for modest pools. See ldlm_pool.c for details. */ +/** + * 5% margin for modest pools. See ldlm_pool.c for details. + */ #define LDLM_POOLS_MODEST_MARGIN (5) -/* A change to SLV in % after which we want to wake up pools thread asap. */ -#define LDLM_POOLS_FAST_SLV_CHANGE (50) +/** + * Default recalc period for server side pools in sec. + */ +#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1) + +/** + * Default recalc period for client side pools in sec. + */ +#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10) struct ldlm_pool { /** @@ -318,18 +329,18 @@ struct ldlm_pool { * Time when last slv from server was obtained. */ time_t pl_recalc_time; + /** + * Recalc period for pool. + */ + time_t pl_recalc_period; /** * Recalc and shrink ops. */ struct ldlm_pool_ops *pl_ops; /** - * Planned number of granted locks for next T. + * Number of planned locks for next period. */ int pl_grant_plan; - /** - * Grant plan step for next T. - */ - int pl_grant_step; /** * Pool statistics. */ @@ -980,7 +991,6 @@ void unlock_res_and_lock(struct ldlm_lock *lock); void ldlm_pools_recalc(ldlm_side_t client); int ldlm_pools_init(void); void ldlm_pools_fini(void); -void ldlm_pools_wakeup(void); int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, int idx, ldlm_side_t client); diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index c5f303a0cb..c870218273 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -86,10 +86,6 @@ * pl_cancel_rate - Number of canceled locks for last T (calculated); * pl_grant_speed - Grant speed (GR - CR) for last T (calculated); * pl_grant_plan - Planned number of granted locks for next T (calculated); - * - * pl_grant_step - Grant plan step, that is how ->pl_grant_plan - * will change in next T (tunable); - * * pl_server_lock_volume - Current server lock volume (calculated); * * As it may be seen from list above, we have few possible tunables which may @@ -119,14 +115,25 @@ #define LDLM_POOL_HOST_L ((num_physpages >> (20 - CFS_PAGE_SHIFT)) * 50) /* - * Default step in % for grant plan. + * Maximal possible grant step plan in %. + */ +#define LDLM_POOL_MAX_GSP (30) + +/* + * Minimal possible grant step plan in %. + */ +#define LDLM_POOL_MIN_GSP (1) + +/* + * This controls the speed of reaching LDLM_POOL_MAX_GSP + * with increasing thread period. */ -#define LDLM_POOL_GSP (10) +#define LDLM_POOL_GSP_STEP (4) /* * LDLM_POOL_GSP% of all locks is default GP. */ -#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_GSP) / 100) +#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_MAX_GSP) / 100) /* * Max age for locks on clients. @@ -183,6 +190,37 @@ static inline struct ldlm_namespace *ldlm_pl2ns(struct ldlm_pool *pl) return container_of(pl, struct ldlm_namespace, ns_pool); } +/** + * Calculates suggested grant_step in % of available locks for passed + * \a period. This is later used in grant_plan calculations. + */ +static inline int ldlm_pool_t2gsp(int t) +{ + /* + * This yeilds 1% grant step for anything below LDLM_POOL_GSP_STEP + * and up to 30% for anything higher than LDLM_POOL_GSP_STEP. + * + * How this will affect execution is the following: + * + * - for thread peroid 1s we will have grant_step 1% which good from + * pov of taking some load off from server and push it out to clients. + * This is like that because 1% for grant_step means that server will + * not allow clients to get lots of locks inshort period of time and + * keep all old locks in their caches. Clients will always have to + * get some locks back if they want to take some new; + * + * - for thread period 10s (which is default) we will have 23% which + * means that clients will have enough of room to take some new locks + * without getting some back. All locks from this 23% which were not + * taken by clients in current period will contribute in SLV growing. + * SLV growing means more locks cached on clients until limit or grant + * plan is reached. + */ + return LDLM_POOL_MAX_GSP - + (LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) / + (1 << (t / LDLM_POOL_GSP_STEP)); +} + /** * Recalculates next grant limit on passed \a pl. * @@ -195,7 +233,8 @@ static inline void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl) limit = ldlm_pool_get_limit(pl); granted = atomic_read(&pl->pl_granted); - grant_step = ((limit - granted) * pl->pl_grant_step) / 100; + grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period); + grant_step = ((limit - granted) * grant_step) / 100; pl->pl_grant_plan = granted + grant_step; } @@ -302,38 +341,28 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) spin_lock(&pl->pl_lock); recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; - if (recalc_interval_sec > 0) { - /* - * Update statistics. - */ - ldlm_pool_recalc_stats(pl); - - /* + if (recalc_interval_sec >= pl->pl_recalc_period) { + /* * Recalc SLV after last period. This should be done * _before_ recalculating new grant plan. */ ldlm_pool_recalc_slv(pl); - /* + /* * Make sure that pool informed obd of last SLV changes. */ ldlm_srv_pool_push_slv(pl); - /* + /* * Update grant_plan for new period. */ ldlm_pool_recalc_grant_plan(pl); - /* - * Zero out all rates and speed for the last period. - */ - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - atomic_set(&pl->pl_grant_speed, 0); pl->pl_recalc_time = cfs_time_current_sec(); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, recalc_interval_sec); } + spin_unlock(&pl->pl_lock); RETURN(0); } @@ -446,29 +475,23 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) ENTRY; spin_lock(&pl->pl_lock); + /* + * Check if we need to recalc lists now. + */ + recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; + if (recalc_interval_sec < pl->pl_recalc_period) { + spin_unlock(&pl->pl_lock); + RETURN(0); + } /* * Make sure that pool knows last SLV and Limit from obd. */ ldlm_cli_pool_pop_slv(pl); - recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; - if (recalc_interval_sec > 0) { - /* - * Update statistics only every T. - */ - ldlm_pool_recalc_stats(pl); - - /* - * Zero out grant/cancel rates and speed for last period. - */ - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - atomic_set(&pl->pl_grant_speed, 0); - pl->pl_recalc_time = cfs_time_current_sec(); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, - recalc_interval_sec); - } + pl->pl_recalc_time = cfs_time_current_sec(); + lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, + recalc_interval_sec); spin_unlock(&pl->pl_lock); /* @@ -540,14 +563,33 @@ struct ldlm_pool_ops ldlm_cli_pool_ops = { */ int ldlm_pool_recalc(struct ldlm_pool *pl) { + time_t recalc_interval_sec; int count; + spin_lock(&pl->pl_lock); + recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; + if (recalc_interval_sec > 0) { + /* + * Update pool statistics every 1s. + */ + ldlm_pool_recalc_stats(pl); + + /* + * Zero out all rates and speed for the last period. + */ + atomic_set(&pl->pl_grant_rate, 0); + atomic_set(&pl->pl_cancel_rate, 0); + atomic_set(&pl->pl_grant_speed, 0); + } + spin_unlock(&pl->pl_lock); + if (pl->pl_ops->po_recalc != NULL) { count = pl->pl_ops->po_recalc(pl); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, count); return count; } + return 0; } EXPORT_SYMBOL(ldlm_pool_recalc); @@ -598,7 +640,7 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, int count, int *eof, void *data) { int granted, grant_rate, cancel_rate, grant_step; - int nr = 0, grant_speed, grant_plan; + int nr = 0, grant_speed, grant_plan, lvf; struct ldlm_pool *pl = data; __u64 slv, clv; __u32 limit; @@ -608,25 +650,26 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, clv = pl->pl_client_lock_volume; limit = ldlm_pool_get_limit(pl); grant_plan = pl->pl_grant_plan; - grant_step = pl->pl_grant_step; granted = atomic_read(&pl->pl_granted); grant_rate = atomic_read(&pl->pl_grant_rate); + lvf = atomic_read(&pl->pl_lock_volume_factor); grant_speed = atomic_read(&pl->pl_grant_speed); cancel_rate = atomic_read(&pl->pl_cancel_rate); + grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period); spin_unlock(&pl->pl_lock); nr += snprintf(page + nr, count - nr, "LDLM pool state (%s):\n", pl->pl_name); nr += snprintf(page + nr, count - nr, " SLV: "LPU64"\n", slv); nr += snprintf(page + nr, count - nr, " CLV: "LPU64"\n", clv); + nr += snprintf(page + nr, count - nr, " LVF: %d\n", lvf); - nr += snprintf(page + nr, count - nr, " LVF: %d\n", - atomic_read(&pl->pl_lock_volume_factor)); - - nr += snprintf(page + nr, count - nr, " GSP: %d%%\n", - grant_step); - nr += snprintf(page + nr, count - nr, " GP: %d\n", - grant_plan); + if (ns_is_server(ldlm_pl2ns(pl))) { + nr += snprintf(page + nr, count - nr, " GSP: %d%%\n", + grant_step); + nr += snprintf(page + nr, count - nr, " GP: %d\n", + grant_plan); + } nr += snprintf(page + nr, count - nr, " GR: %d\n", grant_rate); nr += snprintf(page + nr, count - nr, " CR: %d\n", @@ -641,8 +684,8 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, } LDLM_POOL_PROC_READER(grant_plan, int); -LDLM_POOL_PROC_READER(grant_step, int); -LDLM_POOL_PROC_WRITER(grant_step, int); +LDLM_POOL_PROC_READER(recalc_period, int); +LDLM_POOL_PROC_WRITER(recalc_period, int); static int ldlm_pool_proc_init(struct ldlm_pool *pl) { @@ -711,11 +754,10 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) pool_vars[0].read_fptr = lprocfs_rd_grant_plan; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); - snprintf(var_name, MAX_STRING_SIZE, "grant_step"); + snprintf(var_name, MAX_STRING_SIZE, "recalc_period"); pool_vars[0].data = pl; - pool_vars[0].read_fptr = lprocfs_rd_grant_step; - if (ns_is_server(ns)) - pool_vars[0].write_fptr = lprocfs_wr_grant_step; + pool_vars[0].read_fptr = lprocfs_rd_recalc_period; + pool_vars[0].write_fptr = lprocfs_wr_recalc_period; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor"); @@ -805,7 +847,6 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); - pl->pl_grant_step = LDLM_POOL_GSP; pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L); snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d", @@ -814,11 +855,13 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, if (client == LDLM_NAMESPACE_SERVER) { pl->pl_ops = &ldlm_srv_pool_ops; ldlm_pool_set_limit(pl, LDLM_POOL_HOST_L); + pl->pl_recalc_period = LDLM_POOL_SRV_DEF_RECALC_PERIOD; pl->pl_server_lock_volume = ldlm_pool_slv_max(LDLM_POOL_HOST_L); } else { - pl->pl_server_lock_volume = 1; ldlm_pool_set_limit(pl, 1); + pl->pl_server_lock_volume = 1; pl->pl_ops = &ldlm_cli_pool_ops; + pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD; } pl->pl_client_lock_volume = 0; rc = ldlm_pool_proc_init(pl); @@ -859,7 +902,6 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) */ if (lock->l_resource->lr_type == LDLM_FLOCK) return; - ENTRY; atomic_inc(&pl->pl_granted); @@ -891,6 +933,7 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) if (lock->l_resource->lr_type == LDLM_FLOCK) return; ENTRY; + LASSERT(atomic_read(&pl->pl_granted) > 0); atomic_dec(&pl->pl_granted); atomic_inc(&pl->pl_cancel_rate); @@ -998,17 +1041,6 @@ static struct shrinker *ldlm_pools_srv_shrinker; static struct shrinker *ldlm_pools_cli_shrinker; static struct completion ldlm_pools_comp; -void ldlm_pools_wakeup(void) -{ - ENTRY; - if (ldlm_pools_thread == NULL) - return; - ldlm_pools_thread->t_flags |= SVC_EVENT; - cfs_waitq_signal(&ldlm_pools_thread->t_ctl_waitq); - EXIT; -} -EXPORT_SYMBOL(ldlm_pools_wakeup); - /* * Cancel \a nr locks from all namespaces (if possible). Returns number of * cached locks after shrink is finished. All namespaces are asked to @@ -1437,12 +1469,6 @@ void ldlm_pools_fini(void) } EXPORT_SYMBOL(ldlm_pools_fini); -void ldlm_pools_wakeup(void) -{ - return; -} -EXPORT_SYMBOL(ldlm_pools_wakeup); - void ldlm_pools_recalc(ldlm_side_t client) { return; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 5011d59de3..a75ef68b7d 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1101,26 +1101,6 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) obd->obd_pool_limit = new_limit; write_unlock(&obd->obd_pool_lock); - /* - * Check if we need to wakeup pools thread for fast SLV change. - * This is only done when threads period is noticably long like - * 10s or more. - */ -#if defined(__KERNEL__) && (LDLM_POOLS_THREAD_PERIOD >= 10) - if (old_slv > 0) { - __u64 fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE; - do_div(fast_change, 100); - - /* - * Wake up pools thread only if SLV has changed more than - * 50% since last update. In this case we want to react asap. - * Otherwise it is no sense to wake up pools as they are - * re-calculated every LDLM_POOLS_THREAD_PERIOD anyways. - */ - if (old_slv > new_slv && old_slv - new_slv > fast_change) - ldlm_pools_wakeup(); - } -#endif RETURN(0); } EXPORT_SYMBOL(ldlm_cli_update_pool); -- GitLab