From 4c97c0e5ed6c631d9be052a67752a96c15ffc4a7 Mon Sep 17 00:00:00 2001 From: yury <yury> Date: Tue, 12 Aug 2008 12:26:12 +0000 Subject: [PATCH] b=16367 r=adilger,nikita - fixes pools thread often wake up; - cleanups, comments. --- lustre/include/lustre_dlm.h | 114 ++++++++++++++++------- lustre/ldlm/ldlm_pool.c | 178 +++++++++++++++++++++--------------- lustre/ldlm/ldlm_request.c | 20 ---- 3 files changed, 183 insertions(+), 129 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index b10d8b27e0..65f2a58bf8 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -255,45 +255,92 @@ struct ldlm_pool_ops { int (*po_setup)(struct ldlm_pool *pl, int limit); }; -/* One second for pools thread check interval. */ +/** + * One second for pools thread check interval. Each pool has own period. + */ #define LDLM_POOLS_THREAD_PERIOD (1) -/* 5% margin for modest pools. See ldlm_pool.c for details. */ +/** + * 5% margin for modest pools. See ldlm_pool.c for details. + */ #define LDLM_POOLS_MODEST_MARGIN (5) -/* A change to SLV in % after which we want to wake up pools thread asap. */ -#define LDLM_POOLS_FAST_SLV_CHANGE (50) +/** + * Default recalc period for server side pools in sec. + */ +#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1) + +/** + * Default recalc period for client side pools in sec. + */ +#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10) struct ldlm_pool { - /* Common pool fields */ - cfs_proc_dir_entry_t *pl_proc_dir; /* Pool proc directory. */ - char pl_name[100]; /* Pool name, should be long - * enough to contain complex - * proc entry name. */ - spinlock_t pl_lock; /* Lock for protecting slv/clv - * updates. */ - atomic_t pl_limit; /* Number of allowed locks in - * in pool, both, client and - * server side. */ - atomic_t pl_granted; /* Number of granted locks. */ - atomic_t pl_grant_rate; /* Grant rate per T. */ - atomic_t pl_cancel_rate; /* Cancel rate per T. */ - atomic_t pl_grant_speed; /* Grant speed (GR-CR) per T. */ - __u64 pl_server_lock_volume; /* Server lock volume. - * Protected by pl_lock */ - __u64 pl_client_lock_volume; /* Client lock volue. */ - atomic_t pl_lock_volume_factor; /* Lock volume factor. */ - - time_t pl_recalc_time; /* Time when last slv from - * server was obtained. */ - struct ldlm_pool_ops *pl_ops; /* Recalc and shrink ops. */ - - int pl_grant_plan; /* Planned number of granted - * locks for next T. */ - int pl_grant_step; /* Grant plan step for next - * T. */ - - struct lprocfs_stats *pl_stats; /* Pool statistics. */ + /** + * Pool proc directory. + */ + cfs_proc_dir_entry_t *pl_proc_dir; + /** + * Pool name, should be long enough to contain compound proc entry name. + */ + char pl_name[100]; + /** + * Lock for protecting slv/clv updates. + */ + spinlock_t pl_lock; + /** + * Number of allowed locks in in pool, both, client and server side. + */ + atomic_t pl_limit; + /** + * Number of granted locks in + */ + atomic_t pl_granted; + /** + * Grant rate per T. + */ + atomic_t pl_grant_rate; + /** + * Cancel rate per T. + */ + atomic_t pl_cancel_rate; + /** + * Grant speed (GR-CR) per T. + */ + atomic_t pl_grant_speed; + /** + * Server lock volume. Protected by pl_lock. + */ + __u64 pl_server_lock_volume; + /** + * Current biggest client lock volume. Protected by pl_lock. + */ + __u64 pl_client_lock_volume; + /** + * Lock volume factor. SLV on client is calculated as following: + * server_slv * lock_volume_factor. + */ + atomic_t pl_lock_volume_factor; + /** + * Time when last slv from server was obtained. + */ + time_t pl_recalc_time; + /** + * Recalc period for pool. + */ + time_t pl_recalc_period; + /** + * Recalc and shrink ops. + */ + struct ldlm_pool_ops *pl_ops; + /** + * Number of planned locks for next period. + */ + int pl_grant_plan; + /** + * Pool statistics. + */ + struct lprocfs_stats *pl_stats; }; typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, @@ -856,7 +903,6 @@ void unlock_res_and_lock(struct ldlm_lock *lock); void ldlm_pools_recalc(ldlm_side_t client); int ldlm_pools_init(void); void ldlm_pools_fini(void); -void ldlm_pools_wakeup(void); int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, int idx, ldlm_side_t client); diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index a884ca10d4..da67a4cd3b 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -86,10 +86,6 @@ * pl_cancel_rate - Number of canceled locks for last T (calculated); * pl_grant_speed - Grant speed (GR - CR) for last T (calculated); * pl_grant_plan - Planned number of granted locks for next T (calculated); - * - * pl_grant_step - Grant plan step, that is how ->pl_grant_plan - * will change in next T (tunable); - * * pl_server_lock_volume - Current server lock volume (calculated); * * As it may be seen from list above, we have few possible tunables which may @@ -120,14 +116,27 @@ #define LDLM_POOL_HOST_L ((num_physpages >> (20 - CFS_PAGE_SHIFT)) * 50) /* - * Default step in % for grant plan. + * Maximal possible grant step plan in %. + */ +#define LDLM_POOL_MAX_GSP (30) + +/* + * Minimal possible grant step plan in %. + */ +#define LDLM_POOL_MIN_GSP (1) + +/* + * This controls the speed of reaching LDLM_POOL_MAX_GSP + * with increasing thread period. This is 4s which means + * that for 10s thread period we will have 2 steps by 4s + * each. */ -#define LDLM_POOL_GSP (10) +#define LDLM_POOL_GSP_STEP (4) /* * LDLM_POOL_GSP% of all locks is default GP. */ -#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_GSP) / 100) +#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_MAX_GSP) / 100) /* * Max age for locks on clients. @@ -184,6 +193,37 @@ static inline struct ldlm_namespace *ldlm_pl2ns(struct ldlm_pool *pl) return container_of(pl, struct ldlm_namespace, ns_pool); } +/** + * Calculates suggested grant_step in % of available locks for passed + * \a period. This is later used in grant_plan calculations. + */ +static inline int ldlm_pool_t2gsp(int t) +{ + /* + * This yeilds 1% grant step for anything below LDLM_POOL_GSP_STEP + * and up to 30% for anything higher than LDLM_POOL_GSP_STEP. + * + * How this will affect execution is the following: + * + * - for thread peroid 1s we will have grant_step 1% which good from + * pov of taking some load off from server and push it out to clients. + * This is like that because 1% for grant_step means that server will + * not allow clients to get lots of locks inshort period of time and + * keep all old locks in their caches. Clients will always have to + * get some locks back if they want to take some new; + * + * - for thread period 10s (which is default) we will have 23% which + * means that clients will have enough of room to take some new locks + * without getting some back. All locks from this 23% which were not + * taken by clients in current period will contribute in SLV growing. + * SLV growing means more locks cached on clients until limit or grant + * plan is reached. + */ + return LDLM_POOL_MAX_GSP - + (LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) / + (1 << (t / LDLM_POOL_GSP_STEP)); +} + /** * Recalculates next grant limit on passed \a pl. * @@ -196,7 +236,8 @@ static inline void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl) limit = ldlm_pool_get_limit(pl); granted = atomic_read(&pl->pl_granted); - grant_step = ((limit - granted) * pl->pl_grant_step) / 100; + grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period); + grant_step = ((limit - granted) * grant_step) / 100; pl->pl_grant_plan = granted + grant_step; } @@ -303,38 +344,28 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) spin_lock(&pl->pl_lock); recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; - if (recalc_interval_sec > 0) { - /* - * Update statistics. - */ - ldlm_pool_recalc_stats(pl); - - /* + if (recalc_interval_sec >= pl->pl_recalc_period) { + /* * Recalc SLV after last period. This should be done * _before_ recalculating new grant plan. */ ldlm_pool_recalc_slv(pl); - /* + /* * Make sure that pool informed obd of last SLV changes. */ ldlm_srv_pool_push_slv(pl); - /* + /* * Update grant_plan for new period. */ ldlm_pool_recalc_grant_plan(pl); - /* - * Zero out all rates and speed for the last period. - */ - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - atomic_set(&pl->pl_grant_speed, 0); pl->pl_recalc_time = cfs_time_current_sec(); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, recalc_interval_sec); } + spin_unlock(&pl->pl_lock); RETURN(0); } @@ -447,29 +478,23 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) ENTRY; spin_lock(&pl->pl_lock); + /* + * Check if we need to recalc lists now. + */ + recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; + if (recalc_interval_sec < pl->pl_recalc_period) { + spin_unlock(&pl->pl_lock); + RETURN(0); + } /* * Make sure that pool knows last SLV and Limit from obd. */ ldlm_cli_pool_pop_slv(pl); - recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; - if (recalc_interval_sec > 0) { - /* - * Update statistics only every T. - */ - ldlm_pool_recalc_stats(pl); - - /* - * Zero out grant/cancel rates and speed for last period. - */ - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - atomic_set(&pl->pl_grant_speed, 0); - pl->pl_recalc_time = cfs_time_current_sec(); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, - recalc_interval_sec); - } + pl->pl_recalc_time = cfs_time_current_sec(); + lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, + recalc_interval_sec); spin_unlock(&pl->pl_lock); /* @@ -541,14 +566,33 @@ struct ldlm_pool_ops ldlm_cli_pool_ops = { */ int ldlm_pool_recalc(struct ldlm_pool *pl) { + time_t recalc_interval_sec; int count; + spin_lock(&pl->pl_lock); + recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; + if (recalc_interval_sec > 0) { + /* + * Update pool statistics every 1s. + */ + ldlm_pool_recalc_stats(pl); + + /* + * Zero out all rates and speed for the last period. + */ + atomic_set(&pl->pl_grant_rate, 0); + atomic_set(&pl->pl_cancel_rate, 0); + atomic_set(&pl->pl_grant_speed, 0); + } + spin_unlock(&pl->pl_lock); + if (pl->pl_ops->po_recalc != NULL) { count = pl->pl_ops->po_recalc(pl); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, count); return count; } + return 0; } EXPORT_SYMBOL(ldlm_pool_recalc); @@ -599,7 +643,7 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, int count, int *eof, void *data) { int granted, grant_rate, cancel_rate, grant_step; - int nr = 0, grant_speed, grant_plan; + int nr = 0, grant_speed, grant_plan, lvf; struct ldlm_pool *pl = data; __u64 slv, clv; __u32 limit; @@ -609,25 +653,26 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, clv = pl->pl_client_lock_volume; limit = ldlm_pool_get_limit(pl); grant_plan = pl->pl_grant_plan; - grant_step = pl->pl_grant_step; granted = atomic_read(&pl->pl_granted); grant_rate = atomic_read(&pl->pl_grant_rate); + lvf = atomic_read(&pl->pl_lock_volume_factor); grant_speed = atomic_read(&pl->pl_grant_speed); cancel_rate = atomic_read(&pl->pl_cancel_rate); + grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period); spin_unlock(&pl->pl_lock); nr += snprintf(page + nr, count - nr, "LDLM pool state (%s):\n", pl->pl_name); nr += snprintf(page + nr, count - nr, " SLV: "LPU64"\n", slv); nr += snprintf(page + nr, count - nr, " CLV: "LPU64"\n", clv); + nr += snprintf(page + nr, count - nr, " LVF: %d\n", lvf); - nr += snprintf(page + nr, count - nr, " LVF: %d\n", - atomic_read(&pl->pl_lock_volume_factor)); - - nr += snprintf(page + nr, count - nr, " GSP: %d%%\n", - grant_step); - nr += snprintf(page + nr, count - nr, " GP: %d\n", - grant_plan); + if (ns_is_server(ldlm_pl2ns(pl))) { + nr += snprintf(page + nr, count - nr, " GSP: %d%%\n", + grant_step); + nr += snprintf(page + nr, count - nr, " GP: %d\n", + grant_plan); + } nr += snprintf(page + nr, count - nr, " GR: %d\n", grant_rate); nr += snprintf(page + nr, count - nr, " CR: %d\n", @@ -642,8 +687,8 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, } LDLM_POOL_PROC_READER(grant_plan, int); -LDLM_POOL_PROC_READER(grant_step, int); -LDLM_POOL_PROC_WRITER(grant_step, int); +LDLM_POOL_PROC_READER(recalc_period, int); +LDLM_POOL_PROC_WRITER(recalc_period, int); static int ldlm_pool_proc_init(struct ldlm_pool *pl) { @@ -712,11 +757,10 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) pool_vars[0].read_fptr = lprocfs_rd_grant_plan; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); - snprintf(var_name, MAX_STRING_SIZE, "grant_step"); + snprintf(var_name, MAX_STRING_SIZE, "recalc_period"); pool_vars[0].data = pl; - pool_vars[0].read_fptr = lprocfs_rd_grant_step; - if (ns_is_server(ns)) - pool_vars[0].write_fptr = lprocfs_wr_grant_step; + pool_vars[0].read_fptr = lprocfs_rd_recalc_period; + pool_vars[0].write_fptr = lprocfs_wr_recalc_period; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor"); @@ -806,7 +850,6 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); - pl->pl_grant_step = LDLM_POOL_GSP; pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L); snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d", @@ -815,11 +858,13 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, if (client == LDLM_NAMESPACE_SERVER) { pl->pl_ops = &ldlm_srv_pool_ops; ldlm_pool_set_limit(pl, LDLM_POOL_HOST_L); + pl->pl_recalc_period = LDLM_POOL_SRV_DEF_RECALC_PERIOD; pl->pl_server_lock_volume = ldlm_pool_slv_max(LDLM_POOL_HOST_L); } else { - pl->pl_server_lock_volume = 1; ldlm_pool_set_limit(pl, 1); + pl->pl_server_lock_volume = 1; pl->pl_ops = &ldlm_cli_pool_ops; + pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD; } pl->pl_client_lock_volume = 0; rc = ldlm_pool_proc_init(pl); @@ -860,7 +905,6 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) */ if (lock->l_resource->lr_type == LDLM_FLOCK) return; - ENTRY; atomic_inc(&pl->pl_granted); @@ -892,6 +936,7 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) if (lock->l_resource->lr_type == LDLM_FLOCK) return; ENTRY; + LASSERT(atomic_read(&pl->pl_granted) > 0); atomic_dec(&pl->pl_granted); atomic_inc(&pl->pl_cancel_rate); @@ -999,17 +1044,6 @@ static struct shrinker *ldlm_pools_srv_shrinker; static struct shrinker *ldlm_pools_cli_shrinker; static struct completion ldlm_pools_comp; -void ldlm_pools_wakeup(void) -{ - ENTRY; - if (ldlm_pools_thread == NULL) - return; - ldlm_pools_thread->t_flags |= SVC_EVENT; - cfs_waitq_signal(&ldlm_pools_thread->t_ctl_waitq); - EXIT; -} -EXPORT_SYMBOL(ldlm_pools_wakeup); - /* * Cancel \a nr locks from all namespaces (if possible). Returns number of * cached locks after shrink is finished. All namespaces are asked to @@ -1438,12 +1472,6 @@ void ldlm_pools_fini(void) } EXPORT_SYMBOL(ldlm_pools_fini); -void ldlm_pools_wakeup(void) -{ - return; -} -EXPORT_SYMBOL(ldlm_pools_wakeup); - void ldlm_pools_recalc(ldlm_side_t client) { return; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index c002494502..68c24dce3b 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1061,26 +1061,6 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) obd->obd_pool_limit = new_limit; write_unlock(&obd->obd_pool_lock); - /* - * Check if we need to wakeup pools thread for fast SLV change. - * This is only done when threads period is noticably long like - * 10s or more. - */ -#if defined(__KERNEL__) && (LDLM_POOLS_THREAD_PERIOD >= 10) - if (old_slv > 0) { - __u64 fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE; - do_div(fast_change, 100); - - /* - * Wake up pools thread only if SLV has changed more than - * 50% since last update. In this case we want to react asap. - * Otherwise it is no sense to wake up pools as they are - * re-calculated every LDLM_POOLS_THREAD_PERIOD anyways. - */ - if (old_slv > new_slv && old_slv - new_slv > fast_change) - ldlm_pools_wakeup(); - } -#endif RETURN(0); } EXPORT_SYMBOL(ldlm_cli_update_pool); -- GitLab