diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 7938c855dce16a125b7b6f8ba54984d6d81debe9..a9f03ed00d5464bc150790c6458e6099b7393c43 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -217,10 +217,12 @@ struct ldlm_lock; struct ldlm_resource; struct ldlm_namespace; -typedef int (*ldlm_pool_recalc_t)(struct ldlm_pool *pl); - -typedef int (*ldlm_pool_shrink_t)(struct ldlm_pool *pl, - int nr, unsigned int gfp_mask); +struct ldlm_pool_ops { + int (*po_recalc)(struct ldlm_pool *pl); + int (*po_shrink)(struct ldlm_pool *pl, int nr, + unsigned int gfp_mask); + int (*po_setup)(struct ldlm_pool *pl, int limit); +}; enum { LDLM_POOL_CTL_RECALC = 1 << 0, /* Pool recalc is enabled */ @@ -235,39 +237,39 @@ enum { #define LDLM_POOLS_MODEST_MARGIN (5) /* A change to SLV in % after which we want to wake up pools thread asap. */ -#define LDLM_POOLS_FAST_SLV_CHANGE (5) +#define LDLM_POOLS_FAST_SLV_CHANGE (50) struct ldlm_pool { /* Common pool fields */ - cfs_proc_dir_entry_t *pl_proc_dir; /* Pool proc directory. */ - char pl_name[100]; /* Pool name, should be long - * enough to contain complex - * proc entry name. */ - spinlock_t pl_lock; /* Lock for protecting slv/clv - * updates. */ - atomic_t pl_limit; /* Number of allowed locks in - * in pool, both, client and - * server side. */ - atomic_t pl_granted; /* Number of granted locks. */ - atomic_t pl_grant_rate; /* Grant rate per T. */ - atomic_t pl_cancel_rate; /* Cancel rate per T. */ - atomic_t pl_grant_speed; /* Grant speed (GR - CR) per T. */ - __u64 pl_server_lock_volume; /* Server lock volume. Protected - * by pl_lock. */ - cfs_time_t pl_update_time; /* Time when last slv from server - * was obtained. */ - ldlm_pool_recalc_t pl_recalc; /* Recalc callback func pointer. */ - ldlm_pool_shrink_t pl_shrink; /* Shrink callback func pointer. */ - int pl_control; /* Pool features mask */ + cfs_proc_dir_entry_t *pl_proc_dir; /* Pool proc directory. */ + char pl_name[100]; /* Pool name, should be long + * enough to contain complex + * proc entry name. */ + spinlock_t pl_lock; /* Lock for protecting slv/clv + * updates. */ + atomic_t pl_limit; /* Number of allowed locks in + * in pool, both, client and + * server side. */ + atomic_t pl_granted; /* Number of granted locks. */ + atomic_t pl_grant_rate; /* Grant rate per T. */ + atomic_t pl_cancel_rate; /* Cancel rate per T. */ + atomic_t pl_grant_speed; /* Grant speed (GR-CR) per T. */ + __u64 pl_server_lock_volume; /* Server lock volume. + * Protected by pl_lock */ + atomic_t pl_lock_volume_factor; /* Lock volume factor. */ + + time_t pl_recalc_time; /* Time when last slv from + * server was obtained. */ + struct ldlm_pool_ops *pl_ops; /* Recalc and shrink ops. */ + + int pl_control; /* Pool features mask */ - /* Server side pool fields */ - atomic_t pl_grant_plan; /* Planned number of granted - * locks for next T. */ - atomic_t pl_grant_step; /* Grant plan step for next T. */ + atomic_t pl_grant_plan; /* Planned number of granted + * locks for next T. */ + atomic_t pl_grant_step; /* Grant plan step for next + * T. */ - /* Client side pool related fields */ - atomic_t pl_lock_volume_factor; /* Lock volume factor. */ - struct lprocfs_stats *pl_stats; /* Pool statistics. */ + struct lprocfs_stats *pl_stats; /* Pool statistics. */ }; static inline int pool_recalc_enabled(struct ldlm_pool *pl) @@ -301,6 +303,12 @@ typedef enum { #define NS_DEFAULT_CONTENTION_SECONDS 2 #define NS_DEFAULT_CONTENDED_LOCKS 32 +/* Default value for ->ns_shrink_thumb. If lock is not extent one its cost + * is one page. Here we have 256 pages which is 1M on i386. Thus by default + * all extent locks which have more than 1M long extent will be kept in lru, + * others (including ibits locks) will be canceled on memory pressure event. */ +#define LDLM_LOCK_SHRINK_THUMB 256 + struct ldlm_namespace { char *ns_name; ldlm_side_t ns_client; /* is this a client-side lock tree? */ @@ -321,6 +329,9 @@ struct ldlm_namespace { unsigned int ns_max_unused; unsigned int ns_max_age; + + /* Lower limit to number of pages in lock to keep it in cache */ + unsigned int ns_shrink_thumb; cfs_time_t ns_next_dump; /* next debug dump, jiffies */ atomic_t ns_locks; @@ -804,7 +815,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask); void ldlm_pool_fini(struct ldlm_pool *pl); -int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit); +int ldlm_pool_setup(struct ldlm_pool *pl, int limit); int ldlm_pool_recalc(struct ldlm_pool *pl); __u64 ldlm_pool_get_slv(struct ldlm_pool *pl); __u32 ldlm_pool_get_limit(struct ldlm_pool *pl); diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 35eedf841af87dd360c82a61ca8d74e64e7cafc7..5966d4ca26bba47bbed86dba480291fd166d591c 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -36,9 +36,15 @@ typedef enum { } ldlm_sync_t; /* Cancel lru flag, it indicates we cancel aged locks. */ -#define LDLM_CANCEL_AGED 0x00000001 +enum { + LDLM_CANCEL_AGED = 1 << 0, /* Cancel aged locks (non lru resize). */ + LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */ + LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */ + LDLM_CANCEL_LRUR = 1 << 3 /* Cancel locks from lru resize. */ +}; -int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync); +int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync, + int flags); int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, int count, int max, int flags); extern int ldlm_enqueue_min; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 57a5cee30fcdebf687cac811df5d46eb34e140ae..e58752865794a41bcd7e4ae2deb7eaac19f016c7 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1428,12 +1428,16 @@ int target_pack_pool_reply(struct ptlrpc_request *req) struct ldlm_pool *pl; ENTRY; - if (!exp_connect_lru_resize(req->rq_export)) + if (!exp_connect_lru_resize(req->rq_export)) { + lustre_msg_set_slv(req->rq_repmsg, 0); + lustre_msg_set_limit(req->rq_repmsg, 0); RETURN(0); + } pl = ldlm_exp2pl(req->rq_export); spin_lock(&pl->pl_lock); + LASSERT(ldlm_pool_get_slv(pl) != 0 && ldlm_pool_get_limit(pl) != 0); lustre_msg_set_slv(req->rq_repmsg, ldlm_pool_get_slv(pl)); lustre_msg_set_limit(req->rq_repmsg, ldlm_pool_get_limit(pl)); spin_unlock(&pl->pl_lock); @@ -1458,7 +1462,6 @@ target_send_reply_msg (struct ptlrpc_request *req, int rc, int fail_id) DEBUG_REQ(D_NET, req, "sending reply"); } - target_pack_pool_reply(req); return (ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT)); } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 62603023c1206f8f459a97a2654447e8b7b77bc7..198b5aafac27509b8716d17ba9e12527dee3c19f 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -648,7 +648,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) * enqueue. */ if (!exp_connect_cancelset(lock->l_conn_export) && !ns_connect_lru_resize(ns)) - ldlm_cancel_lru(ns, 0, LDLM_ASYNC); + ldlm_cancel_lru(ns, 0, LDLM_ASYNC, 0); } else { unlock_res_and_lock(lock); } diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index 704b4cd87a12034f9471719677af7b2daaa832ff..979de4773a805d954698801325193f8ab7d0d81b 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -138,11 +138,17 @@ static inline __u64 ldlm_pool_slv_min(__u32 L) } enum { - LDLM_POOL_GRANTED_STAT = 0, + LDLM_POOL_FIRST_STAT = 0, + LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT, + LDLM_POOL_GRANT_STAT, + LDLM_POOL_CANCEL_STAT, LDLM_POOL_GRANT_RATE_STAT, LDLM_POOL_CANCEL_RATE_STAT, LDLM_POOL_GRANT_PLAN_STAT, LDLM_POOL_SLV_STAT, + LDLM_POOL_SHRINK_REQTD_STAT, + LDLM_POOL_SHRINK_FREED_STAT, + LDLM_POOL_RECALC_STAT, LDLM_POOL_LAST_STAT }; @@ -218,8 +224,7 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) ENTRY; spin_lock(&pl->pl_lock); - recalc_interval_sec = cfs_duration_sec(cfs_time_current() - - pl->pl_update_time); + recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* Update statistics */ ldlm_pool_recalc_stats(pl); @@ -230,12 +235,12 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) /* Update grant_plan for new period. */ ldlm_pool_recalc_grant_plan(pl); - pl->pl_update_time = cfs_time_current(); /* Zero out all rates and speed for the last period. */ atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); + pl->pl_recalc_time = cfs_time_current_sec(); } spin_unlock(&pl->pl_lock); RETURN(0); @@ -246,30 +251,36 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) static int ldlm_srv_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) { - __u32 granted, limit; - __u64 slv_delta; + __u32 limit; ENTRY; - /* Client already canceled locks but server is already in shrinker and - * can't cancel anything. Let's catch this race. */ - if ((granted = atomic_read(&pl->pl_granted)) == 0) + /* VM is asking how many entries may be potentially freed. */ + if (nr == 0) + RETURN(atomic_read(&pl->pl_granted)); + + /* Client already canceled locks but server is already in shrinker + * and can't cancel anything. Let's catch this race. */ + if (atomic_read(&pl->pl_granted) == 0) RETURN(0); spin_lock(&pl->pl_lock); - /* Simple proportion but it gives impression on how much should be - * SLV changed for request @nr of locks to be canceled.*/ - slv_delta = nr * ldlm_pool_get_slv(pl); - limit = ldlm_pool_get_limit(pl); - do_div(slv_delta, granted); - - /* As SLV has some dependence on historical data, that is new value - * is based on old one, this decreasing will make clients get some - * locks back to the server and after some time it will stabilize.*/ - if (slv_delta < ldlm_pool_get_slv(pl)) - ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - slv_delta); - else + /* We want shrinker to possibly cause cancelation of @nr locks from + * clients or grant approximately @nr locks smaller next intervals. + * + * This is why we decresed SLV by @nr. This effect will only be as + * long as one re-calc interval (1s these days) and this should be + * enough to pass this decreased SLV to all clients. On next recalc + * interval pool will either increase SLV if locks load is not high + * or will keep on same level or even decrease again, thus, shrinker + * decreased SLV will affect next recalc intervals and this way will + * make locking load lower. */ + if (nr < ldlm_pool_get_slv(pl)) { + ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - nr); + } else { + limit = ldlm_pool_get_limit(pl); ldlm_pool_set_slv(pl, ldlm_pool_slv_min(limit)); + } spin_unlock(&pl->pl_lock); /* We did not really free any memory here so far, it only will be @@ -277,6 +288,13 @@ static int ldlm_srv_pool_shrink(struct ldlm_pool *pl, RETURN(0); } +static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit) +{ + ENTRY; + ldlm_pool_set_limit(pl, limit); + RETURN(0); +} + static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) { time_t recalc_interval_sec; @@ -284,8 +302,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) spin_lock(&pl->pl_lock); - recalc_interval_sec = cfs_duration_sec(cfs_time_current() - - pl->pl_update_time); + recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* Update statistics only every T */ ldlm_pool_recalc_stats(pl); @@ -294,28 +311,63 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); + pl->pl_recalc_time = cfs_time_current_sec(); } spin_unlock(&pl->pl_lock); - /* Recalc client pool is done without taking into account pl_update_time - * as this may be called voluntary in the case of emergency. Client - * recalc does not calculate anything, we do not risk to have skew - * of some pool param. */ - ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC); - RETURN(0); + /* Do not cancel locks in case lru resize is disabled for this ns */ + if (!ns_connect_lru_resize(ldlm_pl2ns(pl))) + RETURN(0); + + /* In the time of canceling locks on client we do not need to maintain + * sharp timing, we only want to cancel locks asap according to new SLV. + * This may be called when SLV has changed much, this is why we do not + * take into account pl->pl_recalc_time here. */ + RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC, + LDLM_CANCEL_LRUR)); } static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) { ENTRY; - RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC)); + + /* Do not cancel locks in case lru resize is disabled for this ns */ + if (!ns_connect_lru_resize(ldlm_pl2ns(pl))) + RETURN(0); + + /* Find out how many locks may be released according to shrink + * policy. */ + if (nr == 0) + RETURN(ldlm_cancel_lru_local(ldlm_pl2ns(pl), NULL, 0, + 0, LDLM_CANCEL_SHRINK)); + + /* Cancel @nr locks accoding to shrink policy */ + RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC, + LDLM_CANCEL_SHRINK)); } +struct ldlm_pool_ops ldlm_srv_pool_ops = { + .po_recalc = ldlm_srv_pool_recalc, + .po_shrink = ldlm_srv_pool_shrink, + .po_setup = ldlm_srv_pool_setup +}; + +struct ldlm_pool_ops ldlm_cli_pool_ops = { + .po_recalc = ldlm_cli_pool_recalc, + .po_shrink = ldlm_cli_pool_shrink +}; + int ldlm_pool_recalc(struct ldlm_pool *pl) { - if (pl->pl_recalc != NULL && pool_recalc_enabled(pl)) - return pl->pl_recalc(pl); + int count; + + if (pl->pl_ops->po_recalc != NULL && pool_recalc_enabled(pl)) { + count = pl->pl_ops->po_recalc(pl); + lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, + count); + return count; + } return 0; } EXPORT_SYMBOL(ldlm_pool_recalc); @@ -323,22 +375,32 @@ EXPORT_SYMBOL(ldlm_pool_recalc); int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) { - if (pl->pl_shrink != NULL && pool_shrink_enabled(pl)) { - CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks\n", - pl->pl_name, nr); - return pl->pl_shrink(pl, nr, gfp_mask); + int cancel = 0; + + if (pl->pl_ops->po_shrink != NULL && pool_shrink_enabled(pl)) { + cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask); + if (nr > 0) { + lprocfs_counter_add(pl->pl_stats, + LDLM_POOL_SHRINK_REQTD_STAT, + nr); + lprocfs_counter_add(pl->pl_stats, + LDLM_POOL_SHRINK_FREED_STAT, + cancel); + CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks, " + "shrunk %d\n", pl->pl_name, nr, cancel); + } } - return 0; + return cancel; } EXPORT_SYMBOL(ldlm_pool_shrink); /* The purpose of this function is to re-setup limit and maximal allowed * slv according to the passed limit. */ -int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit) +int ldlm_pool_setup(struct ldlm_pool *pl, int limit) { ENTRY; - if (ns_is_server(ldlm_pl2ns(pl))) - ldlm_pool_set_limit(pl, limit); + if (pl->pl_ops->po_setup != NULL) + RETURN(pl->pl_ops->po_setup(pl, limit)); RETURN(0); } EXPORT_SYMBOL(ldlm_pool_setup); @@ -368,10 +430,9 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, pl->pl_name); nr += snprintf(page + nr, count - nr, " SLV: "LPU64"\n", slv); - if (ns_is_client(ldlm_pl2ns(pl))) { - nr += snprintf(page + nr, count - nr, " LVF: %d\n", - atomic_read(&pl->pl_lock_volume_factor)); - } + nr += snprintf(page + nr, count - nr, " LVF: %d\n", + atomic_read(&pl->pl_lock_volume_factor)); + nr += snprintf(page + nr, count - nr, " GSP: %d%%\n", grant_step); nr += snprintf(page + nr, count - nr, " GP: %d\n", @@ -469,13 +530,11 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) pool_vars[0].write_fptr = lprocfs_wr_atomic; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); - if (ns_is_client(ns)) { - snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor"); - pool_vars[0].data = &pl->pl_lock_volume_factor; - pool_vars[0].read_fptr = lprocfs_rd_uint; - pool_vars[0].write_fptr = lprocfs_wr_uint; - lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); - } + snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor"); + pool_vars[0].data = &pl->pl_lock_volume_factor; + pool_vars[0].read_fptr = lprocfs_rd_uint; + pool_vars[0].write_fptr = lprocfs_wr_uint; + lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "state"); pool_vars[0].data = pl; @@ -483,13 +542,17 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT - - LDLM_POOL_GRANTED_STAT, 0); + LDLM_POOL_FIRST_STAT, 0); if (!pl->pl_stats) GOTO(out_free_name, rc = -ENOMEM); lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT, LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "granted", "locks"); + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, 0, + "grant", "locks"); + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, 0, + "cancel", "locks"); lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "grant_rate", "locks/s"); @@ -502,6 +565,15 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT, LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "slv", "slv"); + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT, + LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, + "shrink_request", "locks"); + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT, + LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, + "shrink_freed", "locks"); + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT, + LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, + "recalc_freed", "locks"); lprocfs_register_stats(pl->pl_proc_dir, "stats", pl->pl_stats); EXIT; @@ -534,7 +606,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, spin_lock_init(&pl->pl_lock); atomic_set(&pl->pl_granted, 0); - pl->pl_update_time = cfs_time_current(); + pl->pl_recalc_time = cfs_time_current_sec(); atomic_set(&pl->pl_lock_volume_factor, 1); atomic_set(&pl->pl_grant_rate, 0); @@ -548,15 +620,13 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, ns->ns_name, idx); if (client == LDLM_NAMESPACE_SERVER) { - pl->pl_recalc = ldlm_srv_pool_recalc; - pl->pl_shrink = ldlm_srv_pool_shrink; + pl->pl_ops = &ldlm_srv_pool_ops; ldlm_pool_set_limit(pl, LDLM_POOL_HOST_L); ldlm_pool_set_slv(pl, ldlm_pool_slv_max(LDLM_POOL_HOST_L)); } else { ldlm_pool_set_slv(pl, 1); ldlm_pool_set_limit(pl, 1); - pl->pl_recalc = ldlm_cli_pool_recalc; - pl->pl_shrink = ldlm_cli_pool_shrink; + pl->pl_ops = &ldlm_cli_pool_ops; } rc = ldlm_pool_proc_init(pl); @@ -573,8 +643,7 @@ void ldlm_pool_fini(struct ldlm_pool *pl) { ENTRY; ldlm_pool_proc_fini(pl); - pl->pl_recalc = NULL; - pl->pl_shrink = NULL; + pl->pl_ops = NULL; EXIT; } EXPORT_SYMBOL(ldlm_pool_fini); @@ -586,9 +655,12 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) atomic_inc(&pl->pl_grant_rate); atomic_inc(&pl->pl_grant_speed); - /* No need to recalc client pools here as this is already done - * on enqueue/cancel and locks to cancel already packed to the - * rpc. */ + lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT); + + /* Do not do pool recalc for client side as all locks which + * potentially may be canceled has already been packed into + * enqueue/cancel rpc. Also we do not want to run out of stack + * with too long call paths. */ if (ns_is_server(ldlm_pl2ns(pl))) ldlm_pool_recalc(pl); EXIT; @@ -603,7 +675,8 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) atomic_inc(&pl->pl_cancel_rate); atomic_dec(&pl->pl_grant_speed); - /* Same as in ldlm_pool_add() */ + lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT); + if (ns_is_server(ldlm_pl2ns(pl))) ldlm_pool_recalc(pl); EXIT; @@ -675,11 +748,22 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); /* Find out how many resources we may release. */ - mutex_down(ldlm_namespace_lock(client)); - list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) - total += ldlm_pool_granted(&ns->ns_pool); - mutex_up(ldlm_namespace_lock(client)); - + for (nr_ns = atomic_read(ldlm_namespace_nr(client)); + nr_ns > 0; nr_ns--) + { + mutex_down(ldlm_namespace_lock(client)); + if (list_empty(ldlm_namespace_list(client))) { + mutex_up(ldlm_namespace_lock(client)); + return 0; + } + ns = ldlm_namespace_first(client); + ldlm_namespace_get(ns); + ldlm_namespace_move(ns, client); + mutex_up(ldlm_namespace_lock(client)); + total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask); + ldlm_namespace_put(ns, 1); + } + if (nr == 0 || total == 0) return total; @@ -727,15 +811,18 @@ void ldlm_pools_recalc(ldlm_side_t client) { __u32 nr_l = 0, nr_p = 0, l; struct ldlm_namespace *ns; - int rc, nr, equal = 0; + int nr, equal = 0; - /* Check all modest namespaces. */ - mutex_down(ldlm_namespace_lock(client)); - list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) { - if (ns->ns_appetite != LDLM_NAMESPACE_MODEST) - continue; + /* No need to setup pool limit for client pools. */ + if (client == LDLM_NAMESPACE_SERVER) { + /* Check all modest namespaces first. */ + mutex_down(ldlm_namespace_lock(client)); + list_for_each_entry(ns, ldlm_namespace_list(client), + ns_list_chain) + { + if (ns->ns_appetite != LDLM_NAMESPACE_MODEST) + continue; - if (client == LDLM_NAMESPACE_SERVER) { l = ldlm_pool_granted(&ns->ns_pool); if (l == 0) l = 1; @@ -747,21 +834,24 @@ void ldlm_pools_recalc(ldlm_side_t client) nr_l += l; nr_p++; } - } - /* Make sure that modest namespaces did not eat more that 2/3 of limit */ - if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) { - CWARN("Modest pools eat out 2/3 of locks limit. %d of %lu. " - "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L); - equal = 1; - } + /* Make sure that modest namespaces did not eat more that 2/3 + * of limit */ + if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) { + CWARN("\"Modest\" pools eat out 2/3 of server locks " + "limit (%d of %lu). This means that you have too " + "many clients for this amount of server RAM. " + "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L); + equal = 1; + } - /* The rest is given to greedy namespaces. */ - list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) { - if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY) - continue; + /* The rest is given to greedy namespaces. */ + list_for_each_entry(ns, ldlm_namespace_list(client), + ns_list_chain) + { + if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY) + continue; - if (client == LDLM_NAMESPACE_SERVER) { if (equal) { /* In the case 2/3 locks are eaten out by * modest pools, we re-setup equal limit @@ -777,8 +867,8 @@ void ldlm_pools_recalc(ldlm_side_t client) } ldlm_pool_setup(&ns->ns_pool, l); } + mutex_up(ldlm_namespace_lock(client)); } - mutex_up(ldlm_namespace_lock(client)); /* Recalc at least ldlm_namespace_nr(client) namespaces. */ for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) { @@ -798,11 +888,7 @@ void ldlm_pools_recalc(ldlm_side_t client) mutex_up(ldlm_namespace_lock(client)); /* After setup is done - recalc the pool. */ - rc = ldlm_pool_recalc(&ns->ns_pool); - if (rc) - CERROR("%s: pool recalculation error " - "%d\n", ns->ns_pool.pl_name, rc); - + ldlm_pool_recalc(&ns->ns_pool); ldlm_namespace_put(ns, 1); } } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 8db7f8911a3f3a0d35b7d4697044a5e1b223e0c0..9c4621ac1a2273f0a034aaedab7442a97629b6f4 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -529,16 +529,20 @@ struct ptlrpc_request *ldlm_prep_enqueue_req(struct obd_export *exp, /* Estimate the amount of free space in the request. */ int avail = ldlm_req_handles_avail(exp, size, bufcount, LDLM_ENQUEUE_CANCEL_OFF); + int flags, cancel; LASSERT(avail >= count); + flags = ns_connect_lru_resize(ns) ? + LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED; + cancel = ns_connect_lru_resize(exp) ? 0 : 1; + /* Cancel lru locks here _only_ if the server supports * EARLY_CANCEL. Otherwise we have to send extra CANCEL * rpc right on enqueue, what will make it slower, vs. * asynchronous rpc in blocking thread. */ - count += ldlm_cancel_lru_local(ns, cancels, - ns_connect_lru_resize(ns) ? 0 : 1, - avail - count, LDLM_CANCEL_AGED); + count += ldlm_cancel_lru_local(ns, cancels, cancel, + avail - count, flags); size[DLM_LOCKREQ_OFF] = ldlm_request_bufsize(count, LDLM_ENQUEUE); } @@ -970,27 +974,42 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) pl = ldlm_imp2pl(req->rq_import); spin_lock(&pl->pl_lock); -#ifdef __KERNEL__ + + /* Check if we need to wakeup pools thread for fast SLV change. + * This is only done when threads period is noticably long like + * 10s or more. */ +#if defined(__KERNEL__) && (LDLM_POOLS_THREAD_PERIOD >= 10) { - __u64 old_slv, fast_slv_change; + __u64 old_slv, new_slv, fast_change; old_slv = ldlm_pool_get_slv(pl); - fast_slv_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE; - do_div(fast_slv_change, 100); -#endif - pl->pl_update_time = cfs_time_current(); - ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg)); - ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg)); -#ifdef __KERNEL__ + new_slv = lustre_msg_get_slv(req->rq_repmsg); + fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE; + do_div(fast_change, 100); + /* Wake up pools thread only if SLV has changed more than - * 5% since last update. In this case we want to react asap. + * 50% since last update. In this case we want to react asap. * Otherwise it is no sense to wake up pools as they are - * re-calculated every 1s anyways. */ - if (old_slv > ldlm_pool_get_slv(pl) && - old_slv - ldlm_pool_get_slv(pl) > fast_slv_change) + * re-calculated every LDLM_POOLS_THREAD_PERIOD anyways. */ + if (old_slv > new_slv && old_slv - new_slv > fast_change) ldlm_pools_wakeup(); } #endif + /* In some cases RPC may contain slv and limit zeroed out. This is + * the case when server does not support lru resize feature. This is + * also possible in some recovery cases when server side reqs have no + * ref to obd export and thus access to server side namespace is no + * possible. */ + if (lustre_msg_get_slv(req->rq_repmsg) != 0 && + lustre_msg_get_limit(req->rq_repmsg) != 0) { + ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg)); + ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg)); + } else { + DEBUG_REQ(D_HA, req, "zero SLV or Limit found " + "(SLV: "LPU64", Limit: %u)", + lustre_msg_get_slv(req->rq_repmsg), + lustre_msg_get_limit(req->rq_repmsg)); + } spin_unlock(&pl->pl_lock); RETURN(0); @@ -1081,6 +1100,123 @@ static int ldlm_cancel_list_local(struct list_head *cancels, int count) RETURN(count); } +/* Return 1 if @lock should be canceled according to shrinker policy. + * Return zero otherwise. */ +static int ldlm_cancel_shrink_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int asked) +{ + int lock_cost; + __u64 page_nr; + + if (lock->l_resource->lr_type == LDLM_EXTENT) { + struct ldlm_extent *l_extent; + + /* For all extent locks cost is 1 + number of pages in + * their extent. */ + l_extent = &lock->l_policy_data.l_extent; + page_nr = (l_extent->end - l_extent->start); + do_div(page_nr, CFS_PAGE_SIZE); + +#ifdef __KERNEL__ + /* XXX: In fact this is evil hack, we can't access inode + * here. For doing it right we need somehow to have number + * of covered by lock. This should be fixed later when 10718 + * is landed. */ + if (lock->l_ast_data != NULL) { + struct inode *inode = lock->l_ast_data; + if (page_nr > inode->i_mapping->nrpages) + page_nr = inode->i_mapping->nrpages; + } +#endif + lock_cost = 1 + page_nr; + } else { + /* For all locks which are not extent ones cost is 1 */ + lock_cost = 1; + } + + /* Keep all expensive locks in lru for the memory pressure time + * cancel policy. They anyways may be canceled by lru resize + * pplicy if they have not small enough CLV. */ + return (lock_cost <= ns->ns_shrink_thumb); +} + +/* Return 1 if @lock should be canceled according to lru resize policy. + * Return zero otherwise. */ +static int ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int asked) +{ + cfs_time_t cur = cfs_time_current(); + struct ldlm_pool *pl = &ns->ns_pool; + __u64 slv, lvf, lv; + cfs_time_t la; + + spin_lock(&pl->pl_lock); + slv = ldlm_pool_get_slv(pl); + lvf = atomic_read(&pl->pl_lock_volume_factor); + spin_unlock(&pl->pl_lock); + + la = cfs_duration_sec(cfs_time_sub(cur, + lock->l_last_used)); + + /* Stop when slv is not yet come from server or + * lv is smaller than it is. */ + lv = lvf * la * unused; + return (slv > 1 && lv >= slv); +} + +/* Return 1 if @lock should be canceled according to passed policy. + * Return zero otherwise. */ +static int ldlm_cancel_passed_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int asked) +{ + /* Do nothing here, we allow canceling all locks which + * are passed here from upper layer logic. So that locks + * number to be canceled will be limited by @count and + * @max in ldlm_cancel_lru_local(). */ + return 1; +} + +/* Return 1 if @lock should be canceled according to aged policy. + * Return zero otherwise. */ +static int ldlm_cancel_aged_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int asked) +{ + /* Cancel old locks if reached asked limit. */ + return !((added >= asked) && + cfs_time_before_64(cfs_time_current(), + cfs_time_add(lock->l_last_used, + ns->ns_max_age))); +} + +typedef int (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *, + struct ldlm_lock *, int, + int, int); + +static ldlm_cancel_lru_policy_t +ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) +{ + if (ns_connect_lru_resize(ns)) { + if (flags & LDLM_CANCEL_SHRINK) + return ldlm_cancel_shrink_policy; + else if (flags & LDLM_CANCEL_LRUR) + return ldlm_cancel_lrur_policy; + else if (flags & LDLM_CANCEL_PASSED) + return ldlm_cancel_passed_policy; + } else { + if (flags & LDLM_CANCEL_AGED) + return ldlm_cancel_aged_policy; + } + return NULL; +} + /* - Free space in lru for @count new locks, * redundant unused locks are canceled locally; * - also cancel locally unused aged locks; @@ -1092,13 +1228,25 @@ static int ldlm_cancel_list_local(struct list_head *cancels, int count) * There are the following use cases: ldlm_cancel_resource_local(), * ldlm_cancel_lru_local() and ldlm_cli_cancel(), which check&set this * flag properly. As any attempt to cancel a lock rely on this flag, - * l_bl_ast list is accessed later without any special locking. */ + * l_bl_ast list is accessed later without any special locking. + * + * Calling policies for enabled lru resize: + * ---------------------------------------- + * flags & LDLM_CANCEL_LRUR - use lru resize policy (SLV from server) to + * cancel not more than @count locks; + * + * flags & LDLM_CANCEL_PASSED - cancel @count number of old locks (located at + * the beginning of lru list); + * + * flags & LDLM_CANCEL_SHRINK - cancel not more than @count locks according to + * memory pressre policy function. + */ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, int count, int max, int flags) { - int added = 0, unused; - cfs_time_t cur = cfs_time_current(); - struct ldlm_lock *lock; + ldlm_cancel_lru_policy_t cancel_lru_policy_func; + int added = 0, unused, cancel; + struct ldlm_lock *lock, *next; ENTRY; spin_lock(&ns->ns_unused_lock); @@ -1107,113 +1255,95 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, if (!ns_connect_lru_resize(ns)) count += unused - ns->ns_max_unused; - while (!list_empty(&ns->ns_unused_list)) { - struct ldlm_pool *pl; - __u64 slv, lvf, lv; + cancel_lru_policy_func = ldlm_cancel_lru_policy(ns, flags); + + list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) { + /* Make sure that we skip locks being already in cancel. */ + if ((lock->l_flags & LDLM_FL_CANCELING) || + (lock->l_flags & LDLM_FL_BL_AST)) + continue; - if (max && added >= max) + /* For any flags, stop scanning if @max or passed @count is + * reached. */ + if ((max && added >= max) || (count && added >= count)) break; - list_for_each_entry(lock, &ns->ns_unused_list, l_lru) { - /* somebody is already doing CANCEL or there is a - * blocking request will send cancel. */ - if (!(lock->l_flags & LDLM_FL_CANCELING) && - !(lock->l_flags & LDLM_FL_BL_AST)) + /* Pass the lock through the policy filter and see if it + * should stay in lru. */ + if (cancel_lru_policy_func != NULL) { + cancel = cancel_lru_policy_func(ns, lock, unused, + added, count); + + /* Take next lock for shrink policy, we need to check + * whole list. Stop scanning for other policies. */ + if ((flags & LDLM_CANCEL_SHRINK) && !cancel) + continue; + else if (!cancel) break; } - if (&lock->l_lru == &ns->ns_unused_list) - break; - pl = &ns->ns_pool; - - if (ns_connect_lru_resize(ns)) { - cfs_time_t la; - - /* Cancel locks by lru only in the case of count == 0. */ - if (count == 0) { - /* Calculate lv for every lock. */ - spin_lock(&pl->pl_lock); - slv = ldlm_pool_get_slv(pl); - lvf = atomic_read(&pl->pl_lock_volume_factor); - spin_unlock(&pl->pl_lock); - - la = cfs_duration_sec(cfs_time_sub(cur, - lock->l_last_used)); - if (la == 0) - la = 1; - - /* Stop when slv is not yet come from server or - * lv is smaller than it is. */ - lv = lvf * la * unused; - if (slv == 1 || lv < slv) - break; - } else { - if (added >= count) - break; + if (cancels != NULL) { + LDLM_LOCK_GET(lock); /* dropped by bl thread */ + spin_unlock(&ns->ns_unused_lock); + + lock_res_and_lock(lock); + /* Check flags again under the lock. */ + if ((lock->l_flags & LDLM_FL_CANCELING) || + (lock->l_flags & LDLM_FL_BL_AST) || + (ldlm_lock_remove_from_lru(lock) == 0)) { + /* other thread is removing lock from lru or + * somebody is already doing CANCEL or + * there is a blocking request which will send + * cancel by itseft. */ + unlock_res_and_lock(lock); + LDLM_LOCK_PUT(lock); + spin_lock(&ns->ns_unused_lock); + continue; } - } else { - if ((added >= count) && - (!(flags & LDLM_CANCEL_AGED) || - cfs_time_before_64(cur, (__u64)ns->ns_max_age + - lock->l_last_used))) - break; - } - - LDLM_LOCK_GET(lock); /* dropped by bl thread */ - spin_unlock(&ns->ns_unused_lock); - - lock_res_and_lock(lock); - /* Check flags again under the lock. */ - if ((lock->l_flags & LDLM_FL_CANCELING) || - (lock->l_flags & LDLM_FL_BL_AST) || - (ldlm_lock_remove_from_lru(lock) == 0)) { - /* other thread is removing lock from lru or - * somebody is already doing CANCEL or - * there is a blocking request which will send - * cancel by itseft. */ + LASSERT(!lock->l_readers && !lock->l_writers); + + /* If we have chosen to cancel this lock voluntarily, we + * better send cancel notification to server, so that it + * frees appropriate state. This might lead to a race + * where while we are doing cancel here, server is also + * silently cancelling this lock. */ + lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; + + /* Setting the CBPENDING flag is a little misleading, but + * prevents an important race; namely, once CBPENDING is + * set, the lock can accumulate no more readers/writers. + * Since readers and writers are already zero here, + * ldlm_lock_decref() won't see this flag and call + * l_blocking_ast */ + lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING; + + /* We can't re-add to l_lru as it confuses the refcounting + * in ldlm_lock_remove_from_lru() if an AST arrives after + * we drop ns_lock below. We use l_bl_ast and can't use + * l_pending_chain as it is used both on server and client + * nevertheless bug 5666 says it is used only on server */ + LASSERT(list_empty(&lock->l_bl_ast)); + list_add(&lock->l_bl_ast, cancels); unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); spin_lock(&ns->ns_unused_lock); - continue; } - LASSERT(!lock->l_readers && !lock->l_writers); - - /* If we have chosen to canecl this lock voluntarily, we better - send cancel notification to server, so that it frees - appropriate state. This might lead to a race where while - we are doing cancel here, server is also silently - cancelling this lock. */ - lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; - - /* Setting the CBPENDING flag is a little misleading, but - * prevents an important race; namely, once CBPENDING is set, - * the lock can accumulate no more readers/writers. Since - * readers and writers are already zero here, ldlm_lock_decref - * won't see this flag and call l_blocking_ast */ - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING; - /* We can't re-add to l_lru as it confuses the refcounting in - * ldlm_lock_remove_from_lru() if an AST arrives after we drop - * ns_lock below. We use l_bl_ast and can't use l_pending_chain - * as it is used both on server and client nevertheles bug 5666 - * says it is used only on server. --umka */ - - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, cancels); - unlock_res_and_lock(lock); - spin_lock(&ns->ns_unused_lock); added++; unused--; } spin_unlock(&ns->ns_unused_lock); + + if (cancels == NULL) + RETURN(added); - RETURN (ldlm_cancel_list(cancels, added)); + RETURN(ldlm_cancel_list(cancels, added)); } /* when called with LDLM_ASYNC the blocking callback will be handled * in a thread and this function will return after the thread has been * asked to call the callback. when called with LDLM_SYNC the blocking * callback will be performed in this function. */ -int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync) +int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync, + int flags) { CFS_LIST_HEAD(cancels); int count, rc; @@ -1222,7 +1352,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync) #ifndef __KERNEL__ sync = LDLM_SYNC; /* force to be sync in user space */ #endif - count = ldlm_cancel_lru_local(ns, &cancels, nr, 0, 0); + count = ldlm_cancel_lru_local(ns, &cancels, nr, 0, flags); if (sync == LDLM_ASYNC) { struct ldlm_lock *lock, *next; list_for_each_entry_safe(lock, next, &cancels, l_bl_ast) { diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 1a4c475530563f7712ba1ad65cd6508f9587d6b4..ae353ae188c21cb8c8d88c9ecb541f9a07554024 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -152,7 +152,8 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer, int canceled, unused = ns->ns_nr_unused; /* Try to cancel all @ns_nr_unused locks. */ - canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC); + canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC, + LDLM_CANCEL_PASSED); if (canceled < unused) { CERROR("not all requested locks are canceled, " "requested: %d, canceled: %d\n", unused, @@ -162,7 +163,7 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer, } else { tmp = ns->ns_max_unused; ns->ns_max_unused = 0; - ldlm_cancel_lru(ns, 0, LDLM_SYNC); + ldlm_cancel_lru(ns, 0, LDLM_SYNC, LDLM_CANCEL_PASSED); ns->ns_max_unused = tmp; } return count; @@ -185,7 +186,7 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer, CDEBUG(D_DLMTRACE, "changing namespace %s unused locks from %u to %u\n", ns->ns_name, ns->ns_nr_unused, (unsigned int)tmp); - ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC); + ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC, LDLM_CANCEL_PASSED); if (!lru_resize) { CDEBUG(D_DLMTRACE, "disable lru_resize for namespace %s\n", @@ -196,7 +197,7 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer, CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n", ns->ns_name, ns->ns_max_unused, (unsigned int)tmp); ns->ns_max_unused = (unsigned int)tmp; - ldlm_cancel_lru(ns, 0, LDLM_ASYNC); + ldlm_cancel_lru(ns, 0, LDLM_ASYNC, LDLM_CANCEL_PASSED); /* Make sure that originally lru resize was supported before * turning it on here. */ @@ -247,6 +248,13 @@ void ldlm_proc_namespace(struct ldlm_namespace *ns) lock_vars[0].write_fptr = lprocfs_wr_lru_size; lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0); + snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb", + ns->ns_name); + lock_vars[0].data = ns; + lock_vars[0].read_fptr = lprocfs_rd_uint; + lock_vars[0].write_fptr = lprocfs_wr_uint; + lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0); + snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age", ns->ns_name); lock_vars[0].data = &ns->ns_max_age; @@ -308,6 +316,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, ldlm_side_t client, if (!ns->ns_name) GOTO(out_hash, NULL); + ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB; ns->ns_appetite = apt; strcpy(ns->ns_name, name); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 0f4e00187ed8da83ee6949c8c32fc4ee1a58b64a..063dbabb753995f4e176fe10b34a01ee16b8e7b2 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -383,6 +383,9 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int flags) lustre_msg_get_magic(req->rq_repmsg), req->rq_replen); } + if (req->rq_export && req->rq_export->exp_obd) + target_pack_pool_reply(req); + if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL); else diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 35c29def2f258c70696872539dcaa27aeb0c9074..4c32f386b7128411a842765549ead32f1ebfb158 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -4745,36 +4745,53 @@ run_test 124a "lru resize =======================================" test_124b() { [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ skip "no lru resize on server" && return 0 - cleanup -f || error "failed to unmount" - MOUNTOPT="$MOUNTOPT,nolruresize" - setup || error "setup failed" - NR=2000 - mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1` + LIMIT=`cat $NSDIR/pool/limit` + + #define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) + NR_CPU=$(awk '/processor/' /proc/cpuinfo | wc -l) + test $NR_CPU -gt 1 && SUFFIX="(s)" || SUFFIX="" + # 100 locks here is default value for non-shrinkable lru as well + # as the order to switch to static lru managing policy + LDLM_DEFAULT_LRU_SIZE=$((100 * NR_CPU)) + log "$NR_CPU CPU${SUFFIX} detected, LDLM_DEFAULT_LRU_SIZE = $LDLM_DEFAULT_LRU_SIZE" - createmany -o $DIR/$tdir/f $NR - log "doing ls -la $DIR/$tdir 3 times (lru resize disabled)" + log "disable lru resize for $(basename $NSDIR)" + echo $LDLM_DEFAULT_LRU_SIZE > $NSDIR/lru_size + + NR=$((LIMIT-(LIMIT/3))) + mkdir -p $DIR/$tdir/disable_lru_resize || + error "failed to create $DIR/$tdir/disable_lru_resize" + + createmany -o $DIR/$tdir/disable_lru_resize/f $NR + log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times" stime=`date +%s` - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null + ls -la $DIR/$tdir/disable_lru_resize > /dev/null + ls -la $DIR/$tdir/disable_lru_resize > /dev/null + ls -la $DIR/$tdir/disable_lru_resize > /dev/null etime=`date +%s` nolruresize_delta=$((etime-stime)) log "ls -la time: $nolruresize_delta seconds" + log "lru_size = $(cat $NSDIR/lru_size)" - cleanup -f || error "failed to unmount" - MOUNTOPT=`echo $MOUNTOPT | sed "s/nolruresize/lruresize/"` - setup || error "setup failed" + mkdir -p $DIR/$tdir/enable_lru_resize || + error "failed to create $DIR/$tdir/enable_lru_resize" + + # 0 locks means here flush lru and switch to lru resize policy + log "enable lru resize for $(basename $NSDIR)" + echo 0 > $NSDIR/lru_size - createmany -o $DIR/$tdir/f $NR - log "doing ls -la $DIR/$tdir 3 times (lru resize enabled)" + createmany -o $DIR/$tdir/enable_lru_resize/f $NR + log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times" stime=`date +%s` - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null + ls -la $DIR/$tdir/enable_lru_resize > /dev/null + ls -la $DIR/$tdir/enable_lru_resize > /dev/null + ls -la $DIR/$tdir/enable_lru_resize > /dev/null etime=`date +%s` lruresize_delta=$((etime-stime)) log "ls -la time: $lruresize_delta seconds" + log "lru_size = $(cat $NSDIR/lru_size)" if test $lruresize_delta -gt $nolruresize_delta; then log "ls -la is $((lruresize_delta - $nolruresize_delta))s slower with lru resize enabled" @@ -4783,8 +4800,6 @@ test_124b() { else log "lru resize performs the same with no lru resize" fi - - unlinkmany $DIR/$tdir/f $NR } run_test 124b "lru resize (performance test) ======================="