diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index cef9318a15596c546179c5747383e5a53f5a8e39..6dc3840f84a2f21f65a7a614531c9511e9ff2e2b 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -13,7 +13,6 @@ #define D_ADAPTTO D_OTHER #define AT_BINS 4 /* "bin" means "N seconds of history" */ #define AT_FLG_NOHIST 0x1 /* use last reported value only */ -#define AT_FLG_MIN 0x2 /* use a minimum limit */ struct adaptive_timeout { time_t at_binstart; /* bin start time */ @@ -70,7 +69,6 @@ struct imp_at { int iat_portal[IMP_AT_MAX_PORTALS]; struct adaptive_timeout iat_net_latency; struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS]; - time_t iat_drain; /* hack to slow reconnect reqs */ }; struct obd_import { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 8bd5753b0d968cdcd5fcae0d0fd12d96f1a621b6..9b742d3da7da014d78f4343d285cab45bd9bee38 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -38,7 +38,6 @@ extern unsigned int obd_dump_on_eviction; networking / disk / timings affected by load (use Adaptive Timeouts) */ extern unsigned int obd_timeout; /* seconds */ extern unsigned int ldlm_timeout; /* seconds */ -extern unsigned int adaptive_timeout_min; /* seconds */ extern unsigned int adaptive_timeout_max; /* seconds */ extern unsigned int adaptive_timeout_history; /* seconds */ extern unsigned int obd_sync_filter; @@ -60,9 +59,14 @@ extern unsigned int obd_alloc_fail_rate; #define PING_EVICT_TIMEOUT (PING_INTERVAL * 5 / 2) #define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */ #define CONNECTION_SWITCH_MIN 5 /* Connection switching rate limiter */ +#define CONNECTION_SWITCH_MAX 50 /* Max connect interval for nonresponsive + servers; keep this within the recovery + period */ +#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */ #ifndef CRAY_XT3 /* In general this should be low to have quick detection of a system - running on a backup server. */ + running on a backup server. (If it's too low, import_select_connection + will increase the timeout anyhow.) */ #define INITIAL_CONNECT_TIMEOUT max_t(int,CONNECTION_SWITCH_MIN,obd_timeout/20) #else /* ...but for very large systems (e.g. CRAY) we need to keep the initial @@ -71,7 +75,6 @@ extern unsigned int obd_alloc_fail_rate; chance to generate adaptive timeout data. */ #define INITIAL_CONNECT_TIMEOUT max_t(int,CONNECTION_SWITCH_MIN,obd_timeout/2) #endif -#define LND_TIMEOUT 50 /* LNET LND-level RPC timeout */ #define LONG_UNLINK 300 /* Unlink should happen before now */ diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 874798c248fdaa098c9ccfb323cc1c59a6874094..bad9801cfe70c97c2a867ff914b03075818fbb84 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -135,7 +135,7 @@ static int lov_rd_stripecount(char *page, char **start, off_t off, int count, LASSERT(dev != NULL); desc = &dev->u.lov.desc; *eof = 1; - return snprintf(page, count, "%u\n", desc->ld_default_stripe_count); + return snprintf(page, count, "%d\n", desc->ld_default_stripe_count); } static int lov_wr_stripecount(struct file *file, const char *buffer, diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 632d7e3e013b521d5b978fe0e7319ae3d6ad0247..eb828604d9bf95ec627dcc36c3f7da508e5e2cb6 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -65,8 +65,6 @@ unsigned int obd_dump_on_timeout; unsigned int obd_dump_on_eviction; unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ -/* Covers the maximum expected network latency */ -unsigned int adaptive_timeout_min = 10; /* seconds */ unsigned int adaptive_timeout_max = 600; /* seconds */ /* We remember the slowest event that took place within history */ unsigned int adaptive_timeout_history = 600; /* seconds */ @@ -388,7 +386,6 @@ EXPORT_SYMBOL(obd_dump_on_timeout); EXPORT_SYMBOL(obd_dump_on_eviction); EXPORT_SYMBOL(obd_timeout); EXPORT_SYMBOL(ldlm_timeout); -EXPORT_SYMBOL(adaptive_timeout_min); EXPORT_SYMBOL(adaptive_timeout_max); EXPORT_SYMBOL(adaptive_timeout_history); EXPORT_SYMBOL(obd_max_dirty_pages); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 470cae73d966f7ac4f4ba98b6a8cb1fc332b4354..81765de1a10650e121b12c3822a7c9be766e998e 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -793,11 +793,7 @@ EXPORT_SYMBOL(class_import_put); static void init_imp_at(struct imp_at *at) { int i; - /* We need enough time to get an early response on a slow network. - Since we can't say for sure how slow a network might be, we use - a user-defined max expected network latency. We will adapt to slow - increases, but a sudden jump can still kill us. */ - at_init(&at->iat_net_latency, adaptive_timeout_min, AT_FLG_MIN); + at_init(&at->iat_net_latency, CONNECTION_SWITCH_INC, 0); for (i = 0; i < IMP_AT_MAX_PORTALS; i++) { /* max service estimates are tracked on the server side, so don't use the AT history here, just use the last reported @@ -805,7 +801,6 @@ static void init_imp_at(struct imp_at *at) { at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT, AT_FLG_NOHIST); } - at->iat_drain = 0; } struct obd_import *class_new_import(struct obd_device *obd) diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index 85cba69e50ee12b4c714eb3333324fa56038b3c4..0d366621dfd41669db68c342d7218e5c39874739 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -62,7 +62,6 @@ enum { OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */ OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */ OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */ - ADAPTIVE_MIN, /* Adaptive timeout lower limit */ ADAPTIVE_MAX, /* Adaptive timeout upper limit */ ADAPTIVE_HISTORY, /* Adaptive timeout timebase */ }; @@ -198,14 +197,6 @@ static cfs_sysctl_table_t obd_table[] = { .proc_handler = &proc_alloc_fail_rate }, #endif - { - .ctl_name = ADAPTIVE_MIN, - .procname = "adaptive_min", - .data = &adaptive_timeout_min, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, { .ctl_name = ADAPTIVE_MAX, .procname = "adaptive_max", diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 85e23edb60ee5f5a984c11fbe7ecb2bbbdcef48f..0eaddcdcaa97578d045835d30014317290eabd4d 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -215,14 +215,6 @@ static void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req) /* We could get even fancier here, using history to predict increased loading... */ - if (at->iat_drain > req->rq_timeout) { - /* If we're trying to drain the network queues, give this - req a long timeout */ - req->rq_timeout = at->iat_drain; - CDEBUG(D_ADAPTTO, "waiting %ds to let queues drain\n", - req->rq_timeout); - } - /* Let the server know what this RPC timeout is by putting it in the reqmsg*/ lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout); @@ -271,7 +263,7 @@ static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req) /* Network latency is total time less server processing time */ nl = max_t(int, now - req->rq_sent - st, 0) + 1/*st rounding*/; - if (st > now - req->rq_sent + 1 /* rounding */) + if (st > now - req->rq_sent + 2 /* rounding */) CERROR("Reported service time %u > total measured time %ld\n", st, now - req->rq_sent); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index a058f6b6cd8c5148859aeca8f08f76caca99eb11..327c2b78e6f5946ddc3bf8da4dc427d6718a5e09 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -188,8 +188,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp) struct list_head *tmp, *n; struct ptlrpc_request *req; struct l_wait_info lwi; - time_t last = 0; - int timeout, rc = 0; + int rc; atomic_inc(&imp->imp_inval_count); @@ -198,28 +197,16 @@ void ptlrpc_invalidate_import(struct obd_import *imp) LASSERT(imp->imp_invalid); - /* wait for all requests to error out and call completion callbacks */ - spin_lock(&imp->imp_lock); - list_for_each_safe(tmp, n, &imp->imp_sending_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - last = max(last, req->rq_deadline); - } - list_for_each_safe(tmp, n, &imp->imp_delayed_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - last = max(last, req->rq_deadline); - } - spin_unlock(&imp->imp_lock); + /* wait for all requests to error out and call completion callbacks. + Cap it at obd_timeout -- these should all have been locally + cancelled by ptlrpc_abort_inflight. */ + lwi = LWI_TIMEOUT_INTERVAL( + cfs_timeout_cap(cfs_time_seconds(obd_timeout)), + cfs_time_seconds(1), NULL, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + (atomic_read(&imp->imp_inflight) == 0), &lwi); - timeout = (int)(last - cfs_time_current_sec()); - if (timeout > 0) { - lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(timeout), - cfs_time_seconds(1), NULL, NULL); - rc = l_wait_event(imp->imp_recovery_waitq, - (atomic_read(&imp->imp_inflight) == 0), - &lwi); - } - - if (atomic_read(&imp->imp_inflight)) { + if (rc) { CERROR("%s: rc = %d waiting for callback (%d != 0)\n", obd2cli_tgt(imp->imp_obd), rc, atomic_read(&imp->imp_inflight)); @@ -339,18 +326,20 @@ static int import_select_connection(struct obd_import *imp) LASSERT(imp_conn->oic_conn); /* If we've tried everything, and we're back to the beginning of the - list, wait for LND_TIMEOUT to give the queues a chance to drain. */ + list, increase our timeout and try again. It will be reset when + we do finally connect. (FIXME: really we should wait for all network + state associated with the last connection attempt to drain before + trying to reconnect on it.) */ if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) { - int must_wait; + if (at_get(&imp->imp_at.iat_net_latency) < + CONNECTION_SWITCH_MAX) { + at_add(&imp->imp_at.iat_net_latency, + at_get(&imp->imp_at.iat_net_latency) + + CONNECTION_SWITCH_INC); + } LASSERT(imp_conn->oic_last_attempt); - must_wait = LND_TIMEOUT - - (int)cfs_duration_sec(cfs_time_current_64() - - imp_conn->oic_last_attempt); - imp->imp_at.iat_drain = max(0, must_wait); - CWARN("Tried all connections, %lus drain time\n", - imp->imp_at.iat_drain); - } else { - imp->imp_at.iat_drain = 0; + CWARN("Tried all connections, increasing latency to %ds\n", + at_get(&imp->imp_at.iat_net_latency)); } imp_conn->oic_last_attempt = cfs_time_current_64(); @@ -568,7 +557,6 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, ENTRY; spin_lock(&imp->imp_lock); - imp->imp_at.iat_drain = 0; if (imp->imp_state == LUSTRE_IMP_CLOSED) { spin_unlock(&imp->imp_lock); RETURN(0); @@ -1174,10 +1162,6 @@ int at_add(struct adaptive_timeout *at, unsigned int val) at->at_binstart += shift * binlimit; } - if ((at->at_flags & AT_FLG_MIN) && - (at->at_current < adaptive_timeout_min)) - at->at_current = adaptive_timeout_min; - if (at->at_current > at->at_worst_ever) { at->at_worst_ever = at->at_current; at->at_worst_time = now; diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 6f4914661213bffba6674dc91876373be5d6d270..af74dc78422da4c3cfc6e975dd824bc24fda208e 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -67,11 +67,12 @@ int ptlrpc_ping(struct obd_import *imp) void ptlrpc_update_next_ping(struct obd_import *imp) { #ifdef ENABLE_PINGER - int time = (imp->imp_state != LUSTRE_IMP_DISCON) ? PING_INTERVAL : - /* FIXME should this be limited to LND_TIMEOUT so we don't - build up pings in LND output queues? */ - max_t(int, CONNECTION_SWITCH_MIN, - at_get(&imp->imp_at.iat_net_latency)); + int time = PING_INTERVAL; + if (imp->imp_state == LUSTRE_IMP_DISCON) { + int dtime = max_t(int, CONNECTION_SWITCH_MIN, + at_get(&imp->imp_at.iat_net_latency)); + time = min(time, dtime); + } imp->imp_next_ping = cfs_time_shift(time); #endif /* ENABLE_PINGER */ }