diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index a8ae800e64eee217d2c65546250892a8a19ac780..491d0d08500cfa9585ca72b00ffc29edb6f8a84a 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -170,6 +170,18 @@ struct obd_import { }; /* import.c */ +static inline unsigned int at_est2timeout(unsigned int val) +{ + /* add an arbitrary minimum: 125% +5 sec */ + return (val + (val >> 2) + 5); +} + +static inline unsigned int at_timeout2est(unsigned int val) +{ + /* restore estimate value from timeout */ + return ((val - 1) / 5 * 4); +} + static inline void at_init(struct adaptive_timeout *at, int val, int flags) { memset(at, 0, sizeof(*at)); at->at_current = val; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index a9bb6b5eeb1cfc22e63d03dfc480b24af6e86f1c..6538c6609fe5c75d3fcab38db58fd7c8f34ba5b2 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1251,11 +1251,16 @@ target_start_and_reset_recovery_timer(struct obd_device *obd, struct ptlrpc_request *req, int new_client) { - int req_timeout = OBD_RECOVERY_FACTOR * - lustre_msg_get_timeout(req->rq_reqmsg); + int req_timeout = lustre_msg_get_timeout(req->rq_reqmsg); + + /* teach server about old server's estimates */ + if (!new_client) + at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate, + at_timeout2est(req_timeout)); check_and_start_recovery_timer(obd, handler); + req_timeout *= OBD_RECOVERY_FACTOR; if (req_timeout > obd->obd_recovery_timeout && !new_client) reset_recovery_timer(obd, req_timeout, 0); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 885fab84645b8f9529ec200e89d92ecc3bad37a3..6145e1e809447778bbbab3fb7e4fbf30d1e699a5 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -221,8 +221,7 @@ void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req) idx = import_at_get_index(req->rq_import, req->rq_request_portal); serv_est = at_get(&at->iat_service_estimate[idx]); - /* add an arbitrary minimum: 125% +5 sec */ - req->rq_timeout = serv_est + (serv_est >> 2) + 5; + req->rq_timeout = at_est2timeout(serv_est); /* We could get even fancier here, using history to predict increased loading... */ @@ -239,6 +238,10 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req, unsigned int oldse; struct imp_at *at; + /* do estimate only if is not in recovery */ + if (!(req->rq_send_state & (LUSTRE_IMP_FULL | LUSTRE_IMP_CONNECTING))) + return; + LASSERT(req->rq_import); at = &req->rq_import->imp_at; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 2ae3e50af3b124c28770aefe7be3e326a5a8fe3e..66aff1d6e47111668c067afb988f06ecb5036bdd 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -359,9 +359,11 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int flags) service_time = max_t(int, cfs_time_current_sec() - req->rq_arrival_time.tv_sec, 1); if (!(flags & PTLRPC_REPLY_EARLY) && - (req->rq_type != PTL_RPC_MSG_ERR)) { - /* early replies and errors don't count toward our service - time estimate */ + (req->rq_type != PTL_RPC_MSG_ERR) && + !(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY | MSG_LAST_REPLAY))) { + /* early replies, errors and recovery requests don't count + * toward our service time estimate */ int oldse = at_add(&svc->srv_at_estimate, service_time); if (oldse != 0) DEBUG_REQ(D_ADAPTTO, req,