diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 8c5e54d9da2465510bdf731dcd2d234183ce6fb1..d7d1bea8d16f95b7e954eb7edd165443d7aec0ff 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -271,7 +271,7 @@ struct ptlrpc_request { unsigned int rq_intr:1, rq_replied:1, rq_err:1, rq_timedout:1, rq_resend:1, rq_restart:1, rq_replay:1, rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1; + rq_no_delay:1, rq_net_err:1; int rq_phase; /* client-side refcount for SENT race */ atomic_t rq_refcount; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 04ff0e9c7b2e1249f8f1096c2c88d6022dd7d4a9..fce6f392995abedeaaa897d6e08494425d25b666 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -400,6 +400,13 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req) DEBUG_REQ(D_NET, req, "REPLIED:"); GOTO(out, rc = 1); } + + if (req->rq_net_err && !req->rq_timedout) { + spin_unlock_irqrestore (&req->rq_lock, flags); + rc = ptlrpc_expire_one_request(req); + spin_lock_irqsave (&req->rq_lock, flags); + GOTO(out, rc); + } if (req->rq_err) { DEBUG_REQ(D_ERROR, req, "ABORTED:"); @@ -570,7 +577,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) rc = ptl_send_rpc(req); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc); - req->rq_timeout = 1; + req->rq_net_err = 1; RETURN(rc); } RETURN(0); @@ -611,6 +618,9 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) if (req->rq_phase == RQ_PHASE_INTERPRET) GOTO(interpret, req->rq_status); + if (req->rq_net_err && !req->rq_timedout) + ptlrpc_expire_one_request(req); + if (req->rq_err) { ptlrpc_unregister_reply(req); if (req->rq_status == 0) @@ -698,7 +708,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) DEBUG_REQ(D_HA, req, "send failed (%d)", rc); force_timer_recalc = 1; - req->rq_timeout = 0; + req->rq_net_err = 1; } /* need to reset the timeout */ force_timer_recalc = 1; @@ -1191,6 +1201,7 @@ void ptlrpc_resend_req(struct ptlrpc_request *req) spin_lock_irqsave (&req->rq_lock, flags); req->rq_resend = 1; + req->rq_net_err = 0; req->rq_timedout = 0; if (req->rq_bulk) { __u64 old_xid = req->rq_xid; diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 6e612364fbaf2439b4d34ca1b343e99b00f481f4..77334a291f6490b787884a035dbc72e77daf970c 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -58,7 +58,7 @@ void request_out_callback(ptl_event_t *ev) * like failing sends in client.c does currently... */ spin_lock_irqsave(&req->rq_lock, flags); - req->rq_timeout = 0; + req->rq_net_err = 1; spin_unlock_irqrestore(&req->rq_lock, flags); ptlrpc_wake_client_req(req); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index c22e66890b4de606114d5fc2f5637de8a29d37db..d6a9ae8571aec4fc149d99981be890c6983fb83c 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -471,6 +471,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; + request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; spin_unlock_irqrestore (&request->rq_lock, flags);