diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index a4dd44c688b3fa5781426c0a452eb7ff9ae540fe..3fb9d7dca794b6c1fa99208340747d79ccf9ecd8 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -1842,6 +1842,17 @@ Details    : A security feature, which is to prevent users from being able
              configuration management server (MGS). The functionality also
              allows to specify sets of clients for which the remapping does
              not apply.
+
+Severity   : normal
+Bugzilla   : 16860
+Description: Excessive recovery window
+Details    : With AT enabled, the recovery window can be excessively long (6000+
+	     seconds). To address this problem, we no longer use
+	     OBD_RECOVERY_FACTOR when extending the recovery window (the connect
+	     timeout no longer depends on the service time, it is set to
+	     INITIAL_CONNECT_TIMEOUT now) and clients report the old service
+	     time via pb_service_time.
+
 --------------------------------------------------------------------------------
 
 2007-08-10         Cluster File Systems, Inc. <info@clusterfs.com>
diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h
index b594b855be1e59c8c3bc0f42504c6dd706c13ab9..4f6e83f88f862211c0a6912f7d1203aa43e9870b 100644
--- a/lustre/include/lustre_import.h
+++ b/lustre/include/lustre_import.h
@@ -200,9 +200,9 @@ static inline unsigned int at_est2timeout(unsigned int val)
 
 static inline unsigned int at_timeout2est(unsigned int val)
 {
-        /* restore estimate value from timeout */
+        /* restore estimate value from timeout: e=4/5(t-5) */
         LASSERT(val);
-        return ((val - 1) / 5 * 4);
+        return (max((val << 2) / 5, 5U) - 4);
 }
 
 static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index bc49e4b836e6d9801ef559e26a2ca73889b75ae8..7ac4c939c4a1efe401340e5a3be504640b49cee5 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1361,18 +1361,27 @@ target_start_and_reset_recovery_timer(struct obd_device *obd,
                                       struct ptlrpc_request *req,
                                       int new_client)
 {
-        int req_timeout = lustre_msg_get_timeout(req->rq_reqmsg);
+        int service_time = lustre_msg_get_service_time(req->rq_reqmsg);
 
-        /* teach server about old server's estimates */
-        if (!new_client)
+        if (!new_client && service_time)
+                /* Teach server about old server's estimates, as first guess
+                 * at how long new requests will take. */
                 at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate,
-                       at_timeout2est(req_timeout));
+                       service_time);
 
         check_and_start_recovery_timer(obd);
 
-        req_timeout *= OBD_RECOVERY_FACTOR;
-        if (req_timeout > obd->obd_recovery_timeout && !new_client)
-                reset_recovery_timer(obd, req_timeout, 0);
+        /* convert the service time to rpc timeout,
+         * reuse service_time to limit stack usage */
+        service_time = at_est2timeout(service_time);
+
+        /* We expect other clients to timeout within service_time, then try
+         * to reconnect, then try the failover server.  The max delay between
+         * connect attempts is SWITCH_MAX + SWITCH_INC + INITIAL */
+        service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC +
+                             INITIAL_CONNECT_TIMEOUT);
+        if (service_time > obd->obd_recovery_timeout && !new_client)
+                reset_recovery_timer(obd, service_time, 0);
 }
 
 #ifdef __KERNEL__
@@ -1595,7 +1604,7 @@ static int handle_recovery_req(struct ptlrpc_thread *thread,
         if (!req_replay_done(req->rq_export) ||
             !lock_replay_done(req->rq_export))
                 reset_recovery_timer(class_exp2obd(req->rq_export),
-                       OBD_RECOVERY_FACTOR * AT_OFF ? obd_timeout :
+                       AT_OFF ? obd_timeout :
                        at_get(&req->rq_rqbd->rqbd_service->srv_at_estimate), 1);
         ptlrpc_free_clone(req);
         RETURN(0);
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index 38cd423fa758872db6be5266d1ecab43ddde0866..5021179d8f2c922499f47bb83e5ced8c492afa7b 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -655,6 +655,19 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                 GOTO(out, rc);
         }
 
+        /* Report the rpc service time to the server so that it knows how long
+         * to wait for clients to join recovery */
+        lustre_msg_set_service_time(request->rq_reqmsg,
+                                    at_timeout2est(request->rq_timeout));
+
+        /* The amount of time we give the server to process the connect req.
+         * import_select_connection will increase the net latency on
+         * repeated reconnect attempts to cover slow networks.
+         * We override/ignore the server rpc completion estimate here,
+         * which may be large if this is a reconnect attempt */
+        request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
+        lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
+
 #ifndef __KERNEL__
         lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_LIBCLIENT);
 #endif
@@ -681,10 +694,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                 spin_unlock(&imp->imp_lock);
                 lustre_msg_add_op_flags(request->rq_reqmsg,
                                         MSG_CONNECT_INITIAL);
-                if (AT_OFF)
-                        /* AT will use INITIAL_CONNECT_TIMEOUT the first
-                           time, adaptive after that. */
-                        request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
         }
 
         if (set_transno)