From 0fa0cddbdb447cc35e7f872ebb76bb3de20189dc Mon Sep 17 00:00:00 2001
From: wangdi <wangdi>
Date: Wed, 27 Feb 2008 20:45:14 +0000
Subject: [PATCH] Branch: b1_6 b=13464 Set import disconnection before mgc
 reconnection, and refine the previous 13464 fix. i=nathan,adiger

---
 lustre/include/lustre_net.h |  1 +
 lustre/mgc/mgc_request.c    | 29 +----------------------------
 lustre/ptlrpc/client.c      | 16 ++++++++--------
 lustre/ptlrpc/import.c      | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index 67d86bff7a..1bd2eabc5e 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -755,6 +755,7 @@ int ptlrpc_init_import(struct obd_import *imp);
 int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
 int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
 void ptlrpc_import_setasync(struct obd_import *imp, int count);
+int ptlrpc_reconnect_import(struct obd_import *imp);
 
 /* ptlrpc/pack_generic.c */
 int lustre_msg_swabbed(struct lustre_msg *msg);
diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c
index 8a671a1e77..7f388375e9 100644
--- a/lustre/mgc/mgc_request.c
+++ b/lustre/mgc/mgc_request.c
@@ -779,33 +779,6 @@ static int mgc_set_mgs_param(struct obd_export *exp,
         RETURN(rc);
 }
 
-int mgc_reconnect_import(struct obd_import *imp)
-{
-        /* Force a new connect attempt */
-        ptlrpc_invalidate_import(imp);
-        /* Do a fresh connect next time by zeroing the handle */
-        ptlrpc_disconnect_import(imp, 1);
-        /* Wait for all invalidate calls to finish */
-        if (atomic_read(&imp->imp_inval_count) > 0) {
-                int rc;
-                struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
-                rc = l_wait_event(imp->imp_recovery_waitq,
-                                  (atomic_read(&imp->imp_inval_count) == 0),
-                                  &lwi);
-                if (rc)
-                        CERROR("Interrupted, inval=%d\n", 
-                               atomic_read(&imp->imp_inval_count));
-        }
-
-        /* Allow reconnect attempts */
-        imp->imp_obd->obd_no_recov = 0;
-        /* Remove 'invalid' flag */
-        ptlrpc_activate_import(imp);
-        /* Attempt a new connect */
-        ptlrpc_recover_import(imp, NULL);
-        return 0;
-}
-
 int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
                        void *key, obd_count vallen, void *val, 
                        struct ptlrpc_request_set *set)
@@ -844,7 +817,7 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
                        ptlrpc_import_state_name(imp->imp_state));
                 /* Resurrect if we previously died */
                 if (imp->imp_invalid || value > 1) 
-                        mgc_reconnect_import(imp);
+                        ptlrpc_reconnect_import(imp);
                 RETURN(0);
         }
         /* FIXME move this to mgc_process_config */
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index d20c451eb9..f46618fd8a 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -701,13 +701,12 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
         } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
                  imp->imp_state == LUSTRE_IMP_CONNECTING) {
                 /* allow CONNECT even if import is invalid */ ;
-        } else if (imp->imp_invalid) {
-                /* if it is mgc, wait for recovry. b=13464 */
-                if (imp->imp_recon_bk && !imp->imp_obd->obd_no_recov)
-                        delay = 1;
+        } else if (imp->imp_invalid && (!imp->imp_recon_bk ||
+                                         imp->imp_obd->obd_no_recov)) {
                 /* If the import has been invalidated (such as by an OST
-                 * failure) the request must fail with -ESHUTDOWN.  This
-                 * indicates the requests should be discarded; an -EIO
+                 * failure), and if the import(MGC) tried all of its connection  
+                 * list (Bug 13464), the request must fail with -ESHUTDOWN.  
+                 * This indicates the requests should be discarded; an -EIO
                  * may result in a resend of the request. */              
                 if (!imp->imp_deactive)
                         DEBUG_REQ(D_ERROR, req, "IMP_INVALID");
@@ -716,8 +715,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
                 DEBUG_REQ(D_ERROR, req, "req wrong generation:");
                 *status = -EIO;
         } else if (req->rq_send_state != imp->imp_state) {
-                if (imp->imp_obd->obd_no_recov || imp->imp_dlm_fake ||
-                    req->rq_no_delay)
+                if (imp->imp_obd->obd_no_recov)
+                        *status = -ESHUTDOWN;
+                else if (imp->imp_dlm_fake || req->rq_no_delay) 
                         *status = -EWOULDBLOCK;
                 else
                         delay = 1;
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index ee7d3a5c9d..efae267bec 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -268,6 +268,42 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
         EXIT;
 }
 
+int ptlrpc_reconnect_import(struct obd_import *imp)
+{
+        
+        ptlrpc_set_import_discon(imp, 0); 
+        /* Force a new connect attempt */
+        ptlrpc_invalidate_import(imp);
+        /* Do a fresh connect next time by zeroing the handle */
+        ptlrpc_disconnect_import(imp, 1);
+        /* Wait for all invalidate calls to finish */
+        if (atomic_read(&imp->imp_inval_count) > 0) {
+                int rc;
+                struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+                rc = l_wait_event(imp->imp_recovery_waitq,
+                                  (atomic_read(&imp->imp_inval_count) == 0),
+                                  &lwi);
+                if (rc)
+                        CERROR("Interrupted, inval=%d\n", 
+                               atomic_read(&imp->imp_inval_count));
+        }
+
+        /* 
+         * Allow reconnect attempts. Note: Currently, the function is
+         * only called by MGC. So assume this is a recoverable import,
+         * and force import to be recoverable. fix this if you need to 
+         */
+        
+        imp->imp_obd->obd_no_recov = 0;
+        /* Remove 'invalid' flag */
+        ptlrpc_activate_import(imp);
+        /* Attempt a new connect */
+        ptlrpc_recover_import(imp, NULL);
+        return 0;
+}
+
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+
 static int import_select_connection(struct obd_import *imp)
 {
         struct obd_import_conn *imp_conn = NULL, *conn;
-- 
GitLab