From 910aa03b1c3e32ca144becbef6df105374a46641 Mon Sep 17 00:00:00 2001
From: shadow <shadow>
Date: Thu, 13 Dec 2007 10:20:23 +0000
Subject: [PATCH]   if client_disconnect_export was called without force flag
 set,   and exist connect request in flight, this can produce access to   NULL
 pointer (or already free pointer) when connect_interpret   store ocd flags in
 obd_namespace.

  b=14260
  i=adilger
  i=johann
---
 lustre/ChangeLog            |  9 +++++++++
 lustre/ldlm/ldlm_lib.c      | 12 +++++++-----
 lustre/obdclass/obd_mount.c |  9 +++------
 lustre/ptlrpc/client.c      |  4 ++--
 lustre/ptlrpc/import.c      |  5 +++++
 5 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 8a0f5513e9..5b458bb184 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -14,6 +14,15 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        * Recommended e2fsprogs version: 1.40.2-cfs4
        * Note that reiserfs quotas are disabled on SLES 10 in this kernel.
 
+Severity   : major
+Bugzilla   : 14260
+Frequency  : rare, at shutdown
+Description: access already free / zero obd_namespace.
+Details    : if client_disconnect_export was called without force flag set,
+             and exist connect request in flight, this can produce access to
+	     NULL pointer (or already free pointer) when connect_interpret 
+	     store ocd flags in obd_namespace.
+
 Severity   : minor
 Bugzilla   : 14418
 Frequency  : only at startup
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index e587528657..01b886b495 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -474,14 +474,16 @@ int client_disconnect_export(struct obd_export *exp)
                                        NULL);
                 ldlm_namespace_free_prior(obd->obd_namespace);
                 to_be_freed = obd->obd_namespace;
-                obd->obd_namespace = NULL;
         }
 
-        /* Yeah, obd_force means "forced shutdown". */
-        if (!obd->obd_force)
-                rc = ptlrpc_disconnect_import(imp, 0);
+        rc = ptlrpc_disconnect_import(imp, 0);
 
         ptlrpc_invalidate_import(imp);
+        /* set obd_namespace to NULL only after invalidate, because we can have
+         * some connect requests in flight, and his need store a connect flags
+         * in obd_namespace. bug 14260 */
+        obd->obd_namespace = NULL;
+	
         ptlrpc_free_rq_pool(imp->imp_rq_pool);
         class_destroy_import(imp);
         cli->cl_import = NULL;
@@ -494,7 +496,7 @@ int client_disconnect_export(struct obd_export *exp)
  out_sem:
         mutex_up(&cli->cl_sem);
         if (to_be_freed)
-                ldlm_namespace_free_post(to_be_freed, obd->obd_no_recov);
+                ldlm_namespace_free_post(to_be_freed, obd->obd_force);
         RETURN(rc);
 }
 
diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c
index 3da90dcef6..ff67f74b64 100644
--- a/lustre/obdclass/obd_mount.c
+++ b/lustre/obdclass/obd_mount.c
@@ -755,9 +755,8 @@ static int lustre_stop_mgc(struct super_block *sb)
                 GOTO(out, rc = -EBUSY);
         }
 
-        /* MGC should disconnect nicely so MGS won't print eviction messages */
-        obd->obd_force = (lsi->lsi_flags & LSI_UMOUNT_FORCE) != 0;
-        /* The MGC has no recoverable data in any case. */
+        /* The MGC has no recoverable data in any case. 
+         * force shotdown set in umount_begin */
         obd->obd_no_recov = 1;
 
         if (obd->u.cli.cl_mgc_mgsexp)
@@ -1367,12 +1366,10 @@ static void server_put_super(struct super_block *sb)
                 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
                 if (obd) {
                         CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
-                        if (lsi->lsi_flags & LSI_UMOUNT_FORCE)
-                                obd->obd_force = 1;
                         if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
                                 obd->obd_fail = 1;
                         /* We can't seem to give an error return code
-                           to .put_super, so we better make sure we clean up! */
+                         * to .put_super, so we better make sure we clean up! */
                         obd->obd_force = 1;
                         class_manual_cleanup(obd);
                 } else {
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 6fdc64b64c..5744e6343d 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -1141,14 +1141,13 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 if (req->rq_bulk != NULL)
                         ptlrpc_unregister_bulk (req);
 
-                req->rq_phase = RQ_PHASE_COMPLETE;
-
                 if (req->rq_interpret_reply != NULL) {
                         int (*interpreter)(struct ptlrpc_request *,void *,int) =
                                 req->rq_interpret_reply;
                         req->rq_status = interpreter(req, &req->rq_async_args,
                                                      req->rq_status);
                 }
+                req->rq_phase = RQ_PHASE_COMPLETE;
 
                 CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:"
                        "opc %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
@@ -1317,6 +1316,7 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
                 if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
                       (req->rq_phase == RQ_PHASE_BULK) || 
                       (req->rq_phase == RQ_PHASE_NEW)))
+                        continue;
 
                 if (req->rq_timedout)   /* already timed out */
                         continue;
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index b7cff71100..aa6a7668c4 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -1032,8 +1032,12 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
 {
         struct ptlrpc_request *req;
         int rq_opc, rc = 0;
+        int nowait = imp->imp_obd->obd_force;
         ENTRY;
 
+        if (nowait)
+                GOTO(set_state, rc);
+
         switch (imp->imp_connect_op) {
         case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
         case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
@@ -1094,6 +1098,7 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                 ptlrpc_req_finished(req);
         }
 
+set_state:
         spin_lock(&imp->imp_lock);
 out:
         if (noclose) 
-- 
GitLab