From 7cb202a575e528d3193618011d6fbc24837d9bca Mon Sep 17 00:00:00 2001
From: green <green>
Date: Wed, 16 May 2007 20:55:21 +0000
Subject: [PATCH] b=11658 r=wangdi,adigler

Take import reference before releasing llog record semaphore
---
 lustre/ChangeLog              |  5 +++++
 lustre/include/obd_support.h  |  1 +
 lustre/ptlrpc/recov_thread.c  | 15 ++++++++++++++-
 lustre/tests/replay-single.sh | 18 ++++++++++++++++++
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 62841646c0..c698c0493e 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -109,6 +109,11 @@ behaviour.
 Details    : This will achieve local-only flock/fcntl locks
 	     coherentness.
 
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 11658
+Description: log_commit_thread vs filter_destroy race leads to crash
+Details    : Take import reference before releasing llog record semaphore
 
 --------------------------------------------------------------------------------
 
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index ee2c6bafa6..c514d165d1 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -169,6 +169,7 @@ extern int obd_race_state;
 #define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
 #define OBD_FAIL_PTLRPC_DROP_RPC         0x505
 #define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
+#define OBD_FAIL_PTLRPC_DELAY_RECOV      0x507
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c
index 2355264cce..b74c72ec1a 100644
--- a/lustre/ptlrpc/recov_thread.c
+++ b/lustre/ptlrpc/recov_thread.c
@@ -222,6 +222,7 @@ static int log_commit_thread(void *arg)
         struct llog_commit_master *lcm = arg;
         struct llog_commit_daemon *lcd;
         struct llog_canceld_ctxt *llcd, *n;
+        struct obd_import *import = NULL;
         ENTRY;
 
         OBD_ALLOC(lcd, sizeof(*lcd));
@@ -243,10 +244,13 @@ static int log_commit_thread(void *arg)
         CDEBUG(D_HA, "%s started\n", cfs_curproc_comm());
         do {
                 struct ptlrpc_request *request;
-                struct obd_import *import = NULL;
                 struct list_head *sending_list;
                 int rc = 0;
 
+                if (import)
+                        class_import_put(import);
+                import = NULL;
+
                 /* If we do not have enough pages available, allocate some */
                 while (atomic_read(&lcm->lcm_llcd_numfree) <
                        lcm->lcm_llcd_minfree) {
@@ -272,6 +276,8 @@ static int log_commit_thread(void *arg)
 
                 sending_list = &lcm->lcm_llcd_pending;
         resend:
+                if (import)
+                        class_import_put(import);
                 import = NULL;
                 if (lcm->lcm_flags & LLOG_LCM_FL_EXIT) {
                         lcm->lcm_llcd_maxfree = 0;
@@ -301,6 +307,8 @@ static int log_commit_thread(void *arg)
                                           typeof(*llcd), llcd_list);
                         LASSERT(llcd->llcd_lcm == lcm);
                         import = llcd->llcd_ctxt->loc_imp;
+                        if (import)
+                                class_import_get(import);
                 }
                 list_for_each_entry_safe(llcd, n, sending_list, llcd_list) {
                         LASSERT(llcd->llcd_lcm == lcm);
@@ -351,6 +359,8 @@ static int log_commit_thread(void *arg)
                                 continue;
                         }
 
+                        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10);
+
                         request = ptlrpc_prep_req(import, LUSTRE_LOG_VERSION,
                                                   OBD_LOG_CANCEL, 2, size,bufs);
                         if (request == NULL) {
@@ -404,6 +414,9 @@ static int log_commit_thread(void *arg)
                 }
         } while(1);
 
+        if (import)
+                class_import_put(import);
+
         /* If we are force exiting, just drop all of the cookies. */
         if (lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE) {
                 spin_lock(&lcm->lcm_llcd_lock);
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh
index 1d385f85ca..2a806e47ba 100755
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -1122,5 +1122,23 @@ test_58() {
 }
 run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)"
 
+# log_commit_thread vs filter_destroy race used to lead to import use after free
+# bug 11658
+test_59() {
+    mkdir $DIR/$tdir
+    createmany -o $DIR/$tdir/$tfile-%d 200
+    sync
+    unlinkmany $DIR/$tdir/$tfile-%d 200
+#define OBD_FAIL_PTLRPC_DELAY_RECOV       0x507
+    do_facet ost "sysctl -w lustre.fail_loc=0x507"
+    fail ost
+    fail mds
+    do_facet ost "sysctl -w lustre.fail_loc=0x0"
+    sleep 20
+    rmdir $DIR/$tdir
+}
+run_test 59 "test log_commit_thread vs filter_destroy race"
+
+
 equals_msg `basename $0`: test complete, cleaning up
 $CLEANUP
-- 
GitLab