From ef5699424a8b2b40f286c8afc20d9ed9da825f1f Mon Sep 17 00:00:00 2001
From: bobijam <bobijam>
Date: Wed, 18 Jun 2008 01:41:40 +0000
Subject: [PATCH] Branch b1_8 b=14480 o=green i=shadow, bobijam

Description: LBUG during stress test
Details    : Need properly lock accesses the flock deadlock detection list.
---
 lustre/ChangeLog         |  5 +++++
 lustre/ldlm/ldlm_flock.c | 37 +++++++++++++++++++++++++++++++++----
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 0c81657d03..5bce61ddf7 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -24,6 +24,11 @@ tbd Sun Microsystems, Inc.
 	  	'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'.
 	  For more information, please refer to bugzilla 13904.
 
+Severity   : normal
+Bugzilla   : 14480
+Description: LBUG during stress test
+Details    : Need properly lock accesses the flock deadlock detection list.
+
 Severity   : minor
 Bugzilla   : 15837
 Description: oops in page fault handler
diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c
index bd92a84d8b..0b9f09b817 100644
--- a/lustre/ldlm/ldlm_flock.c
+++ b/lustre/ldlm/ldlm_flock.c
@@ -42,6 +42,7 @@
 #define l_flock_waitq   l_lru
 
 static struct list_head ldlm_flock_waitq = CFS_LIST_HEAD_INIT(ldlm_flock_waitq);
+spinlock_t ldlm_flock_waitq_lock = SPIN_LOCK_UNLOCKED;
 
 int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                             void *data, int flag);
@@ -82,6 +83,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags)
         LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%x)",
                    mode, flags);
 
+        /* Safe to not lock here, since it should be empty anyway */
         LASSERT(list_empty(&lock->l_flock_waitq));
 
         list_del_init(&lock->l_res_link);
@@ -107,6 +109,7 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *blocking_lock)
         pid_t blocking_pid = blocking_lock->l_policy_data.l_flock.pid;
         struct ldlm_lock *lock;
 
+        spin_lock(&ldlm_flock_waitq_lock);
 restart:
         list_for_each_entry(lock, &ldlm_flock_waitq, l_flock_waitq) {
                 if ((lock->l_policy_data.l_flock.pid != blocking_pid) ||
@@ -116,11 +119,14 @@ restart:
                 blocking_pid = lock->l_policy_data.l_flock.blocking_pid;
                 blocking_export = (struct obd_export *)(long)
                         lock->l_policy_data.l_flock.blocking_export;
-                if (blocking_pid == req_pid && blocking_export == req_export)
+                if (blocking_pid == req_pid && blocking_export == req_export) {
+                        spin_unlock(&ldlm_flock_waitq_lock);
                         return 1;
+                }
 
                 goto restart;
         }
+        spin_unlock(&ldlm_flock_waitq_lock);
 
         return 0;
 }
@@ -225,7 +231,9 @@ reprocess:
                                 (long)(void *)lock->l_export;
 
                         LASSERT(list_empty(&req->l_flock_waitq));
+                        spin_lock(&ldlm_flock_waitq_lock);
                         list_add_tail(&req->l_flock_waitq, &ldlm_flock_waitq);
+                        spin_unlock(&ldlm_flock_waitq_lock);
 
                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
                         *flags |= LDLM_FL_BLOCK_GRANTED;
@@ -242,7 +250,9 @@ reprocess:
 
         /* In case we had slept on this lock request take it off of the
          * deadlock detection waitq. */
+        spin_lock(&ldlm_flock_waitq_lock);
         list_del_init(&req->l_flock_waitq);
+        spin_unlock(&ldlm_flock_waitq_lock);
 
         /* Scan the locks owned by this process that overlap this request.
          * We may have to merge or split existing locks. */
@@ -341,7 +351,7 @@ reprocess:
                  * and restart processing this lock. */
                 if (!new2) {
                         unlock_res_and_lock(req);
-                         new2 = ldlm_lock_create(ns, res->lr_name, LDLM_FLOCK,
+                        new2 = ldlm_lock_create(ns, res->lr_name, LDLM_FLOCK,
                                         lock->l_granted_mode, NULL, NULL, NULL,
                                         NULL, 0);
                         lock_res_and_lock(req);
@@ -454,7 +464,9 @@ ldlm_flock_interrupted_wait(void *data)
         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
 
         /* take lock off the deadlock detection waitq. */
+        spin_lock(&ldlm_flock_waitq_lock);
         list_del_init(&lock->l_flock_waitq);
+        spin_unlock(&ldlm_flock_waitq_lock);
 
         /* client side - set flag to prevent lock from being put on lru list */
         lock->l_flags |= LDLM_FL_CBPENDING;
@@ -484,6 +496,21 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         CDEBUG(D_DLMTRACE, "flags: 0x%x data: %p getlk: %p\n",
                flags, data, getlk);
 
+        /* Import invalidation. We need to actually release the lock
+         * references being held, so that it can go away. No point in
+         * holding the lock even if app still believes it has it, since
+         * server already dropped it anyway. Only for granted locks too. */
+        lock_res_and_lock(lock);
+        if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) == 
+            (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
+                unlock_res_and_lock(lock);
+                if (lock->l_req_mode == lock->l_granted_mode &&
+                    lock->l_granted_mode != LCK_NL)
+                        ldlm_lock_decref_internal(lock, lock->l_req_mode);
+                RETURN(0);
+        }
+        unlock_res_and_lock(lock);
+
         LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
 
         if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
@@ -523,7 +550,9 @@ granted:
         lock_res_and_lock(lock);
 
         /* take lock off the deadlock detection waitq. */
+        spin_lock(&ldlm_flock_waitq_lock);
         list_del_init(&lock->l_flock_waitq);
+        spin_unlock(&ldlm_flock_waitq_lock);
 
         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
         list_del_init(&lock->l_res_link);
@@ -572,8 +601,8 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         ns = lock->l_resource->lr_namespace;
 
         /* take lock off the deadlock detection waitq. */
-        lock_res_and_lock(lock);
+        spin_lock(&ldlm_flock_waitq_lock);
         list_del_init(&lock->l_flock_waitq);
-        unlock_res_and_lock(lock);
+        spin_unlock(&ldlm_flock_waitq_lock);
         RETURN(0);
 }
-- 
GitLab