From e5d6d25f4c5e9bc0188d334c40879dcaa14f20a4 Mon Sep 17 00:00:00 2001
From: nathan <nathan>
Date: Fri, 27 Jul 2007 22:59:00 +0000
Subject: [PATCH] b=13129 i=adilger i=johann Block umount forever until the
 mount refcount is zero rather than giving up after an arbitrary timeout.

---
 lustre/ChangeLog            |  7 +++++++
 lustre/obdclass/obd_mount.c | 22 ++++++++++------------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 825907819f..1dd0526606 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -76,6 +76,13 @@ Details    : Merging of two LCONSOLE_ERROR_MSG into one.
         may be released separately in future.
 
 
+Severity   : normal
+Frequency  : rare
+Bugzilla   : 13129
+Description: server LBUG when shutting down
+Details    : Block umount forever until the mount refcount is zero rather
+	     than giving up after an arbitrary timeout.
+	
 Severity   : enhancement
 Bugzilla   : 12194
 Description: add optional extra BUILD_VERSION info
diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c
index d04136f9be..2843b54393 100644
--- a/lustre/obdclass/obd_mount.c
+++ b/lustre/obdclass/obd_mount.c
@@ -1296,28 +1296,26 @@ out_free:
         RETURN(ERR_PTR(rc));
 }
 
+/* We have to wait for everything to finish, including lnet lnd expires,
+   before it is safe to free the sb */
 static void server_wait_finished(struct vfsmount *mnt)
 {
         wait_queue_head_t   waitq;
         struct l_wait_info  lwi;
-        int                 retries = 120;
+        int                 waited = 0;
 
         init_waitqueue_head(&waitq);
 
-        while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
-                LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
-                              "%d secs...\n",
-                              atomic_read(&mnt->mnt_count), retries);
-
+        while (atomic_read(&mnt->mnt_count) > 1) {
+                if (waited && (waited % 30 == 0)) 
+                        LCONSOLE_WARN("Mount still busy with %d refs after "
+                                      "%d secs\n", atomic_read(&mnt->mnt_count), 
+                                      waited);
                 /* Wait for a bit */
-                retries -= 5;
-                lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
+                waited += 3;
+                lwi = LWI_TIMEOUT(cfs_time_seconds(3), NULL, NULL);
                 l_wait_event(waitq, 0, &lwi);
         }
-        if (atomic_read(&mnt->mnt_count) > 1) {
-                CERROR("Mount %p is still busy (%d refs), giving up.\n",
-                       mnt, atomic_read(&mnt->mnt_count));
-        }
 }
 
 static void server_put_super(struct super_block *sb)
-- 
GitLab