From 3c0b76d5a0aa4c75a9c7c2987e45194b0e68f7c3 Mon Sep 17 00:00:00 2001
From: rread <rread>
Date: Wed, 17 Mar 2004 21:56:43 +0000
Subject: [PATCH] b=2766 r=zab (partial)

Better handling for OST eviction.  Fixes for clearing the page cache,
imp_invalid handling, and improved import_event interface. Also
includes the tests and fixes added from 2732; one of those tests does
not yet passed so it is excepted for now.
---
 lustre/ChangeLog                     |  3 +-
 lustre/include/linux/lustre_dlm.h    |  2 +-
 lustre/include/linux/lustre_ha.h     |  3 +-
 lustre/include/linux/lustre_import.h |  6 +-
 lustre/include/linux/lustre_net.h    |  3 +-
 lustre/include/linux/obd.h           |  3 +
 lustre/include/linux/obd_support.h   |  2 +
 lustre/ldlm/ldlm_lib.c               |  3 +-
 lustre/ldlm/ldlm_request.c           | 10 +--
 lustre/ldlm/ldlm_resource.c          |  2 +-
 lustre/llite/file.c                  |  6 +-
 lustre/llite/rw24.c                  |  8 ++-
 lustre/mdc/mdc_request.c             |  7 +-
 lustre/mds/mds_fs.c                  |  3 +-
 lustre/obdclass/genops.c             |  1 +
 lustre/osc/osc_request.c             | 51 +++++++++++----
 lustre/ptlrpc/client.c               | 20 ++++--
 lustre/ptlrpc/import.c               | 92 +++++++++++++++++---------
 lustre/ptlrpc/niobuf.c               |  6 ++
 lustre/ptlrpc/ptlrpc_module.c        |  1 +
 lustre/ptlrpc/ptlrpcd.c              | 96 +++++++++++++++++++---------
 lustre/ptlrpc/recover.c              | 28 ++++----
 lustre/tests/recovery-small.sh       | 78 ++++++++++++++++++++++
 lustre/tests/replay-ost-single.sh    |  3 +-
 lustre/tests/replay-single.sh        |  7 +-
 lustre/tests/test-framework.sh       |  1 -
 26 files changed, 326 insertions(+), 119 deletions(-)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 11e208b6ae..3bc4610b09 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -15,7 +15,8 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
 	- correct journal credits calculated for CANCEL_UNLINK_LOG (2931)
 	- don't close files for self_export to avoid uninitialized obd (2936)
         - let lustre could be mounted with the same name for node and mds (2939)
-
+ 	- clear page cache after eviction  (2766)
+	
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
        * bug fixes
diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h
index d85d7a166b..3063c09888 100644
--- a/lustre/include/linux/lustre_dlm.h
+++ b/lustre/include/linux/lustre_dlm.h
@@ -60,7 +60,7 @@ typedef enum {
 #define LDLM_FL_LOCAL_ONLY     0x000400 /* see ldlm_cli_cancel_unused */
 
 /* don't run the cancel callback under ldlm_cli_cancel_unused */
-#define LDLM_FL_NO_CALLBACK    0x000800
+#define LDLM_FL_FAILED         0x000800
 
 #define LDLM_FL_HAS_INTENT     0x001000 /* lock request has intent */
 #define LDLM_FL_CANCELING      0x002000 /* lock cancel has already been sent */
diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/linux/lustre_ha.h
index 808ff44ef4..fe83b7d9a6 100644
--- a/lustre/include/linux/lustre_ha.h
+++ b/lustre/include/linux/lustre_ha.h
@@ -19,7 +19,8 @@ void ptlrpc_free_committed(struct obd_import *imp);
 void ptlrpc_wake_delayed(struct obd_import *imp);
 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid);
 int ptlrpc_set_import_active(struct obd_import *imp, int active);
-void ptlrpc_invalidate_import(struct obd_import *imp);
+void ptlrpc_deactivate_import(struct obd_import *imp);
+void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc);
 void ptlrpc_fail_import(struct obd_import *imp, int generation);
 void ptlrpc_fail_export(struct obd_export *exp);
 
diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h
index 14943f8cde..d2af141039 100644
--- a/lustre/include/linux/lustre_import.h
+++ b/lustre/include/linux/lustre_import.h
@@ -41,8 +41,9 @@ static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
 
 enum obd_import_event {
         IMP_EVENT_DISCON     = 0x808001,
-        IMP_EVENT_INVALIDATE = 0x808002,
-        IMP_EVENT_ACTIVE     = 0x808003,
+        IMP_EVENT_INACTIVE   = 0x808002,
+        IMP_EVENT_INVALIDATE = 0x808003,
+        IMP_EVENT_ACTIVE     = 0x808004,
 };
 
 struct obd_import {
@@ -64,6 +65,7 @@ struct obd_import {
         struct obd_device        *imp_obd;
         wait_queue_head_t         imp_recovery_waitq;
         __u64                     imp_last_replay_transno;
+        atomic_t                  imp_inflight;
         atomic_t                  imp_replay_inflight;
         enum lustre_imp_state     imp_state;
         int                       imp_generation;
diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h
index 13ce57ecb3..b3d93084c1 100644
--- a/lustre/include/linux/lustre_net.h
+++ b/lustre/include/linux/lustre_net.h
@@ -304,6 +304,7 @@ struct ptlrpc_request {
         struct ptlrpc_request_set *rq_set;
         void *rq_interpret_reply;               /* Async completion handler */
         union ptlrpc_async_args rq_async_args;  /* Async completion context */
+        void * rq_ptlrpcd_data;
 };
 
 
@@ -647,7 +648,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp);
 int ptlrpc_pinger_del_import(struct obd_import *imp);
 
 /* ptlrpc/ptlrpcd.c */
-void ptlrpcd_wake(void);
+void ptlrpcd_wake(struct ptlrpc_request *req);
 void ptlrpcd_add_req(struct ptlrpc_request *req);
 int ptlrpcd_addref(void);
 void ptlrpcd_decref(void);
diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h
index 242498e2a5..9da934fedd 100644
--- a/lustre/include/linux/obd.h
+++ b/lustre/include/linux/obd.h
@@ -54,6 +54,7 @@ struct lov_oinfo {                 /* per-stripe data structure */
         /* _cli_ is poorly named, it should be _ready_ */
         struct list_head loi_cli_item;
         struct list_head loi_write_item;
+        struct list_head loi_read_item;
 
         int loi_kms_valid:1;
         __u64 loi_kms; /* known minimum size */
@@ -71,6 +72,7 @@ static inline void loi_init(struct lov_oinfo *loi)
         INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group);
         INIT_LIST_HEAD(&loi->loi_cli_item);
         INIT_LIST_HEAD(&loi->loi_write_item);
+        INIT_LIST_HEAD(&loi->loi_read_item);
 }
 
 struct lov_stripe_md {
@@ -238,6 +240,7 @@ struct client_obd {
         spinlock_t               cl_loi_list_lock;
         struct list_head         cl_loi_ready_list;
         struct list_head         cl_loi_write_list;
+        struct list_head         cl_loi_read_list;
         int                      cl_brw_in_flight;
         /* just a sum of the loi/lop pending numbers to be exported by /proc */
         int                      cl_pending_w_pages;
diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h
index c39cb6f579..2e2137336e 100644
--- a/lustre/include/linux/obd_support.h
+++ b/lustre/include/linux/obd_support.h
@@ -124,6 +124,8 @@ extern wait_queue_head_t obd_race_waitq;
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
 #define OBD_FAIL_PTLRPC_RQBD             0x502
+#define OBD_FAIL_PTLRPC_BULK_GET_NET     0x503
+#define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 0d514db4a2..6c1f7503f9 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -102,6 +102,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
         INIT_LIST_HEAD(&cli->cl_cache_waiters);
         INIT_LIST_HEAD(&cli->cl_loi_ready_list);
         INIT_LIST_HEAD(&cli->cl_loi_write_list);
+        INIT_LIST_HEAD(&cli->cl_loi_read_list);
         spin_lock_init(&cli->cl_loi_list_lock);
         cli->cl_brw_in_flight = 0;
         spin_lock_init(&cli->cl_read_rpc_hist.oh_lock);
@@ -313,7 +314,7 @@ int client_disconnect_export(struct obd_export *exp, int failover)
 
         /* Yeah, obd_no_recov also (mainly) means "forced shutdown". */
         if (obd->obd_no_recov)
-                ptlrpc_invalidate_import(imp);
+                ptlrpc_invalidate_import(imp, 0);
         else
                 rc = ptlrpc_disconnect_import(imp);
 
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c
index a996da6acf..de9e7c5234 100644
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -117,9 +117,9 @@ noreproc:
         /* Go to sleep until the lock is granted or cancelled. */
         rc = l_wait_event(lock->l_waitq,
                           ((lock->l_req_mode == lock->l_granted_mode) ||
-                           (lock->l_flags & LDLM_FL_CANCEL)), &lwi);
+                           (lock->l_flags & LDLM_FL_FAILED)), &lwi);
 
-        if (lock->l_destroyed) {
+        if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
                 RETURN(-EIO);
         }
@@ -665,11 +665,6 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
 
                 w->w_lock = LDLM_LOCK_GET(lock);
 
-                /* Prevent the cancel callback from being called by setting
-                 * LDLM_FL_CANCEL in the lock.  Very sneaky. -p */
-                if (flags & LDLM_FL_NO_CALLBACK)
-                        w->w_lock->l_flags |= LDLM_FL_CANCEL;
-
                 list_add(&w->w_list, &list);
         }
         l_unlock(&ns->ns_lock);
@@ -702,7 +697,6 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
  *
  * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying
  * to notify the server.
- * If flags & LDLM_FL_NO_CALLBACK, don't run the cancel callback.
  * If flags & LDLM_FL_WARN, print a warning if some locks are still in use. */
 int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
                            struct ldlm_res_id *res_id, int flags, void *opaque)
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c
index 80545d08b4..c04a14c318 100644
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -284,7 +284,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
                 LDLM_LOCK_GET(lock);
 
-                lock->l_flags |= LDLM_FL_CANCEL;
+                lock->l_flags |= LDLM_FL_FAILED;
                 lock->l_flags |= flags;
 
                 if (local_only && (lock->l_readers || lock->l_writers)) {
diff --git a/lustre/llite/file.c b/lustre/llite/file.c
index 9e487d5e9b..c786211e04 100644
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -413,11 +413,11 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
 
                 tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1;
                 /* check to see if another DLM lock covers this page */
-                rc2 = ldlm_lock_match(lock->l_resource->lr_namespace,
+                ldlm_lock2handle(lock, &lockh);
+                rc2 = ldlm_lock_match(NULL, 
                                       LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING |
                                       LDLM_FL_TEST_LOCK,
-                                      &lock->l_resource->lr_name, LDLM_EXTENT,
-                                      &tmpex, LCK_PR | LCK_PW, &lockh);
+                                      NULL, 0, &tmpex, 0, &lockh);
                 if (rc2 == 0 && page->mapping != NULL) {
                         // checking again to account for writeback's lock_page()
                         LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c
index c645abd7df..22ffe38cbf 100644
--- a/lustre/llite/rw24.c
+++ b/lustre/llite/rw24.c
@@ -54,6 +54,7 @@ void ll_ap_completion_24(void *data, int cmd, int rc)
 {
         struct ll_async_page *llap;
         struct page *page;
+        ENTRY;
 
         llap = llap_from_cookie(data);
         if (IS_ERR(llap)) {
@@ -64,6 +65,8 @@ void ll_ap_completion_24(void *data, int cmd, int rc)
         page = llap->llap_page;
         LASSERT(PageLocked(page));
 
+        LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
+
         if (rc == 0)  {
                 if (cmd == OBD_BRW_READ) {
                         if (!llap->llap_defer_uptodate)
@@ -71,11 +74,13 @@ void ll_ap_completion_24(void *data, int cmd, int rc)
                 } else {
                         llap->llap_write_queued = 0;
                 }
+                ClearPageError(page);
         } else {
+                if (cmd == OBD_BRW_READ)
+                        llap->llap_defer_uptodate = 0;
                 SetPageError(page);
         }
 
-        LL_CDEBUG_PAGE(D_PAGE, page, "io complete, unlocking\n");
 
         unlock_page(page);
 
@@ -85,6 +90,7 @@ void ll_ap_completion_24(void *data, int cmd, int rc)
         }
 
         page_cache_release(page);
+        EXIT;
 }
 
 static int ll_writepage_24(struct page *page)
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c
index e1c3fedc21..830c2cdcd1 100644
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -844,13 +844,16 @@ static int mdc_import_event(struct obd_device *obd,
         case IMP_EVENT_DISCON: {
                 break;
         }
+        case IMP_EVENT_INACTIVE: {
+                if (obd->obd_observer)
+                        rc = obd_notify(obd->obd_observer, obd, 0);
+                break;
+        }
         case IMP_EVENT_INVALIDATE: {
                 struct ldlm_namespace *ns = obd->obd_namespace;
                 
                 ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
 
-                if (obd->obd_observer)
-                        rc = obd_notify(obd->obd_observer, obd, 0);
                 break;
         }
         case IMP_EVENT_ACTIVE: {
diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c
index 7979092115..df159ac82f 100644
--- a/lustre/mds/mds_fs.c
+++ b/lustre/mds/mds_fs.c
@@ -641,7 +641,8 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
         down(&parent_inode->i_sem);
         de = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
         if (de == NULL || de->d_inode == NULL) {
-                CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
+                CERROR("destroying non-existent object "LPU64" %s\n", 
+                       oa->o_id, fidname);
                 GOTO(out_dput, rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT);
         }
 
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c
index 98ae3b5c7f..aee57a7344 100644
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -526,6 +526,7 @@ struct obd_import *class_new_import(void)
         init_waitqueue_head(&imp->imp_recovery_waitq);
 
         atomic_set(&imp->imp_refcount, 2);
+        atomic_set(&imp->imp_inflight, 0);
         atomic_set(&imp->imp_replay_inflight, 0);
         INIT_LIST_HEAD(&imp->imp_handle.h_link);
         class_handle_hash(&imp->imp_handle, import_handle_addref);
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c
index a6a399242e..ecb2bf3ccc 100644
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -1170,7 +1170,7 @@ static void osc_occ_interrupted(struct oig_callback_context *occ)
         /* ok, it's been put in an rpc. */
         if (oap->oap_request != NULL) {
                 ptlrpc_mark_interrupted(oap->oap_request);
-                ptlrpcd_wake();
+                ptlrpcd_wake(oap->oap_request);
                 GOTO(unlock, 0);
         }
 
@@ -1228,17 +1228,23 @@ static int brw_interpret_oap(struct ptlrpc_request *request,
         struct list_head *pos, *n;
         ENTRY;
 
-        CDEBUG(D_INODE, "request %p aa %p\n", request, aa);
 
         rc = osc_brw_fini_request(request, aa->aa_oa, aa->aa_requested_nob,
                                   aa->aa_nio_count, aa->aa_page_count,
                                   aa->aa_pga, rc);
 
+        CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc);
+
         cli = aa->aa_cli;
         /* in failout recovery we ignore writeback failure and want
          * to just tell llite to unlock the page and continue */
-        if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
+        if (request->rq_reqmsg->opc == OST_WRITE && 
+            (cli->cl_import == NULL || cli->cl_import->imp_invalid)) {
+                CDEBUG(D_INODE, "flipping to rc 0 imp %p inv %d\n", 
+                       cli->cl_import, 
+                       cli->cl_import ? cli->cl_import->imp_invalid : -1);
                 rc = 0;
+        }
 
         spin_lock(&cli->cl_loi_list_lock);
 
@@ -1529,6 +1535,13 @@ static int lop_makes_rpc(struct client_obd *cli, struct loi_oap_pages *lop,
         if (lop->lop_num_pending == 0)
                 RETURN(0);
 
+        /* if we have an invalid import we want to drain the queued pages
+         * by forcing them through rpcs that immediately fail and complete
+         * the pages.  recovery relies on this to empty the queued pages
+         * before canceling the locks and evicting down the llite pages */
+        if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
+                RETURN(1);
+
         /* stream rpcs in queue order as long as as there is an urgent page
          * queued.  this is our cheap solution for good batching in the case
          * where writepage marks some random page in the middle of the file as
@@ -1576,6 +1589,9 @@ static void loi_list_maint(struct client_obd *cli, struct lov_oinfo *loi)
 
         on_list(&loi->loi_write_item, &cli->cl_loi_write_list,
                 loi->loi_write_lop.lop_num_pending);
+
+        on_list(&loi->loi_read_item, &cli->cl_loi_read_list,
+                loi->loi_read_lop.lop_num_pending);
 }
 
 #define LOI_DEBUG(LOI, STR, args...)                                     \
@@ -1604,6 +1620,17 @@ struct lov_oinfo *osc_next_loi(struct client_obd *cli)
             !list_empty(&cli->cl_loi_write_list))
                 RETURN(list_entry(cli->cl_loi_write_list.next,
                                   struct lov_oinfo, loi_write_item));
+
+        /* then return all queued objects when we have an invalid import
+         * so that they get flushed */
+        if (cli->cl_import == NULL || cli->cl_import->imp_invalid) {
+                if (!list_empty(&cli->cl_loi_write_list))
+                        RETURN(list_entry(cli->cl_loi_write_list.next,
+                                          struct lov_oinfo, loi_write_item));
+                if (!list_empty(&cli->cl_loi_read_list))
+                        RETURN(list_entry(cli->cl_loi_read_list.next,
+                                          struct lov_oinfo, loi_read_item));
+        }
         RETURN(NULL);
 }
 
@@ -1653,6 +1680,8 @@ static void osc_check_rpcs(struct client_obd *cli)
                         list_del_init(&loi->loi_cli_item);
                 if (!list_empty(&loi->loi_write_item))
                         list_del_init(&loi->loi_write_item);
+                if (!list_empty(&loi->loi_read_item))
+                        list_del_init(&loi->loi_read_item);
 
                 loi_list_maint(cli, loi);
 
@@ -2873,25 +2902,25 @@ static int osc_import_event(struct obd_device *obd,
                 }
                 break;
         }
+        case IMP_EVENT_INACTIVE: {
+                if (obd->obd_observer)
+                        rc = obd_notify(obd->obd_observer, obd, 0);
+                break;
+        }
         case IMP_EVENT_INVALIDATE: {
                 struct ldlm_namespace *ns = obd->obd_namespace;
 
-                /* this used to try and tear down queued pages, but it was
-                 * not correctly implemented.  We'll have to do it again once
-                 * we call obd_invalidate_import() agian */
-                /* XXX And we still need to do this */
-
-                /* Reset grants, too */
+                /* Reset grants */
                 cli = &obd->u.cli;
                 spin_lock(&cli->cl_loi_list_lock);
                 cli->cl_avail_grant = 0;
                 cli->cl_lost_grant = 0;
+                /* all pages go to failing rpcs due to the invalid import */
+                osc_check_rpcs(cli);
                 spin_unlock(&cli->cl_loi_list_lock);
                 
                 ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
 
-                if (obd->obd_observer)
-                        rc = obd_notify(obd->obd_observer, obd, 0);
                 break;
         }
         case IMP_EVENT_ACTIVE: {
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 8dc4ad030e..25fd1eb1a8 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -327,6 +327,7 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
         list_add_tail(&req->rq_set_chain, &set->set_requests);
         req->rq_set = set;
         set->set_remaining++;
+        atomic_inc(&req->rq_import->imp_inflight);
 }
 
 /* lock so many callers can add things, the context that owns the set
@@ -370,6 +371,11 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
                 DEBUG_REQ(D_ERROR, req, "IMP_CLOSED ");
                 *status = -EIO;
         }
+        /* allow CONNECT even if import is invalid */
+        else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
+                 imp->imp_state == LUSTRE_IMP_CONNECTING) {
+                ;
+        }
         /*
          * If the import has been invalidated (such as by an OST failure), the
          * request must fail with -EIO.  
@@ -533,13 +539,6 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
         imp = req->rq_import;
         spin_lock_irqsave(&imp->imp_lock, flags);
 
-        if (imp->imp_invalid) {
-                spin_unlock_irqrestore(&imp->imp_lock, flags);
-                req->rq_status = -EIO;
-                req->rq_phase = RQ_PHASE_INTERPRET;
-                RETURN(-EIO);
-        }
-
         req->rq_import_generation = imp->imp_generation;
 
         if (ptlrpc_import_delay_req(imp, req, &rc)) {
@@ -797,6 +796,9 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                        req->rq_reqmsg->opc);
 
                 set->set_remaining--;
+
+                atomic_dec(&imp->imp_inflight);
+                wake_up(&imp->imp_recovery_waitq);
         }
 
         /* If we hit an error, we want to recover promptly. */
@@ -1312,6 +1314,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 
         LASSERT(req->rq_set == NULL);
         LASSERT(!req->rq_receiving_reply);
+        atomic_inc(&imp->imp_inflight);
 
         /* for distributed debugging */
         req->rq_reqmsg->status = current->pid;
@@ -1486,6 +1489,9 @@ restart:
 
         LASSERT(!req->rq_receiving_reply);
         req->rq_phase = RQ_PHASE_INTERPRET;
+
+        atomic_dec(&imp->imp_inflight);
+        wake_up(&imp->imp_recovery_waitq);
         RETURN(rc);
 }
 
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index dece441dcd..74e5a35372 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -42,7 +42,6 @@
 struct ptlrpc_connect_async_args {
          __u64 pcaa_peer_committed;
         int pcaa_initial_connect;
-        int pcaa_was_invalid;
 };
 
 /* A CLOSED import should remain so. */
@@ -115,29 +114,67 @@ int ptlrpc_set_import_discon(struct obd_import *imp)
         return rc;
 }
 
-void ptlrpc_invalidate_import(struct obd_import *imp)
+/*
+ * This acts as a barrier; all existing requests are rejected, and
+ * no new requests will be accepted until the import is valid again.
+ */
+void ptlrpc_deactivate_import(struct obd_import *imp)
 {
-        struct obd_device *obd = imp->imp_obd;
         unsigned long flags;
         ENTRY;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
-        /* This is a bit of a hack, but invalidating replayable
-         * imports makes a temporary reconnect failure into a much more
-         * ugly -- and hard to remedy -- situation. */
-        if (!imp->imp_replayable) {
-                CDEBUG(D_HA, "setting import %s INVALID\n",
-                       imp->imp_target_uuid.uuid);
-                imp->imp_invalid = 1;
-        }
+        CDEBUG(D_HA, "setting import %s INVALID\n",
+               imp->imp_target_uuid.uuid);
+        imp->imp_invalid = 1;
         imp->imp_generation++;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         ptlrpc_abort_inflight(imp);
-        obd_import_event(obd, imp, IMP_EVENT_INVALIDATE);
+        obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
 }
 
-void ptlrpc_validate_import(struct obd_import *imp)
+/*
+ * This function will invalidate the import, if necessary, then block
+ * for all the RPC completions, and finally notify the obd to
+ * invalidate its state (ie cancel locks, clear pending requests,
+ * etc).
+ *
+ * in_rpc: true if this is called while processing an rpc, like
+ *    CONNECT. It will allow for one RPC to be inflight while
+ *    waiting for requests to complete. Ugly, yes, but I don't see an
+ *    cleaner way right now.
+ */
+void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc)
+{
+        struct l_wait_info lwi;
+        int inflight = 0;
+        int rc;
+
+        if (!imp->imp_invalid)
+                ptlrpc_deactivate_import(imp);
+        
+        LASSERT(imp->imp_invalid);
+
+        if (in_rpc)
+                inflight = 1;
+        /* wait for all requests to error out and call completion 
+           callbacks */
+        lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL, 
+                               NULL, NULL);
+        rc = l_wait_event(imp->imp_recovery_waitq, 
+                          (atomic_read(&imp->imp_inflight) == inflight), 
+                          &lwi);
+        
+        if (rc)
+                CERROR("%s: rc = %d waiting for callback (%d != %d)\n",
+                       atomic_read(&imp->imp_inflight), inflight,
+                       imp->imp_target_uuid.uuid, rc);
+        
+        obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
+}
+
+static void ptlrpc_activate_import(struct obd_import *imp)
 {
         struct obd_device *obd = imp->imp_obd;
         unsigned long flags;
@@ -164,7 +201,7 @@ void ptlrpc_fail_import(struct obd_import *imp, int generation)
                                imp->imp_target_uuid.uuid,
                                imp->imp_connection->c_remote_uuid.uuid,
                                imp->imp_obd->obd_name);
-                        ptlrpc_invalidate_import(imp);
+                        ptlrpc_deactivate_import(imp);
                 }
                 
                 CDEBUG(D_HA, "%s: waking up pinger\n", 
@@ -186,7 +223,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
         int initial_connect = 0;
         int rc;
         __u64 committed_before_reconnect = 0;
-        int was_invalid = 0;
         struct ptlrpc_request *request;
         int size[] = {sizeof(imp->imp_target_uuid),
                                  sizeof(obd->obd_uuid),
@@ -224,10 +260,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
 
         }
 
-        if (imp->imp_invalid) {
-                imp->imp_invalid = 0;
-                was_invalid = 1;
-        }
 
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
@@ -284,7 +316,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
 
         aa->pcaa_peer_committed = committed_before_reconnect;
         aa->pcaa_initial_connect = initial_connect;
-        aa->pcaa_was_invalid = was_invalid;
 
         if (aa->pcaa_initial_connect)
                 imp->imp_replayable = 1;
@@ -360,11 +391,14 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                                imp->imp_target_uuid.uuid, 
                                imp->imp_connection->c_remote_uuid.uuid);
                 }
-                IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+
+                if (imp->imp_invalid)
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+                else
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
         } 
-        else if (MSG_CONNECT_RECOVERING & msg_flags) {
+        else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
                 LASSERT(imp->imp_replayable);
-                imp->imp_state = LUSTRE_IMP_RECOVER;
                 imp->imp_remote_handle = request->rq_repmsg->handle;
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
         } 
@@ -392,9 +426,6 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
 finish:
         rc = ptlrpc_import_recovery_state_machine(imp);
         if (rc != 0) {
-                if (aa->pcaa_was_invalid)
-                        ptlrpc_invalidate_import(imp);
-
                 if (rc == -ENOTCONN) {
                         CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;"
                                "invalidating and reconnecting\n",
@@ -408,7 +439,7 @@ finish:
         if (rc != 0) {
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
                 if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
-                        ptlrpc_invalidate_import(imp);
+                        ptlrpc_deactivate_import(imp);
                 }
                 CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n",
                        imp->imp_target_uuid.uuid,
@@ -449,7 +480,6 @@ static int signal_completed_replay(struct obd_import *imp)
         RETURN(0);
 }
 
-
 int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 {
         int rc = 0;
@@ -459,7 +489,9 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                 CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
                        imp->imp_target_uuid.uuid,
                        imp->imp_connection->c_remote_uuid.uuid);
-                ptlrpc_invalidate_import(imp);
+
+                ptlrpc_invalidate_import(imp, 1);
+
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
         } 
         
@@ -502,7 +534,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                 if (rc)
                         GOTO(out, rc);
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
-                ptlrpc_validate_import(imp);
+                ptlrpc_activate_import(imp);
         } 
 
         if (imp->imp_state == LUSTRE_IMP_FULL) {
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c
index 29b4e36473..b885e89e5f 100644
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -102,6 +102,9 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
         __u64               xid;
         ENTRY;
 
+        if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_PUT_NET)) 
+                RETURN(0);
+
         /* NB no locking required until desc is on the network */
         LASSERT (!desc->bd_network_rw);
         LASSERT (desc->bd_type == BULK_PUT_SOURCE ||
@@ -215,6 +218,9 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
         ptl_md_t         md;
         ENTRY;
 
+        if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_GET_NET)) 
+                RETURN(0);
+
         /* NB no locking required until desc is on the network */
         LASSERT (desc->bd_nob > 0);
         LASSERT (!desc->bd_network_rw);
diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c
index dfb963594f..caadab07fb 100644
--- a/lustre/ptlrpc/ptlrpc_module.c
+++ b/lustre/ptlrpc/ptlrpc_module.c
@@ -181,6 +181,7 @@ EXPORT_SYMBOL(ptlrpc_disconnect_import);
 EXPORT_SYMBOL(ptlrpc_resend);
 EXPORT_SYMBOL(ptlrpc_wake_delayed);
 EXPORT_SYMBOL(ptlrpc_set_import_active);
+EXPORT_SYMBOL(ptlrpc_deactivate_import);
 EXPORT_SYMBOL(ptlrpc_invalidate_import);
 EXPORT_SYMBOL(ptlrpc_fail_import);
 EXPORT_SYMBOL(ptlrpc_fail_export);
diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c
index 4e688a8020..687f58826a 100644
--- a/lustre/ptlrpc/ptlrpcd.c
+++ b/lustre/ptlrpc/ptlrpcd.c
@@ -54,7 +54,7 @@
 #include <linux/lprocfs_status.h>
 
 #define LIOD_STOP 0
-static struct ptlrpcd_ctl {
+struct ptlrpcd_ctl {
         unsigned long             pc_flags;
         spinlock_t                pc_lock;
         struct completion         pc_starting;
@@ -62,23 +62,40 @@ static struct ptlrpcd_ctl {
         struct list_head          pc_req_list;
         wait_queue_head_t         pc_waitq;
         struct ptlrpc_request_set *pc_set;
-} ptlrpcd_pc;
+#ifndef __KERNEL__
+        int                       pc_recurred;
+        void                     *pc_callback;
+#endif
+};
+
+static struct ptlrpcd_ctl ptlrpcd_pc;
+static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
 
 static DECLARE_MUTEX(ptlrpcd_sem);
 static int ptlrpcd_users = 0;
 
-void ptlrpcd_wake(void)
+void ptlrpcd_wake(struct ptlrpc_request *req)
 {
-        struct ptlrpcd_ctl *pc = &ptlrpcd_pc;
+        struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data;
+
+        LASSERT(pc != NULL);
+
         wake_up(&pc->pc_waitq);
 }
 
 void ptlrpcd_add_req(struct ptlrpc_request *req)
 {
-        struct ptlrpcd_ctl *pc = &ptlrpcd_pc;
+        struct ptlrpcd_ctl *pc;
+
+        if (req->rq_send_state == LUSTRE_IMP_FULL)
+                pc = &ptlrpcd_pc;
+        else 
+                pc = &ptlrpcd_recovery_pc;
 
         ptlrpc_set_add_new_req(pc->pc_set, req);
-        ptlrpcd_wake();
+        req->rq_ptlrpcd_data = pc;
+                
+        ptlrpcd_wake(req);
 }
 
 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
@@ -169,8 +186,6 @@ static int ptlrpcd(void *arg)
         return 0;
 }
 #else
-static int ptlrpcd_recurred = 0;
-static void *ptlrpcd_callback;
 
 int ptlrpcd_check_async_rpcs(void *arg)
 {
@@ -178,25 +193,19 @@ int ptlrpcd_check_async_rpcs(void *arg)
         int                  rc = 0;
 
         /* single threaded!! */
-        ptlrpcd_recurred++;
+        pc->pc_recurred++;
 
-        if (ptlrpcd_recurred == 1)
+        if (pc->pc_recurred == 1)
                 rc = ptlrpcd_check(pc);
 
-        ptlrpcd_recurred--;
+        pc->pc_recurred--;
         return rc;
 }
 #endif
 
-int ptlrpcd_addref(void)
+static int ptlrpcd_start(struct ptlrpcd_ctl *pc)
 {
-        struct ptlrpcd_ctl *pc = &ptlrpcd_pc;
         int rc = 0;
-        ENTRY;
-
-        down(&ptlrpcd_sem);
-        if (++ptlrpcd_users != 1)
-                GOTO(out, rc);
 
         memset(pc, 0, sizeof(*pc));
         init_completion(&pc->pc_starting);
@@ -218,9 +227,46 @@ int ptlrpcd_addref(void)
 
         wait_for_completion(&pc->pc_starting);
 #else
-        ptlrpcd_callback =
+        pc->pc_callback =
                 liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc);
 #endif
+out:
+        RETURN(rc);
+}
+
+static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
+{
+        set_bit(LIOD_STOP, &pc->pc_flags);
+        wake_up(&pc->pc_waitq);
+#ifdef __KERNEL__
+        wait_for_completion(&pc->pc_finishing);
+#else
+        liblustre_deregister_wait_callback(pc->pc_callback);
+#endif
+        ptlrpc_set_destroy(pc->pc_set);
+}
+
+int ptlrpcd_addref(void)
+{
+        int rc = 0;
+        ENTRY;
+
+        down(&ptlrpcd_sem);
+        if (++ptlrpcd_users != 1)
+                GOTO(out, rc);
+
+        rc = ptlrpcd_start(&ptlrpcd_pc);
+        if (rc) {
+                --ptlrpcd_users;
+                GOTO(out, rc);
+        }
+
+        rc = ptlrpcd_start(&ptlrpcd_recovery_pc);
+        if (rc) {
+                ptlrpcd_stop(&ptlrpcd_pc);
+                --ptlrpcd_users;
+                GOTO(out, rc);
+        }
 out:
         up(&ptlrpcd_sem);
         RETURN(rc);
@@ -228,18 +274,10 @@ out:
 
 void ptlrpcd_decref(void)
 {
-        struct ptlrpcd_ctl *pc = &ptlrpcd_pc;
-
         down(&ptlrpcd_sem);
         if (--ptlrpcd_users == 0) {
-                set_bit(LIOD_STOP, &pc->pc_flags);
-                wake_up(&pc->pc_waitq);
-#ifdef __KERNEL__
-                wait_for_completion(&pc->pc_finishing);
-#else
-                liblustre_deregister_wait_callback(ptlrpcd_callback);
-#endif
-                ptlrpc_set_destroy(pc->pc_set);
+                ptlrpcd_stop(&ptlrpcd_pc);
+                ptlrpcd_stop(&ptlrpcd_recovery_pc);
         }
         up(&ptlrpcd_sem);
 }
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c
index 1ff5f306c7..613cb4828e 100644
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -258,9 +258,19 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
                imp->imp_target_uuid.uuid,
                imp->imp_connection->c_remote_uuid.uuid);
         
-        ptlrpc_set_import_discon(imp);
+        if (ptlrpc_set_import_discon(imp)) {
+                if (!imp->imp_replayable) {
+                        CDEBUG(D_HA, "import %s@%s for %s not replayable, "
+                               "auto-deactivating\n",
+                               imp->imp_target_uuid.uuid,
+                               imp->imp_connection->c_remote_uuid.uuid,
+                               imp->imp_obd->obd_name);
+                        ptlrpc_deactivate_import(imp);
+                }
+
+                rc = ptlrpc_connect_import(imp, NULL);
+        }
 
-        rc = ptlrpc_connect_import(imp, NULL);
         
         /* Wait for recovery to complete and resend. If evicted, then
            this request will be errored out later.*/
@@ -276,10 +286,9 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
  * This should only be called by the ioctl interface, currently
  * with the lctl deactivate and activate commands.
  */
-int ptlrpc_set_import_active(struct obd_import *imp, int active)
+int  ptlrpc_set_import_active(struct obd_import *imp, int active)
 {
         struct obd_device *obd = imp->imp_obd;
-        unsigned long flags;
         int rc = 0;
 
         LASSERT(obd);
@@ -287,23 +296,14 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
         /* When deactivating, mark import invalid, and abort in-flight
          * requests. */
         if (!active) {
-                ptlrpc_invalidate_import(imp);
+                ptlrpc_invalidate_import(imp, 0);
         } 
 
         /* When activating, mark import valid, and attempt recovery */
         if (active) {
                 CDEBUG(D_HA, "setting import %s VALID\n",
                        imp->imp_target_uuid.uuid);
-                spin_lock_irqsave(&imp->imp_lock, flags);
-                imp->imp_invalid = 0;
-                spin_unlock_irqrestore(&imp->imp_lock, flags);
-
                 rc = ptlrpc_recover_import(imp, NULL);
-                if (rc) {
-                        spin_lock_irqsave(&imp->imp_lock, flags);
-                        imp->imp_invalid = 1;
-                        spin_unlock_irqrestore(&imp->imp_lock, flags);
-                }
         }
 
         RETURN(rc);
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh
index 5cd1e62334..e68831369f 100755
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -2,6 +2,10 @@
 
 set -e
 
+# 17 = bug 2732
+ALWAYS_EXCEPT="17"
+
+
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 UPCALL=${UPCALL:-$PWD/recovery-small-upcall.sh}
 . $LUSTRE/tests/test-framework.sh
@@ -60,6 +64,12 @@ if [ ! -z "$EVAL" ]; then
     exit $?
 fi
 
+if [ "$ONLY" == "cleanup" ]; then
+    sysctl -w portals.debug=0 || true
+    cleanup
+    exit
+fi
+
 REFORMAT=--reformat $SETUP
 unset REFORMAT
 
@@ -199,4 +209,72 @@ test_15() {
 }
 run_test 15 "failed open (-ENOMEM)"
 
+test_16() {
+# OBD_FAIL_PTLRPC_BULK_PUT_NET | OBD_FAIL_ONCE
+    do_facet client cp /etc/termcap $MOUNT
+    sync
+
+    sysctl -w lustre.fail_loc=0x80000504
+    cancel_lru_locks OSC
+    # wil get evicted here
+    do_facet client "diff /etc/termcap $MOUNT/termcap"  && return 1
+    sysctl -w lustre.fail_loc=0
+    do_facet client "diff /etc/termcap $MOUNT/termcap"  || return 2
+
+}
+run_test 16 "timeout bulk put, evict client (2732)"
+
+test_17() {
+# OBD_FAIL_PTLRPC_BULK_GET_NET | OBD_FAIL_ONCE
+    # wil get evicted here
+    sysctl -w lustre.fail_loc=0x80000503
+    do_facet client cp /etc/termcap $MOUNT && return 1
+
+    do_facet client "diff /etc/termcap $MOUNT/termcap"  && return 1
+    sysctl -w lustre.fail_loc=0
+    do_facet client "diff /etc/termcap $MOUNT/termcap"  || return 2
+
+}
+run_test 17 "timeout bulk get, evict client (2732)"
+
+test_18() {
+# OBD_FAIL_PTLRPC_BULK_PUT_NET|OBD_FAIL_ONCE
+    do_facet client mkdir -p $MOUNT/$tdir
+    f=$MOUNT/$tdir/$tfile
+    f2=$MOUNT/$tdir/${tfile}-2
+
+    cancel_lru_locks OSC
+    for a in /proc/fs/lustre/llite/*/dump_page_cache; do
+        if [ `wc -l $a | awk '{print $1}'` -gt 1 ]; then
+                echo there is still data in page cache $a ?
+                cat $a;
+                return 1;
+        fi
+    done
+
+    # shouldn't have to set stripe size of count==1
+    lfs setstripe $f $((128 * 1024)) 0 1
+    lfs setstripe $f2 $((128 * 1024)) 0 1
+
+    do_facet client cp /etc/termcap $f
+    sync
+    # just use this write to trigger the client's eviction from the ost
+    sysctl -w lustre.fail_loc=0x80000503
+    do_facet client dd if=/dev/zero of=$f2 bs=4k count=1
+    sync
+    sysctl -w lustre.fail_loc=0
+    # allow recovery to complete
+    sleep 10
+    # my understanding is that there should be nothing in the page
+    # cache after the client reconnects?     
+    for a in /proc/fs/lustre/llite/*/dump_page_cache; do
+        if [ `wc -l $a | awk '{print $1}'` -gt 1 ]; then
+                echo there is still data in page cache $a ?
+                cat $a;
+                return 1;
+        fi
+    done
+}
+run_test 18 "eviction and reconnect clears page cache (2766)"
+
 $CLEANUP
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh
index bd109b938b..d0e534f530 100755
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -132,11 +132,12 @@ test_5() {
 run_test 5 "Fail OST during iozone"
 
 kbytesfree() {
-   cat /proc/fs/lustre/osc/OSC_*MNT*/kbytesfree | awk '{total+=$1} END {print total}'
+   awk '{total+=$1} END {print total}' /proc/fs/lustre/osc/OSC_*MNT*/kbytesfree
 }
 
 test_6() {
     f=$DIR/$tfile
+    sync
     before=`kbytesfree`
     dd if=/dev/urandom bs=1024 count=5120 of=$f
 #define OBD_FAIL_MDS_REINT_NET_REP       0x119
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh
index 8b1c6e308d..9ce73ccfb4 100755
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -823,14 +823,15 @@ run_test 41 "read from a valid osc while other oscs are invalid"
 
 # test MDS recovery after ost failure
 test_42() {
+    blocks=`df $MOUNT | tail -1 | awk '{ print $1 }'`
     createmany -o $DIR/$tfile-%d 800
     replay_barrier ost
     unlinkmany $DIR/$tfile-%d 0 400
     facet_failover ost
     
-    # osc is evicted after
-    df $MOUNT && return 1
-    df $MOUNT || return 2
+    # osc is evicted, fs is smaller
+    blocks_after=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+    [ $blocks_after -lt $blocks ] || return 1
     echo wait for MDS to timeout and recover
     sleep $((TIMEOUT * 2))
     unlinkmany $DIR/$tfile-%d 400 400
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index b493c9cf25..a23f6d877b 100644
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -125,7 +125,6 @@ reboot_facet() {
 wait_for_host() {
    HOST=$1
    check_network  $HOST 900
-   while ! do_node $HOST "$CHECKSTAT -t dir $LUSTRE"; do sleep 5; done
    while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done
 }
 
-- 
GitLab