diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index a211ae2971e65274cd6655b07395a6dca00f97d1..4e238cbbdac01cfc98f343a3ed0ba1f9b7da2475 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -19,6 +19,15 @@ Bugzilla   : 12786
 Description: lfs setstripe enhancement
 Details    : Make lfs setstripe understand 'k', 'm' and 'g' for stripe size.
 
+Severity   : normal
+Frequency  : mds/oss recovery 
+Bugzilla   : 10800
+Description: llog ctxt is refrenced after it has been freed. 
+Details    : llog ctxt refcount was added to avoide the race between ctxt free
+	     and llog recovery process. Each llog user must hold ctxt refcount
+	     before it access the llog. And the llog ctxt can only be freed
+	     when its refcount is zero.
+
 --------------------------------------------------------------------------------
 
 2007-07-30         Cluster File Systems, Inc. <info@clusterfs.com>
diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h
index 88730590c099210bd6f03ba07c71463ebd1a665f..46b5fc831ef402939cec6591f73898704812fed0 100644
--- a/lustre/include/liblustre.h
+++ b/lustre/include/liblustre.h
@@ -752,9 +752,6 @@ typedef enum {
 cap_t   cap_get_proc(void);
 int     cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
 
-/* log related */
-static inline int llog_init_commit_master(void) { return 0; }
-static inline int llog_cleanup_commit_master(int force) { return 0; }
 static inline void libcfs_run_lbug_upcall(char *file, const char *fn,
                                            const int l){}
 
diff --git a/lustre/include/lustre_commit_confd.h b/lustre/include/lustre_commit_confd.h
index 40b19788146c6df071ef0ccb649d3252bf0d8a4a..1804615d8b0cc919e1c1e315b6e85a1297472831 100644
--- a/lustre/include/lustre_commit_confd.h
+++ b/lustre/include/lustre_commit_confd.h
@@ -51,8 +51,8 @@ struct llog_commit_daemon {
 };
 
 /* ptlrpc/recov_thread.c */
-int llog_start_commit_thread(void);
-struct llog_canceld_ctxt *llcd_grab(void);
-void llcd_send(struct llog_canceld_ctxt *llcd);
+int llog_start_commit_thread(struct llog_commit_master *);
 
+int llog_init_commit_master(struct llog_commit_master *);
+int llog_cleanup_commit_master(struct llog_commit_master *lcm, int force);
 #endif /* _LUSTRE_COMMIT_CONFD_H */
diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h
index 99adec6b3fd408ca3289b07b58532c69681918dd..83bfc064e339986a37ae139f84986cc4716f8663 100644
--- a/lustre/include/lustre_log.h
+++ b/lustre/include/lustre_log.h
@@ -120,6 +120,7 @@ int llog_cat_set_first_idx(struct llog_handle *cathandle, int index);
 /* llog_obd.c */
 int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
                int count,  struct llog_logid *logid,struct llog_operations *op);
+int __llog_ctxt_put(struct llog_ctxt *ctxt);
 int llog_cleanup(struct llog_ctxt *);
 int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
 int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
@@ -212,6 +213,8 @@ struct llog_ctxt {
         struct llog_handle      *loc_handle;
         struct llog_canceld_ctxt *loc_llcd;
         struct semaphore         loc_sem; /* protects loc_llcd and loc_imp */
+        atomic_t                 loc_refcount;
+        struct llog_commit_master *loc_lcm;
         void                    *llog_proc_cb;
 };
 
@@ -267,13 +270,49 @@ static inline int llog_data_len(int len)
         return size_round(len);
 }
 
+#define llog_ctxt_get(ctxt)                                                 \
+({                                                                          \
+         struct llog_ctxt *ctxt_ = ctxt;                                    \
+         LASSERT(atomic_read(&ctxt_->loc_refcount) > 0);                    \
+         atomic_inc(&ctxt_->loc_refcount);                                  \
+         CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt_,       \
+                atomic_read(&ctxt_->loc_refcount));                         \
+         ctxt_;                                                             \
+})
+ 
+#define llog_ctxt_put(ctxt)                                                 \
+do {                                                                        \
+         if ((ctxt) == NULL)                                                \
+                 break;                                                     \
+         CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", (ctxt),      \
+                atomic_read(&(ctxt)->loc_refcount) - 1);                    \
+         LASSERT(atomic_read(&(ctxt)->loc_refcount) > 0);                   \
+         LASSERT(atomic_read(&(ctxt)->loc_refcount) < 0x5a5a5a);            \
+         __llog_ctxt_put(ctxt);                                             \
+} while (0)
+
 static inline struct llog_ctxt *llog_get_context(struct obd_device *obd,
-                                                 int index)
+                                                   int index)
 {
-        if (index < 0 || index >= LLOG_MAX_CTXTS)
-                return NULL;
+         struct llog_ctxt *ctxt;
+ 
+         if (index < 0 || index >= LLOG_MAX_CTXTS)
+                 return NULL;
+        
+         spin_lock(&obd->obd_dev_lock);  
+         if (obd->obd_llog_ctxt[index] == NULL) {
+                 spin_unlock(&obd->obd_dev_lock);
+                 CWARN("obd %p and ctxt index %d is NULL \n", obd, index);
+                 return NULL;
+         }
+         ctxt = llog_ctxt_get(obd->obd_llog_ctxt[index]);
+         spin_unlock(&obd->obd_dev_lock);
+         return ctxt;
+}
 
-        return obd->obd_llog_ctxt[index];
+static inline int llog_ctxt_null(struct obd_device *obd, int index)
+{
+        return (obd->obd_llog_ctxt[index] == NULL);
 }
 
 static inline int llog_write_rec(struct llog_handle *handle,
diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index 83d0c52633a55cf519e119b0012fff31340e343c..817058d15ddd072d2b92ee2c404c8d13cbf7d943 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -310,6 +310,7 @@ struct filter_obd {
 
         int                      fo_fmd_max_num; /* per exp filter_mod_data */
         int                      fo_fmd_max_age; /* jiffies to fmd expiry */
+        void                     *fo_lcm;
 };
 
 #define OSC_MAX_RIF_DEFAULT       8
@@ -727,6 +728,7 @@ struct obd_device {
         struct lustre_class_hash_body *obd_nid_hash_body; 
         atomic_t obd_refcount;
         cfs_waitq_t             obd_refcount_waitq;
+        cfs_waitq_t             obd_llog_waitq;
         struct list_head        obd_exports;
         int                     obd_num_exports;
         struct ldlm_namespace  *obd_namespace;
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index 40aa2b3f2d14cc43676b6283a8dfdca13c59f31d..3fdc2fa47dc942a7d107d242a2ccda2f7172f625 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -104,6 +104,7 @@ extern int obd_race_state;
 #define OBD_FAIL_MDS_LLOG_CREATE_FAILED  0x137
 #define OBD_FAIL_MDS_LOV_SYNC_RACE       0x138
 #define OBD_FAIL_MDS_OSC_PRECREATE       0x139
+#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT   0x13a
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -138,6 +139,8 @@ extern int obd_race_state;
 #define OBD_FAIL_OST_SETATTR_CREDITS     0x21e
 #define OBD_FAIL_OST_HOLD_WRITE_RPC      0x21f
 #define OBD_FAIL_OST_BRW_WRITE_BULK2     0x220
+#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c
index d179c10e69ae7c5f1f76da4cb99d105b7a0a7959..4da1a09b4cdaebfd82b1e15b1d367eda857e1402 100644
--- a/lustre/liblustre/llite_lib.c
+++ b/lustre/liblustre/llite_lib.c
@@ -167,9 +167,10 @@ int liblustre_process_log(struct config_llog_instance *cfg,
 
         exp = class_conn2export(&mgc_conn);
 
-        ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
+        ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
         cfg->cfg_flags |= CFG_F_COMPAT146;
         rc = class_config_parse_llog(ctxt, profile, cfg);
+        llog_ctxt_put(ctxt);
         if (rc) {
                 CERROR("class_config_parse_llog failed: rc = %d\n", rc);
         }
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c
index 3a3a041deb5afed0f047110b1a58f396191cb264..6835b02e9cf88a0d2a4868327a922e70820e1699 100644
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -832,6 +832,7 @@ static int old_lustre_process_log(struct super_block *sb, char *newprofile,
          */
         rc = class_config_dump_llog(ctxt, profile, cfg);
 #endif
+        llog_ctxt_put(ctxt);
         switch (rc) {
         case 0: {
                 /* Set the caller's profile name to the old-style */
diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c
index 7392d83f0f70f21a68864f1e43956a6b27184169..860125fd053aac5fe24ca259c552241856015766 100755
--- a/lustre/lov/lov_ea.c
+++ b/lustre/lov/lov_ea.c
@@ -468,7 +468,7 @@ static int lsm_revalidate_join(struct lov_stripe_md *lsm,
         LASSERT(ctxt);
 
         if (lsm->lsm_array && lsm->lsm_array->lai_ext_array)
-                RETURN(0);
+                GOTO(release_ctxt, rc = 0);
 
         CDEBUG(D_INFO, "get lsm logid: "LPU64":"LPU64"\n",
                lsm->lsm_array->lai_array_id.lgl_oid,
@@ -476,7 +476,7 @@ static int lsm_revalidate_join(struct lov_stripe_md *lsm,
         OBD_ALLOC(lsm->lsm_array->lai_ext_array,lsm->lsm_array->lai_ext_count *
                                                 sizeof (struct lov_extent));
         if (!lsm->lsm_array->lai_ext_array)
-                RETURN(-ENOMEM);
+                GOTO(release_ctxt, rc = -ENOMEM);        
 
         CDEBUG(D_INFO, "get lsm logid: "LPU64":"LPU64"\n",
                lsm->lsm_array->lai_array_id.lgl_oid,
@@ -497,6 +497,8 @@ static int lsm_revalidate_join(struct lov_stripe_md *lsm,
 out:
         if (rc)
                 lovea_free_array_info(lsm);
+release_ctxt:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -509,16 +511,15 @@ int lsm_destroy_join(struct lov_stripe_md *lsm, struct obdo *oa,
         ENTRY;
 
         LASSERT(md_exp != NULL);
+        /*for those orphan inode, we should keep array id*/
+        if (!(oa->o_valid & OBD_MD_FLCOOKIE))
+                RETURN(rc);
+
         ctxt = llog_get_context(md_exp->exp_obd, LLOG_LOVEA_REPL_CTXT);
         if (!ctxt)
-                GOTO(out, rc = -EINVAL);
+                RETURN(-EINVAL);
 
         LASSERT(lsm->lsm_array != NULL);
-        /*for those orphan inode, we should keep array id*/
-        if (!(oa->o_valid & OBD_MD_FLCOOKIE))
-                RETURN(0);
-
-        LASSERT(ctxt != NULL);
         rc = llog_create(ctxt, &llh, &lsm->lsm_array->lai_array_id,
                          NULL);
         if (rc)
@@ -530,6 +531,7 @@ int lsm_destroy_join(struct lov_stripe_md *lsm, struct obdo *oa,
         }
         llog_free_handle(llh);
 out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c
index 232df7ae0030fd292b219dfe97c28b8f2a46fe56..6e59ecfd23654f66acbecb471f03cd211f50f0af 100644
--- a/lustre/lov/lov_log.c
+++ b/lustre/lov/lov_log.c
@@ -92,6 +92,7 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt,
 
                 rc += llog_add(cctxt, rec, NULL, logcookies + rc,
                                 numcookies - rc);
+                llog_ctxt_put(cctxt);
         }
 
         RETURN(rc);
@@ -120,6 +121,8 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
                 child = lov->lov_tgts[i]->ltd_exp->exp_obd;
                 cctxt = llog_get_context(child, ctxt->loc_idx);
                 rc = llog_connect(cctxt, 1, logid, gen, uuid);
+                llog_ctxt_put(cctxt);
+ 
                 if (rc) {
                         CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc);
                         if (!err) 
@@ -154,6 +157,7 @@ static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls
                 int err;
 
                 err = llog_cancel(cctxt, NULL, 1, cookies, flags);
+                llog_ctxt_put(cctxt);
                 if (err && lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
                         CERROR("error: objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c
index 43dc9a4d44d844b52081b36fedd1ce870e95c385..aa55c180282583509a2c399c6123e72dc58c473f 100644
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -828,6 +828,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         case OBD_IOC_PARSE: {
                 ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
                 rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL);
+                llog_ctxt_put(ctxt);
                 GOTO(out, rc);
         }
 #ifdef __KERNEL__
@@ -835,7 +836,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         case OBD_IOC_LLOG_PRINT: {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
                 rc = llog_ioctl(ctxt, cmd, data);
-
+                llog_ctxt_put(ctxt);
                 GOTO(out, rc);
         }
 #endif
@@ -1284,6 +1285,7 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt,
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
                 ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_ctxt_put(ctxt);
         }
 
         rc = llog_setup(obd, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL,
@@ -1291,6 +1293,7 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt,
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT);
                 ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_ctxt_put(ctxt);
         }
 
         RETURN(rc);
diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c
index de9aa4b956c5530244da9f1949c3c88ccc3098b3..07e565cd52dec1b44aee548ef366dd9205ead0fc 100644
--- a/lustre/mds/handler.c
+++ b/lustre/mds/handler.c
@@ -1958,10 +1958,6 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
                 GOTO(err_ns, rc);
         }
 
-        rc = llog_start_commit_thread();
-        if (rc < 0)
-                GOTO(err_fs, rc);
-
         if (lcfg->lcfg_bufcount >= 4 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
                 class_uuid_t uuid;
 
@@ -2133,6 +2129,7 @@ err_cleanup:
 
 int mds_postrecov(struct obd_device *obd)
 {
+        struct llog_ctxt *ctxt;
         int rc;
         ENTRY;
 
@@ -2140,7 +2137,9 @@ int mds_postrecov(struct obd_device *obd)
                 RETURN(0);
 
         LASSERT(!obd->obd_recovering);
-        LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); 
+        LASSERT(ctxt != NULL);
+        llog_ctxt_put(ctxt);
 
         /* set nextid first, so we are sure it happens */
         mutex_down(&obd->obd_dev_sem);
diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c
index d2db5cb5aa93d04e2a4372ffd3ed96413e54cd00..1015d4eb3b736123df80e43792efad0a81d9e4c0 100644
--- a/lustre/mds/mds_join.c
+++ b/lustre/mds/mds_join.c
@@ -343,7 +343,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req,
         struct lov_mds_md_join *head_lmmj = NULL, *tail_lmmj = NULL;
         int lmm_size, rc = 0, cleanup_phase = 0, size;
         struct llog_handle *llh_head = NULL, *llh_tail = NULL;
-        struct llog_ctxt *ctxt;
+        struct llog_ctxt *ctxt = NULL;
         struct mds_rec_join *join_rec;
         ENTRY;
 
@@ -392,6 +392,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req,
 
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         ctxt = llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT);
+        LASSERT(ctxt != NULL);
         cleanup_phase = 2;
         if (le32_to_cpu(head_lmm->lmm_magic) == LOV_MAGIC) { /*simple file */
                 struct llog_logid *llog_array;
@@ -482,6 +483,7 @@ cleanup:
         case 3:
                 llog_close(llh_head);
         case 2:
+                llog_ctxt_put(ctxt);
                 if (head_lmmj && ((void*)head_lmmj != (void*)head_lmm))
                         OBD_FREE_PTR(head_lmmj);
 
diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c
index 5878a2d8184a806f9d252768b941ff91994d0d64..a33a313964acc1a05994b86801f59a133a03daee 100644
--- a/lustre/mds/mds_log.c
+++ b/lustre/mds/mds_log.c
@@ -56,6 +56,8 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt,
 
         lctxt = llog_get_context(lov_obd, ctxt->loc_idx);
         rc = llog_add(lctxt, rec, lsm, logcookies, numcookies);
+        llog_ctxt_put(lctxt);
+
         RETURN(rc);
 }
 
@@ -72,6 +74,7 @@ static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count,
 
         lctxt = llog_get_context(lov_obd, ctxt->loc_idx);
         rc = llog_connect(lctxt, count, logid, gen, uuid);
+        llog_ctxt_put(lctxt);
         RETURN(rc);
 }
 
@@ -86,6 +89,7 @@ static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls
 
         lctxt = llog_get_context(lov_obd, ctxt->loc_idx);
         rc = llog_cancel(lctxt, lsm, count, cookies, flags);
+        llog_ctxt_put(lctxt);
         RETURN(rc);
 }
 
@@ -119,6 +123,7 @@ int mds_log_op_unlink(struct obd_device *obd,
         ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
         rc = llog_add(ctxt, &lur->lur_hdr, lsm, logcookies,
                       cookies_size / sizeof(struct llog_cookie));
+        llog_ctxt_put(ctxt);
 
         OBD_FREE(lur, sizeof(*lur));
 out:
@@ -163,6 +168,8 @@ int mds_log_op_setattr(struct obd_device *obd, struct inode *inode,
         rc = llog_add(ctxt, &lsr->lsr_hdr, lsm, logcookies,
                       cookies_size / sizeof(struct llog_cookie));
 
+        llog_ctxt_put(ctxt);
+
         OBD_FREE(lsr, sizeof(*lsr));
  out:
         obd_free_memmd(mds->mds_osc_exp, &lsm);
diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c
index 4df5c5b41c6ca33645167506de151d5acaf3828f..1a2b681eeff38e3eda7db2e722adca215122ddb2 100644
--- a/lustre/mds/mds_lov.c
+++ b/lustre/mds/mds_lov.c
@@ -452,12 +452,15 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         switch (cmd) {
         case OBD_IOC_RECORD: {
                 char *name = data->ioc_inlbuf1;
+                struct llog_ctxt *ctxt;
+
                 if (mds->mds_cfg_llh)
                         RETURN(-EBUSY);
 
+                ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-                rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                                 &mds->mds_cfg_llh, NULL, name);
+                rc = llog_create(ctxt, &mds->mds_cfg_llh, NULL, name);
+                llog_ctxt_put(ctxt);
                 if (rc == 0)
                         llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN,
                                          &cfg_uuid);
@@ -482,12 +485,14 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 
         case OBD_IOC_CLEAR_LOG: {
                 char *name = data->ioc_inlbuf1;
+                struct llog_ctxt *ctxt;
                 if (mds->mds_cfg_llh)
                         RETURN(-EBUSY);
 
+                ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-                rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                                 &mds->mds_cfg_llh, NULL, name);
+                rc = llog_create(ctxt, &mds->mds_cfg_llh, NULL, name);
+                llog_ctxt_put(ctxt);
                 if (rc == 0) {
                         llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN,
                                          NULL);
@@ -540,6 +545,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL);
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                llog_ctxt_put(ctxt);
                 if (rc)
                         RETURN(rc);
 
@@ -552,6 +558,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                llog_ctxt_put(ctxt);
                 if (rc)
                         RETURN(rc);
 
@@ -600,6 +607,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 rc = llog_ioctl(ctxt, cmd, data);
                 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
                 llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL);
+                llog_ctxt_put(ctxt);
                 rc2 = obd_set_info_async(mds->mds_osc_exp,
                                          strlen(KEY_MDS_CONN), KEY_MDS_CONN,
                                          0, NULL, NULL);
@@ -615,6 +623,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
                 rc = llog_ioctl(ctxt, cmd, data);
                 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
+                llog_ctxt_put(ctxt);
 
                 RETURN(rc);
         }
@@ -663,6 +672,7 @@ static int __mds_lov_synchronize(void *data)
         struct mds_obd *mds = &obd->u.mds;
         struct obd_uuid *uuid;
         __u32  idx = mlsi->mlsi_index;
+        struct llog_ctxt *ctxt;
         int rc = 0;
         ENTRY;
 
@@ -686,9 +696,15 @@ static int __mds_lov_synchronize(void *data)
         if (rc != 0)
                 GOTO(out, rc);
 
-        rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
-                          mds->mds_lov_desc.ld_tgt_count,
-                          NULL, NULL, uuid);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
+        if (!ctxt) 
+              RETURN(-ENODEV); 
+        
+        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT, 60);
+
+        rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count, 
+                          NULL, NULL, uuid); 
+        llog_ctxt_put(ctxt);
 
         if (rc != 0) {
                 CERROR("%s failed at llog_origin_connect: %d\n",
@@ -836,7 +852,7 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched,
                 RETURN(rc);
         }
 
-        LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
+        LASSERT(!llog_ctxt_null(obd, LLOG_MDS_OST_ORIG_CTXT));
         rc = mds_lov_start_synchronize(obd, watched, data,
                                        !(ev == OBD_NOTIFY_SYNC));
 
diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c
index 67a4f2671c64001f28f3ab4a5d691f71e1e1d2e3..3ec098884717bff62b37833797232d48d92ee7c8 100644
--- a/lustre/mds/mds_reint.c
+++ b/lustre/mds/mds_reint.c
@@ -91,6 +91,8 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
                 rc = llog_cancel(ctxt, lsm, mlcd->mlcd_cookielen /
                                                 sizeof(*mlcd->mlcd_cookies),
                                  mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW);
+                llog_ctxt_put(ctxt);
+
                 if (rc)
                         CERROR("error cancelling %d log cookies: rc %d\n",
                                (int)(mlcd->mlcd_cookielen /
diff --git a/lustre/mgc/libmgc.c b/lustre/mgc/libmgc.c
index 49a8260c8cec3dcdb61759edc1bbccf62f31cdde..b72e8bbc6267ce45bbcd4d56ee8f3fd99000d667 100644
--- a/lustre/mgc/libmgc.c
+++ b/lustre/mgc/libmgc.c
@@ -112,6 +112,7 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
                 ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_ctxt_put(ctxt);
         }
 
         RETURN(rc);
diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c
index 1d22aa1b37f403182a615db09a3934696c019b38..550073dc5187b2d4680224ca1088958dfd8adc66 100644
--- a/lustre/mgc/mgc_request.c
+++ b/lustre/mgc/mgc_request.c
@@ -873,6 +873,7 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
                 ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_ctxt_put(ctxt);
         }
 
         RETURN(rc);
@@ -1085,6 +1086,7 @@ static int mgc_process_log(struct obd_device *mgc,
                 /* Now, whether we copied or not, start using the local llog.
                    If we failed to copy, we'll start using whatever the old 
                    log has. */
+                llog_ctxt_put(ctxt);
                 ctxt = lctxt;
         }
 
@@ -1092,8 +1094,11 @@ static int mgc_process_log(struct obd_device *mgc,
            copy of the instance for the update.  The cfg_last_idx will
            be updated here. */
         rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg);
-        
- out_pop:
+ 
+out_pop:
+        llog_ctxt_put(ctxt);
+        if (ctxt != lctxt)
+                llog_ctxt_put(lctxt);
         if (must_pop) 
                 pop_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL);
 
diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c
index 093e1b09efd8d800146de04879afc34df94d3bf3..e93134dfc7f076c47a019932b5dbcfede06763b4 100644
--- a/lustre/mgs/mgs_handler.c
+++ b/lustre/mgs/mgs_handler.c
@@ -157,10 +157,6 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf)
                 GOTO(err_ns, rc);
         }
 
-        rc = llog_start_commit_thread();
-        if (rc < 0)
-                GOTO(err_fs, rc);
-
         rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL,
                         &llog_lvfs_ops);
         if (rc)
@@ -635,6 +631,7 @@ out_free:
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                llog_ctxt_put(ctxt);
                 if (rc)
                         RETURN(rc);
 
@@ -650,6 +647,7 @@ out_free:
                 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
                 rc = llog_ioctl(ctxt, cmd, data);
                 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
+                llog_ctxt_put(ctxt);
 
                 RETURN(rc);
         }
diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c
index 3d8b607b6217a8da9cb6a703e7b5a19a8c95d513..95af9c86013775c7e8a6f795fe9c7676d85151d3 100644
--- a/lustre/mgs/mgs_llog.c
+++ b/lustre/mgs/mgs_llog.c
@@ -224,15 +224,16 @@ static int mgs_get_fsdb_from_llog(struct obd_device *obd, struct fs_db *fsdb)
         char *logname;
         struct llog_handle *loghandle;
         struct lvfs_run_ctxt saved;
+        struct llog_ctxt *ctxt;
         int rc, rc2;
         ENTRY;
 
+        ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+        LASSERT(ctxt != NULL);
         name_create(&logname, fsdb->fsdb_name, "-client");
         down(&fsdb->fsdb_sem);
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        
-        rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                         &loghandle, NULL, logname);
+        rc = llog_create(ctxt, &loghandle, NULL, logname);
         if (rc)
                 GOTO(out_pop, rc);
 
@@ -249,8 +250,8 @@ out_close:
         rc2 = llog_close(loghandle);
         if (!rc)
                 rc = rc2;
-
 out_pop:
+        llog_ctxt_put(ctxt);
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         up(&fsdb->fsdb_sem);
         name_destroy(&logname);
@@ -564,6 +565,7 @@ static int mgs_modify(struct obd_device *obd, struct fs_db *fsdb,
 {
         struct llog_handle *loghandle;
         struct lvfs_run_ctxt saved;
+        struct llog_ctxt *ctxt;
         struct mgs_modify_lookup *mml;
         int rc, rc2;
         ENTRY;
@@ -571,9 +573,10 @@ static int mgs_modify(struct obd_device *obd, struct fs_db *fsdb,
         CDEBUG(D_MGS, "modify %s/%s/%s\n", logname, devname, comment);
 
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        
-        rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                         &loghandle, NULL, logname);
+       
+        ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+        LASSERT(ctxt != NULL);
+        rc = llog_create(ctxt, &loghandle, NULL, logname);
         if (rc)
                 GOTO(out_pop, rc);
 
@@ -602,8 +605,8 @@ out_close:
         rc2 = llog_close(loghandle);
         if (!rc)
                 rc = rc2;
-
 out_pop:
+        llog_ctxt_put(ctxt);
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         if (rc && rc != -ENODEV) 
                 CERROR("modify %s/%s failed %d\n",
@@ -777,22 +780,25 @@ static int record_start_log(struct obd_device *obd,
 {
         static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
         struct lvfs_run_ctxt saved;
+        struct llog_ctxt *ctxt;
         int rc = 0;
         
-        if (*llh) {
+        if (*llh) 
                 GOTO(out, rc = -EBUSY);
-        }
 
+        ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+        if (!ctxt)
+                GOTO(out, rc = -ENODEV);
+        
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
-        rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                         llh, NULL, name);
+        rc = llog_create(ctxt, llh, NULL, name);
         if (rc == 0)
                 llog_init_handle(*llh, LLOG_F_IS_PLAIN, &cfg_uuid);
         else
                 *llh = NULL;
 
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        llog_ctxt_put(ctxt);
 
 out:
         if (rc) {
@@ -819,17 +825,20 @@ static int mgs_log_is_empty(struct obd_device *obd, char *name)
 {
         struct lvfs_run_ctxt saved;
         struct llog_handle *llh;
+        struct llog_ctxt *ctxt;
         int rc = 0;
 
+        ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+        LASSERT(ctxt != NULL);
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                         &llh, NULL, name);
+        rc = llog_create(ctxt, &llh, NULL, name);
         if (rc == 0) {
                 llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
                 rc = llog_get_size(llh);
                 llog_close(llh);
         }
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        llog_ctxt_put(ctxt);
         /* header is record 1 */
         return(rc <= 1);
 }
@@ -1812,18 +1821,22 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti)
 int mgs_erase_log(struct obd_device *obd, char *name)
 {
         struct lvfs_run_ctxt saved;
+        struct llog_ctxt *ctxt;
         struct llog_handle *llh;
         int rc = 0;
 
+        ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+        LASSERT(ctxt != NULL);
+
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                         &llh, NULL, name);
+        rc = llog_create(ctxt, &llh, NULL, name);
         if (rc == 0) {
                 llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
                 rc = llog_destroy(llh);
                 llog_free_handle(llh);
         }
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        llog_ctxt_put(ctxt);
 
         if (rc)
                 CERROR("failed to clear log %s: %d\n", name, rc);
diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c
index c1eff406c09e2d8095ad7f492a70925de54ce82b..ff52ef622b7df6c7c81af0601e5b052ac800e175 100644
--- a/lustre/obdclass/llog_obd.c
+++ b/lustre/obdclass/llog_obd.c
@@ -38,24 +38,78 @@
 #include "llog_internal.h"
 
 /* helper functions for calling the llog obd methods */
+static struct llog_ctxt* llog_new_ctxt(struct obd_device *obd)
+{
+        struct llog_ctxt *ctxt;
 
-int llog_cleanup(struct llog_ctxt *ctxt)
+        OBD_ALLOC(ctxt, sizeof(*ctxt));
+        if (!ctxt)
+                return NULL;
+        
+        ctxt->loc_obd = obd;
+        atomic_set(&ctxt->loc_refcount, 1);
+        
+        return ctxt;
+}
+
+static void llog_ctxt_destroy(struct llog_ctxt *ctxt)
 {
+        if (ctxt->loc_exp)
+                class_export_put(ctxt->loc_exp);
+        OBD_FREE(ctxt, sizeof(*ctxt));
+        return;
+}
+
+int __llog_ctxt_put(struct llog_ctxt *ctxt)
+{
+        struct obd_device *obd;
         int rc = 0;
+
+        obd = ctxt->loc_obd;
+        spin_lock(&obd->obd_dev_lock);
+        if (!atomic_dec_and_test(&ctxt->loc_refcount)) {
+                spin_unlock(&obd->obd_dev_lock);
+                return rc;
+        }
+        obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
+        spin_unlock(&obd->obd_dev_lock);
+
+        LASSERT(obd->obd_stopping == 1);
+        /* cleanup the llog ctxt here */
+        if (CTXTP(ctxt, cleanup))
+                rc = CTXTP(ctxt, cleanup)(ctxt);
+ 
+        llog_ctxt_destroy(ctxt);
+        wake_up(&obd->obd_llog_waitq);
+        return rc;
+}
+EXPORT_SYMBOL(__llog_ctxt_put);
+ 
+int llog_cleanup(struct llog_ctxt *ctxt)
+{
+        struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+        struct obd_device *obd = ctxt->loc_obd;
+        int rc, idx;
         ENTRY;
 
         if (!ctxt) {
                 CERROR("No ctxt\n");
                 RETURN(-ENODEV);
         }
-        
-        if (CTXTP(ctxt, cleanup))
-                rc = CTXTP(ctxt, cleanup)(ctxt);
 
-        ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
-        if (ctxt->loc_exp)
-                class_export_put(ctxt->loc_exp);
-        OBD_FREE(ctxt, sizeof(*ctxt));
+        /*banlance the ctxt get when calling llog_cleanup */
+        llog_ctxt_put(ctxt);
+
+        /* sync with other llog ctxt user thread */
+        spin_lock(&obd->obd_dev_lock);
+        LASSERT(obd->obd_stopping == 1);
+        spin_unlock(&obd->obd_dev_lock);
+
+        idx = ctxt->loc_idx;
+        /*try to free the ctxt */
+        rc = __llog_ctxt_put(ctxt);
+
+        l_wait_event(obd->obd_llog_waitq, llog_ctxt_null(obd, idx), &lwi);
 
         RETURN(rc);
 }
@@ -71,24 +125,23 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
         if (index < 0 || index >= LLOG_MAX_CTXTS)
                 RETURN(-EFAULT);
 
-        if (obd->obd_llog_ctxt[index]) {
+        ctxt = llog_get_context(obd, index); 
+        if (ctxt) {
                 /* mds_lov_update_mds might call here multiple times. So if the
                    llog is already set up then don't to do it again. */
                 CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n", 
                        obd->obd_name, index);
-                ctxt = obd->obd_llog_ctxt[index];
                 LASSERT(ctxt->loc_obd == obd);
                 LASSERT(ctxt->loc_exp == disk_obd->obd_self_export);
                 LASSERT(ctxt->loc_logops == op);
+                llog_ctxt_put(ctxt); 
                 GOTO(out, rc = 0);
         }
-        
-        OBD_ALLOC(ctxt, sizeof(*ctxt));
+        ctxt = llog_new_ctxt(obd);
         if (!ctxt)
                 GOTO(out, rc = -ENOMEM);
 
         obd->obd_llog_ctxt[index] = ctxt;
-        ctxt->loc_obd = obd;
         ctxt->loc_exp = class_export_get(disk_obd->obd_self_export);
         ctxt->loc_idx = index;
         ctxt->loc_logops = op;
@@ -96,13 +149,9 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
 
         if (op->lop_setup)
                 rc = op->lop_setup(obd, index, disk_obd, count, logid);
-        
-        if (rc) {
-                obd->obd_llog_ctxt[index] = NULL;
-                class_export_put(ctxt->loc_exp);
-                OBD_FREE(ctxt, sizeof(*ctxt));
-        }
-        
+
+        if (rc)
+                llog_ctxt_destroy(ctxt);
 out:
         RETURN(rc);
 }
@@ -247,7 +296,8 @@ int llog_obd_origin_setup(struct obd_device *obd, int index,
         rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL, NULL);
         if (rc)
                 CERROR("llog_process with cat_cancel_cb failed: %d\n", rc);
- out:
+out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_obd_origin_setup);
diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c
index d991f519c946ab4840b7d0cda0f8982d55ab0efb..c98223a4e52fca4a26c638f0fce3abe712f80166 100644
--- a/lustre/obdclass/llog_test.c
+++ b/lustre/obdclass/llog_test.c
@@ -94,6 +94,7 @@ static int llog_test_1(struct obd_device *obd, char *name)
         rc = llog_create(ctxt, &llh, NULL, name);
         if (rc) {
                 CERROR("1a: llog_create with name %s failed: %d\n", name, rc);
+                llog_ctxt_put(ctxt);
                 RETURN(rc);
         }
         llog_init_handle(llh, LLOG_F_IS_PLAIN, &uuid);
@@ -104,6 +105,7 @@ static int llog_test_1(struct obd_device *obd, char *name)
  out:
         CWARN("1b: close newly-created log\n");
         rc2 = llog_close(llh);
+        llog_ctxt_put(ctxt);
         if (rc2) {
                 CERROR("1b: close log %s failed: %d\n", name, rc2);
                 if (rc == 0)
@@ -126,18 +128,18 @@ static int llog_test_2(struct obd_device *obd, char *name,
         rc = llog_create(ctxt, llh, NULL, name);
         if (rc) {
                 CERROR("2a: re-open log with name %s failed: %d\n", name, rc);
-                RETURN(rc);
+                GOTO(out, rc);
         }
         llog_init_handle(*llh, LLOG_F_IS_PLAIN, &uuid);
 
         if ((rc = verify_handle("2", *llh, 1)))
-                RETURN(rc);
+                GOTO(out, rc);
 
         CWARN("2b: create a log without specified NAME & LOGID\n");
         rc = llog_create(ctxt, &loghandle, NULL, NULL);
         if (rc) {
                 CERROR("2b: create log failed\n");
-                RETURN(rc);
+                GOTO(out, rc);
         }
         llog_init_handle(loghandle, LLOG_F_IS_PLAIN, &uuid);
         logid = loghandle->lgh_id;
@@ -147,7 +149,7 @@ static int llog_test_2(struct obd_device *obd, char *name,
         rc = llog_create(ctxt, &loghandle, &logid, NULL);
         if (rc) {
                 CERROR("2b: re-open log by LOGID failed\n");
-                RETURN(rc);
+                GOTO(out, rc);
         }
         llog_init_handle(loghandle, LLOG_F_IS_PLAIN, &uuid);
 
@@ -155,9 +157,11 @@ static int llog_test_2(struct obd_device *obd, char *name,
         rc = llog_destroy(loghandle);
         if (rc) {
                 CERROR("2b: destroy log failed\n");
-                RETURN(rc);
+                GOTO(out, rc);
         }
         llog_free_handle(loghandle);
+out:
+        llog_ctxt_put(ctxt);
 
         RETURN(rc);
 }
@@ -261,10 +265,10 @@ static int llog_test_4(struct obd_device *obd)
         }
         num_recs++;
         if ((rc = verify_handle("4b", cath, 2)))
-                RETURN(rc);
+                GOTO(ctxt_release, rc);
 
         if ((rc = verify_handle("4b", cath->u.chd.chd_current_log, num_recs)))
-                RETURN(rc);
+                GOTO(ctxt_release, rc);
 
         CWARN("4c: cancel 1 log record\n");
         rc = llog_cat_cancel_records(cath, 1, &cookie);
@@ -275,7 +279,7 @@ static int llog_test_4(struct obd_device *obd)
         num_recs--;
 
         if ((rc = verify_handle("4c", cath->u.chd.chd_current_log, num_recs)))
-                RETURN(rc);
+                GOTO(ctxt_release, rc);
 
         CWARN("4d: write 40,000 more log records\n");
         for (i = 0; i < 40000; i++) {
@@ -311,6 +315,8 @@ static int llog_test_4(struct obd_device *obd)
  out:
         CWARN("4f: put newly-created catalog\n");
         rc = llog_cat_put(cath);
+ctxt_release:
+        llog_ctxt_put(ctxt);
         if (rc)
                 CERROR("1b: close log %s failed: %d\n", name, rc);
         RETURN(rc);
@@ -437,6 +443,8 @@ static int llog_test_5(struct obd_device *obd)
                 rc = llog_cat_put(llh);
         if (rc)
                 CERROR("1b: close log %s failed: %d\n", name, rc);
+        llog_ctxt_put(ctxt);
+
         RETURN(rc);
 }
 
@@ -458,13 +466,13 @@ static int llog_test_6(struct obd_device *obd, char *name)
         if (mdc_obd == NULL) {
                 CERROR("6: no MDC devices connected to %s found.\n",
                        mds_uuid->uuid);
-                RETURN(-ENOENT);
+                GOTO(ctxt_release, rc = -ENOENT);
         }
 
         rc = obd_connect(&exph, mdc_obd, &uuid, NULL /* obd_connect_data */);
         if (rc) {
                 CERROR("6: failed to connect to MDC: %s\n", mdc_obd->obd_name);
-                RETURN(rc);
+                GOTO(ctxt_release, rc);
         }
         exp = class_conn2export(&exph);
 
@@ -472,7 +480,8 @@ static int llog_test_6(struct obd_device *obd, char *name)
         rc = llog_create(nctxt, &llh, NULL, name);
         if (rc) {
                 CERROR("6: llog_create failed %d\n", rc);
-                RETURN(rc);
+                llog_ctxt_put(nctxt);
+                GOTO(ctxt_release, rc);
         }
 
         rc = llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
@@ -491,12 +500,13 @@ static int llog_test_6(struct obd_device *obd, char *name)
 
 parse_out:
         rc = llog_close(llh);
+        llog_ctxt_put(nctxt);
         if (rc) {
                 CERROR("6: llog_close failed: rc = %d\n", rc);
         }
-
         rc = obd_disconnect(exp);
-
+ctxt_release:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -516,7 +526,7 @@ static int llog_test_7(struct obd_device *obd)
         rc = llog_create(ctxt, &llh, NULL, name);
         if (rc) {
                 CERROR("7: llog_create with name %s failed: %d\n", name, rc);
-                RETURN(rc);
+                GOTO(ctxt_release, rc);
         }
         llog_init_handle(llh, LLOG_F_IS_PLAIN, &uuid);
 
@@ -525,7 +535,7 @@ static int llog_test_7(struct obd_device *obd)
         rc = llog_write_rec(llh,  &lcr.lcr_hdr, NULL, 0, NULL, -1);
         if (rc) {
                 CERROR("7: write one log record failed: %d\n", rc);
-                RETURN(rc);
+                GOTO(ctxt_release, rc);
         }
 
         rc = llog_destroy(llh);
@@ -533,6 +543,8 @@ static int llog_test_7(struct obd_device *obd)
                 CERROR("7: llog_destroy failed: %d\n", rc);
         else
                 llog_free_handle(llh); 
+ctxt_release:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -591,7 +603,7 @@ static int llog_run_tests(struct obd_device *obd)
         case 0:
                 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
         }
-
+        llog_ctxt_put(ctxt);
         return rc;
 }
 
diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c
index bba8d3b770fc203b291bc83fc0138240c27cf4ef..a1d9fd739a754cca066f72514279ea5268c461cc 100644
--- a/lustre/obdclass/obd_config.c
+++ b/lustre/obdclass/obd_config.c
@@ -203,6 +203,7 @@ int class_attach(struct lustre_cfg *lcfg)
         cfs_init_timer(&obd->obd_recovery_timer);
         spin_lock_init(&obd->obd_processing_task_lock);
         cfs_waitq_init(&obd->obd_next_transno_waitq);
+        cfs_waitq_init(&obd->obd_llog_waitq);
         CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue);
         CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue);
 
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c
index 393c2bdb22a91cf54399747683dd9825df9fada5..ebb8caaeba90974d8e994cd532233cf3de28980f 100644
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -1824,10 +1824,20 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt,
                             int count, struct llog_catid *catid,
                             struct obd_uuid *uuid)
 {
+        struct filter_obd *filter = &obd->u.filter;
         struct llog_ctxt *ctxt;
         int rc;
         ENTRY;
 
+        OBD_ALLOC(filter->fo_lcm, sizeof(struct llog_commit_master));
+        if (!filter->fo_lcm)
+                RETURN(-ENOMEM);
+
+        rc = llog_init_commit_master((struct llog_commit_master *)
+                                     filter->fo_lcm);
+        if (rc)
+                GOTO(cleanup, rc);
+
         filter_mds_ost_repl_logops = llog_client_ops;
         filter_mds_ost_repl_logops.lop_cancel = llog_obd_repl_cancel;
         filter_mds_ost_repl_logops.lop_connect = llog_repl_connect;
@@ -1836,14 +1846,26 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt,
         rc = llog_setup(obd, LLOG_MDS_OST_REPL_CTXT, tgt, 0, NULL,
                         &filter_mds_ost_repl_logops);
         if (rc)
-                RETURN(rc);
+                GOTO(cleanup, rc);
 
         /* FIXME - assign unlink_cb for filter's recovery */
         ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb;
+        ctxt->loc_lcm = obd->u.filter.fo_lcm;
+        rc = llog_start_commit_thread(ctxt->loc_lcm);
+        llog_ctxt_put(ctxt);
+        if (rc)
+                GOTO(cleanup, rc);
 
         rc = llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL,
                         &filter_size_orig_logops);
+
+cleanup:
+        if (rc) {
+                llog_cleanup_commit_master(filter->fo_lcm, 0);
+                OBD_FREE(filter->fo_lcm, sizeof(struct llog_commit_master));
+                filter->fo_lcm = NULL;
+        }
         RETURN(rc);
 }
 
@@ -1853,6 +1875,14 @@ static int filter_llog_finish(struct obd_device *obd, int count)
         int rc = 0, rc2 = 0;
         ENTRY;
 
+        if (obd->u.filter.fo_lcm) { 
+                llog_cleanup_commit_master((struct llog_commit_master *)
+                                           obd->u.filter.fo_lcm, 0);
+                OBD_FREE(obd->u.filter.fo_lcm, 
+                         sizeof(struct llog_commit_master));
+                obd->u.filter.fo_lcm = NULL;
+        }
+
         ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         if (ctxt)
                 rc = llog_cleanup(ctxt);
@@ -2246,6 +2276,8 @@ static int filter_disconnect(struct obd_export *exp)
         /* flush any remaining cancel messages out to the target */
         ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         err = llog_sync(ctxt, exp);
+        llog_ctxt_put(ctxt);
+
         if (err)
                 CERROR("error flushing logs to MDS: rc %d\n", err);
 
@@ -3032,14 +3064,16 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
                        oa->o_id);
                 /* If object already gone, cancel cookie right now */
                 if (oa->o_valid & OBD_MD_FLCOOKIE) {
+                        struct llog_ctxt *ctxt;
                         fcc = obdo_logcookie(oa);
-                        llog_cancel(llog_get_context(obd, fcc->lgc_subsys + 1),
-                                    NULL, 1, fcc, 0);
+                        ctxt = llog_get_context(obd, fcc->lgc_subsys + 1);
+                        llog_cancel(ctxt, NULL, 1, fcc, 0);
+                        llog_ctxt_put(ctxt);
                         fcc = NULL; /* we didn't allocate fcc, don't free it */
                 }
                 GOTO(cleanup, rc = -ENOENT);
         }
-
+        
         filter_prepare_destroy(obd, oa->o_id);
 
         /* Our MDC connection is established by the MDS to us */
@@ -3192,6 +3226,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
                 /* flush any remaining cancel messages out to the target */
                 ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_REPL_CTXT);
                 llog_sync(ctxt, exp);
+                llog_ctxt_put(ctxt);
                 RETURN(rc);
         }
 
@@ -3290,6 +3325,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
         /* setup llog imports */
         ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
+        llog_ctxt_put(ctxt);
 
         lquota_setinfo(filter_quota_interface_ref, exp, obd);
 
diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c
index e0230c3ad1f4a3f4f7dee7a094305fddb43caea3..3b3bb2d01d0cc856a65b53af1a511377a4f8a2a1 100644
--- a/lustre/obdfilter/filter_log.c
+++ b/lustre/obdfilter/filter_log.c
@@ -102,19 +102,27 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
                               void *cb_data, int error)
 {
         struct llog_cookie *cookie = cb_data;
+        struct llog_ctxt *ctxt;
         int rc;
 
-        if (error != 0) {
-                CDEBUG(D_INODE, "not cancelling llog cookie on error %d\n",
-                       error);
+        if (error != 0 || obd->obd_stopping) {
+                CDEBUG(D_INODE, "not cancel logcookie err %d stopping %d \n",
+                       error, obd->obd_stopping);
                 OBD_FREE(cookie, sizeof(*cookie));
                 return;
         }
 
-        rc = llog_cancel(llog_get_context(obd, cookie->lgc_subsys + 1),
-                         NULL, 1, cookie, 0);
+        ctxt = llog_get_context(obd, cookie->lgc_subsys + 1);
+        if (!ctxt)
+                GOTO(out, rc = 0);
+
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT, 30);
+
+        rc = llog_cancel(ctxt, NULL, 1, cookie, 0);
         if (rc)
                 CERROR("error cancelling log cookies: rc = %d\n", rc);
+out:
+        llog_ctxt_put(ctxt);
         OBD_FREE(cookie, sizeof(*cookie));
 }
 
@@ -134,7 +142,7 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt,
 
         lur = (struct llog_unlink_rec *)rec;
         OBDO_ALLOC(oa);
-        if (oa == NULL) 
+        if (oa == NULL)
                 RETURN(-ENOMEM);
         oa->o_valid |= OBD_MD_FLCOOKIE;
         oa->o_id = lur->lur_oid;
@@ -205,11 +213,15 @@ int filter_recov_log_mds_ost_cb(struct llog_handle *llh,
         int rc = 0;
         ENTRY;
 
+        if (ctxt->loc_obd->obd_stopping)
+                RETURN(LLOG_PROC_BREAK);
+
         if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) {
                 CERROR("log is not plain\n");
                 RETURN(-EINVAL);
         }
 
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT, 30);
         cookie.lgc_lgl = llh->lgh_id;
         cookie.lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
         cookie.lgc_index = rec->lrh_index;
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c
index 9c9adc459dc58174a0010560c86bf527abce662b..3b02c24f3bb4c583919a094812e72336d4604b43 100644
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -3272,6 +3272,7 @@ static int osc_setinfo_mds_conn_interpret(struct ptlrpc_request *req,
                                "ctxt %p: %d\n", ctxt, rc);
         }
 
+        llog_ctxt_put(ctxt);
         spin_lock(&imp->imp_lock);
         imp->imp_server_timeout = 1;
         imp->imp_pingable = 1;
@@ -3456,6 +3457,8 @@ static int osc_disconnect(struct obd_export *exp)
         if (obd->u.cli.cl_conn_count == 1)
                 /* flush any remaining cancel messages out to the target */
                 llog_sync(ctxt, exp);
+        
+        llog_ctxt_put(ctxt);
 
         rc = client_disconnect_export(exp);
         return rc;
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c
index 9d0d065313b6d83573eb63482f9f243657df0458..512c1c63754164d12093a3a0c9a8b8eda524ba47 100644
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -1671,11 +1671,6 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf)
         rc = cleanup_group_info();
         if (rc)
                 RETURN(rc);
-
-        rc = llog_start_commit_thread();
-        if (rc < 0)
-                RETURN(rc);
-
         lprocfs_init_vars(ost, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 
diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c
index 870207185a8f8741dad218c957ede06f7ed34363..eee2b4f2f23805a641061a73dd7a8d5e670c9085 100644
--- a/lustre/ptlrpc/llog_net.c
+++ b/lustre/ptlrpc/llog_net.c
@@ -117,6 +117,8 @@ int llog_handle_connect(struct ptlrpc_request *req)
         ctxt = llog_get_context(obd, req_body->lgdc_ctxt_idx);
         rc = llog_connect(ctxt, 1, &req_body->lgdc_logid,
                           &req_body->lgdc_gen, NULL);
+
+        llog_ctxt_put(ctxt);
         if (rc != 0)
                 CERROR("failed at llog_relp_connect\n");
 
diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c
index 7b82f4eb42b1c77bfe6238ab00d50a274abfde05..3de3553266f8b9fc12db397a11dc6e2c34b4bf86 100644
--- a/lustre/ptlrpc/llog_server.c
+++ b/lustre/ptlrpc/llog_server.c
@@ -63,7 +63,7 @@ int llog_origin_handle_create(struct ptlrpc_request *req)
                                  lustre_swab_llogd_body);
         if (body == NULL) {
                 CERROR ("Can't unpack llogd_body\n");
-                GOTO(out, rc =-EFAULT);
+                RETURN(-EFAULT);
         }
 
         if (body->lgd_logid.lgl_oid > 0)
@@ -73,14 +73,14 @@ int llog_origin_handle_create(struct ptlrpc_request *req)
                 name = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF + 1, 0);
                 if (name == NULL) {
                         CERROR("Can't unpack name\n");
-                        GOTO(out, rc = -EFAULT);
+                        RETURN(-EFAULT);
                 }
                 CDEBUG(D_INFO, "opening log %s\n", name);
         }
 
         ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
         if (ctxt == NULL)
-                GOTO(out, rc = -EINVAL);
+                RETURN(-EINVAL);
         disk_obd = ctxt->loc_exp->exp_obd;
         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
 
@@ -101,7 +101,7 @@ out_close:
                 rc = rc2;
 out_pop:
         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
-out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -124,7 +124,7 @@ int llog_origin_handle_destroy(struct ptlrpc_request *req)
                                  lustre_swab_llogd_body);
         if (body == NULL) {
                 CERROR ("Can't unpack llogd_body\n");
-                GOTO(out, rc =-EFAULT);
+                RETURN(-EFAULT);
         }
 
         if (body->lgd_logid.lgl_oid > 0)
@@ -132,7 +132,8 @@ int llog_origin_handle_destroy(struct ptlrpc_request *req)
 
         ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
         if (ctxt == NULL)
-                GOTO(out, rc = -EINVAL);
+                RETURN(-EINVAL);
+
         disk_obd = ctxt->loc_exp->exp_obd;
         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
 
@@ -160,7 +161,7 @@ out_close:
                 llog_close(loghandle);
 out_pop:
         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
-out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -186,12 +187,12 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req)
                                   lustre_swab_llogd_body);
         if (body == NULL) {
                 CERROR ("Can't unpack llogd_body\n");
-                GOTO(out, rc =-EFAULT);
+                RETURN(-EFAULT);
         }
 
         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
         if (!buf)
-                GOTO(out, rc = -ENOMEM);
+                RETURN(-ENOMEM);
 
         ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
         if (ctxt == NULL)
@@ -233,9 +234,9 @@ out_close:
 
 out_pop:
         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+        llog_ctxt_put(ctxt);
 out_free:
         OBD_FREE(buf, LLOG_CHUNK_SIZE);
-out:
         RETURN(rc);
 }
 
@@ -261,12 +262,12 @@ int llog_origin_handle_prev_block(struct ptlrpc_request *req)
                                   lustre_swab_llogd_body);
         if (body == NULL) {
                 CERROR ("Can't unpack llogd_body\n");
-                GOTO(out, rc =-EFAULT);
+                RETURN(-EFAULT);
         }
 
         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
         if (!buf)
-                GOTO(out, rc = -ENOMEM);
+                RETURN(-ENOMEM);
 
         ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
         LASSERT(ctxt != NULL);
@@ -288,7 +289,6 @@ int llog_origin_handle_prev_block(struct ptlrpc_request *req)
         if (rc)
                 GOTO(out_close, rc);
 
-
         rc = lustre_pack_reply(req, 3, size, NULL);
         if (rc)
                 GOTO(out_close, rc = -ENOMEM);
@@ -306,8 +306,8 @@ out_close:
 
 out_pop:
         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+        llog_ctxt_put(ctxt);
         OBD_FREE(buf, LLOG_CHUNK_SIZE);
-out:
         RETURN(rc);
 }
 
@@ -330,12 +330,12 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req)
                                   lustre_swab_llogd_body);
         if (body == NULL) {
                 CERROR ("Can't unpack llogd_body\n");
-                GOTO(out, rc =-EFAULT);
+                RETURN(-EFAULT);
         }
 
         ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
         if (ctxt == NULL)
-                GOTO(out, rc = -EINVAL);
+                RETURN(-EINVAL);
         disk_obd = ctxt->loc_exp->exp_obd;
         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
 
@@ -360,11 +360,9 @@ out_close:
         rc2 = llog_close(loghandle);
         if (!rc)
                 rc = rc2;
-
 out_pop:
         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
-
-out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -382,7 +380,7 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req)
         struct obd_device *obd = req->rq_export->exp_obd;
         struct obd_device *disk_obd;
         struct llog_cookie *logcookies;
-        struct llog_ctxt *ctxt;
+        struct llog_ctxt *ctxt = NULL;
         int num_cookies, rc = 0, err, i;
         struct lvfs_run_ctxt saved;
         struct llog_handle *cathandle;
@@ -436,6 +434,7 @@ pop_ctxt:
         else
                 CDEBUG(D_RPCTRACE, "cancel %d llog-records\n", num_cookies);
 
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_origin_handle_cancel);
@@ -452,7 +451,7 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len,
         char *out = buf;
 
         if (ctxt == NULL || mds == NULL)
-                RETURN(-EOPNOTSUPP);
+                GOTO(release_ctxt, rc = -EOPNOTSUPP);
 
         push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
 
@@ -491,6 +490,8 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len,
         }
 out_pop:
         pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
+release_ctxt:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
@@ -506,7 +507,7 @@ static int llog_catinfo_cb(struct llog_handle *cat,
 {
         static char *out = NULL;
         static int remains = 0;
-        struct llog_ctxt *ctxt;
+        struct llog_ctxt *ctxt = NULL;
         struct llog_handle *handle;
         struct llog_logid *logid;
         struct llog_logid_rec *lir;
@@ -518,11 +519,13 @@ static int llog_catinfo_cb(struct llog_handle *cat,
                 remains = cbd->remains;
                 cbd->init = 0;
         }
-        ctxt = cbd->ctxt;
 
-        if (!(cat->lgh_hdr->llh_flags & LLOG_F_IS_CAT))
+        if (!(cat->lgh_hdr->llh_flags & LLOG_F_IS_CAT)) 
                 RETURN(-EINVAL);
 
+        if (!cbd->ctxt)
+                RETURN(-EINVAL);
+        
         lir = (struct llog_logid_rec *)rec;
         logid = &lir->lid_id;
         rc = llog_create(ctxt, &handle, logid, NULL);
@@ -572,14 +575,14 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf,
         struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
 
         if (ctxt == NULL || mds == NULL)
-                RETURN(-EOPNOTSUPP);
-
+                GOTO(release_ctxt, rc = -EOPNOTSUPP);
+       
         count = mds->mds_lov_desc.ld_tgt_count;
         size = sizeof(*idarray) * count;
 
         OBD_ALLOC(idarray, size);
         if (!idarray)
-                RETURN(-ENOMEM);
+                GOTO(release_ctxt, rc = -ENOMEM);
 
         rc = llog_get_cat_list(obd, obd, name, count, idarray);
         if (rc)
@@ -626,6 +629,9 @@ out_pop:
         pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
 out_free:
         OBD_FREE(idarray, size);
+release_ctxt:
+        llog_ctxt_put(ctxt);
+
         RETURN(rc);
 }
 
diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h
index c1238b933a8f637ce6e1f5cb5e4c034c20c5cdbc..0a299650e68116d86aaafc778b06afe3388b9f0e 100644
--- a/lustre/ptlrpc/ptlrpc_internal.h
+++ b/lustre/ptlrpc/ptlrpc_internal.h
@@ -63,8 +63,6 @@ void ptlrpc_lprocfs_do_request_stat (struct ptlrpc_request *req,
 #endif /* LPROCFS */
 
 /* recovd_thread.c */
-int llog_init_commit_master(void);
-int llog_cleanup_commit_master(int force);
 
 static inline int opcode_offset(__u32 opc) {
         if (opc < OST_LAST_OPC) {
diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c
index 40a1018d11635a953760074167c425710c43fb66..ebfd6cedef110e6a46b4e4fca71090c7cc53cc4c 100644
--- a/lustre/ptlrpc/ptlrpc_module.c
+++ b/lustre/ptlrpc/ptlrpc_module.c
@@ -68,17 +68,12 @@ __init int ptlrpc_init(void)
                 GOTO(cleanup, rc);
         cleanup_phase = 2;
 
-        rc = llog_init_commit_master();
-        if (rc)
-                GOTO(cleanup, rc);
-        cleanup_phase = 3;
-
         ptlrpc_put_connection_superhack = ptlrpc_put_connection;
 
         rc = ptlrpc_start_pinger();
         if (rc)
                 GOTO(cleanup, rc);
-        cleanup_phase = 4;
+        cleanup_phase = 3;
 
         rc = ldlm_init();
         if (rc)
@@ -87,10 +82,8 @@ __init int ptlrpc_init(void)
 
 cleanup:
         switch(cleanup_phase) {
-        case 4:
-                ptlrpc_stop_pinger();
         case 3:
-                llog_cleanup_commit_master(1);
+                ptlrpc_stop_pinger();
         case 2:
                 ptlrpc_cleanup_connection();
         case 1:
@@ -108,7 +101,6 @@ static void __exit ptlrpc_exit(void)
         ptlrpc_stop_pinger();
         ptlrpc_exit_portals();
         ptlrpc_cleanup_connection();
-        llog_cleanup_commit_master(0);
 }
 
 /* connection.c */
diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c
index e0b21d64c5b69f0b7a859f0be487cceebfe0dc93..49adcbdc64942b44359756376a10218106008bfc 100644
--- a/lustre/ptlrpc/recov_thread.c
+++ b/lustre/ptlrpc/recov_thread.c
@@ -55,13 +55,10 @@
 
 #ifdef __KERNEL__
 
-static struct llog_commit_master lustre_lcm;
-static struct llog_commit_master *lcm = &lustre_lcm;
-
 /* Allocate new commit structs in case we do not have enough.
  * Make the llcd size small enough that it fits into a single page when we
  * are sending/receiving it. */
-static int llcd_alloc(void)
+static int llcd_alloc(struct llog_commit_master *lcm)
 {
         struct llog_canceld_ctxt *llcd;
         int llcd_size;
@@ -85,7 +82,7 @@ static int llcd_alloc(void)
 }
 
 /* Get a free cookie struct from the list */
-struct llog_canceld_ctxt *llcd_grab(void)
+static struct llog_canceld_ctxt *llcd_grab(struct llog_commit_master *lcm)
 {
         struct llog_canceld_ctxt *llcd;
 
@@ -93,7 +90,7 @@ repeat:
         spin_lock(&lcm->lcm_llcd_lock);
         if (list_empty(&lcm->lcm_llcd_free)) {
                 spin_unlock(&lcm->lcm_llcd_lock);
-                if (llcd_alloc() < 0) {
+                if (llcd_alloc(lcm) < 0) {
                         CERROR("unable to allocate log commit data!\n");
                         return NULL;
                 }
@@ -110,10 +107,12 @@ repeat:
 
         return llcd;
 }
-EXPORT_SYMBOL(llcd_grab);
 
 static void llcd_put(struct llog_canceld_ctxt *llcd)
 {
+        struct llog_commit_master *lcm = llcd->llcd_lcm;
+
+        llog_ctxt_put(llcd->llcd_ctxt);
         if (atomic_read(&lcm->lcm_llcd_numfree) >= lcm->lcm_llcd_maxfree) {
                 int llcd_size = llcd->llcd_size +
                          offsetof(struct llog_canceld_ctxt, llcd_cookies);
@@ -127,15 +126,16 @@ static void llcd_put(struct llog_canceld_ctxt *llcd)
 }
 
 /* Send some cookies to the appropriate target */
-void llcd_send(struct llog_canceld_ctxt *llcd)
+static void llcd_send(struct llog_canceld_ctxt *llcd)
 {
-        spin_lock(&llcd->llcd_lcm->lcm_llcd_lock);
-        list_add_tail(&llcd->llcd_list, &llcd->llcd_lcm->lcm_llcd_pending);
-        spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
-
+        if (!(llcd->llcd_lcm->lcm_flags & LLOG_LCM_FL_EXIT)) {
+                spin_lock(&llcd->llcd_lcm->lcm_llcd_lock);
+                list_add_tail(&llcd->llcd_list,
+                              &llcd->llcd_lcm->lcm_llcd_pending);
+                spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
+        }
         cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1);
 }
-EXPORT_SYMBOL(llcd_send);
 
 /* deleted objects have a commit callback that cancels the MDS
  * log record for the deletion.  The commit callback calls this
@@ -161,7 +161,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
 
         if (count > 0 && cookies != NULL) {
                 if (llcd == NULL) {
-                        llcd = llcd_grab();
+                        llcd = llcd_grab(ctxt->loc_lcm);
                         if (llcd == NULL) {
                                 CERROR("couldn't get an llcd - dropped "LPX64
                                        ":%x+%u\n",
@@ -170,7 +170,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
                                        cookies->lgc_index);
                                 GOTO(out, rc = -ENOMEM);
                         }
-                        llcd->llcd_ctxt = ctxt;
+                        llcd->llcd_ctxt = llog_ctxt_get(ctxt);
                         ctxt->loc_llcd = llcd;
                 }
 
@@ -254,7 +254,7 @@ static int log_commit_thread(void *arg)
                 /* If we do not have enough pages available, allocate some */
                 while (atomic_read(&lcm->lcm_llcd_numfree) <
                        lcm->lcm_llcd_minfree) {
-                        if (llcd_alloc() < 0)
+                        if (llcd_alloc(lcm) < 0)
                                 break;
                 }
 
@@ -291,7 +291,7 @@ static int log_commit_thread(void *arg)
 
                 if (atomic_read(&lcm->lcm_thread_numidle) <= 1 &&
                     atomic_read(&lcm->lcm_thread_total) < lcm->lcm_thread_max) {
-                        rc = llog_start_commit_thread();
+                        rc = llog_start_commit_thread(lcm);
                         if (rc < 0)
                                 CERROR("error starting thread: rc %d\n", rc);
                 }
@@ -444,7 +444,7 @@ static int log_commit_thread(void *arg)
         return 0;
 }
 
-int llog_start_commit_thread(void)
+int llog_start_commit_thread(struct llog_commit_master *lcm)
 {
         int rc;
         ENTRY;
@@ -470,7 +470,7 @@ static struct llog_process_args {
         void                    *llpa_arg;
 } llpa;
 
-int llog_init_commit_master(void)
+int llog_init_commit_master(struct llog_commit_master *lcm)
 {
         CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy);
         CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle);
@@ -488,8 +488,10 @@ int llog_init_commit_master(void)
         sema_init(&llpa.llpa_sem, 1);
         return 0;
 }
+EXPORT_SYMBOL(llog_init_commit_master);
 
-int llog_cleanup_commit_master(int force)
+int llog_cleanup_commit_master(struct llog_commit_master *lcm,
+                               int force)
 {
         lcm->lcm_flags |= LLOG_LCM_FL_EXIT;
         if (force)
@@ -500,6 +502,7 @@ int llog_cleanup_commit_master(int force)
                                  atomic_read(&lcm->lcm_thread_total) == 0);
         return 0;
 }
+EXPORT_SYMBOL(llog_cleanup_commit_master);
 
 static int log_process_thread(void *args)
 {
@@ -517,12 +520,12 @@ static int log_process_thread(void *args)
         rc = llog_create(ctxt, &llh, &logid, NULL);
         if (rc) {
                 CERROR("llog_create failed %d\n", rc);
-                RETURN(rc);
+                GOTO(out, rc);
         }
         rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL);
         if (rc) {
                 CERROR("llog_init_handle failed %d\n", rc);
-                GOTO(out, rc);
+                GOTO(release_llh, rc);
         }
 
         if (cb) {
@@ -536,24 +539,33 @@ static int log_process_thread(void *args)
         CDEBUG(D_HA, "send llcd %p:%p forcibly after recovery\n",
                ctxt->loc_llcd, ctxt);
         llog_sync(ctxt, NULL);
-out:
+
+release_llh:
         rc = llog_cat_put(llh);
         if (rc)
                 CERROR("llog_cat_put failed %d\n", rc);
-
+out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
 
 static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg)
 {
+        struct obd_device *obd = ctxt->loc_obd;
         int rc;
         ENTRY;
 
+        if (obd->obd_stopping)
+                RETURN(-ENODEV);
+
         mutex_down(&llpa.llpa_sem);
-        llpa.llpa_ctxt = ctxt;
         llpa.llpa_cb = handle;
         llpa.llpa_arg = arg;
-
+        llpa.llpa_ctxt = llog_get_context(ctxt->loc_obd, ctxt->loc_idx);
+        if (!llpa.llpa_ctxt) {
+                up(&llpa.llpa_sem);
+                RETURN(-ENODEV);
+        }
         rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
         if (rc < 0)
                 CERROR("error starting log_process_thread: %d\n", rc);
@@ -581,13 +593,13 @@ int llog_repl_connect(struct llog_ctxt *ctxt, int count,
 
         mutex_down(&ctxt->loc_sem);
         ctxt->loc_gen = *gen;
-        llcd = llcd_grab();
+        llcd = llcd_grab(ctxt->loc_lcm);
         if (llcd == NULL) {
                 CERROR("couldn't get an llcd\n");
                 mutex_up(&ctxt->loc_sem);
                 RETURN(-ENOMEM);
         }
-        llcd->llcd_ctxt = ctxt;
+        llcd->llcd_ctxt = llog_ctxt_get(ctxt);
         ctxt->loc_llcd = llcd;
         mutex_up(&ctxt->loc_sem);
 
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh
index 7d3ffc6341ef838449c5d764f638fe1d13fee106..873a3c9f20e4e6950fdc5e29abe7cf5cb2628394 100755
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -2,8 +2,8 @@
 
 set -e
 
-# bug number:  6088 10124 10800
-ALWAYS_EXCEPT="8    15c   17    $REPLAY_DUAL_EXCEPT"
+# bug number:  6088 10124 
+ALWAYS_EXCEPT="8    15c   $REPLAY_DUAL_EXCEPT"
 
 PTLDEBUG=${PTLDEBUG:--1}
 LUSTRE=${LUSTRE:-`dirname $0`/..}
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh
index 6fc1d5227fda8be6f8ac414982a4b0f34cd4f9cc..971c82d466c0897eecf7c8f08ed9cf48641261dc 100755
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -1154,5 +1154,45 @@ test_60() {
 }
 run_test 60 "test llog post recovery init vs llog unlink"
 
+#test race  llog recovery thread vs llog cleanup
+test_61() {
+    mkdir $DIR/$tdir
+    createmany -o $DIR/$tdir/$tfile-%d 800
+    replay_barrier ost1 
+#   OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221 
+    unlinkmany $DIR/$tdir/$tfile-%d 800 
+    do_facet ost "sysctl -w lustre.fail_loc=0x80000221"
+    facet_failover ost1
+    sleep 10 
+    fail ost1
+    sleep 30
+    do_facet ost "sysctl -w lustre.fail_loc=0x0"
+    $CHECKSTAT -t file $DIR/$tdir/$tfile-* && return 1
+    rmdir $DIR/$tdir
+}
+run_test 61 "test race llog recovery vs llog cleanup"
+
+#test race  mds llog sync vs llog cleanup
+test_61b() {
+#   OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a 
+    do_facet mds "sysctl -w lustre.fail_loc=0x8000013a"
+    facet_failover mds 
+    sleep 10
+    fail mds
+    do_facet client dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 || return 1
+}
+run_test 61b "test race mds llog sync vs llog cleanup"
+
+#test race  cancel cookie cb vs llog cleanup
+test_61c() {
+#   OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222 
+    touch $DIR/$tfile 
+    do_facet ost "sysctl -w lustre.fail_loc=0x80000222"
+    rm $DIR/$tfile    
+    sleep 10
+    fail ost1
+}
+run_test 61c "test race mds llog sync vs llog cleanup"
+
 equals_msg `basename $0`: test complete, cleaning up
 $CLEANUP