From 29379abecac22bc9d99adfea4a16981555be2297 Mon Sep 17 00:00:00 2001 From: wangdi <wangdi> Date: Thu, 26 Jul 2007 04:48:02 +0000 Subject: [PATCH] Branch: b1_6 land 10800(adding llog ctxt refcount) to b1_6. b=10800 i=green i=shadow --- lustre/ChangeLog | 9 +++ lustre/include/liblustre.h | 3 - lustre/include/lustre_commit_confd.h | 6 +- lustre/include/lustre_log.h | 47 ++++++++++++-- lustre/include/obd.h | 2 + lustre/include/obd_support.h | 3 + lustre/liblustre/llite_lib.c | 3 +- lustre/llite/llite_lib.c | 1 + lustre/lov/lov_ea.c | 18 +++--- lustre/lov/lov_log.c | 4 ++ lustre/mdc/mdc_request.c | 5 +- lustre/mds/handler.c | 9 ++- lustre/mds/mds_join.c | 4 +- lustre/mds/mds_log.c | 7 +++ lustre/mds/mds_lov.c | 32 +++++++--- lustre/mds/mds_reint.c | 2 + lustre/mgc/libmgc.c | 1 + lustre/mgc/mgc_request.c | 9 ++- lustre/mgs/mgs_handler.c | 6 +- lustre/mgs/mgs_llog.c | 47 +++++++++----- lustre/obdclass/llog_obd.c | 92 +++++++++++++++++++++------- lustre/obdclass/llog_test.c | 44 ++++++++----- lustre/obdclass/obd_config.c | 1 + lustre/obdfilter/filter.c | 44 +++++++++++-- lustre/obdfilter/filter_log.c | 24 ++++++-- lustre/osc/osc_request.c | 3 + lustre/ost/ost_handler.c | 5 -- lustre/ptlrpc/llog_net.c | 2 + lustre/ptlrpc/llog_server.c | 60 ++++++++++-------- lustre/ptlrpc/ptlrpc_internal.h | 2 - lustre/ptlrpc/ptlrpc_module.c | 12 +--- lustre/ptlrpc/recov_thread.c | 68 +++++++++++--------- lustre/tests/replay-dual.sh | 4 +- lustre/tests/replay-single.sh | 40 ++++++++++++ 34 files changed, 441 insertions(+), 178 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index a211ae2971..4e238cbbda 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -19,6 +19,15 @@ Bugzilla : 12786 Description: lfs setstripe enhancement Details : Make lfs setstripe understand 'k', 'm' and 'g' for stripe size. +Severity : normal +Frequency : mds/oss recovery +Bugzilla : 10800 +Description: llog ctxt is refrenced after it has been freed. +Details : llog ctxt refcount was added to avoide the race between ctxt free + and llog recovery process. Each llog user must hold ctxt refcount + before it access the llog. And the llog ctxt can only be freed + when its refcount is zero. + -------------------------------------------------------------------------------- 2007-07-30 Cluster File Systems, Inc. <info@clusterfs.com> diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 88730590c0..46b5fc831e 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -752,9 +752,6 @@ typedef enum { cap_t cap_get_proc(void); int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *); -/* log related */ -static inline int llog_init_commit_master(void) { return 0; } -static inline int llog_cleanup_commit_master(int force) { return 0; } static inline void libcfs_run_lbug_upcall(char *file, const char *fn, const int l){} diff --git a/lustre/include/lustre_commit_confd.h b/lustre/include/lustre_commit_confd.h index 40b1978814..1804615d8b 100644 --- a/lustre/include/lustre_commit_confd.h +++ b/lustre/include/lustre_commit_confd.h @@ -51,8 +51,8 @@ struct llog_commit_daemon { }; /* ptlrpc/recov_thread.c */ -int llog_start_commit_thread(void); -struct llog_canceld_ctxt *llcd_grab(void); -void llcd_send(struct llog_canceld_ctxt *llcd); +int llog_start_commit_thread(struct llog_commit_master *); +int llog_init_commit_master(struct llog_commit_master *); +int llog_cleanup_commit_master(struct llog_commit_master *lcm, int force); #endif /* _LUSTRE_COMMIT_CONFD_H */ diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index 99adec6b3f..83bfc064e3 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -120,6 +120,7 @@ int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); /* llog_obd.c */ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, int count, struct llog_logid *logid,struct llog_operations *op); +int __llog_ctxt_put(struct llog_ctxt *ctxt); int llog_cleanup(struct llog_ctxt *); int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, @@ -212,6 +213,8 @@ struct llog_ctxt { struct llog_handle *loc_handle; struct llog_canceld_ctxt *loc_llcd; struct semaphore loc_sem; /* protects loc_llcd and loc_imp */ + atomic_t loc_refcount; + struct llog_commit_master *loc_lcm; void *llog_proc_cb; }; @@ -267,13 +270,49 @@ static inline int llog_data_len(int len) return size_round(len); } +#define llog_ctxt_get(ctxt) \ +({ \ + struct llog_ctxt *ctxt_ = ctxt; \ + LASSERT(atomic_read(&ctxt_->loc_refcount) > 0); \ + atomic_inc(&ctxt_->loc_refcount); \ + CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt_, \ + atomic_read(&ctxt_->loc_refcount)); \ + ctxt_; \ +}) + +#define llog_ctxt_put(ctxt) \ +do { \ + if ((ctxt) == NULL) \ + break; \ + CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", (ctxt), \ + atomic_read(&(ctxt)->loc_refcount) - 1); \ + LASSERT(atomic_read(&(ctxt)->loc_refcount) > 0); \ + LASSERT(atomic_read(&(ctxt)->loc_refcount) < 0x5a5a5a); \ + __llog_ctxt_put(ctxt); \ +} while (0) + static inline struct llog_ctxt *llog_get_context(struct obd_device *obd, - int index) + int index) { - if (index < 0 || index >= LLOG_MAX_CTXTS) - return NULL; + struct llog_ctxt *ctxt; + + if (index < 0 || index >= LLOG_MAX_CTXTS) + return NULL; + + spin_lock(&obd->obd_dev_lock); + if (obd->obd_llog_ctxt[index] == NULL) { + spin_unlock(&obd->obd_dev_lock); + CWARN("obd %p and ctxt index %d is NULL \n", obd, index); + return NULL; + } + ctxt = llog_ctxt_get(obd->obd_llog_ctxt[index]); + spin_unlock(&obd->obd_dev_lock); + return ctxt; +} - return obd->obd_llog_ctxt[index]; +static inline int llog_ctxt_null(struct obd_device *obd, int index) +{ + return (obd->obd_llog_ctxt[index] == NULL); } static inline int llog_write_rec(struct llog_handle *handle, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 83d0c52633..817058d15d 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -310,6 +310,7 @@ struct filter_obd { int fo_fmd_max_num; /* per exp filter_mod_data */ int fo_fmd_max_age; /* jiffies to fmd expiry */ + void *fo_lcm; }; #define OSC_MAX_RIF_DEFAULT 8 @@ -727,6 +728,7 @@ struct obd_device { struct lustre_class_hash_body *obd_nid_hash_body; atomic_t obd_refcount; cfs_waitq_t obd_refcount_waitq; + cfs_waitq_t obd_llog_waitq; struct list_head obd_exports; int obd_num_exports; struct ldlm_namespace *obd_namespace; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 40aa2b3f2d..3fdc2fa47d 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -104,6 +104,7 @@ extern int obd_race_state; #define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137 #define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138 #define OBD_FAIL_MDS_OSC_PRECREATE 0x139 +#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -138,6 +139,8 @@ extern int obd_race_state; #define OBD_FAIL_OST_SETATTR_CREDITS 0x21e #define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f #define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 +#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221 +#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index d179c10e69..4da1a09b4c 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -167,9 +167,10 @@ int liblustre_process_log(struct config_llog_instance *cfg, exp = class_conn2export(&mgc_conn); - ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT]; + ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); cfg->cfg_flags |= CFG_F_COMPAT146; rc = class_config_parse_llog(ctxt, profile, cfg); + llog_ctxt_put(ctxt); if (rc) { CERROR("class_config_parse_llog failed: rc = %d\n", rc); } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 3a3a041deb..6835b02e9c 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -832,6 +832,7 @@ static int old_lustre_process_log(struct super_block *sb, char *newprofile, */ rc = class_config_dump_llog(ctxt, profile, cfg); #endif + llog_ctxt_put(ctxt); switch (rc) { case 0: { /* Set the caller's profile name to the old-style */ diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 7392d83f0f..860125fd05 100755 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -468,7 +468,7 @@ static int lsm_revalidate_join(struct lov_stripe_md *lsm, LASSERT(ctxt); if (lsm->lsm_array && lsm->lsm_array->lai_ext_array) - RETURN(0); + GOTO(release_ctxt, rc = 0); CDEBUG(D_INFO, "get lsm logid: "LPU64":"LPU64"\n", lsm->lsm_array->lai_array_id.lgl_oid, @@ -476,7 +476,7 @@ static int lsm_revalidate_join(struct lov_stripe_md *lsm, OBD_ALLOC(lsm->lsm_array->lai_ext_array,lsm->lsm_array->lai_ext_count * sizeof (struct lov_extent)); if (!lsm->lsm_array->lai_ext_array) - RETURN(-ENOMEM); + GOTO(release_ctxt, rc = -ENOMEM); CDEBUG(D_INFO, "get lsm logid: "LPU64":"LPU64"\n", lsm->lsm_array->lai_array_id.lgl_oid, @@ -497,6 +497,8 @@ static int lsm_revalidate_join(struct lov_stripe_md *lsm, out: if (rc) lovea_free_array_info(lsm); +release_ctxt: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -509,16 +511,15 @@ int lsm_destroy_join(struct lov_stripe_md *lsm, struct obdo *oa, ENTRY; LASSERT(md_exp != NULL); + /*for those orphan inode, we should keep array id*/ + if (!(oa->o_valid & OBD_MD_FLCOOKIE)) + RETURN(rc); + ctxt = llog_get_context(md_exp->exp_obd, LLOG_LOVEA_REPL_CTXT); if (!ctxt) - GOTO(out, rc = -EINVAL); + RETURN(-EINVAL); LASSERT(lsm->lsm_array != NULL); - /*for those orphan inode, we should keep array id*/ - if (!(oa->o_valid & OBD_MD_FLCOOKIE)) - RETURN(0); - - LASSERT(ctxt != NULL); rc = llog_create(ctxt, &llh, &lsm->lsm_array->lai_array_id, NULL); if (rc) @@ -530,6 +531,7 @@ int lsm_destroy_join(struct lov_stripe_md *lsm, struct obdo *oa, } llog_free_handle(llh); out: + llog_ctxt_put(ctxt); RETURN(rc); } diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 232df7ae00..6e59ecfd23 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -92,6 +92,7 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, rc += llog_add(cctxt, rec, NULL, logcookies + rc, numcookies - rc); + llog_ctxt_put(cctxt); } RETURN(rc); @@ -120,6 +121,8 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count, child = lov->lov_tgts[i]->ltd_exp->exp_obd; cctxt = llog_get_context(child, ctxt->loc_idx); rc = llog_connect(cctxt, 1, logid, gen, uuid); + llog_ctxt_put(cctxt); + if (rc) { CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc); if (!err) @@ -154,6 +157,7 @@ static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls int err; err = llog_cancel(cctxt, NULL, 1, cookies, flags); + llog_ctxt_put(cctxt); if (err && lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { CERROR("error: objid "LPX64" subobj "LPX64 " on OST idx %d: rc = %d\n", lsm->lsm_object_id, diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 43dc9a4d44..aa55c18028 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -828,6 +828,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_PARSE: { ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL); + llog_ctxt_put(ctxt); GOTO(out, rc); } #ifdef __KERNEL__ @@ -835,7 +836,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_LLOG_PRINT: { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); rc = llog_ioctl(ctxt, cmd, data); - + llog_ctxt_put(ctxt); GOTO(out, rc); } #endif @@ -1284,6 +1285,7 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); ctxt->loc_imp = obd->u.cli.cl_import; + llog_ctxt_put(ctxt); } rc = llog_setup(obd, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, @@ -1291,6 +1293,7 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, if (rc == 0) { ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); ctxt->loc_imp = obd->u.cli.cl_import; + llog_ctxt_put(ctxt); } RETURN(rc); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index de9aa4b956..07e565cd52 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1958,10 +1958,6 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_ns, rc); } - rc = llog_start_commit_thread(); - if (rc < 0) - GOTO(err_fs, rc); - if (lcfg->lcfg_bufcount >= 4 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { class_uuid_t uuid; @@ -2133,6 +2129,7 @@ err_cleanup: int mds_postrecov(struct obd_device *obd) { + struct llog_ctxt *ctxt; int rc; ENTRY; @@ -2140,7 +2137,9 @@ int mds_postrecov(struct obd_device *obd) RETURN(0); LASSERT(!obd->obd_recovering); - LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); + LASSERT(ctxt != NULL); + llog_ctxt_put(ctxt); /* set nextid first, so we are sure it happens */ mutex_down(&obd->obd_dev_sem); diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c index d2db5cb5aa..1015d4eb3b 100644 --- a/lustre/mds/mds_join.c +++ b/lustre/mds/mds_join.c @@ -343,7 +343,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, struct lov_mds_md_join *head_lmmj = NULL, *tail_lmmj = NULL; int lmm_size, rc = 0, cleanup_phase = 0, size; struct llog_handle *llh_head = NULL, *llh_tail = NULL; - struct llog_ctxt *ctxt; + struct llog_ctxt *ctxt = NULL; struct mds_rec_join *join_rec; ENTRY; @@ -392,6 +392,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); ctxt = llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT); + LASSERT(ctxt != NULL); cleanup_phase = 2; if (le32_to_cpu(head_lmm->lmm_magic) == LOV_MAGIC) { /*simple file */ struct llog_logid *llog_array; @@ -482,6 +483,7 @@ cleanup: case 3: llog_close(llh_head); case 2: + llog_ctxt_put(ctxt); if (head_lmmj && ((void*)head_lmmj != (void*)head_lmm)) OBD_FREE_PTR(head_lmmj); diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index 5878a2d818..a33a313964 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -56,6 +56,8 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt, lctxt = llog_get_context(lov_obd, ctxt->loc_idx); rc = llog_add(lctxt, rec, lsm, logcookies, numcookies); + llog_ctxt_put(lctxt); + RETURN(rc); } @@ -72,6 +74,7 @@ static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count, lctxt = llog_get_context(lov_obd, ctxt->loc_idx); rc = llog_connect(lctxt, count, logid, gen, uuid); + llog_ctxt_put(lctxt); RETURN(rc); } @@ -86,6 +89,7 @@ static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls lctxt = llog_get_context(lov_obd, ctxt->loc_idx); rc = llog_cancel(lctxt, lsm, count, cookies, flags); + llog_ctxt_put(lctxt); RETURN(rc); } @@ -119,6 +123,7 @@ int mds_log_op_unlink(struct obd_device *obd, ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); rc = llog_add(ctxt, &lur->lur_hdr, lsm, logcookies, cookies_size / sizeof(struct llog_cookie)); + llog_ctxt_put(ctxt); OBD_FREE(lur, sizeof(*lur)); out: @@ -163,6 +168,8 @@ int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, rc = llog_add(ctxt, &lsr->lsr_hdr, lsm, logcookies, cookies_size / sizeof(struct llog_cookie)); + llog_ctxt_put(ctxt); + OBD_FREE(lsr, sizeof(*lsr)); out: obd_free_memmd(mds->mds_osc_exp, &lsm); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 4df5c5b41c..1a2b681eef 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -452,12 +452,15 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { case OBD_IOC_RECORD: { char *name = data->ioc_inlbuf1; + struct llog_ctxt *ctxt; + if (mds->mds_cfg_llh) RETURN(-EBUSY); + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &mds->mds_cfg_llh, NULL, name); + rc = llog_create(ctxt, &mds->mds_cfg_llh, NULL, name); + llog_ctxt_put(ctxt); if (rc == 0) llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN, &cfg_uuid); @@ -482,12 +485,14 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_CLEAR_LOG: { char *name = data->ioc_inlbuf1; + struct llog_ctxt *ctxt; if (mds->mds_cfg_llh) RETURN(-EBUSY); + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &mds->mds_cfg_llh, NULL, name); + rc = llog_create(ctxt, &mds->mds_cfg_llh, NULL, name); + llog_ctxt_put(ctxt); if (rc == 0) { llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN, NULL); @@ -540,6 +545,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); if (rc) RETURN(rc); @@ -552,6 +558,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); if (rc) RETURN(rc); @@ -600,6 +607,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); + llog_ctxt_put(ctxt); rc2 = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN), KEY_MDS_CONN, 0, NULL, NULL); @@ -615,6 +623,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); RETURN(rc); } @@ -663,6 +672,7 @@ static int __mds_lov_synchronize(void *data) struct mds_obd *mds = &obd->u.mds; struct obd_uuid *uuid; __u32 idx = mlsi->mlsi_index; + struct llog_ctxt *ctxt; int rc = 0; ENTRY; @@ -686,9 +696,15 @@ static int __mds_lov_synchronize(void *data) if (rc != 0) GOTO(out, rc); - rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), - mds->mds_lov_desc.ld_tgt_count, - NULL, NULL, uuid); + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); + if (!ctxt) + RETURN(-ENODEV); + + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT, 60); + + rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count, + NULL, NULL, uuid); + llog_ctxt_put(ctxt); if (rc != 0) { CERROR("%s failed at llog_origin_connect: %d\n", @@ -836,7 +852,7 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, RETURN(rc); } - LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); + LASSERT(!llog_ctxt_null(obd, LLOG_MDS_OST_ORIG_CTXT)); rc = mds_lov_start_synchronize(obd, watched, data, !(ev == OBD_NOTIFY_SYNC)); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 67a4f2671c..3ec0988847 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -91,6 +91,8 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, rc = llog_cancel(ctxt, lsm, mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies), mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW); + llog_ctxt_put(ctxt); + if (rc) CERROR("error cancelling %d log cookies: rc %d\n", (int)(mlcd->mlcd_cookielen / diff --git a/lustre/mgc/libmgc.c b/lustre/mgc/libmgc.c index 49a8260c8c..b72e8bbc62 100644 --- a/lustre/mgc/libmgc.c +++ b/lustre/mgc/libmgc.c @@ -112,6 +112,7 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt, if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); ctxt->loc_imp = obd->u.cli.cl_import; + llog_ctxt_put(ctxt); } RETURN(rc); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 1d22aa1b37..550073dc51 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -873,6 +873,7 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt, if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); ctxt->loc_imp = obd->u.cli.cl_import; + llog_ctxt_put(ctxt); } RETURN(rc); @@ -1085,6 +1086,7 @@ static int mgc_process_log(struct obd_device *mgc, /* Now, whether we copied or not, start using the local llog. If we failed to copy, we'll start using whatever the old log has. */ + llog_ctxt_put(ctxt); ctxt = lctxt; } @@ -1092,8 +1094,11 @@ static int mgc_process_log(struct obd_device *mgc, copy of the instance for the update. The cfg_last_idx will be updated here. */ rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg); - - out_pop: + +out_pop: + llog_ctxt_put(ctxt); + if (ctxt != lctxt) + llog_ctxt_put(lctxt); if (must_pop) pop_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 093e1b09ef..e93134dfc7 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -157,10 +157,6 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_ns, rc); } - rc = llog_start_commit_thread(); - if (rc < 0) - GOTO(err_fs, rc); - rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, &llog_lvfs_ops); if (rc) @@ -635,6 +631,7 @@ out_free: push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); if (rc) RETURN(rc); @@ -650,6 +647,7 @@ out_free: push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); RETURN(rc); } diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 3d8b607b62..95af9c8601 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -224,15 +224,16 @@ static int mgs_get_fsdb_from_llog(struct obd_device *obd, struct fs_db *fsdb) char *logname; struct llog_handle *loghandle; struct lvfs_run_ctxt saved; + struct llog_ctxt *ctxt; int rc, rc2; ENTRY; + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + LASSERT(ctxt != NULL); name_create(&logname, fsdb->fsdb_name, "-client"); down(&fsdb->fsdb_sem); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &loghandle, NULL, logname); + rc = llog_create(ctxt, &loghandle, NULL, logname); if (rc) GOTO(out_pop, rc); @@ -249,8 +250,8 @@ out_close: rc2 = llog_close(loghandle); if (!rc) rc = rc2; - out_pop: + llog_ctxt_put(ctxt); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); up(&fsdb->fsdb_sem); name_destroy(&logname); @@ -564,6 +565,7 @@ static int mgs_modify(struct obd_device *obd, struct fs_db *fsdb, { struct llog_handle *loghandle; struct lvfs_run_ctxt saved; + struct llog_ctxt *ctxt; struct mgs_modify_lookup *mml; int rc, rc2; ENTRY; @@ -571,9 +573,10 @@ static int mgs_modify(struct obd_device *obd, struct fs_db *fsdb, CDEBUG(D_MGS, "modify %s/%s/%s\n", logname, devname, comment); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &loghandle, NULL, logname); + + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + LASSERT(ctxt != NULL); + rc = llog_create(ctxt, &loghandle, NULL, logname); if (rc) GOTO(out_pop, rc); @@ -602,8 +605,8 @@ out_close: rc2 = llog_close(loghandle); if (!rc) rc = rc2; - out_pop: + llog_ctxt_put(ctxt); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc && rc != -ENODEV) CERROR("modify %s/%s failed %d\n", @@ -777,22 +780,25 @@ static int record_start_log(struct obd_device *obd, { static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" }; struct lvfs_run_ctxt saved; + struct llog_ctxt *ctxt; int rc = 0; - if (*llh) { + if (*llh) GOTO(out, rc = -EBUSY); - } + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + if (!ctxt) + GOTO(out, rc = -ENODEV); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - llh, NULL, name); + rc = llog_create(ctxt, llh, NULL, name); if (rc == 0) llog_init_handle(*llh, LLOG_F_IS_PLAIN, &cfg_uuid); else *llh = NULL; pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); out: if (rc) { @@ -819,17 +825,20 @@ static int mgs_log_is_empty(struct obd_device *obd, char *name) { struct lvfs_run_ctxt saved; struct llog_handle *llh; + struct llog_ctxt *ctxt; int rc = 0; + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + LASSERT(ctxt != NULL); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &llh, NULL, name); + rc = llog_create(ctxt, &llh, NULL, name); if (rc == 0) { llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL); rc = llog_get_size(llh); llog_close(llh); } pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); /* header is record 1 */ return(rc <= 1); } @@ -1812,18 +1821,22 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) int mgs_erase_log(struct obd_device *obd, char *name) { struct lvfs_run_ctxt saved; + struct llog_ctxt *ctxt; struct llog_handle *llh; int rc = 0; + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + LASSERT(ctxt != NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &llh, NULL, name); + rc = llog_create(ctxt, &llh, NULL, name); if (rc == 0) { llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL); rc = llog_destroy(llh); llog_free_handle(llh); } pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); if (rc) CERROR("failed to clear log %s: %d\n", name, rc); diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index c1eff406c0..ff52ef622b 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -38,24 +38,78 @@ #include "llog_internal.h" /* helper functions for calling the llog obd methods */ +static struct llog_ctxt* llog_new_ctxt(struct obd_device *obd) +{ + struct llog_ctxt *ctxt; -int llog_cleanup(struct llog_ctxt *ctxt) + OBD_ALLOC(ctxt, sizeof(*ctxt)); + if (!ctxt) + return NULL; + + ctxt->loc_obd = obd; + atomic_set(&ctxt->loc_refcount, 1); + + return ctxt; +} + +static void llog_ctxt_destroy(struct llog_ctxt *ctxt) { + if (ctxt->loc_exp) + class_export_put(ctxt->loc_exp); + OBD_FREE(ctxt, sizeof(*ctxt)); + return; +} + +int __llog_ctxt_put(struct llog_ctxt *ctxt) +{ + struct obd_device *obd; int rc = 0; + + obd = ctxt->loc_obd; + spin_lock(&obd->obd_dev_lock); + if (!atomic_dec_and_test(&ctxt->loc_refcount)) { + spin_unlock(&obd->obd_dev_lock); + return rc; + } + obd->obd_llog_ctxt[ctxt->loc_idx] = NULL; + spin_unlock(&obd->obd_dev_lock); + + LASSERT(obd->obd_stopping == 1); + /* cleanup the llog ctxt here */ + if (CTXTP(ctxt, cleanup)) + rc = CTXTP(ctxt, cleanup)(ctxt); + + llog_ctxt_destroy(ctxt); + wake_up(&obd->obd_llog_waitq); + return rc; +} +EXPORT_SYMBOL(__llog_ctxt_put); + +int llog_cleanup(struct llog_ctxt *ctxt) +{ + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + struct obd_device *obd = ctxt->loc_obd; + int rc, idx; ENTRY; if (!ctxt) { CERROR("No ctxt\n"); RETURN(-ENODEV); } - - if (CTXTP(ctxt, cleanup)) - rc = CTXTP(ctxt, cleanup)(ctxt); - ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL; - if (ctxt->loc_exp) - class_export_put(ctxt->loc_exp); - OBD_FREE(ctxt, sizeof(*ctxt)); + /*banlance the ctxt get when calling llog_cleanup */ + llog_ctxt_put(ctxt); + + /* sync with other llog ctxt user thread */ + spin_lock(&obd->obd_dev_lock); + LASSERT(obd->obd_stopping == 1); + spin_unlock(&obd->obd_dev_lock); + + idx = ctxt->loc_idx; + /*try to free the ctxt */ + rc = __llog_ctxt_put(ctxt); + + l_wait_event(obd->obd_llog_waitq, llog_ctxt_null(obd, idx), &lwi); RETURN(rc); } @@ -71,24 +125,23 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, if (index < 0 || index >= LLOG_MAX_CTXTS) RETURN(-EFAULT); - if (obd->obd_llog_ctxt[index]) { + ctxt = llog_get_context(obd, index); + if (ctxt) { /* mds_lov_update_mds might call here multiple times. So if the llog is already set up then don't to do it again. */ CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n", obd->obd_name, index); - ctxt = obd->obd_llog_ctxt[index]; LASSERT(ctxt->loc_obd == obd); LASSERT(ctxt->loc_exp == disk_obd->obd_self_export); LASSERT(ctxt->loc_logops == op); + llog_ctxt_put(ctxt); GOTO(out, rc = 0); } - - OBD_ALLOC(ctxt, sizeof(*ctxt)); + ctxt = llog_new_ctxt(obd); if (!ctxt) GOTO(out, rc = -ENOMEM); obd->obd_llog_ctxt[index] = ctxt; - ctxt->loc_obd = obd; ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); ctxt->loc_idx = index; ctxt->loc_logops = op; @@ -96,13 +149,9 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, if (op->lop_setup) rc = op->lop_setup(obd, index, disk_obd, count, logid); - - if (rc) { - obd->obd_llog_ctxt[index] = NULL; - class_export_put(ctxt->loc_exp); - OBD_FREE(ctxt, sizeof(*ctxt)); - } - + + if (rc) + llog_ctxt_destroy(ctxt); out: RETURN(rc); } @@ -247,7 +296,8 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL, NULL); if (rc) CERROR("llog_process with cat_cancel_cb failed: %d\n", rc); - out: +out: + llog_ctxt_put(ctxt); RETURN(rc); } EXPORT_SYMBOL(llog_obd_origin_setup); diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index d991f519c9..c98223a4e5 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -94,6 +94,7 @@ static int llog_test_1(struct obd_device *obd, char *name) rc = llog_create(ctxt, &llh, NULL, name); if (rc) { CERROR("1a: llog_create with name %s failed: %d\n", name, rc); + llog_ctxt_put(ctxt); RETURN(rc); } llog_init_handle(llh, LLOG_F_IS_PLAIN, &uuid); @@ -104,6 +105,7 @@ static int llog_test_1(struct obd_device *obd, char *name) out: CWARN("1b: close newly-created log\n"); rc2 = llog_close(llh); + llog_ctxt_put(ctxt); if (rc2) { CERROR("1b: close log %s failed: %d\n", name, rc2); if (rc == 0) @@ -126,18 +128,18 @@ static int llog_test_2(struct obd_device *obd, char *name, rc = llog_create(ctxt, llh, NULL, name); if (rc) { CERROR("2a: re-open log with name %s failed: %d\n", name, rc); - RETURN(rc); + GOTO(out, rc); } llog_init_handle(*llh, LLOG_F_IS_PLAIN, &uuid); if ((rc = verify_handle("2", *llh, 1))) - RETURN(rc); + GOTO(out, rc); CWARN("2b: create a log without specified NAME & LOGID\n"); rc = llog_create(ctxt, &loghandle, NULL, NULL); if (rc) { CERROR("2b: create log failed\n"); - RETURN(rc); + GOTO(out, rc); } llog_init_handle(loghandle, LLOG_F_IS_PLAIN, &uuid); logid = loghandle->lgh_id; @@ -147,7 +149,7 @@ static int llog_test_2(struct obd_device *obd, char *name, rc = llog_create(ctxt, &loghandle, &logid, NULL); if (rc) { CERROR("2b: re-open log by LOGID failed\n"); - RETURN(rc); + GOTO(out, rc); } llog_init_handle(loghandle, LLOG_F_IS_PLAIN, &uuid); @@ -155,9 +157,11 @@ static int llog_test_2(struct obd_device *obd, char *name, rc = llog_destroy(loghandle); if (rc) { CERROR("2b: destroy log failed\n"); - RETURN(rc); + GOTO(out, rc); } llog_free_handle(loghandle); +out: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -261,10 +265,10 @@ static int llog_test_4(struct obd_device *obd) } num_recs++; if ((rc = verify_handle("4b", cath, 2))) - RETURN(rc); + GOTO(ctxt_release, rc); if ((rc = verify_handle("4b", cath->u.chd.chd_current_log, num_recs))) - RETURN(rc); + GOTO(ctxt_release, rc); CWARN("4c: cancel 1 log record\n"); rc = llog_cat_cancel_records(cath, 1, &cookie); @@ -275,7 +279,7 @@ static int llog_test_4(struct obd_device *obd) num_recs--; if ((rc = verify_handle("4c", cath->u.chd.chd_current_log, num_recs))) - RETURN(rc); + GOTO(ctxt_release, rc); CWARN("4d: write 40,000 more log records\n"); for (i = 0; i < 40000; i++) { @@ -311,6 +315,8 @@ static int llog_test_4(struct obd_device *obd) out: CWARN("4f: put newly-created catalog\n"); rc = llog_cat_put(cath); +ctxt_release: + llog_ctxt_put(ctxt); if (rc) CERROR("1b: close log %s failed: %d\n", name, rc); RETURN(rc); @@ -437,6 +443,8 @@ static int llog_test_5(struct obd_device *obd) rc = llog_cat_put(llh); if (rc) CERROR("1b: close log %s failed: %d\n", name, rc); + llog_ctxt_put(ctxt); + RETURN(rc); } @@ -458,13 +466,13 @@ static int llog_test_6(struct obd_device *obd, char *name) if (mdc_obd == NULL) { CERROR("6: no MDC devices connected to %s found.\n", mds_uuid->uuid); - RETURN(-ENOENT); + GOTO(ctxt_release, rc = -ENOENT); } rc = obd_connect(&exph, mdc_obd, &uuid, NULL /* obd_connect_data */); if (rc) { CERROR("6: failed to connect to MDC: %s\n", mdc_obd->obd_name); - RETURN(rc); + GOTO(ctxt_release, rc); } exp = class_conn2export(&exph); @@ -472,7 +480,8 @@ static int llog_test_6(struct obd_device *obd, char *name) rc = llog_create(nctxt, &llh, NULL, name); if (rc) { CERROR("6: llog_create failed %d\n", rc); - RETURN(rc); + llog_ctxt_put(nctxt); + GOTO(ctxt_release, rc); } rc = llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL); @@ -491,12 +500,13 @@ static int llog_test_6(struct obd_device *obd, char *name) parse_out: rc = llog_close(llh); + llog_ctxt_put(nctxt); if (rc) { CERROR("6: llog_close failed: rc = %d\n", rc); } - rc = obd_disconnect(exp); - +ctxt_release: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -516,7 +526,7 @@ static int llog_test_7(struct obd_device *obd) rc = llog_create(ctxt, &llh, NULL, name); if (rc) { CERROR("7: llog_create with name %s failed: %d\n", name, rc); - RETURN(rc); + GOTO(ctxt_release, rc); } llog_init_handle(llh, LLOG_F_IS_PLAIN, &uuid); @@ -525,7 +535,7 @@ static int llog_test_7(struct obd_device *obd) rc = llog_write_rec(llh, &lcr.lcr_hdr, NULL, 0, NULL, -1); if (rc) { CERROR("7: write one log record failed: %d\n", rc); - RETURN(rc); + GOTO(ctxt_release, rc); } rc = llog_destroy(llh); @@ -533,6 +543,8 @@ static int llog_test_7(struct obd_device *obd) CERROR("7: llog_destroy failed: %d\n", rc); else llog_free_handle(llh); +ctxt_release: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -591,7 +603,7 @@ static int llog_run_tests(struct obd_device *obd) case 0: pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); } - + llog_ctxt_put(ctxt); return rc; } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index bba8d3b770..a1d9fd739a 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -203,6 +203,7 @@ int class_attach(struct lustre_cfg *lcfg) cfs_init_timer(&obd->obd_recovery_timer); spin_lock_init(&obd->obd_processing_task_lock); cfs_waitq_init(&obd->obd_next_transno_waitq); + cfs_waitq_init(&obd->obd_llog_waitq); CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue); CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 393c2bdb22..ebb8caaeba 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1824,10 +1824,20 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, int count, struct llog_catid *catid, struct obd_uuid *uuid) { + struct filter_obd *filter = &obd->u.filter; struct llog_ctxt *ctxt; int rc; ENTRY; + OBD_ALLOC(filter->fo_lcm, sizeof(struct llog_commit_master)); + if (!filter->fo_lcm) + RETURN(-ENOMEM); + + rc = llog_init_commit_master((struct llog_commit_master *) + filter->fo_lcm); + if (rc) + GOTO(cleanup, rc); + filter_mds_ost_repl_logops = llog_client_ops; filter_mds_ost_repl_logops.lop_cancel = llog_obd_repl_cancel; filter_mds_ost_repl_logops.lop_connect = llog_repl_connect; @@ -1836,14 +1846,26 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, rc = llog_setup(obd, LLOG_MDS_OST_REPL_CTXT, tgt, 0, NULL, &filter_mds_ost_repl_logops); if (rc) - RETURN(rc); + GOTO(cleanup, rc); /* FIXME - assign unlink_cb for filter's recovery */ ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb; + ctxt->loc_lcm = obd->u.filter.fo_lcm; + rc = llog_start_commit_thread(ctxt->loc_lcm); + llog_ctxt_put(ctxt); + if (rc) + GOTO(cleanup, rc); rc = llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL, &filter_size_orig_logops); + +cleanup: + if (rc) { + llog_cleanup_commit_master(filter->fo_lcm, 0); + OBD_FREE(filter->fo_lcm, sizeof(struct llog_commit_master)); + filter->fo_lcm = NULL; + } RETURN(rc); } @@ -1853,6 +1875,14 @@ static int filter_llog_finish(struct obd_device *obd, int count) int rc = 0, rc2 = 0; ENTRY; + if (obd->u.filter.fo_lcm) { + llog_cleanup_commit_master((struct llog_commit_master *) + obd->u.filter.fo_lcm, 0); + OBD_FREE(obd->u.filter.fo_lcm, + sizeof(struct llog_commit_master)); + obd->u.filter.fo_lcm = NULL; + } + ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); if (ctxt) rc = llog_cleanup(ctxt); @@ -2246,6 +2276,8 @@ static int filter_disconnect(struct obd_export *exp) /* flush any remaining cancel messages out to the target */ ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); err = llog_sync(ctxt, exp); + llog_ctxt_put(ctxt); + if (err) CERROR("error flushing logs to MDS: rc %d\n", err); @@ -3032,14 +3064,16 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, oa->o_id); /* If object already gone, cancel cookie right now */ if (oa->o_valid & OBD_MD_FLCOOKIE) { + struct llog_ctxt *ctxt; fcc = obdo_logcookie(oa); - llog_cancel(llog_get_context(obd, fcc->lgc_subsys + 1), - NULL, 1, fcc, 0); + ctxt = llog_get_context(obd, fcc->lgc_subsys + 1); + llog_cancel(ctxt, NULL, 1, fcc, 0); + llog_ctxt_put(ctxt); fcc = NULL; /* we didn't allocate fcc, don't free it */ } GOTO(cleanup, rc = -ENOENT); } - + filter_prepare_destroy(obd, oa->o_id); /* Our MDC connection is established by the MDS to us */ @@ -3192,6 +3226,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, /* flush any remaining cancel messages out to the target */ ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_REPL_CTXT); llog_sync(ctxt, exp); + llog_ctxt_put(ctxt); RETURN(rc); } @@ -3290,6 +3325,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, /* setup llog imports */ ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse); + llog_ctxt_put(ctxt); lquota_setinfo(filter_quota_interface_ref, exp, obd); diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index e0230c3ad1..3b3bb2d01d 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -102,19 +102,27 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, void *cb_data, int error) { struct llog_cookie *cookie = cb_data; + struct llog_ctxt *ctxt; int rc; - if (error != 0) { - CDEBUG(D_INODE, "not cancelling llog cookie on error %d\n", - error); + if (error != 0 || obd->obd_stopping) { + CDEBUG(D_INODE, "not cancel logcookie err %d stopping %d \n", + error, obd->obd_stopping); OBD_FREE(cookie, sizeof(*cookie)); return; } - rc = llog_cancel(llog_get_context(obd, cookie->lgc_subsys + 1), - NULL, 1, cookie, 0); + ctxt = llog_get_context(obd, cookie->lgc_subsys + 1); + if (!ctxt) + GOTO(out, rc = 0); + + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT, 30); + + rc = llog_cancel(ctxt, NULL, 1, cookie, 0); if (rc) CERROR("error cancelling log cookies: rc = %d\n", rc); +out: + llog_ctxt_put(ctxt); OBD_FREE(cookie, sizeof(*cookie)); } @@ -134,7 +142,7 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt, lur = (struct llog_unlink_rec *)rec; OBDO_ALLOC(oa); - if (oa == NULL) + if (oa == NULL) RETURN(-ENOMEM); oa->o_valid |= OBD_MD_FLCOOKIE; oa->o_id = lur->lur_oid; @@ -205,11 +213,15 @@ int filter_recov_log_mds_ost_cb(struct llog_handle *llh, int rc = 0; ENTRY; + if (ctxt->loc_obd->obd_stopping) + RETURN(LLOG_PROC_BREAK); + if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) { CERROR("log is not plain\n"); RETURN(-EINVAL); } + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT, 30); cookie.lgc_lgl = llh->lgh_id; cookie.lgc_subsys = LLOG_MDS_OST_ORIG_CTXT; cookie.lgc_index = rec->lrh_index; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 9c9adc459d..3b02c24f3b 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3272,6 +3272,7 @@ static int osc_setinfo_mds_conn_interpret(struct ptlrpc_request *req, "ctxt %p: %d\n", ctxt, rc); } + llog_ctxt_put(ctxt); spin_lock(&imp->imp_lock); imp->imp_server_timeout = 1; imp->imp_pingable = 1; @@ -3456,6 +3457,8 @@ static int osc_disconnect(struct obd_export *exp) if (obd->u.cli.cl_conn_count == 1) /* flush any remaining cancel messages out to the target */ llog_sync(ctxt, exp); + + llog_ctxt_put(ctxt); rc = client_disconnect_export(exp); return rc; diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 9d0d065313..512c1c6375 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1671,11 +1671,6 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) rc = cleanup_group_info(); if (rc) RETURN(rc); - - rc = llog_start_commit_thread(); - if (rc < 0) - RETURN(rc); - lprocfs_init_vars(ost, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 870207185a..eee2b4f2f2 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -117,6 +117,8 @@ int llog_handle_connect(struct ptlrpc_request *req) ctxt = llog_get_context(obd, req_body->lgdc_ctxt_idx); rc = llog_connect(ctxt, 1, &req_body->lgdc_logid, &req_body->lgdc_gen, NULL); + + llog_ctxt_put(ctxt); if (rc != 0) CERROR("failed at llog_relp_connect\n"); diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index 7b82f4eb42..3de3553266 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -63,7 +63,7 @@ int llog_origin_handle_create(struct ptlrpc_request *req) lustre_swab_llogd_body); if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); - GOTO(out, rc =-EFAULT); + RETURN(-EFAULT); } if (body->lgd_logid.lgl_oid > 0) @@ -73,14 +73,14 @@ int llog_origin_handle_create(struct ptlrpc_request *req) name = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF + 1, 0); if (name == NULL) { CERROR("Can't unpack name\n"); - GOTO(out, rc = -EFAULT); + RETURN(-EFAULT); } CDEBUG(D_INFO, "opening log %s\n", name); } ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - GOTO(out, rc = -EINVAL); + RETURN(-EINVAL); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -101,7 +101,7 @@ out_close: rc = rc2; out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); -out: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -124,7 +124,7 @@ int llog_origin_handle_destroy(struct ptlrpc_request *req) lustre_swab_llogd_body); if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); - GOTO(out, rc =-EFAULT); + RETURN(-EFAULT); } if (body->lgd_logid.lgl_oid > 0) @@ -132,7 +132,8 @@ int llog_origin_handle_destroy(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - GOTO(out, rc = -EINVAL); + RETURN(-EINVAL); + disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -160,7 +161,7 @@ out_close: llog_close(loghandle); out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); -out: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -186,12 +187,12 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) lustre_swab_llogd_body); if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); - GOTO(out, rc =-EFAULT); + RETURN(-EFAULT); } OBD_ALLOC(buf, LLOG_CHUNK_SIZE); if (!buf) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) @@ -233,9 +234,9 @@ out_close: out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); out_free: OBD_FREE(buf, LLOG_CHUNK_SIZE); -out: RETURN(rc); } @@ -261,12 +262,12 @@ int llog_origin_handle_prev_block(struct ptlrpc_request *req) lustre_swab_llogd_body); if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); - GOTO(out, rc =-EFAULT); + RETURN(-EFAULT); } OBD_ALLOC(buf, LLOG_CHUNK_SIZE); if (!buf) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); ctxt = llog_get_context(obd, body->lgd_ctxt_idx); LASSERT(ctxt != NULL); @@ -288,7 +289,6 @@ int llog_origin_handle_prev_block(struct ptlrpc_request *req) if (rc) GOTO(out_close, rc); - rc = lustre_pack_reply(req, 3, size, NULL); if (rc) GOTO(out_close, rc = -ENOMEM); @@ -306,8 +306,8 @@ out_close: out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); OBD_FREE(buf, LLOG_CHUNK_SIZE); -out: RETURN(rc); } @@ -330,12 +330,12 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) lustre_swab_llogd_body); if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); - GOTO(out, rc =-EFAULT); + RETURN(-EFAULT); } ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - GOTO(out, rc = -EINVAL); + RETURN(-EINVAL); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -360,11 +360,9 @@ out_close: rc2 = llog_close(loghandle); if (!rc) rc = rc2; - out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); - -out: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -382,7 +380,7 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) struct obd_device *obd = req->rq_export->exp_obd; struct obd_device *disk_obd; struct llog_cookie *logcookies; - struct llog_ctxt *ctxt; + struct llog_ctxt *ctxt = NULL; int num_cookies, rc = 0, err, i; struct lvfs_run_ctxt saved; struct llog_handle *cathandle; @@ -436,6 +434,7 @@ pop_ctxt: else CDEBUG(D_RPCTRACE, "cancel %d llog-records\n", num_cookies); + llog_ctxt_put(ctxt); RETURN(rc); } EXPORT_SYMBOL(llog_origin_handle_cancel); @@ -452,7 +451,7 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, char *out = buf; if (ctxt == NULL || mds == NULL) - RETURN(-EOPNOTSUPP); + GOTO(release_ctxt, rc = -EOPNOTSUPP); push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); @@ -491,6 +490,8 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, } out_pop: pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); +release_ctxt: + llog_ctxt_put(ctxt); RETURN(rc); } @@ -506,7 +507,7 @@ static int llog_catinfo_cb(struct llog_handle *cat, { static char *out = NULL; static int remains = 0; - struct llog_ctxt *ctxt; + struct llog_ctxt *ctxt = NULL; struct llog_handle *handle; struct llog_logid *logid; struct llog_logid_rec *lir; @@ -518,11 +519,13 @@ static int llog_catinfo_cb(struct llog_handle *cat, remains = cbd->remains; cbd->init = 0; } - ctxt = cbd->ctxt; - if (!(cat->lgh_hdr->llh_flags & LLOG_F_IS_CAT)) + if (!(cat->lgh_hdr->llh_flags & LLOG_F_IS_CAT)) RETURN(-EINVAL); + if (!cbd->ctxt) + RETURN(-EINVAL); + lir = (struct llog_logid_rec *)rec; logid = &lir->lid_id; rc = llog_create(ctxt, &handle, logid, NULL); @@ -572,14 +575,14 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf, struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); if (ctxt == NULL || mds == NULL) - RETURN(-EOPNOTSUPP); - + GOTO(release_ctxt, rc = -EOPNOTSUPP); + count = mds->mds_lov_desc.ld_tgt_count; size = sizeof(*idarray) * count; OBD_ALLOC(idarray, size); if (!idarray) - RETURN(-ENOMEM); + GOTO(release_ctxt, rc = -ENOMEM); rc = llog_get_cat_list(obd, obd, name, count, idarray); if (rc) @@ -626,6 +629,9 @@ out_pop: pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); out_free: OBD_FREE(idarray, size); +release_ctxt: + llog_ctxt_put(ctxt); + RETURN(rc); } diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index c1238b933a..0a299650e6 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -63,8 +63,6 @@ void ptlrpc_lprocfs_do_request_stat (struct ptlrpc_request *req, #endif /* LPROCFS */ /* recovd_thread.c */ -int llog_init_commit_master(void); -int llog_cleanup_commit_master(int force); static inline int opcode_offset(__u32 opc) { if (opc < OST_LAST_OPC) { diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 40a1018d11..ebfd6cedef 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -68,17 +68,12 @@ __init int ptlrpc_init(void) GOTO(cleanup, rc); cleanup_phase = 2; - rc = llog_init_commit_master(); - if (rc) - GOTO(cleanup, rc); - cleanup_phase = 3; - ptlrpc_put_connection_superhack = ptlrpc_put_connection; rc = ptlrpc_start_pinger(); if (rc) GOTO(cleanup, rc); - cleanup_phase = 4; + cleanup_phase = 3; rc = ldlm_init(); if (rc) @@ -87,10 +82,8 @@ __init int ptlrpc_init(void) cleanup: switch(cleanup_phase) { - case 4: - ptlrpc_stop_pinger(); case 3: - llog_cleanup_commit_master(1); + ptlrpc_stop_pinger(); case 2: ptlrpc_cleanup_connection(); case 1: @@ -108,7 +101,6 @@ static void __exit ptlrpc_exit(void) ptlrpc_stop_pinger(); ptlrpc_exit_portals(); ptlrpc_cleanup_connection(); - llog_cleanup_commit_master(0); } /* connection.c */ diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index e0b21d64c5..49adcbdc64 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -55,13 +55,10 @@ #ifdef __KERNEL__ -static struct llog_commit_master lustre_lcm; -static struct llog_commit_master *lcm = &lustre_lcm; - /* Allocate new commit structs in case we do not have enough. * Make the llcd size small enough that it fits into a single page when we * are sending/receiving it. */ -static int llcd_alloc(void) +static int llcd_alloc(struct llog_commit_master *lcm) { struct llog_canceld_ctxt *llcd; int llcd_size; @@ -85,7 +82,7 @@ static int llcd_alloc(void) } /* Get a free cookie struct from the list */ -struct llog_canceld_ctxt *llcd_grab(void) +static struct llog_canceld_ctxt *llcd_grab(struct llog_commit_master *lcm) { struct llog_canceld_ctxt *llcd; @@ -93,7 +90,7 @@ repeat: spin_lock(&lcm->lcm_llcd_lock); if (list_empty(&lcm->lcm_llcd_free)) { spin_unlock(&lcm->lcm_llcd_lock); - if (llcd_alloc() < 0) { + if (llcd_alloc(lcm) < 0) { CERROR("unable to allocate log commit data!\n"); return NULL; } @@ -110,10 +107,12 @@ repeat: return llcd; } -EXPORT_SYMBOL(llcd_grab); static void llcd_put(struct llog_canceld_ctxt *llcd) { + struct llog_commit_master *lcm = llcd->llcd_lcm; + + llog_ctxt_put(llcd->llcd_ctxt); if (atomic_read(&lcm->lcm_llcd_numfree) >= lcm->lcm_llcd_maxfree) { int llcd_size = llcd->llcd_size + offsetof(struct llog_canceld_ctxt, llcd_cookies); @@ -127,15 +126,16 @@ static void llcd_put(struct llog_canceld_ctxt *llcd) } /* Send some cookies to the appropriate target */ -void llcd_send(struct llog_canceld_ctxt *llcd) +static void llcd_send(struct llog_canceld_ctxt *llcd) { - spin_lock(&llcd->llcd_lcm->lcm_llcd_lock); - list_add_tail(&llcd->llcd_list, &llcd->llcd_lcm->lcm_llcd_pending); - spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock); - + if (!(llcd->llcd_lcm->lcm_flags & LLOG_LCM_FL_EXIT)) { + spin_lock(&llcd->llcd_lcm->lcm_llcd_lock); + list_add_tail(&llcd->llcd_list, + &llcd->llcd_lcm->lcm_llcd_pending); + spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock); + } cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1); } -EXPORT_SYMBOL(llcd_send); /* deleted objects have a commit callback that cancels the MDS * log record for the deletion. The commit callback calls this @@ -161,7 +161,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, if (count > 0 && cookies != NULL) { if (llcd == NULL) { - llcd = llcd_grab(); + llcd = llcd_grab(ctxt->loc_lcm); if (llcd == NULL) { CERROR("couldn't get an llcd - dropped "LPX64 ":%x+%u\n", @@ -170,7 +170,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, cookies->lgc_index); GOTO(out, rc = -ENOMEM); } - llcd->llcd_ctxt = ctxt; + llcd->llcd_ctxt = llog_ctxt_get(ctxt); ctxt->loc_llcd = llcd; } @@ -254,7 +254,7 @@ static int log_commit_thread(void *arg) /* If we do not have enough pages available, allocate some */ while (atomic_read(&lcm->lcm_llcd_numfree) < lcm->lcm_llcd_minfree) { - if (llcd_alloc() < 0) + if (llcd_alloc(lcm) < 0) break; } @@ -291,7 +291,7 @@ static int log_commit_thread(void *arg) if (atomic_read(&lcm->lcm_thread_numidle) <= 1 && atomic_read(&lcm->lcm_thread_total) < lcm->lcm_thread_max) { - rc = llog_start_commit_thread(); + rc = llog_start_commit_thread(lcm); if (rc < 0) CERROR("error starting thread: rc %d\n", rc); } @@ -444,7 +444,7 @@ static int log_commit_thread(void *arg) return 0; } -int llog_start_commit_thread(void) +int llog_start_commit_thread(struct llog_commit_master *lcm) { int rc; ENTRY; @@ -470,7 +470,7 @@ static struct llog_process_args { void *llpa_arg; } llpa; -int llog_init_commit_master(void) +int llog_init_commit_master(struct llog_commit_master *lcm) { CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy); CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle); @@ -488,8 +488,10 @@ int llog_init_commit_master(void) sema_init(&llpa.llpa_sem, 1); return 0; } +EXPORT_SYMBOL(llog_init_commit_master); -int llog_cleanup_commit_master(int force) +int llog_cleanup_commit_master(struct llog_commit_master *lcm, + int force) { lcm->lcm_flags |= LLOG_LCM_FL_EXIT; if (force) @@ -500,6 +502,7 @@ int llog_cleanup_commit_master(int force) atomic_read(&lcm->lcm_thread_total) == 0); return 0; } +EXPORT_SYMBOL(llog_cleanup_commit_master); static int log_process_thread(void *args) { @@ -517,12 +520,12 @@ static int log_process_thread(void *args) rc = llog_create(ctxt, &llh, &logid, NULL); if (rc) { CERROR("llog_create failed %d\n", rc); - RETURN(rc); + GOTO(out, rc); } rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL); if (rc) { CERROR("llog_init_handle failed %d\n", rc); - GOTO(out, rc); + GOTO(release_llh, rc); } if (cb) { @@ -536,24 +539,33 @@ static int log_process_thread(void *args) CDEBUG(D_HA, "send llcd %p:%p forcibly after recovery\n", ctxt->loc_llcd, ctxt); llog_sync(ctxt, NULL); -out: + +release_llh: rc = llog_cat_put(llh); if (rc) CERROR("llog_cat_put failed %d\n", rc); - +out: + llog_ctxt_put(ctxt); RETURN(rc); } static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg) { + struct obd_device *obd = ctxt->loc_obd; int rc; ENTRY; + if (obd->obd_stopping) + RETURN(-ENODEV); + mutex_down(&llpa.llpa_sem); - llpa.llpa_ctxt = ctxt; llpa.llpa_cb = handle; llpa.llpa_arg = arg; - + llpa.llpa_ctxt = llog_get_context(ctxt->loc_obd, ctxt->loc_idx); + if (!llpa.llpa_ctxt) { + up(&llpa.llpa_sem); + RETURN(-ENODEV); + } rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES); if (rc < 0) CERROR("error starting log_process_thread: %d\n", rc); @@ -581,13 +593,13 @@ int llog_repl_connect(struct llog_ctxt *ctxt, int count, mutex_down(&ctxt->loc_sem); ctxt->loc_gen = *gen; - llcd = llcd_grab(); + llcd = llcd_grab(ctxt->loc_lcm); if (llcd == NULL) { CERROR("couldn't get an llcd\n"); mutex_up(&ctxt->loc_sem); RETURN(-ENOMEM); } - llcd->llcd_ctxt = ctxt; + llcd->llcd_ctxt = llog_ctxt_get(ctxt); ctxt->loc_llcd = llcd; mutex_up(&ctxt->loc_sem); diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 7d3ffc6341..873a3c9f20 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -2,8 +2,8 @@ set -e -# bug number: 6088 10124 10800 -ALWAYS_EXCEPT="8 15c 17 $REPLAY_DUAL_EXCEPT" +# bug number: 6088 10124 +ALWAYS_EXCEPT="8 15c $REPLAY_DUAL_EXCEPT" PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 6fc1d5227f..971c82d466 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1154,5 +1154,45 @@ test_60() { } run_test 60 "test llog post recovery init vs llog unlink" +#test race llog recovery thread vs llog cleanup +test_61() { + mkdir $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 800 + replay_barrier ost1 +# OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221 + unlinkmany $DIR/$tdir/$tfile-%d 800 + do_facet ost "sysctl -w lustre.fail_loc=0x80000221" + facet_failover ost1 + sleep 10 + fail ost1 + sleep 30 + do_facet ost "sysctl -w lustre.fail_loc=0x0" + $CHECKSTAT -t file $DIR/$tdir/$tfile-* && return 1 + rmdir $DIR/$tdir +} +run_test 61 "test race llog recovery vs llog cleanup" + +#test race mds llog sync vs llog cleanup +test_61b() { +# OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a + do_facet mds "sysctl -w lustre.fail_loc=0x8000013a" + facet_failover mds + sleep 10 + fail mds + do_facet client dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 || return 1 +} +run_test 61b "test race mds llog sync vs llog cleanup" + +#test race cancel cookie cb vs llog cleanup +test_61c() { +# OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222 + touch $DIR/$tfile + do_facet ost "sysctl -w lustre.fail_loc=0x80000222" + rm $DIR/$tfile + sleep 10 + fail ost1 +} +run_test 61c "test race mds llog sync vs llog cleanup" + equals_msg `basename $0`: test complete, cleaning up $CLEANUP -- GitLab