From 6aaaa8bcb8a94f0f685ce1abbaa4f3ed3e2e8c1b Mon Sep 17 00:00:00 2001 From: wangdi <wangdi> Date: Thu, 24 May 2007 10:06:31 +0000 Subject: [PATCH] Branch:b1_6 Fix lov_mds_md in mdd_create_objects error handler. Checking whether obd_fail before fsfilt_start. Free fcc no matter whether fsfilt_commit success or failed. b=10818 i=adilger i=green --- lustre/ChangeLog | 12 ++++++ lustre/mds/mds_internal.h | 3 +- lustre/mds/mds_join.c | 2 +- lustre/mds/mds_open.c | 9 +++-- lustre/mds/mds_reint.c | 36 ++++++++++------- lustre/mds/mds_xattr.c | 2 +- lustre/obdfilter/filter.c | 65 ++++++++++++++++++------------ lustre/obdfilter/filter_internal.h | 3 +- lustre/obdfilter/filter_io_24.c | 2 +- lustre/obdfilter/filter_io_26.c | 2 +- lustre/obdfilter/filter_log.c | 1 + lustre/tests/replay-single.sh | 4 +- lustre/tests/test-framework.sh | 1 - 13 files changed, 91 insertions(+), 51 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 9be0f36fe2..b32404892f 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -170,6 +170,18 @@ Description: Short directio read returns full requested size rather than Details : Direct I/O operations should return actual amount of bytes transferred rather than requested size. +Severity : normal +Frequency : rare +Bugzilla : 10818 +Description: Memory leak in recovery +Details : Lov_mds_md was not free in an error handler in mds_create_object. + It should also check obd_fail before fsfilt_start, otherwise if + fsfilt_start return -EROFS,(failover mds during mds recovery). + then the req will return with repmsg->transno = 0 and rc = EROFS. + and we met hit the assert LASSERT(req->rq_reqmsg->transno == + req->rq_repmsg->transno) in ptlrpc_replay_interpret. Fcc should + be freed no matter whether fsfilt_commit success or not. + -------------------------------------------------------------------------------- 2007-05-03 Cluster File Systems, Inc. <info@clusterfs.com> diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index af32d2f2f1..c5355e9683 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -132,7 +132,8 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, ldlm_policy_data_t *p2_policy); void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error); int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data); + struct ptlrpc_request *req, int rc, __u32 op_data, + int force_sync); void mds_reconstruct_generic(struct ptlrpc_request *req); void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd); int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c index d0bb7f2175..a0ab48e5b8 100644 --- a/lustre/mds/mds_join.c +++ b/lustre/mds/mds_join.c @@ -477,7 +477,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, sizeof(struct lov_mds_md_join), "lov"); mds_finish_join(mds, req, head_inode, head_lmmj); cleanup: - rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0); + rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0, 0); switch(cleanup_phase){ case 3: llog_close(llh_head); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index d12865ba8a..0bb9878d24 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -475,13 +475,14 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, if (IS_ERR(*handle)) { rc = PTR_ERR(*handle); *handle = NULL; - GOTO(out_oa, rc); + GOTO(free_diskmd, rc); } rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov"); lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size); LASSERT(lmm_buf); memcpy(lmm_buf, lmm, lmm_size); + free_diskmd: obd_free_diskmd(mds->mds_osc_exp, &lmm); out_oa: oti_free_cookies(&oti); @@ -797,7 +798,7 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, NULL); rc = mds_finish_transno(mds, dchild->d_inode, handle, - req, rc, rep ? rep->lock_policy_res1 : 0); + req, rc, rep ? rep->lock_policy_res1 : 0, 0); /* XXX what do we do here if mds_finish_transno itself failed? */ l_dput(dchild); @@ -1179,7 +1180,7 @@ found_child: cleanup: rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, - req, rc, rep ? rep->lock_policy_res1 : 0); + req, rc, rep ? rep->lock_policy_res1 : 0, 0); cleanup_no_trans: switch (cleanup_phase) { @@ -1390,7 +1391,7 @@ out: cleanup: if (req != NULL && reply_body != NULL) { - rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0); + rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0, 0); } else if (handle) { int err = fsfilt_commit(obd, pending_dir, handle, 0); if (err) { diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 75b9dbe97f..503d2b75c6 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -102,7 +102,8 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, /* Assumes caller has already pushed us into the kernel context. */ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data) + struct ptlrpc_request *req, int rc, __u32 op_data, + int force_sync) { struct mds_export_data *med = &req->rq_export->exp_mds_data; struct mds_client_data *mcd = med->med_mcd; @@ -119,7 +120,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, } /* if the export has already been failed, we have no last_rcvd slot */ - if (req->rq_export->exp_failed) { + if (req->rq_export->exp_failed || obd->obd_fail) { CWARN("commit transaction for disconnected client %s: rc %d\n", req->rq_export->exp_client_uuid.uuid, rc); if (rc == 0) @@ -184,11 +185,18 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off); err = -EINVAL; } else { - fsfilt_add_journal_cb(req->rq_export->exp_obd, transno, handle, - mds_commit_cb, NULL); + struct obd_export *exp = req->rq_export; + + if (!force_sync) + force_sync = fsfilt_add_journal_cb(exp->exp_obd,transno, + handle, mds_commit_cb, + NULL); + err = fsfilt_write_record(obd, mds->mds_rcvd_filp, mcd, - sizeof(*mcd), &off, - req->rq_export->exp_need_sync); + sizeof(*mcd), &off, + force_sync | exp->exp_need_sync); + if (force_sync) + mds_commit_cb(obd, transno, NULL, err); } if (err) { @@ -495,7 +503,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, struct lov_mds_md *lmm = NULL; struct llog_cookie *logcookies = NULL; int lmm_size = 0, need_lock = 1, cookie_size = 0; - int rc = 0, cleanup_phase = 0, err, locked = 0; + int rc = 0, cleanup_phase = 0, err, locked = 0, sync = 0; unsigned int qcids[MAXQUOTAS] = { 0, 0 }; unsigned int qpids[MAXQUOTAS] = { rec->ur_iattr.ia_uid, rec->ur_iattr.ia_gid }; @@ -674,9 +682,9 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, EXIT; cleanup: if (mlcd != NULL) - fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, - mds_cancel_cookies_cb, mlcd); - err = mds_finish_transno(mds, inode, handle, req, rc, 0); + sync = fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, + mds_cancel_cookies_cb, mlcd); + err = mds_finish_transno(mds, inode, handle, req, rc, 0, sync); /* do mds to ost setattr if needed */ if (!rc && !err && lmm_size) mds_osc_setattr_async(obd, inode, lmm, lmm_size, @@ -933,7 +941,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, EXIT; cleanup: - err = mds_finish_transno(mds, dir, handle, req, rc, 0); + err = mds_finish_transno(mds, dir, handle, req, rc, 0, 0); if (rc && created) { /* Destroy the file we just created. This should not need @@ -1724,7 +1732,7 @@ cleanup: } rc = mds_finish_transno(mds, dparent ? dparent->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0, 0); if (!rc) (void)obd_set_info_async(mds->mds_osc_exp, strlen("unlinked"), "unlinked", 0, NULL, NULL); @@ -1874,7 +1882,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, CERROR("vfs_link error %d\n", rc); cleanup: rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0, 0); EXIT; switch (cleanup_phase) { @@ -2266,7 +2274,7 @@ no_unlink: GOTO(cleanup, rc); cleanup: rc = mds_finish_transno(mds, de_tgtdir ? de_tgtdir->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0, 0); switch (cleanup_phase) { case 4: diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c index 5b16864dda..8a049d8c38 100644 --- a/lustre/mds/mds_xattr.c +++ b/lustre/mds/mds_xattr.c @@ -310,7 +310,7 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) LASSERT(rc <= 0); out_trans: - err = mds_finish_transno(mds, inode, handle, req, rc, 0); + err = mds_finish_transno(mds, inode, handle, req, rc, 0, 0); out_dput: l_dput(de); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 38b80ad62c..c643b1be33 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -73,7 +73,7 @@ static void filter_commit_cb(struct obd_device *obd, __u64 transno, /* Assumes caller has already pushed us into the kernel context. */ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, - int rc) + int rc, int force_sync) { struct filter_obd *filter = &exp->exp_obd->u.filter; struct filter_export_data *fed = &exp->exp_filter_data; @@ -115,11 +115,18 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, fed->fed_lr_idx, fed->fed_lr_off); err = -EINVAL; } else { - fsfilt_add_journal_cb(exp->exp_obd, last_rcvd, oti->oti_handle, - filter_commit_cb, NULL); + if (!force_sync) + force_sync = fsfilt_add_journal_cb(exp->exp_obd, + last_rcvd, + oti->oti_handle, + filter_commit_cb, + NULL); + err = fsfilt_write_record(exp->exp_obd, filter->fo_rcvd_filp, fcd, sizeof(*fcd), &off, - exp->exp_need_sync); + force_sync | exp->exp_need_sync); + if (force_sync) + filter_commit_cb(exp->exp_obd, last_rcvd, NULL, err); } if (err) { log_pri = D_ERROR; @@ -2341,7 +2348,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, unsigned int orig_ids[MAXQUOTAS] = {0, 0}; struct llog_cookie *fcc = NULL; struct filter_obd *filter; - int rc, err, locked = 0; + int rc, err, locked = 0, sync = 0; unsigned int ia_valid; struct inode *inode; struct iattr iattr; @@ -2421,18 +2428,11 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, EXT3_IOC_SETFLAGS, (long)&oa->o_flags); } else { rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); - if (fcc != NULL) { + if (fcc != NULL) /* set cancel cookie callback function */ - if (fsfilt_add_journal_cb(exp->exp_obd, 0, handle, - filter_cancel_cookies_cb, - fcc)) { - spin_lock(&exp->exp_lock); - exp->exp_need_sync = 1; - spin_unlock(&exp->exp_lock); - } else { - fcc = NULL; - } - } + sync = fsfilt_add_journal_cb(exp->exp_obd, 0, handle, + filter_cancel_cookies_cb, + fcc); } if (OBD_FAIL_CHECK(OBD_FAIL_OST_SETATTR_CREDITS)) @@ -2441,13 +2441,19 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, /* The truncate might have used up our transaction credits. Make * sure we have one left for the last_rcvd update. */ err = fsfilt_extend(exp->exp_obd, inode, 1, handle); - rc = filter_finish_transno(exp, oti, rc); + rc = filter_finish_transno(exp, oti, rc, sync); + if (sync) { + filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc); + fcc = NULL; + } err = fsfilt_commit(exp->exp_obd, inode, handle, 0); if (err) { CERROR("error on commit, err = %d\n", err); if (!rc) rc = err; + } else { + fcc = NULL; } if (locked) { @@ -2993,7 +2999,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lvfs_run_ctxt saved; void *handle = NULL; struct llog_cookie *fcc = NULL; - int rc, rc2, cleanup_phase = 0; + int rc, rc2, cleanup_phase = 0, sync = 0; struct iattr iattr; ENTRY; @@ -3088,19 +3094,28 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, cleanup: switch(cleanup_phase) { case 4: - if (fcc != NULL) { - if (fsfilt_add_journal_cb(obd, 0, oti ? - oti->oti_handle : handle, - filter_cancel_cookies_cb, - fcc) == 0) - fcc = NULL; + if (fcc != NULL) + sync = fsfilt_add_journal_cb(obd, 0, oti ? + oti->oti_handle : handle, + filter_cancel_cookies_cb, + fcc); + /* If add_journal_cb failed, then filter_finish_transno + * will commit the handle and we will do a sync + * on commit. then we call callback directly to free + * the fcc. + */ + rc = filter_finish_transno(exp, oti, rc, sync); + if (sync) { + filter_cancel_cookies_cb(obd, 0, fcc, rc); + fcc = NULL; } - rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) rc = rc2; + } else { + fcc = NULL; } case 3: filter_parent_unlock(dparent); diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 1c5d0f5466..093976febb 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -98,7 +98,8 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa, const char *what, int quiet); #define filter_oa2dentry(obd, oa) __filter_oa2dentry(obd, oa, __FUNCTION__, 0) -int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc); +int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc, + int force_sync); __u64 filter_next_id(struct filter_obd *, struct obdo *); __u64 filter_last_id(struct filter_obd *, obd_gr group); int filter_update_fidea(struct obd_export *exp, struct inode *inode, diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 49ca0feada..19645c83d9 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -255,7 +255,7 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf, up(&inode->i_sem); cleanup_phase = 3; - rc = filter_finish_transno(exp, oti, 0); + rc = filter_finish_transno(exp, oti, 0, 0); if (rc) GOTO(cleanup, rc); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 73bb316636..b8b9a7c3ee 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -532,7 +532,7 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, UNLOCK_INODE_MUTEX(inode); - rc2 = filter_finish_transno(exp, oti, 0); + rc2 = filter_finish_transno(exp, oti, 0, 0); if (rc2 != 0) { CERROR("can't close transaction: %d\n", rc2); if (rc == 0) diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index 8cda048157..0ffa3960a4 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -107,6 +107,7 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, if (error != 0) { CDEBUG(D_INODE, "not cancelling llog cookie on error %d\n", error); + OBD_FREE(cookie, sizeof(*cookie)); return; } diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 35e5714ea9..bf11532299 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -740,10 +740,12 @@ run_test 32 "close() notices client eviction; close() after client eviction" # Abort recovery before client complete test_33() { replay_barrier mds - touch $DIR/$tfile + createmany -o $DIR/$tfile-%d 100 fail_abort mds # this file should be gone, because the replay was aborted $CHECKSTAT -t file $DIR/$tfile && return 3 + $CHECKSTAT -t file $DIR/$tfile-* && return 3 + unlinkmany $DIR/$tfile-%d 0 100 return 0 } run_test 33 "abort recovery before client does replay" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 2d9a6eef79..9fc7125189 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -188,7 +188,6 @@ unload_modules() { echo "$LEAK_PORTALS" 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` || true echo "Memory leaks detected" - [ "$LEAK_LUSTRE" -a $(echo $LEAK_LUSTRE | awk 'leaked=$8 {print leaked % 56}') == 0 ] && echo "ignoring known bug 10818" && return 0 return 254 fi echo "modules unloaded." -- GitLab