From 6aaaa8bcb8a94f0f685ce1abbaa4f3ed3e2e8c1b Mon Sep 17 00:00:00 2001
From: wangdi <wangdi>
Date: Thu, 24 May 2007 10:06:31 +0000
Subject: [PATCH] Branch:b1_6 Fix lov_mds_md in mdd_create_objects error
 handler. Checking whether obd_fail before fsfilt_start. Free fcc no matter
 whether fsfilt_commit success or failed. b=10818 i=adilger i=green

---
 lustre/ChangeLog                   | 12 ++++++
 lustre/mds/mds_internal.h          |  3 +-
 lustre/mds/mds_join.c              |  2 +-
 lustre/mds/mds_open.c              |  9 +++--
 lustre/mds/mds_reint.c             | 36 ++++++++++-------
 lustre/mds/mds_xattr.c             |  2 +-
 lustre/obdfilter/filter.c          | 65 ++++++++++++++++++------------
 lustre/obdfilter/filter_internal.h |  3 +-
 lustre/obdfilter/filter_io_24.c    |  2 +-
 lustre/obdfilter/filter_io_26.c    |  2 +-
 lustre/obdfilter/filter_log.c      |  1 +
 lustre/tests/replay-single.sh      |  4 +-
 lustre/tests/test-framework.sh     |  1 -
 13 files changed, 91 insertions(+), 51 deletions(-)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 9be0f36fe2..b32404892f 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -170,6 +170,18 @@ Description: Short directio read returns full requested size rather than
 Details    : Direct I/O operations should return actual amount of bytes
              transferred rather than requested size.
 
+Severity   : normal
+Frequency  : rare 
+Bugzilla   : 10818
+Description: Memory leak in recovery
+Details    : Lov_mds_md was not free in an error handler in mds_create_object.
+             It should also check obd_fail before fsfilt_start, otherwise if 
+             fsfilt_start return -EROFS,(failover mds during mds recovery).
+             then the req will return with repmsg->transno = 0 and rc = EROFS. 
+             and we met hit the assert LASSERT(req->rq_reqmsg->transno ==
+             req->rq_repmsg->transno) in ptlrpc_replay_interpret.  Fcc should 
+             be freed no matter whether fsfilt_commit success or not.
+
 --------------------------------------------------------------------------------
 
 2007-05-03  Cluster File Systems, Inc. <info@clusterfs.com>
diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h
index af32d2f2f1..c5355e9683 100644
--- a/lustre/mds/mds_internal.h
+++ b/lustre/mds/mds_internal.h
@@ -132,7 +132,8 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id,
                           ldlm_policy_data_t *p2_policy);
 void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error);
 int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
-                       struct ptlrpc_request *req, int rc, __u32 op_data);
+                       struct ptlrpc_request *req, int rc, __u32 op_data, 
+                       int force_sync);
 void mds_reconstruct_generic(struct ptlrpc_request *req);
 void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd);
 int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds,
diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c
index d0bb7f2175..a0ab48e5b8 100644
--- a/lustre/mds/mds_join.c
+++ b/lustre/mds/mds_join.c
@@ -477,7 +477,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req,
                       sizeof(struct lov_mds_md_join), "lov");
         mds_finish_join(mds, req, head_inode, head_lmmj);
 cleanup:
-        rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0);
+        rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0, 0);
         switch(cleanup_phase){
         case 3:
                 llog_close(llh_head);
diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c
index d12865ba8a..0bb9878d24 100644
--- a/lustre/mds/mds_open.c
+++ b/lustre/mds/mds_open.c
@@ -475,13 +475,14 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
         if (IS_ERR(*handle)) {
                 rc = PTR_ERR(*handle);
                 *handle = NULL;
-                GOTO(out_oa, rc);
+                GOTO(free_diskmd, rc);
         }
 
         rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
         lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size);
         LASSERT(lmm_buf);
         memcpy(lmm_buf, lmm, lmm_size);
+ free_diskmd:
         obd_free_diskmd(mds->mds_osc_exp, &lmm);
  out_oa:
         oti_free_cookies(&oti);
@@ -797,7 +798,7 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid,
 
         rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, NULL);
         rc = mds_finish_transno(mds, dchild->d_inode, handle,
-                                req, rc, rep ? rep->lock_policy_res1 : 0);
+                                req, rc, rep ? rep->lock_policy_res1 : 0, 0);
         /* XXX what do we do here if mds_finish_transno itself failed? */
 
         l_dput(dchild);
@@ -1179,7 +1180,7 @@ found_child:
 
  cleanup:
         rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
-                                req, rc, rep ? rep->lock_policy_res1 : 0);
+                                req, rc, rep ? rep->lock_policy_res1 : 0, 0);
 
  cleanup_no_trans:
         switch (cleanup_phase) {
@@ -1390,7 +1391,7 @@ out:
 
  cleanup:
         if (req != NULL && reply_body != NULL) {
-                rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0);
+                rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0, 0);
         } else if (handle) {
                 int err = fsfilt_commit(obd, pending_dir, handle, 0);
                 if (err) {
diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c
index 75b9dbe97f..503d2b75c6 100644
--- a/lustre/mds/mds_reint.c
+++ b/lustre/mds/mds_reint.c
@@ -102,7 +102,8 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
 
 /* Assumes caller has already pushed us into the kernel context. */
 int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
-                       struct ptlrpc_request *req, int rc, __u32 op_data)
+                       struct ptlrpc_request *req, int rc, __u32 op_data, 
+                       int force_sync)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
@@ -119,7 +120,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
         }
 
         /* if the export has already been failed, we have no last_rcvd slot */
-        if (req->rq_export->exp_failed) {
+        if (req->rq_export->exp_failed || obd->obd_fail) {
                 CWARN("commit transaction for disconnected client %s: rc %d\n",
                       req->rq_export->exp_client_uuid.uuid, rc);
                 if (rc == 0)
@@ -184,11 +185,18 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
                 CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off);
                 err = -EINVAL;
         } else {
-                fsfilt_add_journal_cb(req->rq_export->exp_obd, transno, handle,
-                                      mds_commit_cb, NULL);
+                struct obd_export *exp = req->rq_export;
+
+                if (!force_sync)
+                        force_sync = fsfilt_add_journal_cb(exp->exp_obd,transno, 
+                                                          handle, mds_commit_cb,
+                                                          NULL);
+
                 err = fsfilt_write_record(obd, mds->mds_rcvd_filp, mcd,
-                                          sizeof(*mcd), &off,
-                                          req->rq_export->exp_need_sync);
+                                          sizeof(*mcd), &off, 
+                                          force_sync | exp->exp_need_sync);
+                if (force_sync)
+                        mds_commit_cb(obd, transno, NULL, err);
         }
 
         if (err) {
@@ -495,7 +503,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         struct lov_mds_md *lmm = NULL;
         struct llog_cookie *logcookies = NULL;
         int lmm_size = 0, need_lock = 1, cookie_size = 0;
-        int rc = 0, cleanup_phase = 0, err, locked = 0;
+        int rc = 0, cleanup_phase = 0, err, locked = 0, sync = 0;
         unsigned int qcids[MAXQUOTAS] = { 0, 0 };
         unsigned int qpids[MAXQUOTAS] = { rec->ur_iattr.ia_uid, 
                                           rec->ur_iattr.ia_gid };
@@ -674,9 +682,9 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         EXIT;
  cleanup:
         if (mlcd != NULL)
-                fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle,
-                                      mds_cancel_cookies_cb, mlcd);
-        err = mds_finish_transno(mds, inode, handle, req, rc, 0);
+                sync = fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle,
+                                             mds_cancel_cookies_cb, mlcd);
+        err = mds_finish_transno(mds, inode, handle, req, rc, 0, sync);
         /* do mds to ost setattr if needed */
         if (!rc && !err && lmm_size)
                 mds_osc_setattr_async(obd, inode, lmm, lmm_size,
@@ -933,7 +941,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         EXIT;
 
 cleanup:
-        err = mds_finish_transno(mds, dir, handle, req, rc, 0);
+        err = mds_finish_transno(mds, dir, handle, req, rc, 0, 0);
 
         if (rc && created) {
                 /* Destroy the file we just created.  This should not need
@@ -1724,7 +1732,7 @@ cleanup:
         }
 
         rc = mds_finish_transno(mds, dparent ? dparent->d_inode : NULL,
-                                handle, req, rc, 0);
+                                handle, req, rc, 0, 0);
         if (!rc)
                 (void)obd_set_info_async(mds->mds_osc_exp, strlen("unlinked"),
                                          "unlinked", 0, NULL, NULL);
@@ -1874,7 +1882,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
                 CERROR("vfs_link error %d\n", rc);
 cleanup:
         rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
-                                handle, req, rc, 0);
+                                handle, req, rc, 0, 0);
         EXIT;
 
         switch (cleanup_phase) {
@@ -2266,7 +2274,7 @@ no_unlink:
         GOTO(cleanup, rc);
 cleanup:
         rc = mds_finish_transno(mds, de_tgtdir ? de_tgtdir->d_inode : NULL,
-                                handle, req, rc, 0);
+                                handle, req, rc, 0, 0);
 
         switch (cleanup_phase) {
         case 4:
diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c
index 5b16864dda..8a049d8c38 100644
--- a/lustre/mds/mds_xattr.c
+++ b/lustre/mds/mds_xattr.c
@@ -310,7 +310,7 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body)
 
         LASSERT(rc <= 0);
 out_trans:
-        err = mds_finish_transno(mds, inode, handle, req, rc, 0);
+        err = mds_finish_transno(mds, inode, handle, req, rc, 0, 0);
 
 out_dput:
         l_dput(de);
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c
index 38b80ad62c..c643b1be33 100644
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -73,7 +73,7 @@ static void filter_commit_cb(struct obd_device *obd, __u64 transno,
 
 /* Assumes caller has already pushed us into the kernel context. */
 int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
-                          int rc)
+                          int rc, int force_sync)
 {
         struct filter_obd *filter = &exp->exp_obd->u.filter;
         struct filter_export_data *fed = &exp->exp_filter_data;
@@ -115,11 +115,18 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
                        fed->fed_lr_idx, fed->fed_lr_off);
                 err = -EINVAL;
         } else {
-                fsfilt_add_journal_cb(exp->exp_obd, last_rcvd, oti->oti_handle,
-                                      filter_commit_cb, NULL);
+                if (!force_sync)
+                        force_sync = fsfilt_add_journal_cb(exp->exp_obd, 
+                                                           last_rcvd,
+                                                           oti->oti_handle,
+                                                           filter_commit_cb,
+                                                           NULL);
+
                 err = fsfilt_write_record(exp->exp_obd, filter->fo_rcvd_filp,
                                           fcd, sizeof(*fcd), &off,
-                                          exp->exp_need_sync);
+                                          force_sync | exp->exp_need_sync);
+                if (force_sync)
+                        filter_commit_cb(exp->exp_obd, last_rcvd, NULL, err);
         }
         if (err) {
                 log_pri = D_ERROR;
@@ -2341,7 +2348,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         unsigned int orig_ids[MAXQUOTAS] = {0, 0};
         struct llog_cookie *fcc = NULL;
         struct filter_obd *filter;
-        int rc, err, locked = 0;
+        int rc, err, locked = 0, sync = 0;
         unsigned int ia_valid;
         struct inode *inode;
         struct iattr iattr;
@@ -2421,18 +2428,11 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                                       EXT3_IOC_SETFLAGS, (long)&oa->o_flags);
         } else {
                 rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
-                if (fcc != NULL) {
+                if (fcc != NULL)
                         /* set cancel cookie callback function */
-                        if (fsfilt_add_journal_cb(exp->exp_obd, 0, handle,
-                                                  filter_cancel_cookies_cb,
-                                                  fcc)) {
-                                spin_lock(&exp->exp_lock);
-                                exp->exp_need_sync = 1;
-                                spin_unlock(&exp->exp_lock);
-                        } else {
-                                fcc = NULL;
-                }
-        }
+                        sync = fsfilt_add_journal_cb(exp->exp_obd, 0, handle,
+                                                     filter_cancel_cookies_cb,
+                                                     fcc);
         }
 
         if (OBD_FAIL_CHECK(OBD_FAIL_OST_SETATTR_CREDITS))
@@ -2441,13 +2441,19 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         /* The truncate might have used up our transaction credits.  Make
          * sure we have one left for the last_rcvd update. */
         err = fsfilt_extend(exp->exp_obd, inode, 1, handle);
-        rc = filter_finish_transno(exp, oti, rc);
+        rc = filter_finish_transno(exp, oti, rc, sync);
+        if (sync) {
+                filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc);
+                fcc = NULL;
+        }
 
         err = fsfilt_commit(exp->exp_obd, inode, handle, 0);
         if (err) {
                 CERROR("error on commit, err = %d\n", err);
                 if (!rc)
                         rc = err;
+        } else {
+                fcc = NULL;
         }
 
         if (locked) {
@@ -2993,7 +2999,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         struct lvfs_run_ctxt saved;
         void *handle = NULL;
         struct llog_cookie *fcc = NULL;
-        int rc, rc2, cleanup_phase = 0;
+        int rc, rc2, cleanup_phase = 0, sync = 0;
         struct iattr iattr;
         ENTRY;
 
@@ -3088,19 +3094,28 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
 cleanup:
         switch(cleanup_phase) {
         case 4:
-                if (fcc != NULL) {
-                        if (fsfilt_add_journal_cb(obd, 0, oti ?
-                                                  oti->oti_handle : handle,
-                                                  filter_cancel_cookies_cb,
-                                                  fcc) == 0)
-                                fcc = NULL;
+                if (fcc != NULL)
+                        sync = fsfilt_add_journal_cb(obd, 0, oti ?
+                                                     oti->oti_handle : handle,
+                                                     filter_cancel_cookies_cb,
+                                                     fcc);
+                /* If add_journal_cb failed, then filter_finish_transno
+                 * will commit the handle and we will do a sync 
+                 * on commit. then we call callback directly to free 
+                 * the fcc. 
+                 */
+                rc = filter_finish_transno(exp, oti, rc, sync);
+                if (sync) {
+                        filter_cancel_cookies_cb(obd, 0, fcc, rc); 
+                        fcc = NULL;
                 }
-                rc = filter_finish_transno(exp, oti, rc);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
                         if (!rc)
                                 rc = rc2;
+                } else {
+                        fcc = NULL;
                 }
         case 3:
                 filter_parent_unlock(dparent);
diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h
index 1c5d0f5466..093976febb 100644
--- a/lustre/obdfilter/filter_internal.h
+++ b/lustre/obdfilter/filter_internal.h
@@ -98,7 +98,8 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa,
                                   const char *what, int quiet);
 #define filter_oa2dentry(obd, oa) __filter_oa2dentry(obd, oa, __FUNCTION__, 0)
 
-int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc);
+int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc,
+                          int force_sync);
 __u64 filter_next_id(struct filter_obd *, struct obdo *);
 __u64 filter_last_id(struct filter_obd *, obd_gr group);
 int filter_update_fidea(struct obd_export *exp, struct inode *inode,
diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c
index 49ca0feada..19645c83d9 100644
--- a/lustre/obdfilter/filter_io_24.c
+++ b/lustre/obdfilter/filter_io_24.c
@@ -255,7 +255,7 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf,
                 up(&inode->i_sem);
                 cleanup_phase = 3;
 
-                rc = filter_finish_transno(exp, oti, 0);
+                rc = filter_finish_transno(exp, oti, 0, 0);
                 if (rc)
                         GOTO(cleanup, rc);
 
diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c
index 73bb316636..b8b9a7c3ee 100644
--- a/lustre/obdfilter/filter_io_26.c
+++ b/lustre/obdfilter/filter_io_26.c
@@ -532,7 +532,7 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,
 
                 UNLOCK_INODE_MUTEX(inode);
 
-                rc2 = filter_finish_transno(exp, oti, 0);
+                rc2 = filter_finish_transno(exp, oti, 0, 0);
                 if (rc2 != 0) {
                         CERROR("can't close transaction: %d\n", rc2);
                         if (rc == 0)
diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c
index 8cda048157..0ffa3960a4 100644
--- a/lustre/obdfilter/filter_log.c
+++ b/lustre/obdfilter/filter_log.c
@@ -107,6 +107,7 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
         if (error != 0) {
                 CDEBUG(D_INODE, "not cancelling llog cookie on error %d\n",
                        error);
+                OBD_FREE(cookie, sizeof(*cookie));
                 return;
         }
 
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh
index 35e5714ea9..bf11532299 100755
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -740,10 +740,12 @@ run_test 32 "close() notices client eviction; close() after client eviction"
 # Abort recovery before client complete
 test_33() {
     replay_barrier mds
-    touch $DIR/$tfile
+    createmany -o $DIR/$tfile-%d 100 
     fail_abort mds
     # this file should be gone, because the replay was aborted
     $CHECKSTAT -t file $DIR/$tfile && return 3
+    $CHECKSTAT -t file $DIR/$tfile-* && return 3 
+    unlinkmany $DIR/$tfile-%d 0 100
     return 0
 }
 run_test 33 "abort recovery before client does replay"
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 2d9a6eef79..9fc7125189 100644
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -188,7 +188,6 @@ unload_modules() {
         echo "$LEAK_PORTALS" 1>&2
         mv $TMP/debug $TMP/debug-leak.`date +%s` || true
         echo "Memory leaks detected"
-	[ "$LEAK_LUSTRE" -a $(echo $LEAK_LUSTRE | awk 'leaked=$8 {print leaked % 56}') == 0 ] && echo "ignoring known bug 10818" && return 0
         return 254
     fi
     echo "modules unloaded."
-- 
GitLab