From fee2650edeee1d2dc7a3d2680f48eda881426741 Mon Sep 17 00:00:00 2001 From: bobijam <bobijam> Date: Fri, 16 Nov 2007 02:32:37 +0000 Subject: [PATCH] Branch b1_6 b=3462 i=adilger, johann Description: Fix a replay issue Details : In some cases, older replay request will revert the mcd->mcd_last_xid on MDS which is used to record the client's latest sent request. --- lustre/ChangeLog | 7 +++++ lustre/mds/mds_reint.c | 12 +++++--- lustre/tests/replay-single.sh | 58 +++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index ea3d71e4d7..6f9268720f 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -14,6 +14,13 @@ tbd Cluster File Systems, Inc. <info@clusterfs.com> * Recommended e2fsprogs version: 1.40.2-cfs4 * Note that reiserfs quotas are disabled on SLES 10 in this kernel. +Severity : normal +Bugzilla : 3462 +Description: Fix a replay issue +Details : In some cases, older replay request will revert the + mcd->mcd_last_xid on MDS which is used to record the client's + latest sent request. + Severity : normal Bugzilla : 13969 Description: Update to RHEL5 kernel 2.6.18-8.1.15.el5. diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index f09bcc60f2..869fb271ba 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -174,10 +174,14 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, mcd->mcd_last_close_data = cpu_to_le32(op_data); } else { prev_transno = le64_to_cpu(mcd->mcd_last_transno); - mcd->mcd_last_transno = cpu_to_le64(transno); - mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_result = cpu_to_le32(rc); - mcd->mcd_last_data = cpu_to_le32(op_data); + if (((lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY)) == 0) || + (transno > prev_transno)) { + mcd->mcd_last_transno = cpu_to_le64(transno); + mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); + mcd->mcd_last_result = cpu_to_le32(rc); + mcd->mcd_last_data = cpu_to_le32(op_data); + } } /* update the server data to not lose the greatest transno. Bug 11125 */ if ((transno == 0) && (prev_transno == mds->mds_last_transno)) diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index cab4c23c42..9019697ee9 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1198,6 +1198,64 @@ test_53e() { } run_test 53e "|X| open reply while two MDC requests in flight" +test_53f() { + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + mcreate $DIR/${tdir}-2/f & + open_pid=$! + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x8000013b" + kill -USR1 $close_pid + cancel_lru_locks MDC + + replay_barrier_nodf mds + fail_nodf mds + wait $open_pid || return 1 + sleep 2 + #close should be gone + [ -d /proc/$close_pid ] && return 2 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4 + rm -rf $DIR/${tdir}-* +} +run_test 53f "|X| open reply and close reply while two MDC requests in flight" + +test_53g() { + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + mcreate $DIR/${tdir}-2/f & + open_pid=$! + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + + do_facet mds "sysctl -w lustre.fail_loc=0" + replay_barrier_nodf mds + fail_nodf mds + wait $open_pid || return 1 + sleep 2 + # close should be gone + [ -d /proc/$close_pid ] && return 2 + + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4 + rm -rf $DIR/${tdir}-* +} +run_test 53g "|X| drop open reply and close request while close and open are both in flight" + test_53h() { mkdir -p $DIR/${tdir}-1 mkdir -p $DIR/${tdir}-2 -- GitLab