diff --git a/lustre/ChangeLog b/lustre/ChangeLog index ea3d71e4d717c01a71f51853de7e04665406cdf9..6f9268720f23b895348b566ffb50d36ed9b06689 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -14,6 +14,13 @@ tbd Cluster File Systems, Inc. <info@clusterfs.com> * Recommended e2fsprogs version: 1.40.2-cfs4 * Note that reiserfs quotas are disabled on SLES 10 in this kernel. +Severity : normal +Bugzilla : 3462 +Description: Fix a replay issue +Details : In some cases, older replay request will revert the + mcd->mcd_last_xid on MDS which is used to record the client's + latest sent request. + Severity : normal Bugzilla : 13969 Description: Update to RHEL5 kernel 2.6.18-8.1.15.el5. diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index f09bcc60f2615db50e05823f9f6b32498512fbe2..869fb271ba9c18bbba9da765177346e00950b174 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -174,10 +174,14 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, mcd->mcd_last_close_data = cpu_to_le32(op_data); } else { prev_transno = le64_to_cpu(mcd->mcd_last_transno); - mcd->mcd_last_transno = cpu_to_le64(transno); - mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_result = cpu_to_le32(rc); - mcd->mcd_last_data = cpu_to_le32(op_data); + if (((lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY)) == 0) || + (transno > prev_transno)) { + mcd->mcd_last_transno = cpu_to_le64(transno); + mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); + mcd->mcd_last_result = cpu_to_le32(rc); + mcd->mcd_last_data = cpu_to_le32(op_data); + } } /* update the server data to not lose the greatest transno. Bug 11125 */ if ((transno == 0) && (prev_transno == mds->mds_last_transno)) diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index cab4c23c4287821b13201cb813bc5a2e7eaf8b5e..9019697ee9226cec53dfd28040280fa8eb2c2b5b 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1198,6 +1198,64 @@ test_53e() { } run_test 53e "|X| open reply while two MDC requests in flight" +test_53f() { + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + mcreate $DIR/${tdir}-2/f & + open_pid=$! + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x8000013b" + kill -USR1 $close_pid + cancel_lru_locks MDC + + replay_barrier_nodf mds + fail_nodf mds + wait $open_pid || return 1 + sleep 2 + #close should be gone + [ -d /proc/$close_pid ] && return 2 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4 + rm -rf $DIR/${tdir}-* +} +run_test 53f "|X| open reply and close reply while two MDC requests in flight" + +test_53g() { + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + mcreate $DIR/${tdir}-2/f & + open_pid=$! + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + + do_facet mds "sysctl -w lustre.fail_loc=0" + replay_barrier_nodf mds + fail_nodf mds + wait $open_pid || return 1 + sleep 2 + # close should be gone + [ -d /proc/$close_pid ] && return 2 + + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4 + rm -rf $DIR/${tdir}-* +} +run_test 53g "|X| drop open reply and close request while close and open are both in flight" + test_53h() { mkdir -p $DIR/${tdir}-1 mkdir -p $DIR/${tdir}-2