diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 99c3a94462d28543f24bf6394283a92058540da1..ae83eb2a9f8434192e60da1a4bc0cdc45b850a43 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -275,6 +275,16 @@ Description: service threads may hog cpus when ther eis a lot of requests Details : Insert cond_resched to give other threads a chance to use some of the cpu +Severity : normal +Frequency : rare +Bugzilla : 12086 +Description: the cat log was not initialized in recovery +Details : When mds(mgs) do recovery, the tgt_count might be zero, so the + unlink log on mds will not be initialized until mds post + recovery. And also in mds post recovery, the unlink log will + initialization will be done asynchronausly, so there will be race + between add unlink log and unlink log initialization. + -------------------------------------------------------------------------------- 2007-05-03 Cluster File Systems, Inc. <info@clusterfs.com> diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index a411b89035917db62e1b165c9420ca2802786eb6..869661155ad46997ceeacdbf79281eae4a470ee3 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -235,6 +235,13 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) "%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize, stripes); + /* If we added a target we have to reconnect the llogs */ + /* We only _need_ to do this at first add (idx), or the first time + after recovery. However, it should now be safe to call anytime. */ + mutex_down(&obd->obd_dev_sem); + llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); + mutex_up(&obd->obd_dev_sem); + out: OBD_FREE(ld, sizeof(*ld)); RETURN(rc); @@ -292,14 +299,6 @@ static int mds_lov_update_mds(struct obd_device *obd, mds->mds_lov_objids[idx], idx); } - /* If we added a target we have to reconnect the llogs */ - /* We only _need_ to do this at first add (idx), or the first time - after recovery. However, it should now be safe to call anytime. */ - CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx); - mutex_down(&obd->obd_dev_sem); - llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, uuid); - mutex_up(&obd->obd_dev_sem); - RETURN(rc); } diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 32ffd8bf8d1839bdc7e479c33d12feb702ba75be..73f6cabfb6562a3c79c8dd81d4af99ff1b341a4d 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1141,6 +1141,19 @@ test_59() { } run_test 59 "test log_commit_thread vs filter_destroy race" +# race between add unlink llog vs cat log init in post_recovery (only for b1_6) +# bug 12086: should no oops and No ctxt error for this test +test_60() { + mkdir $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 200 + replay_barrier mds + unlinkmany $DIR/$tdir/$tfile-%d 0 100 + fail mds + unlinkmany $DIR/$tdir/$tfile-%d 100 100 + local no_ctxt=`dmesg | grep "No ctxt"` + [ -z "$no_ctxt" ] || error "ctxt is not initialized in recovery" +} +run_test 60 "test llog post recovery init vs llog unlink" equals_msg `basename $0`: test complete, cleaning up $CLEANUP