From d911b48194ae1913a740c18cd3040f9945ba2520 Mon Sep 17 00:00:00 2001 From: alex <alex> Date: Fri, 25 Jun 2004 09:26:06 +0000 Subject: [PATCH] changes to get test #46 working - bunch of fixes to handle dir splitting over N < MDS nodes - fixes to MDS code to handle dir splitting over N < MDS nodes properly - mds_try_to_split() should ignore requests to split dir over 1 node - checks for NULL exports in LMV - checks for wrong -ERESTART handling: because of software errors it possible that lmv will loop forever. we need to assert on this. - one more sanity test 3c to test lfs dirstripe and activity on such a dirs --- lustre/lmv/lmv_intent.c | 15 +++++--- lustre/lmv/lmv_obd.c | 74 +++++++++++++++++++++++++------------- lustre/mds/handler.c | 14 ++++---- lustre/mds/mds_lmv.c | 5 ++- lustre/mds/mds_open.c | 10 +++--- lustre/mds/mds_reint.c | 10 +++--- lustre/tests/sanity-lmv.sh | 22 ++++++++++++ 7 files changed, 104 insertions(+), 46 deletions(-) diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c index d860e86d4c..e012cbb8e2 100644 --- a/lustre/lmv/lmv_intent.c +++ b/lustre/lmv/lmv_intent.c @@ -130,25 +130,28 @@ int lmv_intent_open(struct obd_export *exp, struct ll_uctxt *uctxt, struct ll_fid rpfid = *pfid; struct lmv_obj *obj; struct mea *mea; - int rc, mds; + int rc, mds, loop = 0; ENTRY; /* IT_OPEN is intended to open (and create, possible) an object. Parent * (pfid) may be splitted dir */ repeat: + LASSERT(++loop <= 2); mds = rpfid.mds; obj = lmv_grab_obj(obd, &rpfid); if (obj) { /* directory is already splitted, so we have to forward * request to the right MDS */ mds = raw_name2idx(obj->objcount, (char *)name, len); - CDEBUG(D_OTHER, "forward to MDS #%u\n", mds); + CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds, + (unsigned long) rpfid.mds, (unsigned long) rpfid.id, + (unsigned long) rpfid.generation); rpfid = obj->objs[mds].fid; lmv_put_obj(obj); } - rc = md_intent_lock(lmv->tgts[mds].ltd_exp, uctxt, &rpfid, name, + rc = md_intent_lock(lmv->tgts[rpfid.mds].ltd_exp, uctxt, &rpfid, name, len, lmm, lmmsize, cfid, it, flags, reqp, cb_blocking); if (rc == -ERESTART) { @@ -244,6 +247,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct ll_uctxt *uctxt, /* in fact, we need not this with current intent_lock(), * but it may change some day */ rpfid = obj->objs[mds].fid; + mds = rpfid.mds; lmv_put_obj(obj); } rc = md_intent_lock(lmv->tgts[mds].ltd_exp, uctxt, &rpfid, name, @@ -278,6 +282,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct ll_uctxt *uctxt, /* directory is already splitted. calculate mds */ mds = raw_name2idx(obj->objcount, (char *) name, len); rpfid = obj->objs[mds].fid; + mds = rpfid.mds; lmv_put_obj(obj); CDEBUG(D_OTHER, "forward to MDS #%u (slave %lu/%lu/%lu)\n", @@ -473,7 +478,7 @@ int lmv_intent_lookup(struct obd_export *exp, struct ll_uctxt *uctxt, struct ll_fid rpfid = *pfid; struct lmv_obj *obj; struct mea *mea; - int rc, mds; + int rc, mds, loop = 0; ENTRY; /* IT_LOOKUP is intended to produce name -> fid resolving (let's call @@ -505,6 +510,7 @@ int lmv_intent_lookup(struct obd_export *exp, struct ll_uctxt *uctxt, mds = pfid->mds; repeat: + LASSERT(++loop <= 2); /* this is lookup. during lookup we have to update all the attributes, * because returned values will be put in struct inode */ @@ -514,6 +520,7 @@ repeat: /* directory is already splitted. calculate mds */ mds = raw_name2idx(obj->objcount, (char *)name, len); rpfid = obj->objs[mds].fid; + mds = rpfid.mds; } lmv_put_obj(obj); } diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 3819209511..b9e67b1925 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -395,8 +395,12 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, for (i = 0; i < lmv->desc.ld_tgt_count; i++) { int err; - err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, - len, karg, uarg); + if (lmv->tgts[i].ltd_exp == NULL) { + CWARN("%s: NULL export for %d\n", obddev->obd_name, i); + continue; + } + + err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg); if (err) { if (lmv->tgts[i].active) { CERROR("error: iocontrol MDC %s on MDT" @@ -489,6 +493,11 @@ static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs, RETURN(rc); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + if (lmv->tgts[i].ltd_exp == NULL) { + CWARN("%s: NULL export for %d\n", obd->obd_name, i); + continue; + } + rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, &temp, max_age); if (rc) { CERROR("can't stat MDS #%d (%s)\n", i, @@ -578,6 +587,12 @@ static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid, for (i = 0; i < obj->objcount; i++) { + if (lmv->tgts[i].ltd_exp == NULL) { + CWARN("%s: NULL export for %d\n", + obd->obd_name, i); + continue; + } + /* skip master obj. */ if (fid_equal(&obj->fid, &obj->objs[i].fid)) continue; @@ -644,6 +659,7 @@ static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid, if (obj) { /* directory is splitted. look for right mds for this name. */ mds = raw_name2idx(obj->objcount, name, len); + mds = obj->objs[mds].fid.mds; lmv_put_obj(obj); } rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, cfid, it, data); @@ -737,7 +753,7 @@ int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data, struct lmv_obd *lmv = &obd->u.lmv; struct mds_body *body; struct lmv_obj *obj; - int rc, mds; + int rc, mds, loop = 0; ENTRY; rc = lmv_check_connect(obd); @@ -747,6 +763,7 @@ int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data, if (!lmv->desc.ld_active_tgt_count) RETURN(-EIO); repeat: + LASSERT(++loop <= 2); obj = lmv_grab_obj(obd, &op_data->fid1); if (obj) { mds = raw_name2idx(obj->objcount, op_data->name, @@ -819,13 +836,13 @@ int lmv_enqueue_slaves(struct obd_export *exp, int locktype, LASSERT(mea != NULL); for (i = 0; i < mea->mea_count; i++) { - if (lmv->tgts[i].ltd_exp == NULL) - continue; - memset(&data2, 0, sizeof(data2)); data2.fid1 = mea->mea_fids[i]; mds = data2.fid1.mds; + if (lmv->tgts[mds].ltd_exp == NULL) + continue; + rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it, lockmode, &data2, lockh + i, lmm, lmmsize, cb_completion, cb_blocking, cb_data); @@ -909,7 +926,7 @@ int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid, struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct ll_fid rfid = *fid; - int rc, mds = fid->mds; + int rc, mds = fid->mds, loop = 0; struct mds_body *body; struct lmv_obj *obj; ENTRY; @@ -917,6 +934,7 @@ int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid, if (rc) RETURN(rc); repeat: + LASSERT(++loop <= 2); obj = lmv_grab_obj(obd, fid); if (obj) { /* directory is splitted. look for right mds for this name */ @@ -931,7 +949,7 @@ repeat: (unsigned long)rfid.mds, (unsigned long)rfid.id, (unsigned long)rfid.generation); - rc = md_getattr_name(lmv->tgts[mds].ltd_exp, &rfid, filename, + rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid, filename, namelen, valid, ea_size, request); if (rc == 0) { /* this could be cross-node reference. in this case all we have @@ -1099,10 +1117,10 @@ int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data, { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; - int rc = 0, i = data->fid1.mds; struct ptlrpc_request *req; struct mds_body *body; struct lmv_obj *obj; + int rc = 0, i; ENTRY; rc = lmv_check_connect(obd); @@ -1120,8 +1138,8 @@ int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data, for (i = 0; i < obj->objcount; i++) { data->fid1 = obj->objs[i].fid; - rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, - ea, ealen, ea2, ea2len, &req); + rc = md_setattr(lmv->tgts[data->fid1.mds].ltd_exp, data, + iattr, ea, ealen, ea2, ea2len, &req); if (fid_equal(&obj->fid, &obj->objs[i].fid)) { /* this is master object and this request should @@ -1136,14 +1154,14 @@ int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data, } lmv_put_obj(obj); } else { - LASSERT(i < lmv->desc.ld_tgt_count); - rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea, - ealen, ea2, ea2len, request); + LASSERT(data->fid1.mds < lmv->desc.ld_tgt_count); + rc = md_setattr(lmv->tgts[data->fid1.mds].ltd_exp, data, + iattr, ea, ealen, ea2, ea2len, request); if (rc == 0) { body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body)); LASSERT(body != NULL); - LASSERT(body->mds == i); + LASSERT(body->mds == data->fid1.mds); } } RETURN(rc); @@ -1161,7 +1179,8 @@ int lmv_sync(struct obd_export *exp, struct ll_fid *fid, if (rc) RETURN(rc); - rc = md_sync(lmv->tgts[0].ltd_exp, fid, request); + CWARN("%s: ->m_sync() isn't implemented yet\n", obd->obd_name); + rc = md_sync(lmv->tgts[fid->mds].ltd_exp, fid, request); RETURN(rc); } @@ -1282,13 +1301,14 @@ int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data, LASSERT(mea != NULL); for (i = 0; i < mea->mea_count; i++) { - if (lmv->tgts[i].ltd_exp == NULL) - continue; - memset(&data2, 0, sizeof(data2)); data2.fid1 = mea->mea_fids[i]; data2.create_mode = MDS_MODE_DONT_LOCK | S_IFDIR; mds = data2.fid1.mds; + + if (lmv->tgts[mds].ltd_exp == NULL) + continue; + rc = md_unlink(lmv->tgts[mds].ltd_exp, &data2, req); CDEBUG(D_OTHER, "unlink slave %lu/%lu/%lu -> %d\n", (unsigned long) mea->mea_fids[i].mds, @@ -1369,6 +1389,7 @@ struct obd_device *lmv_get_real_obd(struct obd_export *exp, rc = lmv_check_connect(obd); if (rc) RETURN(ERR_PTR(rc)); +#warning "we need well-desgined readdir() implementation to remove this mess" obd = lmv->tgts[0].ltd_exp->exp_obd; EXIT; return obd; @@ -1396,6 +1417,11 @@ int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize) RETURN(0); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + if (lmv->tgts[i].ltd_exp == NULL) { + CWARN("%s: NULL export for %d\n", obd->obd_name, i); + continue; + } + rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize); if (rc) { CERROR("obd_init_ea_size() failed on MDT target %d, " @@ -1436,9 +1462,10 @@ int lmv_obd_create(struct obd_export *exp, struct obdo *oa, { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; - struct mea *mea; int i, c, rc = 0; + struct mea *mea; struct ll_fid mfid; + int lcount; ENTRY; rc = lmv_check_connect(obd); @@ -1473,9 +1500,8 @@ int lmv_obd_create(struct obd_export *exp, struct obdo *oa, mea->mea_count = lmv->desc.ld_tgt_count; mea->mea_master = -1; - - for (i = 0, c = 0; c < mea->mea_count && - i < lmv->desc.ld_tgt_count; i++) { + lcount = lmv->desc.ld_tgt_count; + for (i = 0, c = 0; c < mea->mea_count && i < lcount; i++) { struct lov_stripe_md obj_md; struct lov_stripe_md *obj_mdp = &obj_md; @@ -1510,8 +1536,6 @@ int lmv_obd_create(struct obd_export *exp, struct obdo *oa, c++; CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n", i, oa->o_id, oa->o_generation); - CDEBUG(D_ERROR, "dirobj at mds %d: "LPU64"/%u\n", - i, oa->o_id, oa->o_generation); } LASSERT(c == mea->mea_count); CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index dd203ae174..5f9f3266fe 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -304,6 +304,7 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, if (!inode) RETURN(ERR_PTR(-ENOENT)); +#warning "I think we need something another here -bzzz" #if 0 /* here we disabled generation check, as root inode i_generation * of cache mds and real mds are different. */ @@ -821,9 +822,9 @@ int mds_check_mds_num(struct obd_device *obd, struct inode* inode, * should live at this MDS or at another one */ int i; i = mea_name2idx(mea, name, namelen - 1); - if (mea->mea_master != i) { - CERROR("inapropriate MDS(%d) for %s. should be %d\n", - mea->mea_master, name, i); + if (mea->mea_master != mea->mea_fids[i].mds) { + CERROR("inapropriate MDS(%d) for %s. should be %d(%d)\n", + mea->mea_master, name, mea->mea_fids[i].mds, i); rc = -ERESTART; } } @@ -1450,10 +1451,11 @@ repeat: rc = fsfilt_set_md(obd, new->d_inode, handle, mea, mealen); up(&new->d_inode->i_sem); OBD_FREE(mea, mealen); + CDEBUG(D_OTHER, "%s: mark non-splittable %lu/%u - %d\n", + obd->obd_name, new->d_inode->i_ino, + new->d_inode->i_generation, flags); } else if (rc == 0 && body->oa.o_easize) { - flags = mds_try_to_split_dir(obd, new, NULL, body->oa.o_easize); - CERROR("%s: splitted %lu/%u - %d\n", obd->obd_name, - new->d_inode->i_ino, new->d_inode->i_generation, flags); + mds_try_to_split_dir(obd, new, NULL, body->oa.o_easize); } cleanup: diff --git a/lustre/mds/mds_lmv.c b/lustre/mds/mds_lmv.c index 8b72d9c186..0b7eeac1be 100644 --- a/lustre/mds/mds_lmv.c +++ b/lustre/mds/mds_lmv.c @@ -537,15 +537,14 @@ int mds_try_to_split_dir(struct obd_device *obd, struct dentry *dentry, return 0; if (rc == MDS_NO_SPLIT_EXPECTED && nstripes == 0) return 0; + if (nstripes && nstripes == 1) + return 0; LASSERT(mea == NULL || *mea == NULL); CDEBUG(D_OTHER, "%s: split directory %u/%lu/%lu\n", obd->obd_name, mds->mds_num, dir->i_ino, (unsigned long) dir->i_generation); - CDEBUG(D_ERROR, "%s: split directory %u/%lu/%lu: %d/%d\n", - obd->obd_name, mds->mds_num, dir->i_ino, - (unsigned long) dir->i_generation, rc, nstripes); if (mea == NULL) mea = &tmea; diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index c0fe7ef2e5..9c86d33d78 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -905,10 +905,12 @@ int mds_open(struct mds_update_record *rec, int offset, * should live at this MDS or at another one */ int i; i = mea_name2idx(mea, rec->ur_name, rec->ur_namelen - 1); - if (mea->mea_master != i) { - CERROR("inapropriate MDS(%d) for %lu/%u:%s. should be %d\n", - mea->mea_master, dparent->d_inode->i_ino, - dparent->d_inode->i_generation, rec->ur_name, i); + if (mea->mea_master != mea->mea_fids[i].mds) { + CERROR("%s: inapropriate MDS(%d) for %lu/%u:%s." + " should be %d(%d)\n", obd->obd_name, + mea->mea_master, dparent->d_inode->i_ino, + dparent->d_inode->i_generation, rec->ur_name, + mea->mea_fids[i].mds, i); GOTO(cleanup, rc = -ERESTART); } } diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 5e7886e631..870ce10390 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -602,10 +602,12 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, * should live at this MDS or at another one */ int i; i = mea_name2idx(mea, rec->ur_name, rec->ur_namelen - 1); - if (mea->mea_master != i) { - CERROR("inapropriate MDS(%d) for %lu/%u:%s. should be %d\n", - mea->mea_master, dparent->d_inode->i_ino, - dparent->d_inode->i_generation, rec->ur_name, i); + if (mea->mea_master != mea->mea_fids[i].mds) { + CERROR("inapropriate MDS(%d) for %lu/%u:%s." + " should be %d(%d)\n", + mea->mea_master, dparent->d_inode->i_ino, + dparent->d_inode->i_generation, rec->ur_name, + mea->mea_fids[i].mds, i); GOTO(cleanup, rc = -ERESTART); } } diff --git a/lustre/tests/sanity-lmv.sh b/lustre/tests/sanity-lmv.sh index c9d6a538e6..29df4305a0 100644 --- a/lustre/tests/sanity-lmv.sh +++ b/lustre/tests/sanity-lmv.sh @@ -298,6 +298,28 @@ test_3a() { } run_test 3a " dir splitting with cross-ref =============================" +test_3b() { + mkdir $DIR/3b1 || error + createmany -m $DIR/3b1/f 5000 || error + rm -rf $DIR/3b1 || error +} +run_test 3b " dir splitting via createmany -m =============================" + +test_3c() { + mkdir $DIR/3c1 || error + echo "MDS nodes: $MDSCOUNT" + for j in `seq 3`; do + for i in `seq 10`; do + $LFS dirstripe $DIR/3c1/d-${j}-${i} $j || error + createmany -m $DIR/3c1/d-${j}-${i}/m 200 || error + createmany -o $DIR/3c1/d-${j}-${i}/o 200 || error + done + done + rm -rf $DIR/3c1 || error +} + +run_test 3c " dir splitting via lfs stripe =============================" + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME -- GitLab