From a7520963267b0d9dfaf93199df7179737c42f61f Mon Sep 17 00:00:00 2001 From: shadow <shadow> Date: Tue, 5 Aug 2008 08:53:37 +0000 Subject: [PATCH] don't fail open with -ERANGE. if client connected until mds will be know about real ost count get LOV EA can be fail because mds not allocate enougth buffer for LOV EA. Branch b1_6 b=16080 i=tappro i=johann --- lustre/ChangeLog | 7 ++ lustre/include/obd.h | 7 +- lustre/lov/lov_obd.c | 138 ++++++++++++---------- lustre/mds/mds_internal.h | 1 + lustre/mds/mds_lov.c | 226 ++++++++++++++++++++---------------- lustre/mds/mds_open.c | 5 +- lustre/tests/conf-sanity.sh | 5 - 7 files changed, 220 insertions(+), 169 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 9ace837dd4..440c9453aa 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -38,6 +38,13 @@ tbd Sun Microsystems, Inc. * Output of lfs quota has been made less detailed by default, old (verbose) output can be obtained by using -v option. +Severity : normal +Bugzilla : 16080 +Description: don't fail open with -ERANGE +Details : if client connected until mds will be know about real ost count + get LOV EA can be fail because mds not allocate enougth buffer + for LOV EA. + Severity : normal Frequency : only for filesystems larger than 8TB Bugzilla : 16101 diff --git a/lustre/include/obd.h b/lustre/include/obd.h index e18dc6f3c7..a21c90e57f 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -525,6 +525,8 @@ struct mds_obd { mds_fl_acl:1, mds_fl_cfglog:1, mds_fl_synced:1, + mds_fl_target:1, /* mds have one or + * more targets */ mds_evict_ost_nids:1; uid_t mds_squash_uid; @@ -773,8 +775,9 @@ enum obd_notify_event { OBD_NOTIFY_CONFIG }; -#define CONFIG_LOG 0x1 /* finished processing config log */ -#define CONFIG_SYNC 0x2 /* mdt synced 1 ost */ +#define CONFIG_LOG 0x1 /* finished processing config log */ +#define CONFIG_SYNC 0x2 /* mdt synced 1 ost */ +#define CONFIG_TARGET 0x4 /* one target is added */ /* * Data structure used to pass obd_notify()-event to non-obd listeners (llite diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 5c26d48beb..fc847c2785 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -191,6 +191,68 @@ static int lov_unregister_lock_cancel_cb(struct obd_export *exp, return rc; } +static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, + int activate); + +static int lov_notify(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev, void *data) +{ + int rc = 0; + ENTRY; + + if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { + struct obd_uuid *uuid; + + LASSERT(watched); + + if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + CERROR("unexpected notification of %s %s!\n", + watched->obd_type->typ_name, + watched->obd_name); + RETURN(-EINVAL); + } + uuid = &watched->u.cli.cl_target_uuid; + + /* Set OSC as active before notifying the observer, so the + * observer can use the OSC normally. + */ + rc = lov_set_osc_active(obd, uuid, ev == OBD_NOTIFY_ACTIVE); + if (rc) { + CERROR("%sactivation of %s failed: %d\n", + (ev == OBD_NOTIFY_ACTIVE) ? "" : "de", + obd_uuid2str(uuid), rc); + RETURN(rc); + } + } + + /* Pass the notification up the chain. */ + if (watched) { + rc = obd_notify_observer(obd, watched, ev, data); + } else { + /* NULL watched means all osc's in the lov (only for syncs) */ + struct lov_obd *lov = &obd->u.lov; + struct obd_device *tgt_obd; + int i; + lov_getref(obd); + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + if (!lov->lov_tgts[i]) + continue; + tgt_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); + rc = obd_notify_observer(obd, tgt_obd, ev, data); + if (rc) { + CERROR("%s: notify %s of %s failed %d\n", + obd->obd_name, + obd->obd_observer->obd_name, + tgt_obd->obd_name, rc); + break; + } + } + lov_putref(obd); + } + + RETURN(rc); +} + #define MAX_STRING_SIZE 128 static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) @@ -355,7 +417,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, /* Why should there ever be more than 1 connect? */ lov->lov_connects++; LASSERT(lov->lov_connects == 1); - + memset(&lov->lov_ocd, 0, sizeof(lov->lov_ocd)); if (data) lov->lov_ocd = *data; @@ -373,9 +435,19 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, obd->obd_name, i, rc); continue; } + /* connect to administrative disabled ost */ + if (!lov->lov_tgts[i]->ltd_exp) + continue; + + rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd, + OBD_NOTIFY_ACTIVE, (void *)&i); + if (rc) { + CERROR("%s error sending notify %d\n", + obd->obd_name, rc); + } } lov_putref(obd); - + RETURN(0); } @@ -536,64 +608,6 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, RETURN(rc); } -static int lov_notify(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data) -{ - int rc = 0; - ENTRY; - - if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { - struct obd_uuid *uuid; - - LASSERT(watched); - - if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { - CERROR("unexpected notification of %s %s!\n", - watched->obd_type->typ_name, - watched->obd_name); - RETURN(-EINVAL); - } - uuid = &watched->u.cli.cl_target_uuid; - - /* Set OSC as active before notifying the observer, so the - * observer can use the OSC normally. - */ - rc = lov_set_osc_active(obd, uuid, ev == OBD_NOTIFY_ACTIVE); - if (rc) { - CERROR("%sactivation of %s failed: %d\n", - (ev == OBD_NOTIFY_ACTIVE) ? "" : "de", - obd_uuid2str(uuid), rc); - RETURN(rc); - } - } - - /* Pass the notification up the chain. */ - if (watched) { - rc = obd_notify_observer(obd, watched, ev, data); - } else { - /* NULL watched means all osc's in the lov (only for syncs) */ - struct lov_obd *lov = &obd->u.lov; - struct obd_device *tgt_obd; - int i; - lov_getref(obd); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i]) - continue; - tgt_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); - rc = obd_notify_observer(obd, tgt_obd, ev, data); - if (rc) { - CERROR("%s: notify %s of %s failed %d\n", - obd->obd_name, - obd->obd_observer->obd_name, - tgt_obd->obd_name, rc); - break; - } - } - lov_putref(obd); - } - - RETURN(rc); -} static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, __u32 index, int gen, int active) @@ -689,6 +703,10 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, if (rc) GOTO(out, rc); + /* connect to administrative disabled ost */ + if (!tgt->ltd_exp) + GOTO(out, rc = 0); + rc = lov_notify(obd, tgt->ltd_exp->exp_obd, active ? OBD_NOTIFY_ACTIVE : OBD_NOTIFY_INACTIVE, (void *)&index); diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 32b0a20ebb..c57ea74e7a 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -223,6 +223,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name); int mds_lov_disconnect(struct obd_device *obd); int mds_lov_write_objids(struct obd_device *obd); +int mds_lov_prepare_objids(struct obd_device *obd, struct lov_mds_md *lmm); void mds_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm); int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index ad4db6ec13..6107d0485e 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -165,6 +165,77 @@ void mds_lov_destroy_objids(struct obd_device *obd) } EXPORT_SYMBOL(mds_lov_destroy_objids); +/** + * currently exist two ways for know about ost count and max ost index. + * first - after ost is connected to mds and sync process finished + * second - get from lmm in recovery process, in case when mds not have configs, + * and ost isn't registered in mgs. + * + * \param mds pointer to mds structure + * \param index maxium ost index + * + * \retval -ENOMEM is not hame memory for new page + * \retval 0 is update passed + */ +static int mds_lov_update_max_ost(struct mds_obd *mds, obd_id index) +{ + __u32 page = index / OBJID_PER_PAGE(); + obd_id *data = mds->mds_lov_page_array[page]; + + if (data == NULL) { + OBD_ALLOC(data, MDS_LOV_ALLOC_SIZE); + if (data == NULL) + RETURN(-ENOMEM); + + mds->mds_lov_page_array[page] = data; + } + + /* XXX mds_lov_objid_count max index (not count) now. this need fix in + * lov first */ + if (index > mds->mds_lov_objid_count) { + __u32 off = index % OBJID_PER_PAGE(); + __u32 stripes; + + mds->mds_lov_objid_count = index; + mds->mds_lov_objid_lastpage = page; + mds->mds_lov_objid_lastidx = off; + + stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT, + index); + + mds->mds_max_mdsize = lov_mds_md_size(stripes); + mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie); + CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize for %d stripes: " + "%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize, + stripes); + } + EXIT; + return 0; +} + +int mds_lov_prepare_objids(struct obd_device *obd, struct lov_mds_md *lmm) +{ + int rc = 0; + __u32 j; + + /* if we create file without objects - lmm is NULL */ + if (lmm == NULL) + return 0; + + mutex_down(&obd->obd_dev_sem); + for (j = 0; j < le32_to_cpu(lmm->lmm_stripe_count); j++) { + __u32 i = le32_to_cpu(lmm->lmm_objects[j].l_ost_idx); + if (mds_lov_update_max_ost(&obd->u.mds, i)) { + rc = -ENOMEM; + break; + } + } + mutex_up(&obd->obd_dev_sem); + + RETURN(rc); +} +EXPORT_SYMBOL(mds_lov_prepare_objids); + void mds_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm) { struct mds_obd *mds = &obd->u.mds; @@ -176,13 +247,15 @@ void mds_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm) return; for (j = 0; j < le32_to_cpu(lmm->lmm_stripe_count); j++) { - int i = le32_to_cpu(lmm->lmm_objects[j].l_ost_idx); + __u32 i = le32_to_cpu(lmm->lmm_objects[j].l_ost_idx); obd_id id = le64_to_cpu(lmm->lmm_objects[j].l_object_id); - int page = i / OBJID_PER_PAGE(); - int idx = i % OBJID_PER_PAGE(); - obd_id *data = mds->mds_lov_page_array[page]; + __u32 page = i / OBJID_PER_PAGE(); + __u32 idx = i % OBJID_PER_PAGE(); + obd_id *data; + + data = mds->mds_lov_page_array[page]; - CDEBUG(D_INODE,"update last object for ost %d" + CDEBUG(D_INODE,"update last object for ost %u" " - new "LPU64" old "LPU64"\n", i, id, data[idx]); if (id > data[idx]) { data[idx] = id; @@ -190,6 +263,7 @@ void mds_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm) } } EXIT; + return; } EXPORT_SYMBOL(mds_lov_update_objids); @@ -211,15 +285,15 @@ static int mds_lov_read_objids(struct obd_device *obd) page = (size/(OBJID_PER_PAGE()*sizeof(obd_id)))+1; CDEBUG(D_INFO, "file size %d pages %d\n", (int)size, page); for(i=0; i < page; i++) { - obd_id *data = mds->mds_lov_page_array[i]; + obd_id *data; loff_t off_old = off; - LASSERT(data == NULL); - OBD_ALLOC(data, MDS_LOV_ALLOC_SIZE); - if (data == NULL) - GOTO(out, rc = -ENOMEM); - - mds->mds_lov_page_array[i] = data; + LASSERT(mds->mds_lov_page_array[i] == NULL); + if (mds_lov_update_max_ost(mds, i)) { + CERROR("Can't update mds data\n"); + GOTO(out, rc = -EIO); + } + data = mds->mds_lov_page_array[i]; rc = fsfilt_read_record(obd, mds->mds_lov_objid_filp, data, OBJID_PER_PAGE()*sizeof(obd_id), &off); @@ -232,12 +306,7 @@ static int mds_lov_read_objids(struct obd_device *obd) count += (off-off_old)/sizeof(obd_id); } - mds->mds_lov_objid_count = count; - if (count) { - count --; - mds->mds_lov_objid_lastpage = count / OBJID_PER_PAGE(); - mds->mds_lov_objid_lastidx = count % OBJID_PER_PAGE(); - } + CDEBUG(D_INFO, "Read %u - %u %u objid\n", count, mds->mds_lov_objid_lastpage, mds->mds_lov_objid_lastidx); out: @@ -283,7 +352,7 @@ int mds_lov_write_objids(struct obd_device *obd) EXPORT_SYMBOL(mds_lov_write_objids); static int mds_lov_get_objid(struct obd_device * obd, - __u32 idx) + obd_id idx) { struct mds_obd *mds = &obd->u.mds; unsigned int page; @@ -294,15 +363,8 @@ static int mds_lov_get_objid(struct obd_device * obd, page = idx / OBJID_PER_PAGE(); off = idx % OBJID_PER_PAGE(); - data = mds->mds_lov_page_array[page]; - if (data == NULL) { - OBD_ALLOC(data, MDS_LOV_ALLOC_SIZE); - if (data == NULL) - GOTO(out, rc = -ENOMEM); - - mds->mds_lov_page_array[page] = data; - } + data = mds->mds_lov_page_array[page]; if (data[off] == 0) { /* We never read this lastid; ask the osc */ struct obd_id_info lastid; @@ -315,14 +377,9 @@ static int mds_lov_get_objid(struct obd_device * obd, if (rc) GOTO(out, rc); - if (idx > mds->mds_lov_objid_count) { - mds->mds_lov_objid_count = idx; - mds->mds_lov_objid_lastpage = page; - mds->mds_lov_objid_lastidx = off; - } cfs_bitmap_set(mds->mds_lov_page_dirty, page); } - CDEBUG(D_INFO, "idx %d - %p - %d/%d - "LPU64"\n", + CDEBUG(D_INFO, "idx "LPU64" - %p - %d/%d - "LPU64"\n", idx, data, page, off, data[off]); out: RETURN(rc); @@ -394,7 +451,7 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) { struct mds_obd *mds = &obd->u.mds; struct lov_desc *ld; - __u32 stripes, valsize = sizeof(mds->mds_lov_desc); + __u32 valsize = sizeof(mds->mds_lov_desc); int rc = 0; ENTRY; @@ -413,14 +470,8 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) CDEBUG(D_CONFIG, "updated lov_desc, tgt_count: %d\n", mds->mds_lov_desc.ld_tgt_count); - stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT, - mds->mds_lov_desc.ld_tgt_count); - - mds->mds_max_mdsize = lov_mds_md_size(stripes); - mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie); - CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize for %d stripes: " - "%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize, - stripes); + if (mds_lov_update_max_ost(mds, mds->mds_lov_desc.ld_tgt_count)) + GOTO(out, rc = -ENOMEM); /* If we added a target we have to reconnect the llogs */ /* We only _need_ to do this at first add (idx), or the first time @@ -512,8 +563,25 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) mds->mds_osc_obd = class_name2obd(lov_name); if (!mds->mds_osc_obd) { CERROR("MDS cannot locate LOV %s\n", lov_name); - mds->mds_osc_obd = ERR_PTR(-ENOTCONN); - RETURN(-ENOTCONN); + GOTO(error_exit, rc = -ENOTCONN); + } + + mutex_down(&obd->obd_dev_sem); + rc = mds_lov_read_objids(obd); + mutex_up(&obd->obd_dev_sem); + if (rc) { + CERROR("cannot read lov_objids: rc = %d\n", rc); + GOTO(error_exit, rc); + } + + /* Deny new client connections until we are sure we have some OSTs */ + obd->obd_no_conn = 1; + + rc = obd_register_observer(mds->mds_osc_obd, obd); + if (rc) { + CERROR("MDS cannot register as observer of LOV %s (%d)\n", + lov_name, rc); + GOTO(error_exit, rc); } OBD_ALLOC(data, sizeof(*data)); @@ -531,64 +599,16 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) OBD_FREE(data, sizeof(*data)); if (rc) { CERROR("MDS cannot connect to LOV %s (%d)\n", lov_name, rc); - mds->mds_osc_obd = ERR_PTR(rc); - RETURN(rc); + GOTO(error_exit, rc); } mds->mds_osc_exp = class_conn2export(&conn); + /* we not want postrecov in case clean fs, in other cases postrecov will + * be called from ldlm. otherwise we can call postrecov twice - in case + * short recovery */ - rc = obd_register_observer(mds->mds_osc_obd, obd); - if (rc) { - CERROR("MDS cannot register as observer of LOV %s (%d)\n", - lov_name, rc); - GOTO(err_discon, rc); - } - - /* Deny new client connections until we are sure we have some OSTs */ - obd->obd_no_conn = 1; - - mutex_down(&obd->obd_dev_sem); - rc = mds_lov_read_objids(obd); - if (rc) { - CERROR("cannot read %s: rc = %d\n", "lov_objids", rc); - GOTO(err_reg, rc); - } - - rc = mds_lov_update_desc(obd, mds->mds_osc_exp); - if (rc) - GOTO(err_reg, rc); - - /* If we're mounting this code for the first time on an existing FS, - * we need to populate the objids array from the real OST values */ - if (mds->mds_lov_desc.ld_tgt_count > mds->mds_lov_objid_count) { - __u32 i = mds->mds_lov_objid_count; - for(; i <= mds->mds_lov_desc.ld_tgt_count; i++) { - rc = mds_lov_get_objid(obd, i); - if (rc != 0) - break; - } - if (rc == 0) - rc = mds_lov_write_objids(obd); - if (rc) - CERROR("got last objids from OSTs, but error " - "in update objids file: %d\n", rc); - } - - mutex_up(&obd->obd_dev_sem); - - /* I want to see a callback happen when the OBD moves to a - * "For General Use" state, and that's when we'll call - * set_nextid(). The class driver can help us here, because - * it can use the obd_recovering flag to determine when the - * the OBD is full available. */ - if (!obd->obd_recovering) - rc = mds_postrecov(obd); RETURN(rc); -err_reg: - mutex_up(&obd->obd_dev_sem); - obd_register_observer(mds->mds_osc_obd, NULL); -err_discon: - obd_disconnect(mds->mds_osc_exp); +error_exit: mds->mds_osc_exp = NULL; mds->mds_osc_obd = ERR_PTR(rc); RETURN(rc); @@ -827,7 +847,10 @@ static void mds_allow_cli(struct obd_device *obd, unsigned long flag) obd->u.mds.mds_fl_cfglog = 1; if (flag & CONFIG_SYNC) obd->u.mds.mds_fl_synced = 1; - if (obd->u.mds.mds_fl_cfglog /* bz11778: && obd->u.mds.mds_fl_synced */) + if (flag & CONFIG_TARGET) + obd->u.mds.mds_fl_target = 1; + if (obd->u.mds.mds_fl_cfglog && obd->u.mds.mds_fl_target + /* bz11778: && obd->u.mds.mds_fl_synced */) /* Open for clients */ obd->obd_no_conn = 0; } @@ -996,9 +1019,13 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int rc = 0; ENTRY; + CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev); + switch (ev) { /* We only handle these: */ case OBD_NOTIFY_ACTIVE: + /* lov want one or more _active_ targets for work */ + mds_allow_cli(obd, CONFIG_TARGET); case OBD_NOTIFY_SYNC: case OBD_NOTIFY_SYNC_NONBLOCK: break; @@ -1008,8 +1035,6 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, RETURN(0); } - CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev); - if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) != 0) { CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, watched->obd_name); @@ -1030,7 +1055,6 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, RETURN(rc); } - LASSERT(!llog_ctxt_null(obd, LLOG_MDS_OST_ORIG_CTXT)); rc = mds_lov_start_synchronize(obd, watched, data, !(ev == OBD_NOTIFY_SYNC)); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 041b991166..39dd754fe0 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -775,6 +775,9 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, RETURN(rc); } + if ((rc = mds_lov_prepare_objids(obd,lmm)) != 0) + RETURN(rc); + intent_set_disposition(rep, DISP_OPEN_OPEN); mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, flags, req); if (IS_ERR(mfd)) @@ -785,7 +788,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, mds_lov_update_objids(obd, lmm); - if (rc) /* coverity[deadcode] */ + if (rc) mds_mfd_unlink(mfd, 1); mds_mfd_put(mfd); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index df554395bf..740bc36dca 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -799,11 +799,6 @@ test_22() { #reformat to remove all logs reformat start_mds - echo Client mount before any osts are in the logs - mount_client $MOUNT - check_mount && return 41 - umount_client $MOUNT - pass echo Client mount with ost in logs, but none running start_ost -- GitLab