Commit a336d7c7 authored by Lai Siyao's avatar Lai Siyao Committed by Oleg Drokin
Browse files

LU-11025 dne: directory restripe and auto split



A specific restriper thread is created for each MDT, it does three
tasks in a loop:
1. If there is directory whose total sub-files exceeds threshold
   (50000 by default, can be changed "lctl set_param
   mdt.*.dir_split_count=N"), split this directory by adding new
   stripes (4 stripes by default, which can be adjusted by
   "lctl set_param mdt.*.dir_split_delta=N").
2. If a directory stripe LMV is marked 'MIGRATION', migrate sub file
   from current offset, and update offset to next file.
3. If a directory master LMV is marked 'RESTRIPING', check whether
   all stripe LMV 'MIGRATION' flag is cleared, if so, clear
   'RESTRIPING' flag and update directory LMV.

In last patch, the first part of manual directory stripe is
implemented, and in this patch, sub file migrations and dir layout
update is done. Directory auto-split is done in similar way, except
that the first step is done by this thread too.

Directory auto-split can be enabled/disabled by "lctl set_param
mdt.*.enable_dir_auto_split=[0|1]", it's turned on by default.

Auto split is triggered at the end of getattr(): since now the attr
contains dirent count, check whether it exceeds threshold, if so,
add this directory into mdr_auto_split list and wake up the dir
restriper thread.

Restripe migration is also triggered in getattr(): if the object is
directory stripe, and LMV 'MIGRATION' flag set, add this object into
mdr_restripe_migrate list and wake up the dir restriper thread.

Directory layout update is similar: if current directory is striped,
and LNV 'RESTRIPING' flag is set, add this directory into
mdr_restripe_update list and wake up restriper thread.

By default restripe migrate dirent only, and leave inode unchanged, it
can be adjusted by "lctl set_param mdt.*.dir_restripe_nsonly=[0|1]".

Currently DoM file inode migration is not supported, migrate dirent
only for such files to avoid leaving dir migration/restripe
unfinished.

Add sanity.sh 230o, 230p and 230q, adjust 230j since DoM files migrate
dirent.
Signed-off-by: default avatarLai Siyao <lai.siyao@whamcloud.com>
Change-Id: I8c83b42e4acbaab067d0092d0b232de37f956588
Reviewed-on: https://review.whamcloud.com/37284

Tested-by: default avatarjenkins <devops@whamcloud.com>
Reviewed-by: default avatarAndreas Dilger <adilger@whamcloud.com>
Tested-by: default avatarMaloo <maloo@whamcloud.com>
Reviewed-by: default avatarHongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: default avatarOleg Drokin <green@whamcloud.com>
parent e8347675
......@@ -203,6 +203,8 @@ __out: __ret; \
#ifndef TASK_NOLOAD
#define TASK_IDLE TASK_INTERRUPTIBLE
#define ___wait_event_idle(wq_head, condition, exclusive, ret, cmd) \
({ \
wait_queue_entry_t __wq_entry; \
......
......@@ -171,7 +171,6 @@ struct lu_device_operations {
* \param[in] parent parent object
* \param[in] name lu_name
*
* \retval 0 on success
* \retval 0 0 FID allocated successfully.
* \retval 1 1 FID allocated successfully and new sequence
* requested from seq meta server
......
......@@ -444,32 +444,42 @@ insane:
static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
{
LASSERT(lmv_is_sane2(lmv));
if (!lmv_is_sane2(lmv))
return false;
return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
}
static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
{
LASSERT(lmv_is_sane2(lmv));
if (!lmv_is_sane2(lmv))
return false;
return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
}
static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
{
LASSERT(lmv_is_sane(lmv));
if (!lmv_is_sane(lmv))
return false;
return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
}
static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
{
LASSERT(lmv_is_sane2(lmv));
if (!lmv_is_sane2(lmv))
return false;
return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
}
static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
{
LASSERT(lmv_is_sane2(lmv));
if (!lmv_is_sane2(lmv))
return false;
return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
......
......@@ -204,11 +204,16 @@ struct md_layout_change {
__u32 *mlc_resync_ids;
}; /* file */
struct {
struct md_object *mlc_parent; /* parent obj in plain dir split */
struct md_object *mlc_target; /* target obj in plain dir split */
struct lu_attr *mlc_attr; /* target attr in plain dir split */
const struct lu_name *mlc_name; /* target name in plain dir split */
struct md_op_spec *mlc_spec; /* dir split spec */
/* parent obj in plain dir split */
struct md_object *mlc_parent;
/* target obj in plain dir split */
struct md_object *mlc_target;
/* target attr in plain dir split */
struct lu_attr *mlc_attr;
/* target name in plain dir split */
const struct lu_name *mlc_name;
/* dir split spec */
struct md_op_spec *mlc_spec;
}; /* dir */
};
};
......
......@@ -533,6 +533,24 @@ static inline __kernel_size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
return (size + 7) & ~7;
}
static inline __u16 lu_dirent_type_get(struct lu_dirent *ent)
{
__u16 type = 0;
struct luda_type *lt;
int len = 0;
if (__le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
const unsigned int align = sizeof(struct luda_type) - 1;
len = __le16_to_cpu(ent->lde_namelen);
len = (len + align) & ~align;
lt = (void *)ent->lde_name + len;
type = __le16_to_cpu(lt->lt_type);
}
return type;
}
#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
/**
......@@ -2171,7 +2189,8 @@ struct lmv_mds_md_v1 {
};
#define LMV_DEBUG(mask, lmv, msg) \
CDEBUG(mask, "%s LMV: magic=%#x count=%u index=%u hash=%#x version=%u migrate offset=%u migrate hash=%u.\n", \
CDEBUG(mask, \
"%s LMV: magic=%#x count=%u index=%u hash=%#x version=%u migrate offset=%u migrate hash=%u.\n", \
msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count, \
(lmv)->lmv_master_mdt_index, (lmv)->lmv_hash_type, \
(lmv)->lmv_layout_version, (lmv)->lmv_migrate_offset, \
......
......@@ -175,29 +175,6 @@ void ll_release_page(struct inode *inode, struct page *page,
put_page(page);
}
/**
* return IF_* type for given lu_dirent entry.
* IF_* flag shld be converted to particular OS file type in
* platform llite module.
*/
static u16 ll_dirent_type_get(struct lu_dirent *ent)
{
u16 type = 0;
struct luda_type *lt;
int len = 0;
if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
const unsigned align = sizeof(struct luda_type) - 1;
len = le16_to_cpu(ent->lde_namelen);
len = (len + align) & ~align;
lt = (void *)ent->lde_name + len;
type = IFTODT(le16_to_cpu(lt->lt_type));
}
return type;
}
#ifdef HAVE_DIR_CONTEXT
int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
struct dir_context *ctx)
......@@ -256,7 +233,7 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
lhash = hash;
fid_le_to_cpu(&fid, &ent->lde_fid);
ino = cl_fid_build_ino(&fid, is_api32);
type = ll_dirent_type_get(ent);
type = IFTODT(lu_dirent_type_get(ent));
/* For ll_nfs_get_name_filldir(), it will try to access
* 'ent' through 'lde_name', so the parameter 'name'
* for 'filldir()' must be part of the 'ent'. */
......
......@@ -901,10 +901,6 @@ static void sa_statahead(struct dentry *parent, const char *name, int len,
EXIT;
}
#ifndef TASK_IDLE
#define TASK_IDLE TASK_INTERRUPTIBLE
#endif
/* async glimpse (agl) thread main function */
static int ll_agl_thread(void *arg)
{
......
......@@ -1606,8 +1606,8 @@ static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
* the FIDs of all shards of the striped directory. */
if (le32_to_cpu(lmv1->lmv_magic) == LMV_MAGIC_V1)
rc = lmv_mds_md_size(
le32_to_cpu(lmv1->lmv_stripe_count),
le32_to_cpu(lmv1->lmv_magic));
le32_to_cpu(lmv1->lmv_stripe_count),
le32_to_cpu(lmv1->lmv_magic));
} else {
lmv1 = buf->lb_buf;
if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
......
......@@ -4040,7 +4040,7 @@ static int mdd_migrate_create(const struct lu_env *env,
RETURN(rc);
}
/* NB: if user issued different migrate command, we can't ajust it silently
/* NB: if user issued different migrate command, we can't adjust it silently
* here, because this command will decide target MDT in subdir migration in
* LMV.
*/
......@@ -4180,8 +4180,7 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj,
GOTO(out, rc = -EINVAL);
GOTO(out, rc = -EALREADY);
}
if (S_ISDIR(attr->la_mode))
nsonly = spec->sp_migrate_nsonly;
nsonly = spec->sp_migrate_nsonly;
} else {
spobj = tpobj;
mdd_object_get(spobj);
......@@ -4646,7 +4645,7 @@ static int mdd_dir_declare_split_plain(const struct lu_env *env,
return rc;
/* tobj mode will be used in lod_declare_xattr_set(), but it's not
* createb yet.
* created yet.
*/
tobj->mod_obj.mo_lu.lo_header->loh_attr |= S_IFDIR;
......
MODULES := mdt
mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o
mdt-objs += mdt_open.o mdt_identity.o mdt_lproc.o mdt_fs.o mdt_som.o
mdt-objs += mdt_lvb.o mdt_hsm.o mdt_mds.o mdt_io.o
mdt-objs += mdt_lvb.o mdt_hsm.o mdt_mds.o mdt_io.o mdt_restripe.o
mdt-objs += mdt_hsm_cdt_actions.o
mdt-objs += mdt_hsm_cdt_requests.o
mdt-objs += mdt_hsm_cdt_client.o
......
......@@ -58,6 +58,7 @@
#include <uapi/linux/lustre/lustre_param.h>
#include <lustre_quota.h>
#include <lustre_swab.h>
#include <lustre_lmv.h>
#include <obd.h>
#include <obd_support.h>
#include <lustre_barrier.h>
......@@ -979,8 +980,8 @@ int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o,
RETURN(rc);
}
int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
struct md_attr *ma, const char *name)
int __mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
struct md_attr *ma, const char *name)
{
struct md_object *next = mdt_object_child(o);
struct lu_buf *buf = &info->mti_buf;
......@@ -1056,6 +1057,40 @@ got:
return rc;
}
int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
struct md_attr *ma, const char *name)
{
int rc;
if (!info->mti_big_lmm) {
OBD_ALLOC(info->mti_big_lmm, PAGE_SIZE);
if (!info->mti_big_lmm)
return -ENOMEM;
info->mti_big_lmmsize = PAGE_SIZE;
}
if (strcmp(name, XATTR_NAME_LOV) == 0) {
ma->ma_lmm = info->mti_big_lmm;
ma->ma_lmm_size = info->mti_big_lmmsize;
ma->ma_valid &= ~MA_LOV;
} else if (strcmp(name, XATTR_NAME_LMV) == 0) {
ma->ma_lmv = info->mti_big_lmm;
ma->ma_lmv_size = info->mti_big_lmmsize;
ma->ma_valid &= ~MA_LMV;
} else {
LBUG();
}
LASSERT(!info->mti_big_lmm_used);
rc = __mdt_stripe_get(info, o, ma, name);
/* since big_lmm is always used here, clear 'used' flag to avoid
* assertion in mdt_big_xattr_get().
*/
info->mti_big_lmm_used = 0;
return rc;
}
int mdt_attr_get_pfid(struct mdt_thread_info *info, struct mdt_object *o,
struct lu_fid *pfid)
{
......@@ -1103,6 +1138,51 @@ int mdt_attr_get_pfid(struct mdt_thread_info *info, struct mdt_object *o,
RETURN(0);
}
int mdt_attr_get_pfid_name(struct mdt_thread_info *info, struct mdt_object *o,
struct lu_fid *pfid, struct lu_name *lname)
{
struct lu_buf *buf = &info->mti_buf;
struct link_ea_header *leh;
struct link_ea_entry *lee;
int reclen;
int rc;
buf->lb_buf = info->mti_xattr_buf;
buf->lb_len = sizeof(info->mti_xattr_buf);
rc = mo_xattr_get(info->mti_env, mdt_object_child(o), buf,
XATTR_NAME_LINK);
if (rc == -ERANGE) {
rc = mdt_big_xattr_get(info, o, XATTR_NAME_LINK);
buf->lb_buf = info->mti_big_lmm;
buf->lb_len = info->mti_big_lmmsize;
}
if (rc < 0)
return rc;
if (rc < sizeof(*leh)) {
CERROR("short LinkEA on "DFID": rc = %d\n",
PFID(mdt_object_fid(o)), rc);
return -ENODATA;
}
leh = (struct link_ea_header *)buf->lb_buf;
lee = (struct link_ea_entry *)(leh + 1);
if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
leh->leh_magic = LINK_EA_MAGIC;
leh->leh_reccount = __swab32(leh->leh_reccount);
leh->leh_len = __swab64(leh->leh_len);
}
if (leh->leh_magic != LINK_EA_MAGIC)
return -EINVAL;
if (leh->leh_reccount == 0)
return -ENODATA;
linkea_entry_unpack(lee, &reclen, lname, pfid);
return 0;
}
int mdt_attr_get_complex(struct mdt_thread_info *info,
struct mdt_object *o, struct md_attr *ma)
{
......@@ -1140,19 +1220,19 @@ int mdt_attr_get_complex(struct mdt_thread_info *info,
}
if (need & MA_LOV && (S_ISREG(mode) || S_ISDIR(mode))) {
rc = mdt_stripe_get(info, o, ma, XATTR_NAME_LOV);
rc = __mdt_stripe_get(info, o, ma, XATTR_NAME_LOV);
if (rc)
GOTO(out, rc);
}
if (need & MA_LMV && S_ISDIR(mode)) {
rc = mdt_stripe_get(info, o, ma, XATTR_NAME_LMV);
rc = __mdt_stripe_get(info, o, ma, XATTR_NAME_LMV);
if (rc != 0)
GOTO(out, rc);
}
if (need & MA_LMV_DEF && S_ISDIR(mode)) {
rc = mdt_stripe_get(info, o, ma, XATTR_NAME_DEFAULT_LMV);
rc = __mdt_stripe_get(info, o, ma, XATTR_NAME_DEFAULT_LMV);
if (rc != 0)
GOTO(out, rc);
}
......@@ -1202,19 +1282,21 @@ out:
}
static int mdt_getattr_internal(struct mdt_thread_info *info,
struct mdt_object *o, int ma_need)
struct mdt_object *o, int ma_need)
{
struct md_object *next = mdt_object_child(o);
const struct mdt_body *reqbody = info->mti_body;
struct ptlrpc_request *req = mdt_info_req(info);
struct md_attr *ma = &info->mti_attr;
struct lu_attr *la = &ma->ma_attr;
struct req_capsule *pill = info->mti_pill;
const struct lu_env *env = info->mti_env;
struct mdt_body *repbody;
struct lu_buf *buffer = &info->mti_buf;
struct obd_export *exp = info->mti_exp;
int rc;
struct mdt_device *mdt = info->mti_mdt;
struct md_object *next = mdt_object_child(o);
const struct mdt_body *reqbody = info->mti_body;
struct ptlrpc_request *req = mdt_info_req(info);
struct md_attr *ma = &info->mti_attr;
struct lu_attr *la = &ma->ma_attr;
struct req_capsule *pill = info->mti_pill;
const struct lu_env *env = info->mti_env;
struct mdt_body *repbody;
struct lu_buf *buffer = &info->mti_buf;
struct obd_export *exp = info->mti_exp;
int rc;
ENTRY;
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK))
......@@ -1301,13 +1383,13 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
}
}
if (S_ISDIR(lu_object_attr(&next->mo_lu)) &&
if (S_ISDIR(lu_object_attr(&next->mo_lu)) &&
reqbody->mbo_valid & OBD_MD_FLDIREA &&
lustre_msg_get_opc(req->rq_reqmsg) == MDS_GETATTR) {
/* get default stripe info for this dir. */
ma->ma_need |= MA_LOV_DEF;
}
ma->ma_need |= ma_need;
lustre_msg_get_opc(req->rq_reqmsg) == MDS_GETATTR) {
/* get default stripe info for this dir. */
ma->ma_need |= MA_LOV_DEF;
}
ma->ma_need |= ma_need;
rc = mdt_attr_get_complex(info, o, ma);
if (unlikely(rc)) {
......@@ -1326,22 +1408,27 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
repbody->mbo_t_state = MS_RESTORE;
}
if (likely(ma->ma_valid & MA_INODE))
mdt_pack_attr2body(info, repbody, la, mdt_object_fid(o));
else
RETURN(-EFAULT);
if (unlikely(!(ma->ma_valid & MA_INODE)))
RETURN(-EFAULT);
mdt_pack_attr2body(info, repbody, la, mdt_object_fid(o));
if (mdt_body_has_lov(la, reqbody)) {
if (ma->ma_valid & MA_LOV) {
LASSERT(ma->ma_lmm_size);
if (mdt_body_has_lov(la, reqbody)) {
u32 stripe_count = 1;
if (ma->ma_valid & MA_LOV) {
LASSERT(ma->ma_lmm_size);
repbody->mbo_eadatasize = ma->ma_lmm_size;
if (S_ISDIR(la->la_mode))
repbody->mbo_valid |= OBD_MD_FLDIREA;
else
repbody->mbo_valid |= OBD_MD_FLEASIZE;
mdt_dump_lmm(D_INFO, ma->ma_lmm, repbody->mbo_valid);
}
}
if (ma->ma_valid & MA_LMV) {
struct lmv_mds_md_v1 *lmv = &ma->ma_lmv->lmv_md_v1;
u32 magic = le32_to_cpu(lmv->lmv_magic);
/* Return -ENOTSUPP for old client */
if (!mdt_is_striped_client(req->rq_export))
RETURN(-ENOTSUPP);
......@@ -1350,6 +1437,13 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
mdt_dump_lmv(D_INFO, ma->ma_lmv);
repbody->mbo_eadatasize = ma->ma_lmv_size;
repbody->mbo_valid |= (OBD_MD_FLDIREA|OBD_MD_MEA);
stripe_count = le32_to_cpu(lmv->lmv_stripe_count);
if (magic == LMV_MAGIC_STRIPE && lmv_is_restriping(lmv))
mdt_restripe_migrate_add(info, o);
else if (magic == LMV_MAGIC_V1 &&
lmv_is_restriping(lmv))
mdt_restripe_update_add(info, o);
}
if (ma->ma_valid & MA_LMV_DEF) {
/* Return -ENOTSUPP for old client */
......@@ -1366,6 +1460,18 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
repbody->mbo_valid |= (OBD_MD_FLDIREA |
OBD_MD_DEFAULT_MEA);
}
CDEBUG(D_VFSTRACE,
"dirent count %llu stripe count %u MDT count %d\n",
ma->ma_attr.la_dirent_count, stripe_count,
atomic_read(&mdt->mdt_mds_mds_conns) + 1);
if (ma->ma_attr.la_dirent_count != LU_DIRENT_COUNT_UNSET &&
ma->ma_attr.la_dirent_count >
mdt->mdt_restriper.mdr_dir_split_count &&
!fid_is_root(mdt_object_fid(o)) &&
mdt->mdt_enable_dir_auto_split &&
!o->mot_restriping &&
stripe_count < atomic_read(&mdt->mdt_mds_mds_conns) + 1)
mdt_auto_split_add(info, o);
} else if (S_ISLNK(la->la_mode) &&
reqbody->mbo_valid & OBD_MD_LINKNAME) {
buffer->lb_buf = ma->ma_lmm;
......@@ -1403,8 +1509,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
print_limit < rc ? "..." : "", print_limit,
(char *)ma->ma_lmm + rc - print_limit, rc);
rc = 0;
}
}
}
}
if (reqbody->mbo_valid & OBD_MD_FLMODEASIZE) {
repbody->mbo_max_mdsize = info->mti_mdt->mdt_max_mdsize;
......@@ -1426,10 +1532,10 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
#endif
out:
if (rc == 0)
if (rc == 0)
mdt_counter_incr(req, LPROC_MDT_GETATTR);
RETURN(rc);
RETURN(rc);
}
static int mdt_getattr(struct tgt_session_info *tsi)
......@@ -5369,6 +5475,8 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
next->md_ops->mdo_iocontrol(env, next, OBD_IOC_STOP_LFSCK, 0, &stop);
mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child));
mdt_restriper_stop(m);
ping_evictor_stop();
/* Remove the HSM /proc entry so the coordinator cannot be
......@@ -5510,10 +5618,12 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
m->mdt_enable_remote_dir = 1;
m->mdt_enable_striped_dir = 1;
m->mdt_enable_dir_migration = 1;
m->mdt_enable_dir_restripe = 1;
m->mdt_enable_dir_restripe = 0;
m->mdt_enable_dir_auto_split = 0;
m->mdt_enable_remote_dir_gid = 0;
m->mdt_enable_chprojid_gid = 0;
m->mdt_enable_remote_rename = 1;
m->mdt_dir_restripe_nsonly = 1;
atomic_set(&m->mdt_mds_mds_conns, 0);
atomic_set(&m->mdt_async_commit_count, 0);
......@@ -5674,7 +5784,14 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_LOCAL_RECOV))
m->mdt_lut.lut_local_recovery = 1;
rc = mdt_restriper_start(m);
if (rc)
GOTO(err_ping_evictor, rc);
RETURN(0);
err_ping_evictor:
ping_evictor_stop();
err_procfs:
mdt_tunables_fini(m);
err_recovery:
......@@ -5824,6 +5941,8 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env,
init_rwsem(&mo->mot_dom_sem);
init_rwsem(&mo->mot_open_sem);
atomic_set(&mo->mot_open_count, 0);
mo->mot_restripe_offset = 0;
INIT_LIST_HEAD(&mo->mot_restripe_linkage);
RETURN(o);
}
RETURN(NULL);
......
......@@ -59,6 +59,7 @@
#include <lustre_eacl.h>
#include <lustre_quota.h>
#include <lustre_linkea.h>
#include <lustre_lmv.h>
struct mdt_object;
......@@ -205,6 +206,36 @@ struct mdt_statfs_cache {
__u64 msf_age;
};
/* split directory automatically when sub file count exceeds 50k */
#define DIR_SPLIT_COUNT_DEFAULT 50000
/* directory auto-split allocate delta new stripes each time */
#define DIR_SPLIT_DELTA_DEFAULT 4
struct mdt_dir_restriper {
struct lu_env mdr_env;
struct lu_context mdr_session;
struct task_struct *mdr_task;
/* lock for below fields */
spinlock_t mdr_lock;
/* auto split when plain dir/shard sub files exceed threshold */
u64 mdr_dir_split_count;
/* auto split growth delta */
u32 mdr_dir_split_delta;
/* directories to split */
struct list_head mdr_auto_splitting;
/* directories under which sub files are migrating */
struct list_head mdr_migrating;
/* directories waiting to update layout after migration */
struct list_head mdr_updating;
/* time to update directory layout after migration */
time64_t mdr_update_time;
/* lum used in split/migrate/layout_change */
union lmv_mds_md mdr_lmv;
/* page used in readdir */
struct page *mdr_page;
};
struct mdt_device {
/* super-class */
struct lu_device mdt_lu_dev;
......@@ -256,9 +287,12 @@ struct mdt_device {
mdt_enable_striped_dir:1,
mdt_enable_dir_migration:1,
mdt_enable_dir_restripe:1,
mdt_enable_dir_auto_split:1,
mdt_enable_remote_rename:1,
mdt_skip_lfsck:1,
mdt_readonly:1;
mdt_readonly:1,
/* dir restripe migrate dirent only */
mdt_dir_restripe_nsonly:1;
/* user with gid can create remote/striped
* dir, and set default dir stripe */
......@@ -293,6 +327,8 @@ struct mdt_device {
atomic_t mdt_async_commit_count;