diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 3df013c81975ba6cdcc418c32cf1d200c7cb469f..5cf43a6da43ec3939da2994c2ee3d9de3588e8a1 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -13,6 +13,14 @@ tbd Sun Microsystems, Inc. removed cwd "./" (refer to Bugzilla 14399). * File join has been disabled in this release, refer to Bugzilla 16929. +Severity : enhancement +Bugzilla : 15699 +Description: Changelogs +Details : Changelogs are a lightweight mechanism to track filesystem + metadata and namespace changes. The changelog is recorded + permanently on the MDTs, and is periodically "consumed" / purged + when records are no longer needed. + Severity : enhancement Bugzilla : 15957 Description: compact fld format with extents diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 66284e8d4894e6f8ee2fede172b110728ed45d8c..08a573fda46a21e1df30856f4fcafe509be6fa72 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -934,6 +934,25 @@ LB_LINUX_TRY_COMPILE([ ]) ]) +# LC_SEQ_LOCK +# after 2.6.18 seq_file has lock intead of sem +AC_DEFUN([LC_SEQ_LOCK], +[AC_MSG_CHECKING([if struct seq_file has lock field]) +LB_LINUX_TRY_COMPILE([ + #include <linux/seq_file.h> +],[ + struct seq_file seq; + + mutex_unlock(&seq.lock); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SEQ_LOCK, 1, + [after 2.6.18 seq_file has lock intead of sem]) +],[ + AC_MSG_RESULT(NO) +]) +]) + # LC_DQUOTOFF_MUTEX # after 2.6.17 dquote use mutex instead if semaphore AC_DEFUN([LC_DQUOTOFF_MUTEX], @@ -1466,7 +1485,7 @@ EXTRA_KCFLAGS="$tmp_flags" AC_DEFUN([LC_CONST_ACL_SIZE], [AC_MSG_CHECKING([calc acl size]) tmp_flags="$CFLAGS" -CFLAGS="$CFLAGS -I $LINUX_OBJ/include $EXTRA_KCFLAGS" +CFLAGS="$CFLAGS -I$LINUX/include -I$LINUX_OBJ/include -I$LINUX_OBJ/include2 $EXTRA_KCFLAGS" AC_TRY_RUN([ #define __KERNEL__ #include <linux/autoconf.h> @@ -1632,6 +1651,7 @@ AC_DEFUN([LC_PROG_LINUX], LC_VFS_KERN_MOUNT LC_INVALIDATEPAGE_RETURN_INT LC_UMOUNTBEGIN_HAS_VFSMOUNT + LC_SEQ_LOCK if test x$enable_server = xyes ; then LC_EXPORT_INVALIDATE_MAPPING_PAGES LC_EXPORT_FILEMAP_FDATAWRITE_RANGE diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c index 7cbf87d097026fb37004126f8e1951c63ce418b9..bb5fd7614b8d324c4c043de558c2a66d115c3b13 100644 --- a/lustre/cmm/cmm_object.c +++ b/lustre/cmm/cmm_object.c @@ -345,6 +345,15 @@ static int cml_capa_get(const struct lu_env *env, struct md_object *mo, RETURN(rc); } +static int cml_path(const struct lu_env *env, struct md_object *mo, + char *path, int pathlen, __u64 recno, int *linkno) +{ + int rc; + ENTRY; + rc = mo_path(env, md_object_next(mo), path, pathlen, recno, linkno); + RETURN(rc); +} + static int cml_object_sync(const struct lu_env *env, struct md_object *mo) { int rc; @@ -370,6 +379,7 @@ static const struct md_object_operations cml_mo_ops = { .moo_readlink = cml_readlink, .moo_capa_get = cml_capa_get, .moo_object_sync = cml_object_sync, + .moo_path = cml_path, }; /* md_dir operations */ @@ -931,6 +941,12 @@ static int cmr_capa_get(const struct lu_env *env, struct md_object *mo, return -EFAULT; } +static int cmr_path(const struct lu_env *env, struct md_object *obj, + char *path, int pathlen, __u64 recno, int *linkno) +{ + return -EREMOTE; +} + static int cmr_object_sync(const struct lu_env *env, struct md_object *mo) { return -EFAULT; @@ -953,6 +969,7 @@ static const struct md_object_operations cmr_mo_ops = { .moo_readlink = cmr_readlink, .moo_capa_get = cmr_capa_get, .moo_object_sync = cmr_object_sync, + .moo_path = cmr_path, }; /* remote part of md_dir operations */ diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 13c0385a6fa12aaafc5768101ed0bec3ec70c970..184a772edcaa192adc22fbef7ddacb10eb778ad4 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -98,6 +98,14 @@ do {mutex_lock_nested(&(inode)->i_mutex, I_MUTEX_PARENT); } while(0) #define LOCK_INODE_MUTEX_PARENT(inode) LOCK_INODE_MUTEX(inode) #endif /* HAVE_INODE_I_MUTEX */ +#ifdef HAVE_SEQ_LOCK +#define LL_SEQ_LOCK(seq) mutex_lock(&(seq)->lock) +#define LL_SEQ_UNLOCK(seq) mutex_unlock(&(seq)->lock) +#else +#define LL_SEQ_LOCK(seq) down(&(seq)->sem) +#define LL_SEQ_UNLOCK(seq) up(&(seq)->sem) +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) #define d_child d_u.d_child #define d_rcu d_u.d_rcu @@ -321,7 +329,7 @@ static inline int filemap_fdatawrite_range(struct address_space *mapping, #ifdef mapping_cap_writeback_dirty if (!mapping_cap_writeback_dirty(mapping)) - rc = 0; + rc = 0; #else if (mapping->backing_dev_info->memory_backed) rc = 0; @@ -377,7 +385,7 @@ static inline u32 get_sb_time_gran(struct super_block *sb) #endif #ifdef HAVE_RW_TREE_LOCK -#define TREE_READ_LOCK_IRQ(mapping) read_lock_irq(&(mapping)->tree_lock) +#define TREE_READ_LOCK_IRQ(mapping) read_lock_irq(&(mapping)->tree_lock) #define TREE_READ_UNLOCK_IRQ(mapping) read_unlock_irq(&(mapping)->tree_lock) #else #define TREE_READ_LOCK_IRQ(mapping) spin_lock_irq(&(mapping)->tree_lock) @@ -408,9 +416,9 @@ int ll_unregister_blkdev(unsigned int dev, const char *name) #endif #ifdef HAVE_FS_RENAME_DOES_D_MOVE -#define LL_RENAME_DOES_D_MOVE FS_RENAME_DOES_D_MOVE +#define LL_RENAME_DOES_D_MOVE FS_RENAME_DOES_D_MOVE #else -#define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME +#define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME #endif /* add a lustre compatible layer for crypto API */ @@ -507,7 +515,7 @@ struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname, { char buf[CRYPTO_MAX_ALG_NAME + 1]; const char *pan = algname; - u32 flag = 0; + u32 flag = 0; if (strncmp("cbc(", algname, 4) == 0) flag |= CRYPTO_TFM_MODE_CBC; @@ -526,7 +534,7 @@ struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname, return crypto_alloc_tfm(pan, flag); } -static inline +static inline struct ll_crypto_hash *ll_crypto_alloc_hash(const char *alg, u32 type, u32 mask) { char buf[CRYPTO_MAX_ALG_NAME + 1]; diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index b5443416fbaa67c9396a289d02899287c653e229..17e30a5bcb137a0d6a50396391d50d6043bc2947 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -58,8 +58,6 @@ struct fsfilt_objinfo { int fso_bufcnt; }; -#define XATTR_LUSTRE_MDS_LOV_EA "lov" - struct lustre_dquot; struct fsfilt_operations { struct list_head fs_list; diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 1b00b02c429f61502bc214eb6c710f3e2c3a2230..54ef3dfb5496da4adfd8d1c0805d77a3ad14a730 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -1289,6 +1289,7 @@ struct lu_name { struct lu_buf { void *lb_buf; ssize_t lb_len; + int lb_vmalloc:1; }; /** null buffer */ diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index 3f4fd1e3c5a77c3ddc429376be319b463569f626..b82d3b38db3292dc716706a45c9abdbca304c96d 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -61,7 +61,7 @@ enum llapi_message_level { /* the bottom three bits reserved for llapi_message_level */ #define LLAPI_MSG_MASK 0x00000007 -#define LLAPI_MSG_NO_ERRNO 0x00000010 +#define LLAPI_MSG_NO_ERRNO 0x00000010 /* liblustreapi.c */ extern void llapi_msg_set_level(int level); @@ -171,4 +171,10 @@ extern int llapi_rsetfacl(int argc, char *argv[]); extern int llapi_rgetfacl(int argc, char *argv[]); extern int llapi_cp(int argc, char *argv[]); extern int llapi_ls(int argc, char *argv[]); +extern int llapi_changelog_open(const char *mdtname, long long startrec); +extern int llapi_changelog_clear(const char *mdtname, long long endrec); +struct lu_fid; +extern int llapi_fid2path(char *device, char *fid, char *path, int pathlen, + __u64 recno, int *linkno); #endif + diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 48bbc4bd8bf5e17b4648e5aeac32c22c6b19a34e..9eb187aa54ef4e9211b0c055f21e5c593a52d007 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -370,7 +370,7 @@ static inline __u32 lu_igif_gen(const struct lu_fid *fid) return fid_oid(fid); } -#define DFID "[0x%16.16"LPF64"x/0x%8.8x:0x%8.8x]" +#define DFID "["LPX64":0x%x:0x%x]" #define PFID(fid) \ fid_seq(fid), \ @@ -459,12 +459,12 @@ extern void lustre_swab_lu_seq_range(struct lu_seq_range *range); static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) { - /* Check that there is no alignment padding. */ - CLASSERT(sizeof *f0 == + /* Check that there is no alignment padding. */ + CLASSERT(sizeof *f0 == sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver); LASSERTF(fid_is_igif(f0) || fid_ver(f0) == 0, DFID, PFID(f0)); LASSERTF(fid_is_igif(f1) || fid_ver(f1) == 0, DFID, PFID(f1)); - return memcmp(f0, f1, sizeof *f0) == 0; + return memcmp(f0, f1, sizeof *f0) == 0; } #define __diff_normalize(val0, val1) \ @@ -2013,6 +2013,7 @@ struct cfg_marker { extern void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size); + /* * Opcodes for multiple servers. */ @@ -2068,6 +2069,8 @@ typedef enum { PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, /* obsolete */ LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, + /** changelog record type */ + CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000, LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, } llog_op_type; @@ -2180,6 +2183,62 @@ struct llog_size_change_rec { struct llog_rec_tail lsc_tail; } __attribute__((packed)); +#define CHANGELOG_MAGIC 0xca103000 +/** Changelog record types + * When adding record types, update mdd_lproc.c's changelog_str + */ +enum changelog_rec_type { + CL_MARK = 0, + CL_CREATE = 1, /* namespace */ + CL_MKDIR = 2, /* namespace */ + CL_HARDLINK = 3, /* namespace */ + CL_SOFTLINK = 4, /* namespace */ + CL_MKNOD = 5, /* namespace */ + CL_UNLINK = 6, /* namespace */ + CL_RMDIR = 7, /* namespace */ + CL_RENAME = 8, /* namespace */ + CL_EXT = 9, /* namespace extended record (2nd half of rename) */ + CL_OPEN = 10, /* not currently used */ + CL_CLOSE = 11, /* may be written to log only with mtime change */ + CL_IOCTL = 12, + CL_TRUNC = 13, + CL_SETATTR = 14, + CL_XATTR = 15, + CL_LAST +}; + +/** \a changelog_rec_type's that can't be masked */ +#define CL_MINMASK (1 << CL_MARK) +/** bits covering all \a changelog_rec_type's */ +#define CL_ALLMASK 0XFFFF +/** default \a changelog_rec_type mask */ +#define CL_DEFMASK CL_ALLMASK + +/* per-record flags */ +#define CLF_VERSION 0x1000 +#define CLF_FLAGMASK 0x0FFF +#define CLF_HSM 0x0001 + +/** changelog record */ +struct llog_changelog_rec { + struct llog_rec_hdr cr_hdr; + __u16 cr_flags; /**< (flags&CLF_FLAGMASK)|CLF_VERSION */ + __u16 cr_namelen; + __u32 cr_type; /**< \a changelog_rec_type */ + __u64 cr_index; + __u64 cr_prev; /**< last index for this target fid */ + __u64 cr_time; + union { + struct lu_fid cr_tfid; /**< target fid */ + __u32 cr_markerflags; /**< CL_MARK flags */ + }; + struct lu_fid cr_pfid; /**< parent fid */ + union { + char cr_name[0]; /**< last element */ + struct llog_rec_tail cr_tail; /**< for_sizezof_only */ + }; +} __attribute__((packed)); + struct llog_gen { __u64 mnt_cnt; __u64 conn_cnt; @@ -2511,6 +2570,27 @@ struct lustre_capa_key { extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); -#endif +/** The link ea holds 1 \a link_ea_entry for each hardlink */ +#define LINK_EA_MAGIC 0x01EA0000 +struct link_ea_header { + __u32 leh_magic; + __u32 leh_reccount; + __u64 leh_len; /* total size */ + /* future use */ + __u32 padding1; + __u32 padding2; +}; +/** Hardlink data is name and parent fid. + * Stored in this crazy struct for maximum packing and endian-neutrality + */ +struct link_ea_entry { + /** __u16 stored big-endian, unaligned */ + char lee_reclen[2]; + struct lu_fid_pack lee_parent_fid; /**< variable length */ + /** logically after lee_parent_fid; don't use directly */ + char lee_name[0]; +}; + +#endif /** @} lustreidl */ diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 50495326b32ede361e64a281713d59ae69886361..491439092c52b08252ba006434dc09e859e369fb 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -57,6 +57,8 @@ #define LOV_OBJID "lov_objid" #define HEALTH_CHECK "health_check" #define CAPA_KEYS "capa_keys" +#define CHANGELOG_CATALOG "changelog_catalog" + /****************** persistent mount data *********************/ @@ -116,7 +118,7 @@ struct lustre_disk_data { __u32 ldd_config_ver; /* config rewrite count - not used */ __u32 ldd_flags; /* LDD_SV_TYPE */ - __u32 ldd_svindex; /* server index (0001), must match + __u32 ldd_svindex; /* server index (0001), must match svname */ __u32 ldd_mount_type; /* target fs type LDD_MT_* */ char ldd_fsname[64]; /* filesystem this server is part of */ @@ -158,7 +160,7 @@ int server_name2index(char *svname, __u32 *idx, char **endptr); /****************** mount command *********************/ -/* The lmd is only used internally by Lustre; mount simply passes +/* The lmd is only used internally by Lustre; mount simply passes everything as string options */ #define LMD_MAGIC 0xbdacbd03 @@ -172,7 +174,7 @@ struct lustre_mount_data { char *lmd_dev; /* device name */ char *lmd_profile; /* client only */ char *lmd_mgssec; /* sptlrpc flavor to mgs */ - char *lmd_opts; /* lustre mount options (as opposed to + char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ }; @@ -180,12 +182,12 @@ struct lustre_mount_data { #define LMD_FLG_SERVER 0x0001 /* Mounting a server */ #define LMD_FLG_CLIENT 0x0002 /* Mounting a client */ #define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */ -#define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers, +#define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers, no other services */ #define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, reusing existing MGS services */ -#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) +#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) /****************** last_rcvd file *********************/ @@ -271,8 +273,9 @@ struct lsd_client_data { }; -#ifdef __KERNEL__ /****************** superblock additional info *********************/ +#ifdef __KERNEL__ + struct ll_sb_info; struct lustre_sb_info { @@ -316,9 +319,9 @@ void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb)); int lustre_common_put_super(struct super_block *sb); -int lustre_process_log(struct super_block *sb, char *logname, +int lustre_process_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); -int lustre_end_log(struct super_block *sb, char *logname, +int lustre_end_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); struct lustre_mount_info *server_get_mount(const char *name); struct lustre_mount_info *server_get_mount_2(const char *name); diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index ba36693b475b4ea16bcb4c461c865586298973a0..73d45d3f4e88b493328513b05774ff87c102a909 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -91,7 +91,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req); #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */ void target_cancel_recovery_timer(struct obd_device *obd); -int target_start_recovery_thread(struct obd_device *obd, +int target_start_recovery_thread(struct obd_device *obd, svc_handler_t handler); void target_stop_recovery_thread(struct obd_device *obd); void target_cleanup_recovery(struct obd_device *obd); @@ -337,7 +337,7 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) ENTRY; err = copy_from_user(&hdr, (void *)arg, sizeof(hdr)); - if (err) + if (err) RETURN(err); if (hdr.ioc_version != OBD_IOCTL_VERSION) { @@ -431,8 +431,8 @@ static inline void obd_ioctl_freedata(char *buf, int len) * arg will be treated as a pointer, bsd will call * copyin(buf, arg, sizeof(long)) * - * To make BSD ioctl handles argument correctly and simplely, - * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data + * To make BSD ioctl handles argument correctly and simplely, + * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data * for us. Does this change affect Linux? (XXX Liang) */ #define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) @@ -478,6 +478,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) +#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) #define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) #define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index 402e33e917e07da0607291a1ef15dbebd82f8fe0..167b366b8f1e047a073f907fd3d21b06f62d2c6e 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -124,6 +124,11 @@ struct llog_process_data { * in catalog. */ llog_cb_t lpd_cb; + /** + * Start processing the catalog from startcat/startidx + */ + int lpd_startcat; + int lpd_startidx; }; struct llog_process_cat_data { @@ -157,14 +162,19 @@ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, struct llog_cookie *reccookie, void *buf); int llog_cat_cancel_records(struct llog_handle *cathandle, int count, struct llog_cookie *cookies); -int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); +int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, + int startcat, int startidx); int llog_cat_process_thread(void *data); int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); /* llog_obd.c */ +int llog_setup_named(struct obd_device *obd, struct obd_llog_group *olg, + int index, struct obd_device *disk_obd, int count, + struct llog_logid *logid, const char *logname, + struct llog_operations *op); int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, - struct obd_device *disk_obd, int count, struct llog_logid *logid, + struct obd_device *disk_obd, int count, struct llog_logid *logid, struct llog_operations *op); int __llog_ctxt_put(struct llog_ctxt *ctxt); int llog_cleanup(struct llog_ctxt *); @@ -177,7 +187,7 @@ int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm, int llog_obd_origin_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, struct obd_device *disk_obd, int count, - struct llog_logid *logid); + struct llog_logid *logid, const char *name); int llog_obd_origin_cleanup(struct llog_ctxt *ctxt); int llog_obd_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, @@ -228,9 +238,9 @@ struct llog_operations { int (*lop_close)(struct llog_handle *handle); int (*lop_read_header)(struct llog_handle *handle); - int (*lop_setup)(struct obd_device *obd, struct obd_llog_group *olg, + int (*lop_setup)(struct obd_device *obd, struct obd_llog_group *olg, int ctxt_idx, struct obd_device *disk_obd, int count, - struct llog_logid *logid); + struct llog_logid *logid, const char *name); int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); int (*lop_cleanup)(struct llog_ctxt *ctxt); int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, @@ -335,10 +345,10 @@ struct llog_canceld_ctxt { /* ptlrpc/recov_thread.c */ extern struct llog_commit_master *llog_recov_thread_init(char *name); -extern void llog_recov_thread_fini(struct llog_commit_master *lcm, +extern void llog_recov_thread_fini(struct llog_commit_master *lcm, int force); extern int llog_recov_thread_start(struct llog_commit_master *lcm); -extern void llog_recov_thread_stop(struct llog_commit_master *lcm, +extern void llog_recov_thread_stop(struct llog_commit_master *lcm, int force); static inline void llog_gen_init(struct llog_ctxt *ctxt) @@ -425,7 +435,7 @@ static inline void llog_group_set_export(struct obd_llog_group *olg, struct obd_export *exp) { LASSERT(exp != NULL); - + spin_lock(&olg->olg_lock); if (olg->olg_exp != NULL && olg->olg_exp != exp) CWARN("%s: export for group %d is changed: 0x%p -> 0x%p\n", @@ -440,7 +450,7 @@ static inline int llog_group_set_ctxt(struct obd_llog_group *olg, { LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - spin_lock(&olg->olg_lock); + spin_lock(&olg->olg_lock); if (olg->olg_ctxts[index] != NULL) { spin_unlock(&olg->olg_lock); return -EEXIST; @@ -457,7 +467,7 @@ static inline struct llog_ctxt *llog_group_get_ctxt(struct obd_llog_group *olg, LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - spin_lock(&olg->olg_lock); + spin_lock(&olg->olg_lock); if (olg->olg_ctxts[index] == NULL) { ctxt = NULL; } else { @@ -508,10 +518,10 @@ static inline int llog_write_rec(struct llog_handle *handle, raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE); if (!raised) - cfs_cap_raise(CFS_CAP_SYS_RESOURCE); + cfs_cap_raise(CFS_CAP_SYS_RESOURCE); rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); if (!raised) - cfs_cap_lower(CFS_CAP_SYS_RESOURCE); + cfs_cap_lower(CFS_CAP_SYS_RESOURCE); RETURN(rc); } diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h index fb63c752296bd5e4d5cba026b18f85916ab194eb..9bf9437932bcd6d27006b434b9898adf2b297de1 100644 --- a/lustre/include/lustre_mds.h +++ b/lustre/include/lustre_mds.h @@ -71,11 +71,12 @@ int mds_lov_prepare_objids(struct obd_device *obd, struct lov_mds_md *lmm); void mds_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm); -#define MDS_LOV_MD_NAME "trusted.lov" -#define MDS_LMV_MD_NAME "trusted.lmv" -#define MDD_OBD_NAME "mdd_obd" -#define MDD_OBD_UUID "mdd_obd_uuid" -#define MDD_OBD_TYPE "mds" +#define MDS_LOV_MD_NAME "trusted.lov" +#define MDS_LMV_MD_NAME "trusted.lmv" +#define MDS_LINK_EA_NAME "trusted.link" +#define MDD_OBD_NAME "mdd_obd" +#define MDD_OBD_UUID "mdd_obd_uuid" +#define MDD_OBD_TYPE "mds" static inline int md_should_create(__u32 flags) { diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index d106fa110e3ceb7026373107fb8867e364b8c33f..0b7caf65b4e1f5c492f6c32e69259c52b113bdec 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -55,12 +55,12 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, /****************** User-settable parameter keys *********************/ -/* e.g. +/* e.g. tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0 ... testfs-MDT0000.lov.stripesize=4M ... testfs-OST0000.ost.client_cache_seconds=15 - ... testfs.sys.timeout=<secs> + ... testfs.sys.timeout=<secs> ... testfs.llite.max_read_ahead_mb=16 */ diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 640ad6d0a918f4f2ea7951f283a3e8272c55f270..985550b12896d0cd1a9de070337c8eae77d53543 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -84,8 +84,8 @@ struct md_ucred { __u32 mu_suppgids[2]; cfs_cap_t mu_cap; __u32 mu_umask; - struct group_info *mu_ginfo; - struct md_identity *mu_identity; + struct group_info *mu_ginfo; + struct md_identity *mu_identity; }; enum { @@ -244,7 +244,11 @@ struct md_object_operations { int (*moo_capa_get)(const struct lu_env *, struct md_object *, struct lustre_capa *, int renewal); + int (*moo_object_sync)(const struct lu_env *, struct md_object *); + + int (*moo_path)(const struct lu_env *env, struct md_object *obj, + char *path, int pathlen, __u64 recno, int *linkno); }; /** @@ -680,6 +684,13 @@ static inline int mo_capa_get(const struct lu_env *env, return m->mo_ops->moo_capa_get(env, m, c, renewal); } +static inline int mo_path(const struct lu_env *env, struct md_object *m, + char *path, int pathlen, __u64 recno, int *linkno) +{ + LASSERT(m->mo_ops->moo_path); + return m->mo_ops->moo_path(env, m, path, pathlen, recno, linkno); +} + static inline int mo_object_sync(const struct lu_env *env, struct md_object *m) { LASSERT(m->mo_ops->moo_object_sync); diff --git a/lustre/include/obd.h b/lustre/include/obd.h index badcf8f325aa2e5071591191a072d01ce88997a8..3aa9f0ee8e839516281d33019cb124812c4d5575 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -266,6 +266,7 @@ enum llog_ctxt_id { LLOG_TEST_REPL_CTXT, LLOG_LOVEA_ORIG_CTXT, LLOG_LOVEA_REPL_CTXT, + LLOG_CHANGELOG_ORIG_CTXT, /**< changelog context */ LLOG_MAX_CTXTS }; diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 334ecfe863846dfffdc85c120029bba377877972..85060223c3e5d8794a1973aa4b04016759ccabb4 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -1214,6 +1214,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, RETURN(0); } + case LL_IOC_PATH2FID: + if (copy_to_user((void *)arg, &ll_i2info(inode)->lli_fid, + sizeof(struct lu_fid))) + RETURN(-EFAULT); + RETURN(0); default: RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg)); } diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 223234be07150664f9f342274cbed55c91b286cd..9e4fc79a41edc6623d7f5384b17698227885c259 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -69,7 +69,7 @@ * the right cookies are passed back to the right OSTs at the client side. * Unset cookies should be all-zero (which will never occur naturally). */ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, + struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies) { struct obd_device *obd = ctxt->loc_obd; @@ -77,14 +77,14 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, int i, rc = 0, rc1; ENTRY; - LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, + LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, "logcookies %p, numcookies %d lsm->lsm_stripe_count %d \n", logcookies, numcookies, lsm->lsm_stripe_count); for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - struct obd_device *child = - lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd; + struct obd_device *child = + lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd; struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); /* fill mds unlink/setattr log record */ @@ -136,7 +136,7 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, for (i = 0; i < lov->desc.ld_tgt_count; i++) { struct obd_device *child; struct llog_ctxt *cctxt; - + if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid)) @@ -146,10 +146,10 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, cctxt = llog_get_context(child, ctxt->loc_idx); rc = llog_connect(cctxt, logid, gen, uuid); llog_ctxt_put(cctxt); - + if (rc) { CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc); - if (!err) + if (!err) err = rc; } } @@ -174,9 +174,9 @@ static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls lov_getref(obd); for (i = 0; i < count; i++, cookies++) { struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - struct obd_device *child = + struct obd_device *child = lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd; - struct llog_ctxt *cctxt = + struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); int err; @@ -247,7 +247,7 @@ int lov_llog_init(struct obd_device *obd, struct obd_llog_group *olg, GOTO(err_cleanup, rc); err_cleanup: if (rc) { - struct llog_ctxt *ctxt = + struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_SIZE_REPL_CTXT); if (ctxt) llog_cleanup(ctxt); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index ff4cc210797c72050a0ca5295131b9a248cbe35a..8d184b0484f48b2aca868578d855db4ea56dff3b 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1659,8 +1659,8 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, LASSERT(olg == &obd->obd_olg); - rc = llog_setup(obd, olg, LLOG_LOVEA_REPL_CTXT, tgt, 0, - NULL, &llog_client_ops); + rc = llog_setup(obd, olg, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, + &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); llog_initiator_connect(ctxt); diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 76ef1c60b398a887a41d4d43e98ff4284293ff83..e9bfcfcc1de676aad6ee620bd20363b54a18fc26 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -58,6 +58,7 @@ #include <linux/ldiskfs_fs.h> #include <lustre_mds.h> #include <lustre/lustre_idl.h> +#include <lustre_disk.h> /* for changelogs */ #include <lustre_param.h> #include "mdd_internal.h" @@ -102,10 +103,14 @@ static struct lu_device *mdd_device_fini(const struct lu_env *env, return next; } +static void mdd_changelog_fini(const struct lu_env *env, + struct mdd_device *mdd); + static void mdd_device_shutdown(const struct lu_env *env, struct mdd_device *m, struct lustre_cfg *cfg) { ENTRY; + mdd_changelog_fini(env, m); dt_txn_callback_del(m->mdd_child, &m->mdd_txn_cb); if (m->mdd_obd_dev) mdd_fini_obd(env, m, cfg); @@ -115,6 +120,186 @@ static void mdd_device_shutdown(const struct lu_env *env, EXIT; } +static int changelog_init_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, + void *data) +{ + struct mdd_device *mdd = (struct mdd_device *)data; + struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; + ENTRY; + + if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) { + CERROR("log is not plain\n"); + RETURN(-EINVAL); + } + if (rec->cr_hdr.lrh_type != CHANGELOG_REC) { + CERROR("Not a changelog rec? %d\n", rec->cr_hdr.lrh_type); + RETURN(-EINVAL); + } + + CDEBUG(D_INODE, + "seeing record at index %d/%d/"LPU64" t=%x %.*s in log "LPX64"\n", + hdr->lrh_index, rec->cr_hdr.lrh_index, rec->cr_index, + rec->cr_type, rec->cr_namelen, rec->cr_name, + llh->lgh_id.lgl_oid); + + mdd->mdd_cl.mc_index = rec->cr_index; + RETURN(LLOG_PROC_BREAK); +} + +static int mdd_changelog_llog_init(struct mdd_device *mdd) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + struct llog_ctxt *ctxt; + int rc; + + ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); + if (ctxt == NULL) { + CERROR("no context\n"); + return -EINVAL; + } + if (!ctxt->loc_handle) { + CERROR("no handle\n"); + return -EINVAL; + } + rc = llog_cat_reverse_process(ctxt->loc_handle, changelog_init_cb, mdd); + llog_ctxt_put(ctxt); + + if (rc < 0) + CERROR("changelog init failed: %d\n", rc); + else + rc = 0; /* llog_proc_break is ok */ + + CDEBUG(D_INODE, "changelog_init index="LPU64"\n", mdd->mdd_cl.mc_index); + + return rc; +} + +static int mdd_changelog_init(const struct lu_env *env, struct mdd_device *mdd) +{ + int rc; + + mdd->mdd_cl.mc_index = 0; + spin_lock_init(&mdd->mdd_cl.mc_lock); + cfs_waitq_init(&mdd->mdd_cl.mc_waitq); + + mdd->mdd_cl.mc_starttime = cfs_time_current_64(); + mdd->mdd_cl.mc_flags = 0; /* off by default */ + mdd->mdd_cl.mc_mask = CL_DEFMASK; + rc = mdd_changelog_llog_init(mdd); + if (rc) { + CERROR("Changelog setup during init failed %d\n", rc); + mdd->mdd_cl.mc_flags |= CLM_ERR; + } + return rc; +} + +static void mdd_changelog_fini(const struct lu_env *env, struct mdd_device *mdd) +{ + mdd->mdd_cl.mc_flags = 0; +} + +/** Add a changelog entry \a rec to the changelog llog + * \param mdd + * \param rec + * \param handle - currently ignored since llogs start their own transaction; + * this will hopefully be fixed in llog rewrite + * \retval 0 ok + */ +int mdd_changelog_llog_write(struct mdd_device *mdd, + struct llog_changelog_rec *rec, + struct thandle *handle) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + struct llog_ctxt *ctxt; + int rc; + + if ((mdd->mdd_cl.mc_mask & (1 << rec->cr_type)) == 0) + return 0; + + rec->cr_hdr.lrh_len = llog_data_len(sizeof(*rec) + rec->cr_namelen); + /* llog_lvfs_write_rec sets the llog tail len */ + rec->cr_hdr.lrh_type = CHANGELOG_REC; + rec->cr_time = cfs_time_current_64(); + spin_lock(&mdd->mdd_cl.mc_lock); + /* NB: I suppose it's possible llog_add adds out of order wrt cr_index, + but as long as the MDD transactions are ordered correctly for e.g. + rename conflicts, I don't think this should matter. */ + rec->cr_index = ++mdd->mdd_cl.mc_index; + spin_unlock(&mdd->mdd_cl.mc_lock); + ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); + if (ctxt == NULL) + return -ENXIO; + + /* nested journal transaction */ + rc = llog_add(ctxt, &rec->cr_hdr, NULL, NULL, 0); + llog_ctxt_put(ctxt); + + cfs_waitq_signal(&mdd->mdd_cl.mc_waitq); + + return rc; +} + +/** Remove entries with indicies up to and including \a endrec from the + * changelog + * \param mdd + * \param endrec + * \retval 0 ok + */ +int mdd_changelog_llog_cancel(struct mdd_device *mdd, long long endrec) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + struct llog_ctxt *ctxt; + int rc; + + ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); + if (ctxt == NULL) + return -ENXIO; + + /* Some records purged; reset repeat-access time */ + mdd->mdd_cl.mc_starttime = cfs_time_current_64(); + + rc = llog_cancel(ctxt, NULL, 1, (struct llog_cookie *)&endrec, 0); + + llog_ctxt_put(ctxt); + + return rc; +} + +/** Add a CL_MARK record to the changelog + * \param mdd + * \param markerflags - CLM_* + * \retval 0 ok + */ +int mdd_changelog_write_header(struct mdd_device *mdd, int markerflags) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + struct llog_changelog_rec *rec; + int reclen; + int len = strlen(obd->obd_name); + int rc; + ENTRY; + + reclen = llog_data_len(sizeof(*rec) + len); + OBD_ALLOC(rec, reclen); + if (rec == NULL) + RETURN(-ENOMEM); + + rec->cr_flags = CLF_VERSION; + rec->cr_type = CL_MARK; + rec->cr_namelen = len; + memcpy(rec->cr_name, obd->obd_name, rec->cr_namelen); + /* Status and action flags */ + rec->cr_markerflags = mdd->mdd_cl.mc_flags | markerflags; + + rc = mdd_changelog_llog_write(mdd, rec, NULL); + + /* assume on or off event; reset repeat-access time */ + mdd->mdd_cl.mc_starttime = rec->cr_time; + + OBD_FREE(rec, reclen); + RETURN(rc); +} + static int mdd_process_config(const struct lu_env *env, struct lu_device *d, struct lustre_cfg *cfg) { @@ -147,6 +332,10 @@ static int mdd_process_config(const struct lu_env *env, GOTO(out, rc); } rc = mdd_txn_init_credits(env, m); + if (rc) + break; + + mdd_changelog_init(env, m); break; case LCFG_CLEANUP: mdd_device_shutdown(env, m, cfg); @@ -157,6 +346,7 @@ static int mdd_process_config(const struct lu_env *env, out: RETURN(rc); } + #if 0 static int mdd_lov_set_nextid(const struct lu_env *env, struct mdd_device *mdd) @@ -469,6 +659,8 @@ static void mdd_key_fini(const struct lu_context *ctx, OBD_FREE(info->mti_max_lmm, info->mti_max_lmm_size); if (info->mti_max_cookie != NULL) OBD_FREE(info->mti_max_cookie, info->mti_max_cookie_size); + mdd_buf_put(&info->mti_big_buf); + OBD_FREE_PTR(info); } diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index cb8423901c52020c9b605656e9d5f73f16477eb4..309025bbb44b8f28848ccca26e159e5e55bf8b67 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -71,6 +71,19 @@ static struct lu_name lname_dotdot = { static int __mdd_lookup(const struct lu_env *env, struct md_object *pobj, const struct lu_name *lname, struct lu_fid* fid, int mask); +static int mdd_links_add(const struct lu_env *env, + struct mdd_object *mdd_obj, + const struct lu_fid *pfid, + const struct lu_name *lname, + struct thandle *handle); +static int mdd_links_rename(const struct lu_env *env, + struct mdd_object *mdd_obj, + const struct lu_fid *oldpfid, + const struct lu_name *oldlname, + const struct lu_fid *newpfid, + const struct lu_name *newlname, + struct thandle *handle); + static int __mdd_lookup_locked(const struct lu_env *env, struct md_object *pobj, const struct lu_name *lname, struct lu_fid* fid, int mask) @@ -89,9 +102,9 @@ __mdd_lookup_locked(const struct lu_env *env, struct md_object *pobj, return rc; } -static int mdd_lookup(const struct lu_env *env, - struct md_object *pobj, const struct lu_name *lname, - struct lu_fid* fid, struct md_op_spec *spec) +int mdd_lookup(const struct lu_env *env, + struct md_object *pobj, const struct lu_name *lname, + struct lu_fid* fid, struct md_op_spec *spec) { int rc; ENTRY; @@ -99,7 +112,6 @@ static int mdd_lookup(const struct lu_env *env, RETURN(rc); } - static int mdd_parent_fid(const struct lu_env *env, struct mdd_object *obj, struct lu_fid *fid) { @@ -107,10 +119,10 @@ static int mdd_parent_fid(const struct lu_env *env, struct mdd_object *obj, } /* - * For root fid use special function, whcih does not compare version component - * of fid. Vresion component is different for root fids on all MDTs. + * For root fid use special function, which does not compare version component + * of fid. Version component is different for root fids on all MDTs. */ -static int mdd_is_root(struct mdd_device *mdd, const struct lu_fid *fid) +int mdd_is_root(struct mdd_device *mdd, const struct lu_fid *fid) { return fid_seq(&mdd->mdd_root_fid) == fid_seq(fid) && fid_oid(&mdd->mdd_root_fid) == fid_oid(fid); @@ -577,6 +589,66 @@ __mdd_index_insert_only(const struct lu_env *env, struct mdd_object *pobj, RETURN(rc); } +/** Store a namespace change changelog record + * If this fails, we must fail the whole transaction; we don't + * want the change to commit without the log entry. + * \param target - mdd_object of change + * \param parent - parent dir/object + * \param tf - target lu_fid, overrides fid of \a target if this is non-null + * \param tname - target name string + * \param handle - transacion handle + */ +static int mdd_changelog_ns_store(const struct lu_env *env, + struct mdd_device *mdd, + enum changelog_rec_type type, + struct mdd_object *target, + struct mdd_object *parent, + const struct lu_fid *tf, + const struct lu_name *tname, + struct thandle *handle) +{ + const struct lu_fid *tfid; + const struct lu_fid *tpfid = mdo2fid(parent); + struct llog_changelog_rec *rec; + struct lu_buf *buf; + int reclen; + int rc; + ENTRY; + + if (!(mdd->mdd_cl.mc_flags & CLM_ON)) + RETURN(0); + + LASSERT(parent != NULL); + LASSERT(tname != NULL); + LASSERT(handle != NULL); + + /* target */ + reclen = llog_data_len(sizeof(*rec) + tname->ln_namelen); + buf = mdd_buf_alloc(env, reclen); + if (buf->lb_buf == NULL) + RETURN(-ENOMEM); + rec = (struct llog_changelog_rec *)buf->lb_buf; + + rec->cr_flags = CLF_VERSION; + rec->cr_type = (__u32)type; + tfid = tf ? tf : mdo2fid(target); + rec->cr_tfid = *tfid; + rec->cr_pfid = *tpfid; + rec->cr_namelen = tname->ln_namelen; + memcpy(rec->cr_name, tname->ln_name, rec->cr_namelen); + if (likely(target)) + target->mod_cltime = cfs_time_current_64(); + + rc = mdd_changelog_llog_write(mdd, rec, handle); + if (rc < 0) { + CERROR("changelog failed: rc=%d, op%d %s c"DFID" p"DFID"\n", + rc, type, tname->ln_name, PFID(tfid), PFID(tpfid)); + return -EFAULT; + } + + return 0; +} + static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, struct md_object *src_obj, const struct lu_name *lname, struct md_attr *ma) @@ -645,11 +717,17 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, la->la_valid = LA_CTIME; rc = mdd_attr_check_set_internal(env, mdd_sobj, la, handle, 0); + if (rc == 0) + mdd_links_add(env, mdd_sobj, mdo2fid(mdd_tobj), lname, handle); + EXIT; out_unlock: mdd_write_unlock(env, mdd_sobj); mdd_pdo_write_unlock(env, mdd_tobj, dlh); out_trans: + if (rc == 0) + rc = mdd_changelog_ns_store(env, mdd, CL_HARDLINK, mdd_sobj, + mdd_tobj, NULL, lname, handle); mdd_trans_stop(env, mdd, rc, handle); out_pending: #ifdef HAVE_QUOTA_SUPPORT @@ -733,7 +811,8 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, unsigned int qpids[MAXQUOTAS] = { 0, 0 }; int quota_opc = 0; #endif - int rc, is_dir; + int is_dir = S_ISDIR(ma->ma_attr.la_mode); + int rc; ENTRY; LASSERTF(mdd_object_exists(mdd_cobj) > 0, "FID is "DFID"\n", @@ -747,13 +826,11 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, if (IS_ERR(handle)) RETURN(PTR_ERR(handle)); - dlh = mdd_pdo_write_lock(env, mdd_pobj, name, MOR_TGT_PARENT); if (dlh == NULL) GOTO(out_trans, rc = -ENOMEM); mdd_write_lock(env, mdd_cobj, MOR_TGT_CHILD); - is_dir = S_ISDIR(ma->ma_attr.la_mode); rc = mdd_unlink_sanity_check(env, mdd_pobj, mdd_cobj, ma); if (rc) GOTO(cleanup, rc); @@ -804,11 +881,22 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp, sizeof(KEY_UNLINKED), KEY_UNLINKED, 0, NULL, NULL); + if (!is_dir) + /* old files may not have link ea; ignore errors */ + mdd_links_rename(env, mdd_cobj, mdo2fid(mdd_pobj), + lname, NULL, NULL, handle); + EXIT; cleanup: mdd_write_unlock(env, mdd_cobj); mdd_pdo_write_unlock(env, mdd_pobj, dlh); out_trans: + if (rc == 0) + rc = mdd_changelog_ns_store(env, mdd, + is_dir ? CL_RMDIR : CL_UNLINK, + mdd_cobj, mdd_pobj, NULL, lname, + handle); + mdd_trans_stop(env, mdd, rc, handle); #ifdef HAVE_QUOTA_SUPPORT if (quota_opc) @@ -1065,6 +1153,7 @@ static int mdd_rt_sanity_check(const struct lu_env *env, RETURN(rc); } +/* Partial rename op on slave MDD */ static int mdd_rename_tgt(const struct lu_env *env, struct md_object *pobj, struct md_object *tobj, const struct lu_fid *lf, const struct lu_name *lname, @@ -1173,6 +1262,12 @@ cleanup: mdd_write_unlock(env, mdd_tobj); mdd_pdo_write_unlock(env, mdd_tpobj, dlh); out_trans: + if (rc == 0) + /* Bare EXT record with no RENAME in front of it signifies + a partial slave op */ + rc = mdd_changelog_ns_store(env, mdd, CL_EXT, mdd_tobj, + mdd_tpobj, NULL, lname, handle); + mdd_trans_stop(env, mdd, rc, handle); out_pending: #ifdef HAVE_QUOTA_SUPPORT @@ -1272,6 +1367,7 @@ out_free: RETURN(rc); } +/* Get fid from name and parent */ static int __mdd_lookup(const struct lu_env *env, struct md_object *pobj, const struct lu_name *lname, struct lu_fid* fid, int mask) @@ -1308,6 +1404,7 @@ __mdd_lookup(const struct lu_env *env, struct md_object *pobj, if (likely(S_ISDIR(mdd_object_type(mdd_obj)) && dt_try_as_dir(env, dir))) { + rc = dir->do_index_ops->dio_lookup(env, dir, (struct dt_rec *)pack, key, mdd_object_capa(env, mdd_obj)); @@ -1322,8 +1419,9 @@ __mdd_lookup(const struct lu_env *env, struct md_object *pobj, } int mdd_object_initialize(const struct lu_env *env, const struct lu_fid *pfid, - struct mdd_object *child, struct md_attr *ma, - struct thandle *handle, const struct md_op_spec *spec) + const struct lu_name *lname, struct mdd_object *child, + struct md_attr *ma, struct thandle *handle, + const struct md_op_spec *spec) { int rc; ENTRY; @@ -1360,6 +1458,9 @@ int mdd_object_initialize(const struct lu_env *env, const struct lu_fid *pfid, } } } + if (rc == 0) + mdd_links_add(env, child, pfid, lname, handle); + RETURN(rc); } @@ -1626,7 +1727,7 @@ static int mdd_create(const struct lu_env *env, } #endif - rc = mdd_object_initialize(env, mdo2fid(mdd_pobj), + rc = mdd_object_initialize(env, mdo2fid(mdd_pobj), lname, son, ma, handle, spec); mdd_write_unlock(env, son); if (rc) @@ -1717,6 +1818,12 @@ cleanup: mdd_pdo_write_unlock(env, mdd_pobj, dlh); out_trans: + if (rc == 0) + rc = mdd_changelog_ns_store(env, mdd, + S_ISDIR(attr->la_mode) ? CL_MKDIR : + S_ISREG(attr->la_mode) ? CL_CREATE : + S_ISLNK(attr->la_mode) ? CL_SOFTLINK : CL_MKNOD, + son, mdd_pobj, NULL, lname, handle); mdd_trans_stop(env, mdd, rc, handle); out_free: /* finis lov_create stuff, free all temporary data */ @@ -1837,10 +1944,10 @@ static int mdd_rename(const struct lu_env *env, const char *sname = lsname->ln_name; const char *tname = ltname->ln_name; struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; - struct mdd_object *mdd_spobj = md2mdd_obj(src_pobj); + struct mdd_object *mdd_spobj = md2mdd_obj(src_pobj); /* source parent */ struct mdd_object *mdd_tpobj = md2mdd_obj(tgt_pobj); struct mdd_device *mdd = mdo2mdd(src_pobj); - struct mdd_object *mdd_sobj = NULL; + struct mdd_object *mdd_sobj = NULL; /* source object */ struct mdd_object *mdd_tobj = NULL; struct dynlock_handle *sdlh, *tdlh; struct thandle *handle; @@ -1924,6 +2031,7 @@ static int mdd_rename(const struct lu_env *env, if (rc) GOTO(cleanup, rc); + /* Remove source name from source directory */ rc = __mdd_index_delete(env, mdd_spobj, sname, is_dir, handle, mdd_object_capa(env, mdd_spobj)); if (rc) @@ -1943,7 +2051,7 @@ static int mdd_rename(const struct lu_env *env, GOTO(cleanup, rc); } - /* + /* Remove target name from target directory * Here tobj can be remote one, so we do index_delete unconditionally * and -ENOENT is allowed. */ @@ -1952,6 +2060,7 @@ static int mdd_rename(const struct lu_env *env, if (rc != 0 && rc != -ENOENT) GOTO(cleanup, rc); + /* Insert new fid with target name into target dir */ rc = __mdd_index_insert(env, mdd_tpobj, lf, tname, is_dir, handle, mdd_object_capa(env, mdd_tpobj)); if (rc) @@ -1969,7 +2078,7 @@ static int mdd_rename(const struct lu_env *env, GOTO(cleanup, rc); } - /* + /* Remove old target object * For tobj is remote case cmm layer has processed * and set tobj to NULL then. So when tobj is NOT NULL, * it must be local one. @@ -2012,6 +2121,20 @@ static int mdd_rename(const struct lu_env *env, handle, 0); } + if (rc == 0 && mdd_sobj) { + mdd_write_lock(env, mdd_sobj, MOR_SRC_CHILD); + rc = mdd_links_rename(env, mdd_sobj, mdo2fid(mdd_spobj), lsname, + mdo2fid(mdd_tpobj), ltname, handle); + if (rc == -ENOENT) + /* Old files might not have EA entry */ + mdd_links_add(env, mdd_sobj, mdo2fid(mdd_spobj), + lsname, handle); + mdd_write_unlock(env, mdd_sobj); + /* We don't fail the transaction if the link ea can't be + updated -- fid2path will use alternate lookup method. */ + rc = 0; + } + EXIT; cleanup: if (likely(tdlh) && sdlh != tdlh) @@ -2019,6 +2142,13 @@ cleanup: if (likely(sdlh)) mdd_pdo_write_unlock(env, mdd_spobj, sdlh); cleanup_unlocked: + if (rc == 0) + rc = mdd_changelog_ns_store(env, mdd, CL_RENAME, mdd_tobj, + mdd_spobj, lf, lsname, handle); + if (rc == 0) + rc = mdd_changelog_ns_store(env, mdd, CL_EXT, mdd_tobj, + mdd_tpobj, lf, ltname, handle); + mdd_trans_stop(env, mdd, rc, handle); if (mdd_sobj) mdd_object_put(env, mdd_sobj); @@ -2046,6 +2176,260 @@ out_pending: return rc; } +/** enable/disable storing of hardlink info */ +int mdd_linkea_enable = 1; +CFS_MODULE_PARM(mdd_linkea_enable, "d", int, 0644, + "record hardlink info in EAs"); + +/** Read the link EA into a temp buffer. + * Uses the name_buf since it is generally large. + * \retval IS_ERR err + * \retval ptr to \a lu_buf (always \a mti_big_buf) + */ +struct lu_buf *mdd_links_get(const struct lu_env *env, + struct mdd_object *mdd_obj) +{ + struct lu_buf *buf; + struct lustre_capa *capa; + struct link_ea_header *leh; + int rc; + + /* First try a small buf */ + buf = mdd_buf_alloc(env, CFS_PAGE_SIZE); + if (buf->lb_buf == NULL) + return ERR_PTR(-ENOMEM); + + capa = mdd_object_capa(env, mdd_obj); + rc = mdo_xattr_get(env, mdd_obj, buf, MDS_LINK_EA_NAME, capa); + if (rc == -ERANGE) { + /* Buf was too small, figure out what we need. */ + buf->lb_buf = NULL; + buf->lb_len = 0; + rc = mdo_xattr_get(env, mdd_obj, buf, MDS_LINK_EA_NAME, capa); + if (rc < 0) + return ERR_PTR(rc); + buf = mdd_buf_alloc(env, rc); + if (buf->lb_buf == NULL) + return ERR_PTR(-ENOMEM); + rc = mdo_xattr_get(env, mdd_obj, buf, MDS_LINK_EA_NAME, capa); + } + if (rc < 0) + return ERR_PTR(rc); + + leh = buf->lb_buf; + if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) { + leh->leh_magic = LINK_EA_MAGIC; + leh->leh_reccount = __swab32(leh->leh_reccount); + leh->leh_len = __swab64(leh->leh_len); + /* entries are swabbed by mdd_lee_unpack */ + } + if (leh->leh_magic != LINK_EA_MAGIC) + return ERR_PTR(-EINVAL); + + return buf; +} + +/** Pack a link_ea_entry. + * All elements are stored as chars to avoid alignment issues. + * Numbers are always big-endian + * \param packbuf is a temp fid buffer + * \retval record length + */ +static int mdd_lee_pack(struct link_ea_entry *lee, const struct lu_name *lname, + const struct lu_fid *pfid, struct lu_fid* packbuf) +{ + char *ptr; + int reclen; + + fid_pack(&lee->lee_parent_fid, pfid, packbuf); + ptr = (char *)&lee->lee_parent_fid + lee->lee_parent_fid.fp_len; + strncpy(ptr, lname->ln_name, lname->ln_namelen); + reclen = lee->lee_parent_fid.fp_len + lname->ln_namelen + + sizeof(lee->lee_reclen); + lee->lee_reclen[0] = (reclen >> 8) & 0xff; + lee->lee_reclen[1] = reclen & 0xff; + return reclen; +} + +void mdd_lee_unpack(const struct link_ea_entry *lee, int *reclen, + struct lu_name *lname, struct lu_fid *pfid) +{ + *reclen = (lee->lee_reclen[0] << 8) | lee->lee_reclen[1]; + fid_unpack(&lee->lee_parent_fid, pfid); + lname->ln_name = (char *)&lee->lee_parent_fid + + lee->lee_parent_fid.fp_len; + lname->ln_namelen = *reclen - lee->lee_parent_fid.fp_len - + sizeof(lee->lee_reclen); +} + +/** Add a record to the end of link ea buf */ +static int __mdd_links_add(const struct lu_env *env, struct lu_buf *buf, + const struct lu_fid *pfid, + const struct lu_name *lname) +{ + struct link_ea_header *leh; + struct link_ea_entry *lee; + int reclen; + + /* Make sure our buf is big enough for the new one */ + leh = buf->lb_buf; + reclen = lname->ln_namelen + sizeof(struct link_ea_entry); + if (leh->leh_len + reclen > buf->lb_len) { + mdd_buf_grow(env, leh->leh_len + reclen); + if (buf->lb_buf == NULL) + return -ENOMEM; + } + + leh = buf->lb_buf; + lee = buf->lb_buf + leh->leh_len; + reclen = mdd_lee_pack(lee, lname, pfid, &mdd_env_info(env)->mti_fid2); + leh->leh_len += reclen; + leh->leh_reccount++; + return 0; +} + +/* For pathologic linkers, we don't want to spend lots of time scanning the + * link ea. Limit ourseleves to something reasonable; links not in the EA + * can be looked up via (slower) parent lookup. + */ +#define LINKEA_MAX_COUNT 128 + +static int mdd_links_add(const struct lu_env *env, + struct mdd_object *mdd_obj, + const struct lu_fid *pfid, + const struct lu_name *lname, + struct thandle *handle) +{ + struct lu_buf *buf; + struct link_ea_header *leh; + int rc; + ENTRY; + + if (!mdd_linkea_enable) + RETURN(0); + + buf = mdd_links_get(env, mdd_obj); + if (IS_ERR(buf)) { + rc = PTR_ERR(buf); + if (rc != -ENODATA) { + CERROR("link_ea read failed %d "DFID"\n", rc, + PFID(mdd_object_fid(mdd_obj))); + RETURN (rc); + } + /* empty EA; start one */ + buf = mdd_buf_alloc(env, CFS_PAGE_SIZE); + if (buf->lb_buf == NULL) + RETURN(-ENOMEM); + leh = buf->lb_buf; + leh->leh_magic = LINK_EA_MAGIC; + leh->leh_len = sizeof(struct link_ea_header); + leh->leh_reccount = 0; + } + + leh = buf->lb_buf; + if (leh->leh_reccount > LINKEA_MAX_COUNT) + RETURN(-EOVERFLOW); + + rc = __mdd_links_add(env, buf, pfid, lname); + if (rc) + RETURN(rc); + + leh = buf->lb_buf; + rc = __mdd_xattr_set(env, mdd_obj, + mdd_buf_get_const(env, buf->lb_buf, leh->leh_len), + MDS_LINK_EA_NAME, 0, handle); + if (rc) + CERROR("link_ea add failed %d "DFID"\n", rc, + PFID(mdd_object_fid(mdd_obj))); + + if (buf->lb_vmalloc) + /* if we vmalloced a large buffer drop it */ + mdd_buf_put(buf); + + RETURN (rc); +} + +static int mdd_links_rename(const struct lu_env *env, + struct mdd_object *mdd_obj, + const struct lu_fid *oldpfid, + const struct lu_name *oldlname, + const struct lu_fid *newpfid, + const struct lu_name *newlname, + struct thandle *handle) +{ + struct lu_buf *buf; + struct link_ea_header *leh; + struct link_ea_entry *lee; + struct lu_name *tmpname = &mdd_env_info(env)->mti_name; + struct lu_fid *tmpfid = &mdd_env_info(env)->mti_fid; + int reclen = 0; + int rc, count; + ENTRY; + + if (!mdd_linkea_enable) + RETURN(0); + + if (mdd_obj->mod_flags & DEAD_OBJ) + /* No more links, don't bother */ + RETURN(0); + + buf = mdd_links_get(env, mdd_obj); + if (IS_ERR(buf)) { + rc = PTR_ERR(buf); + CERROR("link_ea read failed %d "DFID"\n", + rc, PFID(mdd_object_fid(mdd_obj))); + RETURN(rc); + } + leh = buf->lb_buf; + lee = (struct link_ea_entry *)(leh + 1); /* link #0 */ + + /* Find the old record */ + for(count = 0; count <= leh->leh_reccount; count++) { + mdd_lee_unpack(lee, &reclen, tmpname, tmpfid); + if (tmpname->ln_namelen == oldlname->ln_namelen && + lu_fid_eq(tmpfid, oldpfid) && + (strncmp(tmpname->ln_name, oldlname->ln_name, + tmpname->ln_namelen) == 0)) + break; + lee = (struct link_ea_entry *)((char *)lee + reclen); + } + if (count > leh->leh_reccount) { + CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n", + oldlname->ln_namelen, oldlname->ln_name); + GOTO(out, rc = -ENOENT); + } + + /* Remove the old record */ + leh->leh_reccount--; + leh->leh_len -= reclen; + memmove(lee, (char *)lee + reclen, (char *)leh + leh->leh_len - + (char *)lee); + + /* If renaming, add the new record */ + if (newpfid != NULL) { + rc = __mdd_links_add(env, buf, newpfid, newlname); + if (rc) + GOTO(out, rc); + leh = buf->lb_buf; + } + + rc = __mdd_xattr_set(env, mdd_obj, + mdd_buf_get_const(env, buf->lb_buf, leh->leh_len), + MDS_LINK_EA_NAME, 0, handle); + +out: + if (rc) + CDEBUG(D_INODE, "link_ea mv/unlink '%.*s' failed %d "DFID"\n", + oldlname->ln_namelen, oldlname->ln_name, rc, + PFID(mdd_object_fid(mdd_obj))); + + if (buf->lb_vmalloc) + /* if we vmalloced a large buffer drop it */ + mdd_buf_put(buf); + + RETURN (rc); +} + const struct md_dir_operations mdd_dir_ops = { .mdo_is_subdir = mdd_is_subdir, .mdo_lookup = mdd_lookup, diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 746047e01f955c0f35b271ffcbd30ca7c4aa876f..c5a0e64cb29bb83931e258c4530a219f73917f73 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -88,6 +88,28 @@ struct mdd_txn_op_descr { unsigned int mod_credits; }; +/* Changelog flags */ +/** changelog is recording */ +#define CLM_ON 0x00001 +/** internal error prevented changelogs from starting */ +#define CLM_ERR 0x00002 +/* Marker flags */ +/** changelogs turned on */ +#define CLM_START 0x10000 +/** changelogs turned off */ +#define CLM_FINI 0x20000 +/** some changelog records purged */ +#define CLM_PURGE 0x40000 + +struct mdd_changelog { + spinlock_t mc_lock; /* for index */ + cfs_waitq_t mc_waitq; + int mc_flags; + int mc_mask; + __u64 mc_index; + __u64 mc_starttime; +}; + struct mdd_device { struct md_device mdd_md_dev; struct dt_device *mdd_child; @@ -99,6 +121,7 @@ struct mdd_device { cfs_proc_dir_entry_t *mdd_proc_entry; struct lprocfs_stats *mdd_stats; struct mdd_txn_op_descr mdd_tod[MDD_TXN_LAST_OP]; + struct mdd_changelog mdd_cl; unsigned long mdd_atime_diff; }; @@ -126,10 +149,11 @@ enum mdd_object_role { struct mdd_object { struct md_object mod_obj; /* open count */ - __u32 mod_count; - __u32 mod_valid; - unsigned long mod_flags; - struct dynlock mod_pdlock; + __u32 mod_count; + __u32 mod_valid; + __u64 mod_cltime; + unsigned long mod_flags; + struct dynlock mod_pdlock; #ifdef CONFIG_LOCKDEP /* "dep_map" name is assumed by lockdep.h macros. */ struct lockdep_map dep_map; @@ -139,16 +163,18 @@ struct mdd_object { struct mdd_thread_info { struct txn_param mti_param; struct lu_fid mti_fid; + struct lu_fid mti_fid2; /* used for be & cpu converting */ struct lu_attr mti_la; - struct md_attr mti_ma; struct lu_attr mti_la_for_fix; + struct md_attr mti_ma; struct obd_info mti_oi; char mti_orph_key[NAME_MAX + 1]; struct obd_trans_info mti_oti; struct lu_buf mti_buf; + struct lu_buf mti_big_buf; /* biggish persistent buf */ + struct lu_name mti_name; struct obdo mti_oa; char mti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE]; - struct lu_fid mti_fid2; /* used for be & cpu converting */ struct lu_fid_pack mti_pack; struct dt_allocation_hint mti_hint; struct lov_mds_md *mti_max_lmm; @@ -173,6 +199,9 @@ int mdd_init_obd(const struct lu_env *env, struct mdd_device *mdd, struct lustre_cfg *cfg); int mdd_fini_obd(const struct lu_env *env, struct mdd_device *mdd, struct lustre_cfg *lcfg); +int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *obj, + const struct lu_buf *buf, const char *name, + int fl, struct thandle *handle); int mdd_xattr_set_txn(const struct lu_env *env, struct mdd_object *obj, const struct lu_buf *buf, const char *name, int fl, struct thandle *txn); @@ -223,6 +252,7 @@ int mdd_attr_check_set_internal_locked(const struct lu_env *env, int needacl); int mdd_lmm_get_locked(const struct lu_env *env, struct mdd_object *mdd_obj, struct md_attr *ma); + /* mdd_lock.c */ void mdd_write_lock(const struct lu_env *env, struct mdd_object *obj, enum mdd_object_role role); @@ -262,10 +292,20 @@ int mdd_unlink_sanity_check(const struct lu_env *env, struct mdd_object *pobj, int mdd_finish_unlink(const struct lu_env *env, struct mdd_object *obj, struct md_attr *ma, struct thandle *th); int mdd_object_initialize(const struct lu_env *env, const struct lu_fid *pfid, - struct mdd_object *child, struct md_attr *ma, - struct thandle *handle, const struct md_op_spec *spec); + const struct lu_name *lname, struct mdd_object *child, + struct md_attr *ma, struct thandle *handle, + const struct md_op_spec *spec); int mdd_link_sanity_check(const struct lu_env *env, struct mdd_object *tgt_obj, const struct lu_name *lname, struct mdd_object *src_obj); +int mdd_is_root(struct mdd_device *mdd, const struct lu_fid *fid); +int mdd_lookup(const struct lu_env *env, + struct md_object *pobj, const struct lu_name *lname, + struct lu_fid* fid, struct md_op_spec *spec); +struct lu_buf *mdd_links_get(const struct lu_env *env, + struct mdd_object *mdd_obj); +void mdd_lee_unpack(const struct link_ea_entry *lee, int *reclen, + struct lu_name *lname, struct lu_fid *pfid); + /* mdd_lov.c */ int mdd_unlink_log(const struct lu_env *env, struct mdd_device *mdd, struct mdd_object *mdd_cobj, struct md_attr *ma); @@ -305,11 +345,23 @@ void mdd_lprocfs_time_start(const struct lu_env *env); void mdd_lprocfs_time_end(const struct lu_env *env, struct mdd_device *mdd, int op); +/* mdd_object.c */ int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj); +struct lu_buf *mdd_buf_alloc(const struct lu_env *env, ssize_t len); +int mdd_buf_grow(const struct lu_env *env, ssize_t len); +void mdd_buf_put(struct lu_buf *buf); extern const struct md_dir_operations mdd_dir_ops; extern const struct md_object_operations mdd_obj_ops; +int accmode(const struct lu_env *env, struct lu_attr *la, int flags); +extern struct lu_context_key mdd_thread_key; +extern const struct lu_device_operations mdd_lu_ops; + +struct mdd_object *mdd_object_find(const struct lu_env *env, + struct mdd_device *d, + const struct lu_fid *f); + /* mdd_quota.c*/ #ifdef HAVE_QUOTA_SUPPORT int mdd_quota_notify(const struct lu_env *env, struct md_device *m); @@ -376,15 +428,12 @@ int mdd_txn_commit_cb(const struct lu_env *env, struct thandle *txn, struct lu_object *mdd_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *d); - -/* mdd_object.c */ -int accmode(const struct lu_env *env, struct lu_attr *la, int flags); -extern struct lu_context_key mdd_thread_key; -extern const struct lu_device_operations mdd_lu_ops; - -struct mdd_object *mdd_object_find(const struct lu_env *env, - struct mdd_device *d, - const struct lu_fid *f); +struct llog_changelog_rec; +int mdd_changelog_llog_write(struct mdd_device *mdd, + struct llog_changelog_rec *rec, + struct thandle *handle); +int mdd_changelog_llog_cancel(struct mdd_device *mdd, long long endrec); +int mdd_changelog_write_header(struct mdd_device *mdd, int markerflags); /* mdd_permission.c */ #define mdd_cap_t(x) (x) @@ -417,24 +466,24 @@ int mdd_capa_get(const struct lu_env *env, struct md_object *obj, static inline int lu_device_is_mdd(struct lu_device *d) { - return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &mdd_lu_ops); + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &mdd_lu_ops); } static inline struct mdd_device* lu2mdd_dev(struct lu_device *d) { - LASSERT(lu_device_is_mdd(d)); - return container_of0(d, struct mdd_device, mdd_md_dev.md_lu_dev); + LASSERT(lu_device_is_mdd(d)); + return container_of0(d, struct mdd_device, mdd_md_dev.md_lu_dev); } static inline struct lu_device *mdd2lu_dev(struct mdd_device *d) { - return (&d->mdd_md_dev.md_lu_dev); + return (&d->mdd_md_dev.md_lu_dev); } static inline struct mdd_object *lu2mdd_obj(struct lu_object *o) { - LASSERT(ergo(o != NULL, lu_device_is_mdd(o->lo_dev))); - return container_of0(o, struct mdd_object, mod_obj.mo_lu); + LASSERT(ergo(o != NULL, lu_device_is_mdd(o->lo_dev))); + return container_of0(o, struct mdd_object, mod_obj.mo_lu); } static inline struct mdd_device* mdo2mdd(struct md_object *mdo) diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index 9178114df2fbf8bfec2ca623191bc0cd1db046e1..d675478c55e87aaafe827e1fa6f2a08e159d5bea 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -52,14 +52,23 @@ #include <obd_support.h> #include <lprocfs_status.h> #include <lu_time.h> - +#include <lustre_log.h> #include <lustre/lustre_idl.h> +#include <libcfs/libcfs_string.h> #include "mdd_internal.h" +#ifndef SEEK_CUR /* SLES10 needs this */ +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + static const char *mdd_counter_names[LPROC_MDD_NR] = { }; +/* from LPROC_SEQ_FOPS(mdd_changelog) below */ +extern struct file_operations mdd_changelog_fops; + int mdd_procfs_init(struct mdd_device *mdd, const char *name) { struct lprocfs_static_vars lvars; @@ -88,11 +97,12 @@ int mdd_procfs_init(struct mdd_device *mdd, const char *name) rc = lu_time_init(&mdd->mdd_stats, mdd->mdd_proc_entry, mdd_counter_names, ARRAY_SIZE(mdd_counter_names)); + EXIT; out: if (rc) mdd_procfs_fini(mdd); - return rc; + return rc; } int mdd_procfs_fini(struct mdd_device *mdd) @@ -150,6 +160,456 @@ static int lprocfs_rd_atime_diff(char *page, char **start, off_t off, return snprintf(page, count, "%lu\n", mdd->mdd_atime_diff); } +/* match enum changelog_rec_type */ +static const char *changelog_str[] = {"MARK","CREAT","MKDIR","HLINK","SLINK", + "MKNOD","UNLNK","RMDIR","RNMFM","RNMTO","OPEN","CLOSE","IOCTL", + "TRUNC","SATTR","XATTR"}; + +const char *changelog_bit2str(int bit) +{ + if (bit < CL_LAST) + return changelog_str[bit]; + return NULL; +} + +static int lprocfs_rd_cl_mask(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct mdd_device *mdd = data; + int i = 0, rc = 0; + + *eof = 1; + while (i < CL_LAST) { + if (mdd->mdd_cl.mc_mask & (1 << i)) + rc += snprintf(page + rc, count - rc, "%s ", + changelog_str[i]); + i++; + } + return rc; +} + +static int lprocfs_wr_cl_mask(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct mdd_device *mdd = data; + char *kernbuf; + int rc; + ENTRY; + + if (count >= CFS_PAGE_SIZE) + RETURN(-EINVAL); + OBD_ALLOC(kernbuf, CFS_PAGE_SIZE); + if (kernbuf == NULL) + RETURN(-ENOMEM); + if (copy_from_user(kernbuf, buffer, count)) + GOTO(out, rc = -EFAULT); + kernbuf[count] = 0; + + rc = libcfs_str2mask(kernbuf, changelog_bit2str, + &mdd->mdd_cl.mc_mask, CL_MINMASK, CL_ALLMASK); + if (rc == 0) + rc = count; +out: + OBD_FREE(kernbuf, CFS_PAGE_SIZE); + return rc; +} + +/** struct for holding changelog data for seq_file processing */ +struct cl_seq_iter { + struct mdd_device *csi_mdd; + __u64 csi_startrec; + __u64 csi_endrec; + loff_t csi_pos; + int csi_wrote; + int csi_startcat; + int csi_startidx; + int csi_fill:1; +}; + +/* non-seq version for direct calling by class_process_proc_param */ +static int lprocfs_wr_cl(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct mdd_device *mdd = (struct mdd_device *)data; + char kernbuf[32]; + char *end; + int rc; + + if (count > (sizeof(kernbuf) - 1)) + goto out_usage; + + count = min_t(unsigned long, count, sizeof(kernbuf)); + if (copy_from_user(kernbuf, buffer, count)) + return -EFAULT; + + kernbuf[count] = '\0'; + /* strip trailing newline from "echo blah" */ + if (kernbuf[count - 1] == '\n') + kernbuf[count - 1] = '\0'; + + if (strcmp(kernbuf, "on") == 0) { + LCONSOLE_INFO("changelog on\n"); + if (mdd->mdd_cl.mc_flags & CLM_ERR) { + CERROR("Changelogs cannot be enabled due to error " + "condition.\n"); + } else { + spin_lock(&mdd->mdd_cl.mc_lock); + mdd->mdd_cl.mc_flags |= CLM_ON; + spin_unlock(&mdd->mdd_cl.mc_lock); + rc = mdd_changelog_write_header(mdd, CLM_START); + if (rc) + return rc; + } + } else if (strcmp(kernbuf, "off") == 0) { + LCONSOLE_INFO("changelog off\n"); + rc = mdd_changelog_write_header(mdd, CLM_FINI); + if (rc) + return rc; + spin_lock(&mdd->mdd_cl.mc_lock); + mdd->mdd_cl.mc_flags &= ~CLM_ON; + spin_unlock(&mdd->mdd_cl.mc_lock); + } else { + /* purge to an index */ + long long unsigned endrec, cur; + + spin_lock(&mdd->mdd_cl.mc_lock); + cur = (long long)mdd->mdd_cl.mc_index; + spin_unlock(&mdd->mdd_cl.mc_lock); + + if (strcmp(kernbuf, "0") == 0) + /* purge to "0" is shorthand for everything */ + endrec = cur; + else + endrec = (long long)simple_strtoull(kernbuf, &end, 0); + if ((kernbuf == end) || (endrec == 0)) + goto out_usage; + if (endrec > cur) + endrec = cur; + + /* If purging all records, write a header entry so we + don't have an empty catalog and + we're sure to have a valid starting index next time. In + case of crash, we just restart with old log so we're + allright. */ + if (endrec == cur) { + rc = mdd_changelog_write_header(mdd, CLM_PURGE); + if (rc) + return rc; + } + + LCONSOLE_INFO("changelog purge to %llu\n", endrec); + + rc = mdd_changelog_llog_cancel(mdd, endrec); + if (rc < 0) + return rc; + } + + return count; + +out_usage: + CWARN("changelog write usage: [on|off] | <purge_idx (0=all)>\n"); + return -EINVAL; +} + +static ssize_t mdd_cl_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct cl_seq_iter *csi = seq->private; + struct mdd_device *mdd = csi->csi_mdd; + + return lprocfs_wr_cl(file, buffer, count, mdd); +} + +#define D_CL 0 + +/* How many records per seq_show. Too small, we spawn llog_process threads + too often; too large, we run out of buffer space */ +#define CL_CHUNK_SIZE 100 + +static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, + void *data) +{ + struct seq_file *seq = (struct seq_file *)data; + struct cl_seq_iter *csi = seq->private; + struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; + int rc; + ENTRY; + + if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) || + (rec->cr_type >= CL_LAST)) { + CERROR("Not a changelog rec? %d/%d\n", rec->cr_hdr.lrh_type, + rec->cr_type); + RETURN(-EINVAL); + } + + CDEBUG(D_CL, "rec="LPU64" start="LPU64" cat=%d:%d start=%d:%d\n", + rec->cr_index, csi->csi_startrec, + llh->lgh_hdr->llh_cat_idx, llh->lgh_cur_idx, + csi->csi_startcat, csi->csi_startidx); + + if (rec->cr_index < csi->csi_startrec) + RETURN(0); + if (rec->cr_index == csi->csi_startrec) { + /* Remember where we started, since seq_read will re-read + * the data when it reallocs space. Sigh, if only there was + * a way to tell seq_file how big the buf should be in the + * first place... */ + csi->csi_startcat = llh->lgh_hdr->llh_cat_idx; + csi->csi_startidx = rec->cr_hdr.lrh_index - 1; + } + if (csi->csi_wrote > CL_CHUNK_SIZE) { + /* Stop at some point with a reasonable seq_file buffer size. + * Start from here the next time. + */ + csi->csi_endrec = rec->cr_index - 1; + csi->csi_startcat = llh->lgh_hdr->llh_cat_idx; + csi->csi_startidx = rec->cr_hdr.lrh_index - 1; + csi->csi_wrote = 0; + RETURN(LLOG_PROC_BREAK); + } + + rc = seq_printf(seq, LPU64" %02d%-5s "LPU64" 0x%x t="DFID, + rec->cr_index, rec->cr_type, + changelog_str[rec->cr_type], rec->cr_time, + rec->cr_flags & CLF_FLAGMASK, PFID(&rec->cr_tfid)); + + if (rec->cr_namelen) + /* namespace rec includes parent and filename */ + rc += seq_printf(seq, " p="DFID" %.*s\n", PFID(&rec->cr_pfid), + rec->cr_namelen, rec->cr_name); + else + rc += seq_puts(seq, "\n"); + + if (rc < 0) { + /* seq_read will dump the whole buffer and re-seq_start with a + larger one; no point in continuing the llog_process */ + CDEBUG(D_CL, "rec="LPU64" overflow "LPU64"<-"LPU64"\n", + rec->cr_index, csi->csi_startrec, csi->csi_endrec); + csi->csi_endrec = csi->csi_startrec - 1; + csi->csi_wrote = 0; + RETURN(LLOG_PROC_BREAK); + } + + csi->csi_wrote++; + csi->csi_endrec = rec->cr_index; + + RETURN(0); +} + +static int mdd_cl_seq_show(struct seq_file *seq, void *v) +{ + struct cl_seq_iter *csi = seq->private; + struct obd_device *obd = mdd2obd_dev(csi->csi_mdd); + struct llog_ctxt *ctxt; + int rc; + + if (csi->csi_fill) { + /* seq_read wants more data to fill his buffer. But we already + filled the buf as much as we cared to; force seq_read to + accept that. */ + while ((rc = seq_putc(seq, 0)) == 0); + return 0; + } + + ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); + if (ctxt == NULL) + return -ENOENT; + + /* Since we have to restart the llog_cat_process for each chunk of the + seq_ functions, start from where we left off. */ + rc = llog_cat_process(ctxt->loc_handle, changelog_show_cb, seq, + csi->csi_startcat, csi->csi_startidx); + + CDEBUG(D_CL, "seq_show "LPU64"-"LPU64" cat=%d:%d wrote=%d rc=%d\n", + csi->csi_startrec, csi->csi_endrec, csi->csi_startcat, + csi->csi_startidx, csi->csi_wrote, rc); + + llog_ctxt_put(ctxt); + + if (rc == LLOG_PROC_BREAK) + rc = 0; + + return rc; +} + +static int mdd_cl_done(struct cl_seq_iter *csi) +{ + int done = 0; + spin_lock(&csi->csi_mdd->mdd_cl.mc_lock); + done = (csi->csi_endrec >= csi->csi_mdd->mdd_cl.mc_index); + spin_unlock(&csi->csi_mdd->mdd_cl.mc_lock); + return done; +} + + +static void *mdd_cl_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct cl_seq_iter *csi = seq->private; + LASSERT(csi); + + CDEBUG(D_CL, "start "LPU64"-"LPU64" pos="LPU64"\n", + csi->csi_startrec, csi->csi_endrec, *pos); + + csi->csi_fill = 0; + + if (mdd_cl_done(csi)) + /* no more records, seq_read should return 0 if buffer + is empty */ + return NULL; + + if (*pos > csi->csi_pos) { + /* The seq_read implementation sucks. It may call start + multiple times, using pos to indicate advances, if any, + by arbitrarily increasing it by 1. So ignore the actual + value of pos, and just register any increase as + "seq_read wants the next values". */ + csi->csi_startrec = csi->csi_endrec + 1; + csi->csi_pos = *pos; + } + /* else use old startrec/startidx */ + + return csi; +} + +static void mdd_cl_seq_stop(struct seq_file *seq, void *v) +{ + struct cl_seq_iter *csi = seq->private; + + CDEBUG(D_CL, "stop "LPU64"-"LPU64"\n", + csi->csi_startrec, csi->csi_endrec); +} + +static void *mdd_cl_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct cl_seq_iter *csi = seq->private; + + CDEBUG(D_CL, "next "LPU64"-"LPU64" pos="LPU64"\n", + csi->csi_startrec, csi->csi_endrec, *pos); + + csi->csi_fill = 1; + + return csi; +} + +struct seq_operations mdd_cl_sops = { + .start = mdd_cl_seq_start, + .stop = mdd_cl_seq_stop, + .next = mdd_cl_seq_next, + .show = mdd_cl_seq_show, +}; + +static int mdd_cl_seq_open(struct inode *inode, struct file *file) +{ + struct cl_seq_iter *csi; + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc; + + LPROCFS_ENTRY_AND_CHECK(dp); + + rc = seq_open(file, &mdd_cl_sops); + if (rc) + goto out; + + OBD_ALLOC_PTR(csi); + if (csi == NULL) { + rc = -ENOMEM; + goto out; + } + csi->csi_mdd = dp->data; + seq = file->private_data; + seq->private = csi; + +out: + if (rc) + LPROCFS_EXIT(); + return rc; +} + +static int mdd_cl_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct cl_seq_iter *csi = seq->private; + + OBD_FREE_PTR(csi); + + return lprocfs_seq_release(inode, file); +} + +static loff_t mdd_cl_seq_lseek(struct file *file, loff_t offset, int origin) +{ + struct seq_file *seq = (struct seq_file *)file->private_data; + struct cl_seq_iter *csi = seq->private; + + CDEBUG(D_CL, "seek "LPU64"-"LPU64" off="LPU64":%d fpos="LPU64"\n", + csi->csi_startrec, csi->csi_endrec, offset, origin, file->f_pos); + + LL_SEQ_LOCK(seq); + + switch (origin) { + case SEEK_CUR: + offset += csi->csi_endrec; + break; + case SEEK_END: + spin_lock(&csi->csi_mdd->mdd_cl.mc_lock); + offset += csi->csi_mdd->mdd_cl.mc_index; + spin_unlock(&csi->csi_mdd->mdd_cl.mc_lock); + break; + } + + /* SEEK_SET */ + + if (offset < 0) { + LL_SEQ_UNLOCK(seq); + return -EINVAL; + } + + csi->csi_startrec = offset; + csi->csi_endrec = offset ? offset - 1 : 0; + + /* drop whatever is left in sucky seq_read's buffer */ + seq->count = 0; + seq->from = 0; + seq->index++; + LL_SEQ_UNLOCK(seq); + file->f_pos = csi->csi_startrec; + return csi->csi_startrec; +} + +static ssize_t mdd_cl_seq_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = (struct seq_file *)file->private_data; + struct cl_seq_iter *csi = seq->private; + + if ((file->f_flags & O_NONBLOCK) && mdd_cl_done(csi)) + return -EAGAIN; + return seq_read(file, buf, count, ppos); +} + +static unsigned int mdd_cl_seq_poll(struct file *file, poll_table *wait) +{ /* based on kmsg_poll */ + struct seq_file *seq = (struct seq_file *)file->private_data; + struct cl_seq_iter *csi = seq->private; + + poll_wait(file, &csi->csi_mdd->mdd_cl.mc_waitq, wait); + if (!mdd_cl_done(csi)) + return POLLIN | POLLRDNORM; + + return 0; +} + +struct file_operations mdd_changelog_fops = { + .owner = THIS_MODULE, + .open = mdd_cl_seq_open, + .read = mdd_cl_seq_read, + .write = mdd_cl_seq_write, + .llseek = mdd_cl_seq_lseek, + .poll = mdd_cl_seq_poll, + .release = mdd_cl_seq_release, +}; + #ifdef HAVE_QUOTA_SUPPORT static int mdd_lprocfs_quota_rd_type(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -169,6 +629,8 @@ static int mdd_lprocfs_quota_wr_type(struct file *file, const char *buffer, static struct lprocfs_vars lprocfs_mdd_obd_vars[] = { { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, + { "changelog_mask", lprocfs_rd_cl_mask, lprocfs_wr_cl_mask, 0 }, + { "changelog", 0, lprocfs_wr_cl, 0, &mdd_changelog_fops, 0600 }, #ifdef HAVE_QUOTA_SUPPORT { "quota_type", mdd_lprocfs_quota_rd_type, mdd_lprocfs_quota_wr_type, 0 }, @@ -186,3 +648,4 @@ void lprocfs_mdd_init_vars(struct lprocfs_static_vars *lvars) lvars->module_vars = lprocfs_mdd_module_vars; lvars->obd_vars = lprocfs_mdd_obd_vars; } + diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index d7a99693790fb5b27908190e9d5b8726628037c0..4ada09927391ebc8b142ff7a6ee7c9365f89cc9e 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -54,6 +54,7 @@ /* fid_be_cpu(), fid_cpu_to_be(). */ #include <lustre_fid.h> +#include <lustre_param.h> #include <linux/ldiskfs_fs.h> #include <lustre_mds.h> #include <lustre/lustre_idl.h> @@ -62,6 +63,10 @@ static const struct lu_object_operations mdd_lu_obj_ops; +static int mdd_xattr_get(const struct lu_env *env, + struct md_object *obj, struct lu_buf *buf, + const char *name); + int mdd_la_get(const struct lu_env *env, struct mdd_object *obj, struct lu_attr *la, struct lustre_capa *capa) { @@ -81,6 +86,15 @@ static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags) obj->mod_flags |= IMMUTE_OBJ; } +struct mdd_thread_info *mdd_env_info(const struct lu_env *env) +{ + struct mdd_thread_info *info; + + info = lu_context_key_get(&env->le_ctx, &mdd_thread_key); + LASSERT(info != NULL); + return info; +} + struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len) { struct lu_buf *buf; @@ -91,6 +105,86 @@ struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len) return buf; } +void mdd_buf_put(struct lu_buf *buf) +{ + if (buf == NULL || buf->lb_buf == NULL) + return; + if (buf->lb_vmalloc) + OBD_VFREE(buf->lb_buf, buf->lb_len); + else + OBD_FREE(buf->lb_buf, buf->lb_len); + buf->lb_buf = NULL; +} + +const struct lu_buf *mdd_buf_get_const(const struct lu_env *env, + const void *area, ssize_t len) +{ + struct lu_buf *buf; + + buf = &mdd_env_info(env)->mti_buf; + buf->lb_buf = (void *)area; + buf->lb_len = len; + return buf; +} + +#define BUF_VMALLOC_SIZE (CFS_PAGE_SIZE<<2) /* 16k */ +struct lu_buf *mdd_buf_alloc(const struct lu_env *env, ssize_t len) +{ + struct lu_buf *buf = &mdd_env_info(env)->mti_big_buf; + + if ((len > buf->lb_len) && (buf->lb_buf != NULL)) { + if (buf->lb_vmalloc) + OBD_VFREE(buf->lb_buf, buf->lb_len); + else + OBD_FREE(buf->lb_buf, buf->lb_len); + buf->lb_buf = NULL; + } + if (buf->lb_buf == NULL) { + buf->lb_len = len; + if (buf->lb_len <= BUF_VMALLOC_SIZE) { + OBD_ALLOC(buf->lb_buf, buf->lb_len); + buf->lb_vmalloc = 0; + } + if (buf->lb_buf == NULL) { + OBD_VMALLOC(buf->lb_buf, buf->lb_len); + buf->lb_vmalloc = 1; + } + if (buf->lb_buf == NULL) + buf->lb_len = 0; + } + return buf; +} + +/* preserve old data */ +int mdd_buf_grow(const struct lu_env *env, ssize_t len) +{ + struct lu_buf *oldbuf = &mdd_env_info(env)->mti_big_buf; + struct lu_buf buf; + + LASSERT(len >= oldbuf->lb_len); + if (len > BUF_VMALLOC_SIZE) { + OBD_VMALLOC(buf.lb_buf, len); + buf.lb_vmalloc = 1; + } else { + OBD_ALLOC(buf.lb_buf, len); + buf.lb_vmalloc = 0; + } + if (buf.lb_buf == NULL) + return -ENOMEM; + + buf.lb_len = len; + memcpy(buf.lb_buf, oldbuf->lb_buf, oldbuf->lb_len); + + if (oldbuf->lb_vmalloc) + OBD_VFREE(oldbuf->lb_buf, oldbuf->lb_len); + else + OBD_FREE(oldbuf->lb_buf, oldbuf->lb_len); + + memcpy(oldbuf, &buf, sizeof(buf)); + + return 0; +} + struct llog_cookie *mdd_max_cookie_get(const struct lu_env *env, struct mdd_device *mdd) { @@ -135,26 +229,6 @@ struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env, return mti->mti_max_lmm; } -const struct lu_buf *mdd_buf_get_const(const struct lu_env *env, - const void *area, ssize_t len) -{ - struct lu_buf *buf; - - buf = &mdd_env_info(env)->mti_buf; - buf->lb_buf = (void *)area; - buf->lb_len = len; - return buf; -} - -struct mdd_thread_info *mdd_env_info(const struct lu_env *env) -{ - struct mdd_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &mdd_thread_key); - LASSERT(info != NULL); - return info; -} - struct lu_object *mdd_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *d) @@ -181,17 +255,20 @@ static int mdd_object_init(const struct lu_env *env, struct lu_object *o, const struct lu_object_conf *_) { struct mdd_device *d = lu2mdd_dev(o->lo_dev); + struct mdd_object *mdd_obj = lu2mdd_obj(o); struct lu_object *below; struct lu_device *under; ENTRY; + mdd_obj->mod_cltime = 0; under = &d->mdd_child->dd_lu_dev; below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under); - mdd_pdlock_init(lu2mdd_obj(o)); + mdd_pdlock_init(mdd_obj); if (below == NULL) RETURN(-ENOMEM); lu_object_add(o, below); + RETURN(0); } @@ -218,10 +295,10 @@ static int mdd_object_print(const struct lu_env *env, void *cookie, } static const struct lu_object_operations mdd_lu_obj_ops = { - .loo_object_init = mdd_object_init, - .loo_object_start = mdd_object_start, - .loo_object_free = mdd_object_free, - .loo_object_print = mdd_object_print, + .loo_object_init = mdd_object_init, + .loo_object_start = mdd_object_start, + .loo_object_free = mdd_object_free, + .loo_object_print = mdd_object_print, }; struct mdd_object *mdd_object_find(const struct lu_env *env, @@ -231,6 +308,237 @@ struct mdd_object *mdd_object_find(const struct lu_env *env, return md2mdd_obj(md_object_find_slice(env, &d->mdd_md_dev, f)); } +static int mdd_path2fid(const struct lu_env *env, struct mdd_device *mdd, + const char *path, struct lu_fid *fid) +{ + struct lu_buf *buf; + struct lu_fid *f = &mdd_env_info(env)->mti_fid; + struct mdd_object *obj; + struct lu_name *lname = &mdd_env_info(env)->mti_name; + char *name; + int rc = 0; + ENTRY; + + /* temp buffer for path element */ + buf = mdd_buf_alloc(env, PATH_MAX); + if (buf->lb_buf == NULL) + RETURN(-ENOMEM); + + lname->ln_name = name = buf->lb_buf; + lname->ln_namelen = 0; + *f = mdd->mdd_root_fid; + + while(1) { + while (*path == '/') + path++; + if (*path == '\0') + break; + while (*path != '/' && *path != '\0') { + *name = *path; + path++; + name++; + lname->ln_namelen++; + } + + *name = '\0'; + /* find obj corresponding to fid */ + obj = mdd_object_find(env, mdd, f); + if (obj == NULL) + GOTO(out, rc = -EREMOTE); + if (IS_ERR(obj)) + GOTO(out, rc = -PTR_ERR(obj)); + /* get child fid from parent and name */ + rc = mdd_lookup(env, &obj->mod_obj, lname, f, NULL); + mdd_object_put(env, obj); + if (rc) + break; + + name = buf->lb_buf; + lname->ln_namelen = 0; + } + + if (!rc) + *fid = *f; +out: + RETURN(rc); +} + +/** The maximum depth that fid2path() will search. + * This is limited only because we want to store the fids for + * historical path lookup purposes. + */ +#define MAX_PATH_DEPTH 100 + +/** mdd_path() lookup structure. */ +struct path_lookup_info { + __u64 pli_recno; /**< history point */ + struct lu_fid pli_fid; + struct lu_fid pli_fids[MAX_PATH_DEPTH]; /**< path, in fids */ + struct mdd_object *pli_mdd_obj; + char *pli_path; /**< full path */ + int pli_pathlen; + int pli_linkno; /**< which hardlink to follow */ + int pli_fidcount; /**< number of \a pli_fids */ +}; + +static int mdd_path_current(const struct lu_env *env, + struct path_lookup_info *pli) +{ + struct mdd_device *mdd = mdo2mdd(&pli->pli_mdd_obj->mod_obj); + struct mdd_object *mdd_obj; + struct lu_buf *buf = NULL; + struct link_ea_header *leh; + struct link_ea_entry *lee; + struct lu_name *tmpname = &mdd_env_info(env)->mti_name; + struct lu_fid *tmpfid = &mdd_env_info(env)->mti_fid; + char *ptr; + int reclen; + int rc; + ENTRY; + + ptr = pli->pli_path + pli->pli_pathlen - 1; + *ptr = 0; + --ptr; + pli->pli_fidcount = 0; + pli->pli_fids[0] = *(struct lu_fid *)mdd_object_fid(pli->pli_mdd_obj); + + while (!mdd_is_root(mdd, &pli->pli_fids[pli->pli_fidcount])) { + mdd_obj = mdd_object_find(env, mdd, + &pli->pli_fids[pli->pli_fidcount]); + if (mdd_obj == NULL) + GOTO(out, rc = -EREMOTE); + if (IS_ERR(mdd_obj)) + GOTO(out, rc = -PTR_ERR(mdd_obj)); + rc = lu_object_exists(&mdd_obj->mod_obj.mo_lu); + if (rc <= 0) { + mdd_object_put(env, mdd_obj); + if (rc == -1) + rc = -EREMOTE; + else if (rc == 0) + /* Do I need to error out here? */ + rc = -ENOENT; + GOTO(out, rc); + } + + /* Get parent fid and object name */ + mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD); + buf = mdd_links_get(env, mdd_obj); + if (IS_ERR(buf)) + GOTO(out, rc = PTR_ERR(buf)); + mdd_read_unlock(env, mdd_obj); + mdd_object_put(env, mdd_obj); + if (rc < 0) + GOTO(out, rc); + + leh = buf->lb_buf; + lee = (struct link_ea_entry *)(leh + 1); /* link #0 */ + mdd_lee_unpack(lee, &reclen, tmpname, tmpfid); + + /* If set, use link #linkno for path lookup, otherwise use + link #0. Only do this for the final path element. */ + if ((pli->pli_fidcount == 0) && + (pli->pli_linkno < leh->leh_reccount)) { + int count; + for (count = 0; count < pli->pli_linkno; count++) { + lee = (struct link_ea_entry *) + ((char *)lee + reclen); + mdd_lee_unpack(lee, &reclen, tmpname, tmpfid); + } + if (pli->pli_linkno < leh->leh_reccount - 1) + /* indicate to user there are more links */ + pli->pli_linkno++; + } + + /* Pack the name in the end of the buffer */ + ptr -= tmpname->ln_namelen; + if (ptr - 1 <= pli->pli_path) + GOTO(out, rc = -EOVERFLOW); + strncpy(ptr, tmpname->ln_name, tmpname->ln_namelen); + *(--ptr) = '/'; + + /* Store the parent fid for historic lookup */ + if (++pli->pli_fidcount >= MAX_PATH_DEPTH) + GOTO(out, rc = -EOVERFLOW); + pli->pli_fids[pli->pli_fidcount] = *tmpfid; + } + + /* Verify that our path hasn't changed since we started the lookup */ + rc = mdd_path2fid(env, mdd, ptr, &pli->pli_fid); + if (rc) { + CDEBUG(D_INFO, "mdd_path2fid(%s) failed %d\n", ptr, rc); + GOTO (out, rc = -EAGAIN); + } + if (!lu_fid_eq(&pli->pli_fids[0], &pli->pli_fid)) { + CDEBUG(D_INFO, "mdd_path2fid(%s) found another FID o="DFID + " n="DFID"\n", ptr, PFID(&pli->pli_fids[0]), + PFID(&pli->pli_fid)); + GOTO(out, rc = -EAGAIN); + } + + memmove(pli->pli_path, ptr, pli->pli_path + pli->pli_pathlen - ptr); + + EXIT; +out: + if (buf && !IS_ERR(buf) && buf->lb_vmalloc) + /* if we vmalloced a large buffer drop it */ + mdd_buf_put(buf); + + return rc; +} + +/* Returns the full path to this fid, as of changelog record recno. */ +static int mdd_path(const struct lu_env *env, struct md_object *obj, + char *path, int pathlen, __u64 recno, int *linkno) +{ + struct path_lookup_info *pli; + int tries = 3; + int rc = -EAGAIN; + ENTRY; + + if (pathlen < 3) + RETURN(-EOVERFLOW); + + if (mdd_is_root(mdo2mdd(obj), mdd_object_fid(md2mdd_obj(obj)))) { + path[0] = '/'; + path[1] = '\0'; + RETURN(0); + } + + OBD_ALLOC_PTR(pli); + if (pli == NULL) + RETURN(-ENOMEM); + + pli->pli_mdd_obj = md2mdd_obj(obj); + pli->pli_recno = recno; + pli->pli_path = path; + pli->pli_pathlen = pathlen; + pli->pli_linkno = *linkno; + + /* Retry multiple times in case file is being moved */ + while (tries-- && rc == -EAGAIN) + rc = mdd_path_current(env, pli); + +#if 0 /* We need old path names only for replication */ + /* For historical path lookup, the current links may not have existed + * at "recno" time. We must switch over to earlier links/parents + * by using the changelog records. If the earlier parent doesn't + * exist, we must search back through the changelog to reconstruct + * its parents, then check if it exists, etc. + * We may ignore this problem for the initial implementation and + * state that an "original" hardlink must still exist for us to find + * historic path name. */ + if (pli->pli_recno != -1) + rc = mdd_path_historic(env, pli); +#endif + + /* return next link index to caller */ + *linkno = pli->pli_linkno; + + OBD_FREE_PTR(pli); + + RETURN (rc); +} + int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj) { struct lu_attr *la = &mdd_env_info(env)->mti_la; @@ -595,9 +903,9 @@ int mdd_attr_check_set_internal_locked(const struct lu_env *env, RETURN(rc); } -static int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *obj, - const struct lu_buf *buf, const char *name, - int fl, struct thandle *handle) +int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *obj, + const struct lu_buf *buf, const char *name, + int fl, struct thandle *handle) { struct lustre_capa *capa = mdd_object_capa(env, obj); int rc = -EINVAL; @@ -819,6 +1127,60 @@ static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj, RETURN(0); } +/** Store a data change changelog record + * If this fails, we must fail the whole transaction; we don't + * want the change to commit without the log entry. + * \param mdd_obj - mdd_object of change + * \param handle - transacion handle + */ +static int mdd_changelog_data_store(const struct lu_env *env, + struct mdd_device *mdd, + enum changelog_rec_type type, + struct mdd_object *mdd_obj, + struct thandle *handle) +{ + const struct lu_fid *tfid = mdo2fid(mdd_obj); + struct llog_changelog_rec *rec; + struct lu_buf *buf; + int reclen; + int rc; + + if (!(mdd->mdd_cl.mc_flags & CLM_ON)) + RETURN(0); + + LASSERT(handle != NULL); + LASSERT(mdd_obj != NULL); + + if ((type == CL_SETATTR) && + cfs_time_before_64(mdd->mdd_cl.mc_starttime, mdd_obj->mod_cltime)) { + /* Don't need multiple updates in this log */ + /* Don't check under lock - no big deal if we get an extra + entry */ + RETURN(0); + } + + reclen = llog_data_len(sizeof(*rec)); + buf = mdd_buf_alloc(env, reclen); + if (buf->lb_buf == NULL) + RETURN(-ENOMEM); + rec = (struct llog_changelog_rec *)buf->lb_buf; + + rec->cr_flags = CLF_VERSION; + rec->cr_type = (__u32)type; + rec->cr_tfid = *tfid; + rec->cr_namelen = 0; + mdd_obj->mod_cltime = cfs_time_current_64(); + + rc = mdd_changelog_llog_write(mdd, rec, handle); + if (rc < 0) { + CERROR("changelog failed: rc=%d op%d t"DFID"\n", + rc, type, PFID(tfid)); + return -EFAULT; + } + + return 0; +} + /* set attr and LOV EA at once, return updated attr */ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj, const struct md_attr *ma) @@ -861,7 +1223,7 @@ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj, GOTO(cleanup, rc); } - if (ma->ma_attr.la_valid & (ATTR_MTIME | ATTR_CTIME)) + if (ma->ma_attr.la_valid & (LA_MTIME | LA_CTIME)) CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n", ma->ma_attr.la_mtime, ma->ma_attr.la_ctime); @@ -931,6 +1293,9 @@ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj, } cleanup: + if ((rc == 0) && (ma->ma_attr.la_valid & (LA_MTIME | LA_CTIME))) + rc = mdd_changelog_data_store(env, mdd, CL_SETATTR, mdd_obj, + handle); mdd_trans_stop(env, mdd, rc, handle); if (rc == 0 && (lmm != NULL && lmm_size > 0 )) { /*set obd attr, if needed*/ @@ -1017,6 +1382,12 @@ static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj, RETURN(PTR_ERR(handle)); rc = mdd_xattr_set_txn(env, mdd_obj, buf, name, fl, handle); + + /* Only record user xattr changes */ + if ((rc == 0) && (mdd->mdd_cl.mc_flags & CLM_ON) && + (strncmp("user.", name, 5) == 0)) + rc = mdd_changelog_data_store(env, mdd, CL_XATTR, mdd_obj, + handle); mdd_trans_stop(env, mdd, rc, handle); RETURN(rc); @@ -1048,6 +1419,13 @@ int mdd_xattr_del(const struct lu_env *env, struct md_object *obj, rc = mdo_xattr_del(env, mdd_obj, name, handle, mdd_object_capa(env, mdd_obj)); mdd_write_unlock(env, mdd_obj); + + /* Only record user xattr changes */ + if ((rc == 0) && (mdd->mdd_cl.mc_flags & CLM_ON) && + (strncmp("user.", name, 5) != 0)) + rc = mdd_changelog_data_store(env, mdd, CL_XATTR, mdd_obj, + handle); + mdd_trans_stop(env, mdd, rc, handle); RETURN(rc); @@ -1252,7 +1630,8 @@ static int mdd_object_create(const struct lu_env *env, pfid = spec->u.sp_ea.fid; } #endif - rc = mdd_object_initialize(env, pfid, mdd_obj, ma, handle, spec); + rc = mdd_object_initialize(env, pfid, NULL, mdd_obj, ma, handle, + spec); } EXIT; unlock: @@ -1760,4 +2139,5 @@ const struct md_object_operations mdd_obj_ops = { .moo_readlink = mdd_readlink, .moo_capa_get = mdd_capa_get, .moo_object_sync = mdd_object_sync, + .moo_path = mdd_path, }; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 6e5c40a87bda75b2ccac68bd1427ca1bfd5226db..568f69221e8236d255695f68e18918a158c3f4c5 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -194,6 +194,8 @@ static int mds_postsetup(struct obd_device *obd) if (rc) GOTO(err_llog, rc); + mds_changelog_llog_init(obd, obd); + if (mds->mds_profile) { struct lustre_profile *lprof; /* The profile defines which osc and mdc to connect to, for a diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index c98aefa9174890fbfcc118c86d337a6ab15ed3d7..333597b75c282c45d98de96ef1b357d967a40c3f 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -48,6 +48,7 @@ int mds_llog_init(struct obd_device *obd, struct obd_llog_group *olg, struct obd_device *tgt, int count, struct llog_catid *logid, struct obd_uuid *uuid); int mds_llog_finish(struct obd_device *obd, int count); +int mds_changelog_llog_init(struct obd_device *obd, struct obd_device *tgt); /* mds/mds_lov.c */ int mds_lov_connect(struct obd_device *obd, char * lov_name); diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index b582ae1f3f111398e3aff41a199d135cd473efdc..ab925e082979ed7ede6628485beb87923ab52832 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -89,6 +89,11 @@ static int mds_llog_origin_connect(struct llog_ctxt *ctxt, RETURN(rc); } +static struct llog_operations mds_ost_orig_logops = { + lop_add: mds_llog_origin_add, + lop_connect: mds_llog_origin_connect, +}; + static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, int count, struct llog_cookie *cookies, int flags) { @@ -104,17 +109,84 @@ static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls RETURN(rc); } -static struct llog_operations mds_ost_orig_logops = { - lop_add: mds_llog_origin_add, - lop_connect: mds_llog_origin_connect, -}; - static struct llog_operations mds_size_repl_logops = { lop_cancel: mds_llog_repl_cancel, }; +static struct llog_operations changelog_orig_logops; + +static int llog_changelog_cancel_cb(struct llog_handle *llh, + struct llog_rec_hdr *hdr, void *data) +{ + struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; + struct llog_cookie cookie; + long long endrec = *(long long *)data; + int rc; + ENTRY; + + /* This is always a (sub)log, not the catalog */ + LASSERT(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN); + + if (rec->cr_index > endrec) + /* records are in order, so we're done */ + RETURN(LLOG_PROC_BREAK); + + cookie.lgc_lgl = llh->lgh_id; + cookie.lgc_index = hdr->lrh_index; + + /* cancel them one at a time. I suppose we could store up the cookies + and cancel them all at once; probably more efficient, but this is + done as a user call, so who cares... */ + rc = llog_cat_cancel_records(llh->u.phd.phd_cat_handle, 1, &cookie); + RETURN(rc < 0 ? rc : 0); +} + +static int llog_changelog_cancel(struct llog_ctxt *ctxt, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags) +{ + struct llog_handle *cathandle = ctxt->loc_handle; + int rc; + ENTRY; + + /* This should only be called with the catalog handle */ + LASSERT(cathandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT); + + rc = llog_cat_process(cathandle, llog_changelog_cancel_cb, + (void *)cookies, 0, 0); + if (rc >= 0) + /* 0 or 1 means we're done */ + rc = 0; + else + CERROR("cancel idx %u of catalog "LPX64" rc=%d\n", + cathandle->lgh_last_idx, cathandle->lgh_id.lgl_oid, rc); + + RETURN(rc); +} + +int mds_changelog_llog_init(struct obd_device *obd, struct obd_device *tgt) +{ + int rc; + + /* see osc_llog_init */ + changelog_orig_logops = llog_lvfs_ops; + changelog_orig_logops.lop_setup = llog_obd_origin_setup; + changelog_orig_logops.lop_cleanup = llog_obd_origin_cleanup; + changelog_orig_logops.lop_add = llog_obd_origin_add; + changelog_orig_logops.lop_cancel = llog_changelog_cancel; + + rc = llog_setup_named(obd, &obd->obd_olg, LLOG_CHANGELOG_ORIG_CTXT, + tgt, 1, NULL, CHANGELOG_CATALOG, + &changelog_orig_logops); + if (rc) + CERROR("changelog llog setup failed %d\n", rc); + + RETURN(rc); +} +EXPORT_SYMBOL(mds_changelog_llog_init); + int mds_llog_init(struct obd_device *obd, struct obd_llog_group *olg, - struct obd_device *tgt, int count, struct llog_catid *logid, + struct obd_device *tgt, int count, struct llog_catid *logid, struct obd_uuid *uuid) { struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -123,13 +195,13 @@ int mds_llog_init(struct obd_device *obd, struct obd_llog_group *olg, ENTRY; LASSERT(olg == &obd->obd_olg); - rc = llog_setup(obd, &obd->obd_olg, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, - &mds_ost_orig_logops); + rc = llog_setup(obd, &obd->obd_olg, LLOG_MDS_OST_ORIG_CTXT, tgt, + 0, NULL, &mds_ost_orig_logops); if (rc) RETURN(rc); - rc = llog_setup(obd, &obd->obd_olg, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, - &mds_size_repl_logops); + rc = llog_setup(obd, &obd->obd_olg, LLOG_SIZE_REPL_CTXT, tgt, + 0, NULL, &mds_size_repl_logops); if (rc) GOTO(err_llog, rc); @@ -167,5 +239,11 @@ int mds_llog_finish(struct obd_device *obd, int count) if (!rc) rc = rc2; + ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); + if (ctxt) + rc2 = llog_cleanup(ctxt); + if (!rc) + rc = rc2; + RETURN(rc); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index ea8e45a68323190dff0ac5c9de2433b925887588..4a7dbe52765c63aa7c602b04d016455dfc8225f1 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5140,11 +5140,87 @@ static int mdt_obd_notify(struct obd_device *host, RETURN(0); } +static int mdt_ioc_fid2path(struct lu_env *env, struct mdt_device *mdt, + struct obd_ioctl_data *data) +{ + struct lu_context ioctl_session; + struct mdt_object *obj; + struct lu_fid *fid; + char *path = NULL; + __u64 recno; + int pathlen = data->ioc_plen1; + int linkno; + int rc; + ENTRY; + + + fid = (struct lu_fid *)data->ioc_inlbuf1; + memcpy(&recno, data->ioc_inlbuf2, sizeof(recno)); + memcpy(&linkno, data->ioc_inlbuf3, sizeof(linkno)); + CDEBUG(D_IOCTL, "path get "DFID" from "LPU64" #%d\n", + PFID(fid), recno, linkno); + + if (!fid_is_sane(fid)) + RETURN(-EINVAL); + + if (pathlen < 3) + RETURN(-EOVERFLOW); + + rc = lu_context_init(&ioctl_session, LCT_SESSION); + if (rc) + RETURN(rc); + ioctl_session.lc_thread = (struct ptlrpc_thread *)cfs_current(); + lu_context_enter(&ioctl_session); + env->le_ses = &ioctl_session; + + OBD_ALLOC(path, pathlen); + if (path == NULL) + GOTO(out_context, rc = -ENOMEM); + + obj = mdt_object_find(env, mdt, fid); + if (obj == NULL || IS_ERR(obj)) { + CDEBUG(D_IOCTL, "no object "DFID": %ld\n", PFID(fid), + PTR_ERR(obj)); + GOTO(out_free, rc = -EINVAL); + } + + rc = lu_object_exists(&obj->mot_obj.mo_lu); + if (rc <= 0) { + if (rc == -1) + rc = -EREMOTE; + else + rc = -ENOENT; + mdt_object_put(env, obj); + CDEBUG(D_IOCTL, "nonlocal object "DFID": %d\n", PFID(fid), + rc); + GOTO(out_free, rc); + } + + rc = mo_path(env, md_object_next(&obj->mot_obj), path, pathlen, recno, + &linkno); + mdt_object_put(env, obj); + if (rc) + GOTO(out_free, rc); + + if (copy_to_user(data->ioc_pbuf1, path, pathlen)) + rc = -EFAULT; + + memcpy(data->ioc_inlbuf3, &linkno, sizeof(linkno)); + + EXIT; +out_free: + OBD_FREE(path, pathlen); +out_context: + lu_context_exit(&ioctl_session); + lu_context_fini(&ioctl_session); + return rc; +} + static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { struct lu_env env; - struct obd_device *obd= exp->exp_obd; + struct obd_device *obd = exp->exp_obd; struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); struct dt_device *dt = mdt->mdt_bottom; int rc; @@ -5167,6 +5243,9 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, target_stop_recovery_thread(obd); rc = 0; break; + case OBD_IOC_FID2PATH: + rc = mdt_ioc_fid2path(&env, mdt, karg); + break; default: CERROR("Not supported cmd = %d for device %s\n", cmd, obd->obd_name); diff --git a/lustre/mgc/libmgc.c b/lustre/mgc/libmgc.c index a9f6737ecf7340b345c2edd6b8aebde8fc481262..9742e8c29e647ea2e25778cd58a9bc677fc4cde9 100644 --- a/lustre/mgc/libmgc.c +++ b/lustre/mgc/libmgc.c @@ -41,7 +41,7 @@ /* Minimal MGC for liblustre: only used to read the config log from the MGS at setup time, no updates. */ - + #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif @@ -88,7 +88,7 @@ static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) ENTRY; switch (stage) { - case OBD_CLEANUP_EARLY: + case OBD_CLEANUP_EARLY: case OBD_CLEANUP_EXPORTS: rc = obd_llog_finish(obd, 0); if (rc != 0) @@ -113,7 +113,7 @@ static int mgc_cleanup(struct obd_device *obd) } static int mgc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, - struct obd_device *tgt, int count, + struct obd_device *tgt, int count, struct llog_catid *logid, struct obd_uuid *uuid) { struct llog_ctxt *ctxt; @@ -157,6 +157,6 @@ struct obd_ops mgc_obd_ops = { int __init mgc_init(void) { - return class_register_type(&mgc_obd_ops, NULL, + return class_register_type(&mgc_obd_ops, NULL, NULL, LUSTRE_MGC_NAME, NULL); } diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 4c63f6fa91a639b2db5dcf93c37c9ef7f61899a5..31d6ba77e806716732e8f9aaf4600a77b88c3512 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -323,10 +323,6 @@ static int llog_process_thread(void *arg) lpi->lpi_cbdata); last_called_index = index; if (rc == LLOG_PROC_BREAK) { - CDEBUG(D_HA, "recovery from log: "LPX64 - ":%x stopped\n", - loghandle->lgh_id.lgl_oid, - loghandle->lgh_id.lgl_ogen); GOTO(out, rc); } else if (rc == LLOG_DEL_RECORD) { llog_cancel_rec(loghandle, @@ -462,10 +458,6 @@ int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, if (ext2_test_bit(index, llh->llh_bitmap)) { rc = cb(loghandle, rec, data); if (rc == LLOG_PROC_BREAK) { - CWARN("recovery from log: "LPX64":%x" - " stopped\n", - loghandle->lgh_id.lgl_oid, - loghandle->lgh_id.lgl_ogen); GOTO(out, rc); } if (rc) diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index a5c568bbbceafcf896bfbc20780a5d0b6daf70e1..edf822b5af402fe20055dec0b74b9cdfad882c20 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -293,6 +293,8 @@ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, RETURN(PTR_ERR(loghandle)); /* loghandle is already locked by llog_cat_current_log() for us */ rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1); + if (rc < 0) + CERROR("llog_write_rec %d: lh=%p\n", rc, loghandle); up_write(&loghandle->lgh_lock); if (rc == -ENOSPC) { /* to create a new plain log */ @@ -382,14 +384,29 @@ int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, RETURN(rc); } - rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL); + if (rec->lrh_index < d->lpd_startcat) + /* Skip processing of the logs until startcat */ + RETURN(0); + + if (d->lpd_startidx > 0) { + struct llog_process_cat_data cd; + + cd.lpcd_first_idx = d->lpd_startidx; + cd.lpcd_last_idx = 0; + rc = llog_process(llh, d->lpd_cb, d->lpd_data, &cd); + /* Continue processing the next log from idx 0 */ + d->lpd_startidx = 0; + } else { + rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL); + } + RETURN(rc); } -int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data) +int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, + int startcat, int startidx) { struct llog_process_data d; - struct llog_process_cat_data cd; struct llog_log_hdr *llh = cat_llh->lgh_hdr; int rc; ENTRY; @@ -397,8 +414,12 @@ int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data) LASSERT(llh->llh_flags & LLOG_F_IS_CAT); d.lpd_data = data; d.lpd_cb = cb; + d.lpd_startcat = startcat; + d.lpd_startidx = startidx; if (llh->llh_cat_idx > cat_llh->lgh_last_idx) { + struct llog_process_cat_data cd; + CWARN("catlog "LPX64" crosses index zero\n", cat_llh->lgh_id.lgl_oid); @@ -445,7 +466,7 @@ int llog_cat_process_thread(void *data) } if (cb) { - rc = llog_cat_process(llh, (llog_cb_t)cb, NULL); + rc = llog_cat_process(llh, (llog_cb_t)cb, NULL, 0, 0); if (rc != LLOG_PROC_BREAK && rc != 0) CERROR("llog_cat_process() failed %d\n", rc); } else { diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 60ee61c389a68f18b301468522aba02e4b8bed97..4f0e45be2ce6195036198ad1563b52e7db8833d1 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -637,16 +637,8 @@ static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, handle->lgh_id = *logid; } else if (name) { - /* COMPAT_146 */ - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) { - handle->lgh_file = llog_filp_open(MDT_LOGS_DIR, name, - open_flags, 0644); - } else { - /* end COMPAT_146 */ - handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR, - name, open_flags, - 0644); - } + handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR, + name, open_flags, 0644); if (IS_ERR(handle->lgh_file)) GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); @@ -718,12 +710,7 @@ static int llog_lvfs_destroy(struct llog_handle *handle) int rc; ENTRY; - /* COMPAT_146 */ - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) - dir = MDT_LOGS_DIR; - else - /* end COMPAT_146 */ - dir = MOUNT_CONFIGS_DIR; + dir = MOUNT_CONFIGS_DIR; fdentry = handle->lgh_file->f_dentry; if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) { diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 13a1e9a448f13daec6ef7f6b9659da236430f02a..7ff434a98bce408e0f52144488085b8abcc84ab6 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -97,9 +97,9 @@ int __llog_ctxt_put(struct llog_ctxt *ctxt) /* obd->obd_starting is needed for the case of cleanup * in error case while obd is starting up. */ - LASSERTF(obd->obd_starting == 1 || + LASSERTF(obd->obd_starting == 1 || obd->obd_stopping == 1 || obd->obd_set_up == 0, - "wrong obd state: %d/%d/%d\n", !!obd->obd_starting, + "wrong obd state: %d/%d/%d\n", !!obd->obd_starting, !!obd->obd_stopping, !!obd->obd_set_up); /* cleanup the llog ctxt here */ @@ -144,9 +144,10 @@ int llog_cleanup(struct llog_ctxt *ctxt) } EXPORT_SYMBOL(llog_cleanup); -int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, - struct obd_device *disk_obd, int count, struct llog_logid *logid, - struct llog_operations *op) +int llog_setup_named(struct obd_device *obd, struct obd_llog_group *olg, + int index, struct obd_device *disk_obd, int count, + struct llog_logid *logid, const char *logname, + struct llog_operations *op) { int rc = 0; struct llog_ctxt *ctxt; @@ -190,15 +191,26 @@ int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, GOTO(out, rc); } - if (op->lop_setup) - rc = op->lop_setup(obd, olg, index, disk_obd, count, logid); - - if (rc) { - llog_ctxt_destroy(ctxt); + if (op->lop_setup) { + rc = op->lop_setup(obd, olg, index, disk_obd, count, logid, + logname); + if (rc) { + CERROR("obd %s ctxt %d lop_setup=%p failed %d\n", + obd->obd_name, index, op->lop_setup, rc); + llog_ctxt_put(ctxt); + } } out: RETURN(rc); } +EXPORT_SYMBOL(llog_setup_named); + +int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, + int index, struct obd_device *disk_obd, int count, + struct llog_logid *logid, struct llog_operations *op) +{ + return llog_setup_named(obd,olg,index,disk_obd,count,logid,NULL,op); +} EXPORT_SYMBOL(llog_setup); int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) @@ -308,7 +320,7 @@ static int cat_cancel_cb(struct llog_handle *cathandle, // XXX how to set exports int llog_obd_origin_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, struct obd_device *disk_obd, int count, - struct llog_logid *logid) + struct llog_logid *logid, const char *name) { struct llog_ctxt *ctxt; struct llog_handle *handle; @@ -327,11 +339,11 @@ int llog_obd_origin_setup(struct obd_device *obd, struct obd_llog_group *olg, LASSERT(ctxt); llog_gen_init(ctxt); - if (logid->lgl_oid) + if (logid && logid->lgl_oid) { rc = llog_create(ctxt, &handle, logid, NULL); - else { - rc = llog_create(ctxt, &handle, NULL, NULL); - if (!rc) + } else { + rc = llog_create(ctxt, &handle, NULL, (char *)name); + if (!rc && logid) *logid = handle->lgh_id; } if (rc) @@ -407,7 +419,7 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt, cathandle = ctxt->loc_handle; LASSERT(cathandle != NULL); rc = llog_cat_add_rec(cathandle, rec, logcookies, NULL); - if (rc != 1) + if ((rc < 0) || (!logcookies && rc)) CERROR("write one catalog record failed: %d\n", rc); RETURN(rc); } diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index b76cca8a342fa1fa591c175dddf7f7c8423ca356..4ccb6bae17dd191363433714c360640d8d654ccc 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -128,7 +128,6 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) lustre_swab_ll_fid(&lsc->lsc_fid); __swab32s(&lsc->lsc_ioepoch); - break; } @@ -140,7 +139,6 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) __swab64s(&lur->lur_oid); __swab32s(&lur->lur_ogen); - break; } @@ -151,7 +149,20 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) __swab32s(&lsr->lsr_ogen); __swab32s(&lsr->lsr_uid); __swab32s(&lsr->lsr_gid); + break; + } + case CHANGELOG_REC: { + struct llog_changelog_rec *cr = (struct llog_changelog_rec*)rec; + + __swab16s(&cr->cr_flags); + __swab16s(&cr->cr_namelen); + __swab32s(&cr->cr_type); + __swab64s(&cr->cr_index); + __swab64s(&cr->cr_prev); + __swab64s(&cr->cr_time); + lustre_swab_lu_fid(&cr->cr_tfid); + lustre_swab_lu_fid(&cr->cr_pfid); break; } @@ -167,7 +178,6 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) } case OBD_CFG_REC: - case PTL_CFG_REC: /* obsolete */ /* these are swabbed as they are consumed */ break; @@ -184,7 +194,6 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) __swab32s(&llh->llh_tail.lrt_index); __swab32s(&llh->llh_tail.lrt_len); } - break; } @@ -198,8 +207,6 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) } case LLOG_JOIN_REC: case LLOG_PAD_MAGIC: - /* ignore old pad records of type 0 */ - case 0: break; default: diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index 80716fff7ed59b8bcaddcf9c3dbeae491b7c0ad8..21be99b07fccb732270cc8035ad7eed84d5d862e 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -184,7 +184,7 @@ static int llog_test_3(struct obd_device *obd, struct llog_handle *llh) struct llog_create_rec lcr; int rc, i; int num_recs = 1; /* 1 for the header */ - ENTRY; + ENTRY; lcr.lcr_hdr.lrh_len = lcr.lcr_tail.lrt_len = sizeof(lcr); lcr.lcr_hdr.lrh_type = OST_SZ_REC; @@ -258,9 +258,9 @@ static int llog_test_3(struct obd_device *obd, struct llog_handle *llh) if (rc == -ENOSPC) { break; } else { - CERROR("3c: write recs failed at #%d: %d\n", - i + 1, rc); - RETURN(rc); + CERROR("3c: write recs failed at #%d: %d\n", + i + 1, rc); + RETURN(rc); } } num_recs++; @@ -449,7 +449,7 @@ static int llog_test_5(struct obd_device *obd) } CWARN("5c: Cancel 40000 records, see one log zapped\n"); - rc = llog_cat_process(llh, llog_cancel_rec_cb, "foobar"); + rc = llog_cat_process(llh, llog_cancel_rec_cb, "foobar", 0, 0); if (rc != -4711) { CERROR("5c: process with cat_cancel_cb failed: %d\n", rc); GOTO(out, rc); @@ -471,7 +471,7 @@ static int llog_test_5(struct obd_device *obd) } CWARN("5e: print plain log entries.. expect 6\n"); - rc = llog_cat_process(llh, plain_print_cb, "foobar"); + rc = llog_cat_process(llh, plain_print_cb, "foobar", 0, 0); if (rc) { CERROR("5e: process with plain_print_cb failed: %d\n", rc); GOTO(out, rc); diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index d6e72220c77c8a1889b5b7d57bc409adc4a325f0..71ea5691d6db5e0881f0b8a0d546d1f4839cfbe4 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1673,7 +1673,7 @@ int server_name2index(char *svname, __u32 *idx, char **endptr) { unsigned long index; int rc; - char *dash = strchr(svname, '-'); + char *dash = strrchr(svname, '-'); if (!dash) return(-EINVAL); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 8b3fd41e4590198b816e44ef4b835f096e93a7ec..d1678d732c726d384611d776b446f59fe36415ce 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -4453,8 +4453,8 @@ static int filter_process_config(struct obd_device *obd, obd_count len, rc = class_process_proc_param(PARAM_OST, lvars.obd_vars, lcfg, obd); - if (rc > 0) - rc = 0; + if (rc > 0) + rc = 0; break; } diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index d35ec0d9794b95fece29b9f23d027dc1b9d2982e..d1f4475f21aeae9df4c0bbb8766be0429a02e6f2 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -61,13 +61,12 @@ #include <obd_class.h> #include <lustre_net.h> #include <lustre_disk.h> - void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' * (make -C lustre/utils newwiretest) - * running on Linux vb1 2.6.18-build.1 #1 SMP Thu Mar 27 14:34:21 MDT 2008 i686 i686 i386 GNU - * with gcc version 4.1.2 20070626 (Red Hat 4.1.2-14) */ + * running on Linux lin2 2.6.18-92.1.17-prep #3 Sun Nov 23 14:29:36 IST 2008 i686 i686 i386 G + * with gcc version 3.4.6 20060404 (Red Hat 3.4.6-10) */ /* Constants... */ @@ -1025,6 +1024,18 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, " found %lld\n", (long long)(int)sizeof(((struct ll_fid *)0)->f_type)); + /* Checks for struct lu_fid_pack */ + LASSERTF((int)sizeof(struct lu_fid_pack) == 17, " found %lld\n", + (long long)(int)sizeof(struct lu_fid_pack)); + LASSERTF((int)offsetof(struct lu_fid_pack, fp_len) == 0, " found %lld\n", + (long long)(int)offsetof(struct lu_fid_pack, fp_len)); + LASSERTF((int)sizeof(((struct lu_fid_pack *)0)->fp_len) == 1, " found %lld\n", + (long long)(int)sizeof(((struct lu_fid_pack *)0)->fp_len)); + LASSERTF((int)offsetof(struct lu_fid_pack, fp_area) == 1, " found %lld\n", + (long long)(int)offsetof(struct lu_fid_pack, fp_area)); + LASSERTF((int)sizeof(((struct lu_fid_pack *)0)->fp_area) == 16, " found %lld\n", + (long long)(int)sizeof(((struct lu_fid_pack *)0)->fp_area)); + /* Checks for struct mds_status_req */ LASSERTF((int)sizeof(struct mds_status_req) == 8, " found %lld\n", (long long)(int)sizeof(struct mds_status_req)); @@ -1961,6 +1972,50 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, " found %lld\n", (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail)); + /* Checks for struct llog_changelog_rec */ + LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, " found %lld\n", + (long long)(int)sizeof(struct llog_changelog_rec)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_flags) == 16, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_flags)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_flags) == 2, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_flags)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_namelen) == 18, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_namelen)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_namelen) == 2, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_namelen)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_type) == 20, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_type)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_type) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_type)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_index) == 24, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_index)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_index) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_index)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_prev) == 32, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_prev)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_prev) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_prev)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_time) == 40, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_time)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_time) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_time)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tfid) == 48, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_tfid)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tfid) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tfid)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_pfid) == 64, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_pfid)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_pfid) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_pfid)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_tail)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail)); + /* Checks for struct llog_gen */ LASSERTF((int)sizeof(struct llog_gen) == 16, " found %lld\n", (long long)(int)sizeof(struct llog_gen)); @@ -2399,5 +2454,45 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((xattr_acl_header *)0)->a_entries) == 0, " found %lld\n", (long long)(int)sizeof(((xattr_acl_header *)0)->a_entries)); #endif + + /* Checks for struct link_ea_header */ + LASSERTF((int)sizeof(struct link_ea_header) == 24, " found %lld\n", + (long long)(int)sizeof(struct link_ea_header)); + LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_magic)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic)); + LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_reccount)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount)); + LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_len)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len)); + LASSERTF((int)offsetof(struct link_ea_header, padding1) == 16, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, padding1)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->padding1)); + LASSERTF((int)offsetof(struct link_ea_header, padding2) == 20, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, padding2)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->padding2)); + + /* Checks for struct link_ea_entry */ + LASSERTF((int)sizeof(struct link_ea_entry) == 19, " found %lld\n", + (long long)(int)sizeof(struct link_ea_entry)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, " found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_reclen)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, " found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 17, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 19, " found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_name)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name)); } diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index a0239b8b7990056cb358879ee7b22552510cb6cb..f35408e0b355b05b7e239a7566908c29df7d81dd 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -1,3 +1,4 @@ +.tmp_versions .Xrefs config.log config.status diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index c74354f8b5b74d8bde6589edb62718631a36e176..1152dc0df4c0bdff434d9eec15115e251e990283 100755 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -57,7 +57,7 @@ char *buf, *buf_align; int bufsize = 0; #define ALIGN 65535 -char usage[] = +char usage[] = "Usage: %s filename command-sequence\n" " command-sequence items:\n" " c close\n" @@ -79,6 +79,7 @@ char usage[] = " T[num] ftruncate [optional position, default 0]\n" " u unlink\n" " U munmap\n" +" v verbose\n" " w[num] write optional length\n" " W write entire mmap-ed region\n" " y fsync\n" @@ -327,6 +328,8 @@ int main(int argc, char **argv) fprintf(stderr, "short read: %u/%u\n", rc, len); len -= rc; + if (verbose >= 2) + printf("%.*s\n", rc, buf_align); } break; case 'R': @@ -378,7 +381,7 @@ int main(int argc, char **argv) } break; case 'v': - verbose = 1; + verbose++; break; case 'w': len = atoi(commands+1); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index bae9793d3722fe403cec12fe4b44d8e579903478..d1be1e39fa6cc0a57fcbc9e09b4251810ae466ed 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -971,6 +971,51 @@ test_59() { # bug 10589 } run_test 59 "Read cancel race on client eviction" +test_60() { + remote_mds && { skip "remote MDS" && return 0; } + + NUM_FILES=15000 + mkdir -p $DIR/$tdir + + # Enable and clear changelog + $LCTL conf_param ${mds1_svc}.mdd.changelog=on + $LCTL set_param -n mdd.*.changelog on + $LFS changelog_clear $FSNAME 0 + + # Create NUM_FILES in the background + createmany -o $DIR/$tdir/$tfile $NUM_FILES + sync + sleep 5 + + # Unlink files in the background + unlinkmany $DIR/$tdir/$tfile $NUM_FILES & + CLIENT_PID=$! + sleep 1 + + # Failover the MDS while creates are happening + facet_failover $SINGLEMDS + + # Wait for unlinkmany to finish + wait $CLIENT_PID + + # Check if NUM_FILES create/unlink events were recorded + # in the changelog + $LFS changelog $FSNAME >> $DIR/$tdir/changelog + local cl_count=$(grep UNLNK $DIR/$tdir/changelog | wc -l) + echo "$cl_count unlinks in changelog" + + [ $cl_count -eq $NUM_FILES ] || error "Recorded ${cl_count} unlinks out +of $NUM_FILES" + + # Also make sure we can clear large changelogs + lctl set_param -n mdd.*.changelog off + $LFS changelog_clear $FSNAME 0 + + cl_count=$($LFS changelog $FSNAME | wc -l) + [ $cl_count -eq 1 ] || error "Changelog not empty: $cl_count entries" +} +run_test 60 "Add Changelog entries during MDS failover" + equals_msg `basename $0`: test complete, cleaning up check_and_cleanup_lustre [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 3d052139dacc2b78ab2a46b55c061da86a30c94e..8e2fd534ef3ce368ae90b5284970d645fb21d175 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -3629,6 +3629,8 @@ test_80() { # bug 10718 run_test 80 "Page eviction is equally fast at high offsets too ====" test_99a() { + [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && \ + return mkdir -p $DIR/d99cvsroot chown $RUNAS_ID $DIR/d99cvsroot local oldPWD=$PWD # bug 13584, use $TMP as working dir @@ -5839,6 +5841,138 @@ test_152() { } run_test 152 "test read/write with enomem ============================" +#Changelogs +test_160() { + remote_mds && skip "remote MDS" && return + lctl set_param -n mdd.*.changelog on + $LFS changelog_clear $FSNAME 0 + + # change something + mkdir -p $DIR/$tdir/pics/2008/zachy + touch $DIR/$tdir/pics/2008/zachy/timestamp + cp /etc/hosts $DIR/$tdir/pics/2008/zachy/pic1.jpg + mv $DIR/$tdir/pics/2008/zachy $DIR/$tdir/pics/zach + ln $DIR/$tdir/pics/zach/pic1.jpg $DIR/$tdir/pics/2008/portland.jpg + ln -s $DIR/$tdir/pics/2008/portland.jpg $DIR/$tdir/pics/desktop.jpg + rm $DIR/$tdir/pics/desktop.jpg + + # verify contents + $LFS changelog $FSNAME + # check target fid + fidc=$($LFS changelog $FSNAME | grep timestamp | grep "CREAT" | tail -1 | \ + awk '{print $5}') + fidf=$($LFS path2fid $DIR/$tdir/pics/zach/timestamp) + [ "$fidc" == "t=$fidf" ] || \ + error "fid in changelog $fidc != file fid $fidf" + # check parent fid + fidc=$($LFS changelog $FSNAME | grep timestamp | grep "CREAT" | tail -1 | \ + awk '{print $6}') + fidf=$($LFS path2fid $DIR/$tdir/pics/zach) + [ "$fidc" == "p=$fidf" ] || \ + error "pfid in changelog $fidc != dir fid $fidf" + + # verify purge + FIRST_REC=$($LFS changelog $FSNAME | head -1 | awk '{print $1}') + $LFS changelog_clear $FSNAME $(($FIRST_REC + 5)) + PURGE_REC=$($LFS changelog $FSNAME | head -1 | awk '{print $1}') + [ $PURGE_REC == $(($FIRST_REC + 6)) ] || \ + error "first rec after purge should be $(($FIRST_REC + 6)); is $PURGE_REC" + # purge all + $LFS changelog_clear $FSNAME 0 + lctl set_param -n mdd.*.changelog off +} +run_test 160 "changelog sanity" + +test_161() { + # need local MDT for fid2path + remote_mds && skip "remote MDS" && return + + mkdir -p $DIR/$tdir + cp /etc/hosts $DIR/$tdir/$tfile + mkdir $DIR/$tdir/foo1 + mkdir $DIR/$tdir/foo2 + ln $DIR/$tdir/$tfile $DIR/$tdir/foo1/sofia + ln $DIR/$tdir/$tfile $DIR/$tdir/foo2/zachary + ln $DIR/$tdir/$tfile $DIR/$tdir/foo1/luna + ln $DIR/$tdir/$tfile $DIR/$tdir/foo2/thor + local FID=$($LFS path2fid $DIR/$tdir/$tfile) + if [ "$($LFS fid2path ${mds1_svc} $FID | wc -l)" != "5" ]; then + $LFS fid2path ${mds1_svc} $FID + error "bad link ea" + fi + # middle + rm $DIR/$tdir/foo2/zachary + # last + rm $DIR/$tdir/foo2/thor + # first + rm $DIR/$tdir/$tfile + # rename + mv $DIR/$tdir/foo1/sofia $DIR/$tdir/foo2/maggie + if [ "$($LFS fid2path ${mds1_svc} --link 1 $FID)" != "/$tdir/foo2/maggie" ] + then + $LFS fid2path ${mds1_svc} $FID + error "bad link rename" + fi + rm $DIR/$tdir/foo2/maggie + + # overflow the EA + local longname=filename_avg_len_is_thirty_two_ + createmany -l$DIR/$tdir/foo1/luna $DIR/$tdir/foo2/$longname 1000 || \ + error "failed to hardlink many files" + links=$($LFS fid2path ${mds1_svc} $FID | wc -l) + echo -n "${links}/1000 links in link EA" + [ ${links} -gt 60 ] || error "expected at least 60 links in link EA" + unlinkmany $DIR/$tdir/foo2/$longname 1000 || \ + error "failed to unlink many hardlinks" +} +run_test 161 "link ea sanity" + +check_path() { + local expected=$1 + shift + local fid=$2 + + local path=$(${LFS} fid2path $*) + RC=$? + + if [ $RC -ne 0 ]; then + error "path looked up of $expected failed. Error $RC" + return $RC + elif [ "${path}" != "${expected}" ]; then + error "path looked up \"${path}\" instead of \"${expected}\"" + return 2 + fi + echo "fid $fid resolves to path $path" +} + +test_162() { + # need local MDT for fid2path + remote_mds && skip "remote MDS" && return + + # Make changes to filesystem + mkdir -p $DIR/$tdir/d2 + touch $DIR/$tdir/d2/$tfile + touch $DIR/$tdir/d2/x1 + touch $DIR/$tdir/d2/x2 + mkdir -p $DIR/$tdir/d2/a/b/c + mkdir -p $DIR/$tdir/d2/p/q/r + fid=$($LFS path2fid $DIR/$tdir/d2/$tfile) + check_path "/$tdir/d2/$tfile" ${mds1_svc} $fid --link 0 + ln $DIR/$tdir/d2/$tfile $DIR/$tdir/d2/p/q/r/hlink + mv $DIR/$tdir/d2/$tfile $DIR/$tdir/d2/a/b/c/new_file + fid=$($LFS path2fid $DIR/$tdir/d2/a/b/c/new_file) + check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $fid --link 1 + check_path "/$tdir/d2/p/q/r/hlink" ${mds1_svc} $fid --link 0 + # check that there are 2 links, and that --rec doesnt break anything + ${LFS} fid2path ${mds1_svc} $fid --rec 20 | wc -l | grep -q 2 || \ + error "expected 2 links" + + rm $DIR/$tdir/d2/p/q/r/hlink + check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $fid --link 0 +} +run_test 162 "path lookup sanity" + +# OST pools tests POOL=${POOL:-cea1} TGT_COUNT=$OSTCOUNT TGTPOOL_FIRST=1 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index a2234be6e3afbabd54bf36ad3eb6793679443a9b..437682f7645af20068a5d0c57fea97965e182c7c 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -259,9 +259,9 @@ load_modules() { load_module llite/lustre load_module llite/llite_lloop + [ -d /r ] && OGDB=${OGDB:-"/r/tmp"} OGDB=${OGDB:-$TMP} rm -f $OGDB/ogdb-$HOSTNAME - [ -d /r ] && OGDB="/r/tmp" $LCTL modules > $OGDB/ogdb-$HOSTNAME # 'mount' doesn't look in $PATH, just sbin diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 6b2c150069069df73b0c7b6c80d78bf14de70399..51b9803912a2ae3d21dcd1af03de82bc1e0a3d81 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -62,7 +62,9 @@ llverfs_LDADD := $(EXT2FSLIB) $(E2PLIB) llverdev_LDADD := $(EXT2FSLIB) $(BLKIDLIB) -liblustreapi_a_SOURCES = liblustreapi.c +L_IOCTL := $(top_builddir)/libcfs/libcfs/util/l_ioctl.c +liblustreapi_a_SOURCES = liblustreapi.c $(L_IOCTL) + libiam_a_SOURCES = libiam.c wirecheck_SOURCES = wirecheck.c diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 6a3bf18f9f4ee8536ca1b9a6325c33327d1a2b49..c8fcc6ee201b3993aa3592d8b29b7cb4ba587f14 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -65,6 +65,7 @@ /* For dirname() */ #include <libgen.h> +#include <poll.h> #include <lnet/api-support.h> #include <lnet/lnetctl.h> @@ -105,6 +106,9 @@ static int lfs_rgetfacl(int argc, char **argv); static int lfs_cp(int argc, char **argv); static int lfs_ls(int argc, char **argv); static int lfs_poollist(int argc, char **argv); +static int lfs_changelog(int argc, char **argv); +static int lfs_changelog_clear(int argc, char **argv); +static int lfs_fid2path(int argc, char **argv); static int lfs_path2fid(int argc, char **argv); /* all avaialable commands */ @@ -214,6 +218,18 @@ command_t cmdlist[] = { {"ls", lfs_ls, 0, "Remote user list directory contents.\n" "usage: ls [OPTION]... [FILE]..."}, + {"changelog", lfs_changelog, 0, + "Show the metadata changes in a filesystem between two snapshot times." + "\nusage: changelog [--follow] <mdtname> [startrec [endrec]]"}, + {"changelog_clear", lfs_changelog_clear, 0, + "Purge old changelog records up to <endrec> to free up space.\n" + "An <endrec> of 0 means all records.\n" + "usage: changelog_clear <mdtname> <endrec>"}, + {"fid2path", lfs_fid2path, 0, + "Resolve the full path to a given FID. For a specific hardlink " + "specify link number <linkno>.\n" + /* "For a historical name, specify changelog record <recno>.\n" */ + "usage: fid2path <mdtname> <fid> [--link <linkno>]"/*[--rec <recno>]*/}, {"path2fid", lfs_path2fid, 0, "Display the fid for a given path.\n" "usage: path2fid <path>"}, {"help", Parser_help, 0, "help"}, @@ -901,32 +917,6 @@ static int lfs_osts(int argc, char **argv) return rc; } -static int lfs_path2fid(int argc, char **argv) -{ - char *path; - unsigned long long seq; - unsigned long oid, ver; - int rc; - - if (argc != 2) - return CMD_HELP; - - path = argv[1]; - rc = llapi_path2fid(path, &seq, &oid, &ver); - if (rc) { - fprintf(stderr, "error: can't get fid for %s\n", path); - return rc; - } - - printf("%llu:%lu", seq, oid); - if (ver) - printf(":%lu", ver); - - printf("\n"); - - return 0; -} - #define COOK(value) \ ({ \ int radix = 0; \ @@ -2328,6 +2318,219 @@ static int lfs_ls(int argc, char **argv) return(llapi_ls(argc, argv)); } +/* A helper function to return single, whole lines delimited by newline. + Returns length of line. Not reentrant! */ +static int get_next_full_line(int fd, char **ptr) +{ + static char buf[8192]; /* bigger than MAX_PATH_LENGTH */ + static char *sptr = buf, *eptr = buf; + static int len, rem; + + if ((*ptr == NULL) /* first time */ + || (eptr >= buf + len) /* buffer empty */) { + sptr = eptr = buf; + len = read(fd, buf, sizeof(buf)); + if (len <= 0) + return len; + } else { + sptr = eptr + 1; + } + +full_line: + while (eptr < buf + len) { + eptr++; + /* parse full lines */ + if (*eptr == '\n') { + *eptr = '\0'; + *ptr = sptr; + return (eptr - sptr); + } + } + + /* partial line; move to front of buf */ + rem = buf + len - sptr; + memcpy(buf, sptr, rem); + sptr = buf; + eptr = buf + rem; + len = read(fd, eptr, sizeof(buf) - rem); + if (len <= 0) + return len; + len += rem; + goto full_line; +} + +static int lfs_changelog(int argc, char **argv) +{ + long long startrec = 0, endrec = 0, recnum; + int fd, len; + char c, *mdd, *ptr = NULL; + struct option long_opts[] = { + {"follow", 0, 0, 'f'}, + {0, 0, 0, 0} + }; + char short_opts[] = "f"; + int follow = 0; + + optind = 0; + while ((c = getopt_long(argc, argv, short_opts, + long_opts, NULL)) != -1) { + switch (c) { + case 'f': + follow++; + break; + case '?': + return CMD_HELP; + default: + fprintf(stderr, "error: %s: option '%s' unrecognized\n", + argv[0], argv[optind - 1]); + return CMD_HELP; + } + } + if (optind >= argc) + return CMD_HELP; + + mdd = argv[optind++]; + if (argc > optind) + startrec = strtoll(argv[optind++], NULL, 10); + if (argc > optind) + endrec = strtoll(argv[optind++], NULL, 10); + + fd = llapi_changelog_open(mdd, startrec); + if (fd < 0) + return fd; + + while ((len = get_next_full_line(fd, &ptr)) >= 0) { + if (len == 0) { + struct pollfd pfds[1]; + int rc; + + if (!follow) + break; + pfds[0].fd = fd; + pfds[0].events = POLLIN; + rc = poll(pfds, 1, -1); + if (rc < 0) + break; + continue; + } + /* eg. 2 02MKDIR 4405821890 t=[0x100000400/0x5] p=[0x100000400/0x4] pics */ + sscanf(ptr, "%lld *", &recnum); + if (endrec && recnum > endrec) + break; + if (recnum < startrec) + continue; + printf("%.*s\n", len, ptr); + } + + close(fd); + + if (len < 0) { + printf("read err %d\n", errno); + return -errno; + } + + return 0; +} + +static int lfs_changelog_clear(int argc, char **argv) +{ + long long endrec; + + if (argc != 3) + return CMD_HELP; + + endrec = strtoll(argv[2], NULL, 10); + + return(llapi_changelog_clear(argv[1], endrec)); +} + +static int lfs_fid2path(int argc, char **argv) +{ + struct option long_opts[] = { + {"link", 1, 0, 'l'}, + {"rec", 1, 0, 'r'}, + {0, 0, 0, 0} + }; + char c, short_opts[] = "l:r:"; + char *device, *fid, *path; + long long recno = -1; + int linkno = -1; + int lnktmp; + int rc; + + optind = 0; + while ((c = getopt_long(argc, argv, short_opts, + long_opts, NULL)) != -1) { + switch (c) { + case 'l': + linkno = strtol(optarg, NULL, 10); + break; + case 'r': + recno = strtoll(optarg, NULL, 10); + break; + case '?': + return CMD_HELP; + default: + fprintf(stderr, "error: %s: option '%s' unrecognized\n", + argv[0], argv[optind - 1]); + return CMD_HELP; + } + } + + device = argv[optind++]; + fid = argv[optind++]; + if (optind != argc) + return CMD_HELP; + + path = calloc(1, PATH_MAX); + + lnktmp = (linkno >= 0) ? linkno : 0; + while (1) { + int oldtmp = lnktmp; + rc = llapi_fid2path(device, fid, path, PATH_MAX, recno, + &lnktmp); + if (rc < 0) { + fprintf(stderr, "%s error: %s\n", argv[0], + strerror(errno = -rc)); + break; + } else { + fprintf(stdout, "%s\n", path); + } + if (linkno >= 0) + /* specified linkno */ + break; + if (oldtmp == lnktmp) + /* no more links */ + break; + } + + free(path); + return rc; +} + +static int lfs_path2fid(int argc, char **argv) +{ + char *path; + unsigned long long seq; + unsigned long oid, ver; + int rc; + + if (argc != 2) + return CMD_HELP; + + path = argv[1]; + rc = llapi_path2fid(path, &seq, &oid, &ver); + if (rc) { + fprintf(stderr, "can't get fid for %s: %s\n", path, + strerror(errno = -rc)); + return rc; + } + + printf(DFID"\n", seq, (unsigned int)oid, (unsigned int)ver); + + return 0; +} + int main(int argc, char **argv) { int rc; @@ -2351,3 +2554,4 @@ int main(int argc, char **argv) obd_finalize(argc, argv); return rc; } + diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 38569478eea3ee2dda362c8a32a884326efc6dc4..83d6990f3169b8de14a737086cc2c6d0914a4f8a 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -66,6 +66,7 @@ #include <unistd.h> #endif +#include <libcfs/libcfsutil.h> /* l_ioctl */ #include <liblustre.h> #include <lnet/lnetctl.h> #include <obd.h> @@ -429,7 +430,6 @@ static int search_fsname(char *pathname, char *fsname) } endmntent(fp); return -ENOENT; - } /* @@ -661,7 +661,7 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count) int llapi_get_obd_count(char *mnt, int *count, int is_mdt) { DIR *root; - int rc; + int rc; root = opendir(mnt); if (!root) { @@ -673,7 +673,7 @@ int llapi_get_obd_count(char *mnt, int *count, int is_mdt) rc = ioctl(dirfd(root), LL_IOC_GETOBDCOUNT, count); closedir(root); - return rc; + return rc; } /* Here, param->obduuid points to a single obduuid, the index of which is @@ -1824,8 +1824,6 @@ int llapi_target_iterate(int type_num, char **obd_type,void *args,llapi_cb_t cb) char *obd_type_name = NULL; char *obd_name = NULL; char *obd_uuid = NULL; - char rawbuf[OBD_MAX_IOCTL_BUFFER]; - char *bufl = rawbuf; char *bufp = buf; struct obd_ioctl_data datal = { 0, }; struct obd_statfs osfs_buffer; @@ -1841,7 +1839,6 @@ int llapi_target_iterate(int type_num, char **obd_type,void *args,llapi_cb_t cb) memset(&osfs_buffer, 0, sizeof (osfs_buffer)); - memset(bufl, 0, sizeof(rawbuf)); datal.ioc_pbuf1 = (char *)&osfs_buffer; datal.ioc_plen1 = sizeof(osfs_buffer); @@ -2123,7 +2120,7 @@ static int rmtacl_notify(int ops) if (rc < 0) { perror("ioctl"); return -1; - } + } found++; } @@ -2377,6 +2374,164 @@ int llapi_ls(int argc, char *argv[]) exit(execvp(argv[0], argv)); } +/* format must have %s%s, buf must be > 16 */ +static int get_mdtname(const char *name, char *format, char *buf) +{ + char suffix[]="-MDT0000"; + int len = strlen(name); + + if (len > 16) { + llapi_err(LLAPI_MSG_ERROR, "bad MDT name |%s|\n", name); + return -EINVAL; + } + + if ((len > 8) && (strncmp(name + len - 8, "-MDT", 4) == 0)) + suffix[0] = '\0'; + + return sprintf(buf, format, name, suffix); +} + +#define CHANGELOG_FILE "/proc/fs/lustre/mdd/%s%s/changelog" + +/* return a file desc to readable changelog */ +int llapi_changelog_open(const char *mdtname, long long startrec) +{ + char path[256]; + int rc, fd; + + if (get_mdtname(mdtname, CHANGELOG_FILE, path) <0) + return -EINVAL; + + if ((fd = open(path, O_RDONLY)) < 0) { + llapi_err(LLAPI_MSG_ERROR, "error: can't open |%s|\n", path); + return -errno; + } + + rc = lseek(fd, (off_t)startrec, SEEK_SET); + if (rc < 0) { + llapi_err(LLAPI_MSG_ERROR, "can't seek rc=%d\n", rc); + return -errno; + } + + return fd; +} + +int llapi_changelog_clear(const char *mdtname, long long endrec) +{ + char path[256]; + char val[20]; + int fd, len; + + if (endrec < 0) { + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "can't purge negative records\n"); + return -EINVAL; + } + + if (get_mdtname(mdtname, CHANGELOG_FILE, path) <0) + return -EINVAL; + + if ((fd = open(path, O_WRONLY)) < 0) { + llapi_err(LLAPI_MSG_ERROR, "error: can't open |%s|\n", path); + return errno; + } + + snprintf(val, sizeof(val), "%llu", endrec); + len = write(fd, val, strlen(val)); + close(fd); + if (len != strlen(val)) { + llapi_err(LLAPI_MSG_ERROR, "purge err\n"); + return errno; + } + + return 0; +} + +static int dev_ioctl(struct obd_ioctl_data *data, int dev, int cmd) +{ + int rc; + static char rawbuf[8192]; + static char *buf = rawbuf; + + data->ioc_dev = dev; + memset(buf, 0, sizeof(rawbuf)); + + if ((rc = obd_ioctl_pack(data, &buf, sizeof(rawbuf)))) { + llapi_err(LLAPI_MSG_ERROR, + "error: ioctl pack (%d) failed: rc %d", cmd, rc); + return rc; + } + + rc = l_ioctl(OBD_DEV_ID, cmd, buf); + if (rc < 0) { + /* ioctl returns -1 with errno set */ + rc = -errno; + return rc; + } + + if (obd_ioctl_unpack(data, buf, sizeof(rawbuf))) { + llapi_err(LLAPI_MSG_ERROR, + "error: invalid reply\n"); + return -EPROTO; + } + return rc; +} + +/* should we just grep it from proc? */ +static int dev_name2dev(char *name) +{ + struct obd_ioctl_data data; + int rc; + + memset(&data, 0, sizeof(data)); + data.ioc_inllen1 = strlen(name) + 1; + data.ioc_inlbuf1 = name; + rc = dev_ioctl(&data, -1, OBD_IOC_NAME2DEV); + + if (rc < 0) { + llapi_err(LLAPI_MSG_ERROR, "Device %s not found %d\n", name,rc); + return rc; + } + return data.ioc_dev; +} + +int llapi_fid2path(char *device, char *fidstr, char *buf, int buflen, + __u64 recno, int *linkno) +{ + struct lu_fid fid; + struct obd_ioctl_data data; + int dev, rc; + + while (*fidstr == '[') + fidstr++; + + sscanf(fidstr, "0x%llx:0x%x:0x%x", &(fid.f_seq), &(fid.f_oid), + &(fid.f_ver)); + if (!fid_is_sane(&fid)) { + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "bad FID format [%s], should be "DFID"\n", + fidstr, (__u64)1, 2, 0); + return -EINVAL; + } + + dev = dev_name2dev(device); + if (dev < 0) + return dev; + + memset(&data, 0, sizeof(data)); + data.ioc_inlbuf1 = (char *)&fid; + data.ioc_inllen1 = sizeof(fid); + data.ioc_inlbuf2 = (char *)&recno; + data.ioc_inllen2 = sizeof(__u64); + data.ioc_inlbuf3 = (char *)linkno; + data.ioc_inllen3 = sizeof(int); + data.ioc_plen1 = buflen; + data.ioc_pbuf1 = buf; + rc = dev_ioctl(&data, dev, OBD_IOC_FID2PATH); + + return rc; +} + int llapi_path2fid(const char *path, unsigned long long *seq, unsigned long *oid, unsigned long *ver) { @@ -2395,3 +2550,4 @@ int llapi_path2fid(const char *path, unsigned long long *seq, close(fd); return rc; } + diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index 712dc7fbcbb40ef5f75b6b3ccd9f789a7e1c37d6..69f734b6cf98025974e7427add4fbc75bc530fe0 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -82,7 +82,7 @@ int lcfg_set_devname(char *name) /* quietly strip the unnecessary '$' */ if (*name == '$' || *name == '%') name++; - if (isdigit(*name)) { + if (isdigit(*name)) { /* We can't translate from dev # to name */ lcfg_devname = NULL; } else { @@ -90,7 +90,7 @@ int lcfg_set_devname(char *name) } } else { lcfg_devname = NULL; - } + } return 0; } @@ -154,8 +154,8 @@ int jt_lcfg_setup(int argc, char **argv) if (lcfg_devname == NULL) { fprintf(stderr, "%s: please use 'device name' to set the " - "device name for config commands.\n", - jt_cmdname(argv[0])); + "device name for config commands.\n", + jt_cmdname(argv[0])); return -EINVAL; } @@ -186,8 +186,8 @@ int jt_obd_detach(int argc, char **argv) if (lcfg_devname == NULL) { fprintf(stderr, "%s: please use 'device name' to set the " - "device name for config commands.\n", - jt_cmdname(argv[0])); + "device name for config commands.\n", + jt_cmdname(argv[0])); return -EINVAL; } @@ -218,8 +218,8 @@ int jt_obd_cleanup(int argc, char **argv) if (lcfg_devname == NULL) { fprintf(stderr, "%s: please use 'device name' to set the " - "device name for config commands.\n", - jt_cmdname(argv[0])); + "device name for config commands.\n", + jt_cmdname(argv[0])); return -EINVAL; } @@ -256,8 +256,8 @@ int jt_obd_cleanup(int argc, char **argv) return rc; } -static -int do_add_uuid(char * func, char *uuid, lnet_nid_t nid) +static +int do_add_uuid(char * func, char *uuid, lnet_nid_t nid) { int rc; struct lustre_cfg_bufs bufs; @@ -269,7 +269,7 @@ int do_add_uuid(char * func, char *uuid, lnet_nid_t nid) lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); lcfg->lcfg_nid = nid; - /* Poison NAL -- pre 1.4.6 will LASSERT on 0 NAL, this way it + /* Poison NAL -- pre 1.4.6 will LASSERT on 0 NAL, this way it doesn't work without crashing (bz 10130) */ lcfg->lcfg_nal = 0x5a; @@ -292,8 +292,8 @@ int do_add_uuid(char * func, char *uuid, lnet_nid_t nid) int jt_lcfg_add_uuid(int argc, char **argv) { lnet_nid_t nid; - - if (argc != 3) { + + if (argc != 3) { return CMD_HELP; } @@ -325,7 +325,7 @@ int jt_lcfg_del_uuid(int argc, char **argv) lustre_cfg_bufs_reset(&bufs, lcfg_devname); if (strcmp (argv[1], "_all_")) lustre_cfg_bufs_set_string(&bufs, 1, argv[1]); - + lcfg = lustre_cfg_new(LCFG_DEL_UUID, &bufs); rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg); lustre_cfg_free(lcfg); @@ -382,7 +382,7 @@ int jt_lcfg_set_timeout(int argc, char **argv) lustre_cfg_bufs_reset(&bufs, lcfg_devname); lcfg = lustre_cfg_new(LCFG_SET_TIMEOUT, &bufs); lcfg->lcfg_num = atoi(argv[1]); - + rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg); //rc = lcfg_mgs_ioctl(argv[0], OBD_DEV_ID, lcfg); @@ -412,8 +412,8 @@ int jt_lcfg_add_conn(int argc, char **argv) if (lcfg_devname == NULL) { fprintf(stderr, "%s: please use 'device name' to set the " - "device name for config commands.\n", - jt_cmdname(argv[0])); + "device name for config commands.\n", + jt_cmdname(argv[0])); return -EINVAL; } @@ -445,8 +445,8 @@ int jt_lcfg_del_conn(int argc, char **argv) if (lcfg_devname == NULL) { fprintf(stderr, "%s: please use 'device name' to set the " - "device name for config commands.\n", - jt_cmdname(argv[0])); + "device name for config commands.\n", + jt_cmdname(argv[0])); return -EINVAL; } @@ -484,7 +484,7 @@ int jt_lcfg_param(int argc, char **argv) } lcfg = lustre_cfg_new(LCFG_PARAM, &bufs); - + rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg); lustre_cfg_free(lcfg); if (rc < 0) { @@ -519,7 +519,7 @@ int jt_lcfg_mgsparam(int argc, char **argv) fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]), strerror(rc = errno)); } - + return rc; } diff --git a/lustre/utils/module_setup.sh b/lustre/utils/module_setup.sh index 5727b4d46f8a6e53c956a003e12464ae817bed12..9f9792f77ca90251f39e9ee7388bcc9e5d456ace 100755 --- a/lustre/utils/module_setup.sh +++ b/lustre/utils/module_setup.sh @@ -1,23 +1,15 @@ -#!/bin/sh +#!/bin/bash MDIR=/lib/modules/`uname -r`/lustre /bin/rm -rf $MDIR mkdir -p $MDIR -KVER=26 EXT=ko -FSFLT=fsfilt_ldiskfs if [ -d /etc/modprobe.d ]; then MODFILE="/etc/modprobe.d/Lustre" else MODFILE="/etc/modprobe.conf" fi -if [ `uname -r | cut -c 3` -eq 4 ]; then - KVER=24 - EXT=o - FSFLT=fsfilt_ext3 - MODFILE="/etc/modules.conf" -fi echo "Copying modules from local build dir to "$MDIR diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index e7abfce90131559c6197f7b2e6766eec1c136ffa..a7deb160b77f02b9429ffe10eed5872c95d180d2 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -1911,11 +1911,10 @@ int jt_cfg_dump_log(int argc, char **argv) struct obd_ioctl_data data; int rc; - IOC_INIT(data); - if (argc != 2) return CMD_HELP; + IOC_INIT(data); data.ioc_inllen1 = strlen(argv[1]) + 1; data.ioc_inlbuf1 = argv[1]; diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 55d026ba64a5a087f7f596f0a2f621e1134f579a..b8e5d06481b6e55372cc46d1ac9c404b31440d03 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -252,7 +252,7 @@ check_obdo(void) CHECK_MEMBER(obdo, o_padding_3); CHECK_MEMBER(obdo, o_padding_4); CHECK_MEMBER(obdo, o_padding_5); - CHECK_MEMBER(obdo, o_padding_6); + CHECK_MEMBER(obdo, o_padding_6); CHECK_CDEFINE(OBD_MD_FLID); CHECK_CDEFINE(OBD_MD_FLATIME); @@ -478,6 +478,15 @@ check_ll_fid(void) CHECK_MEMBER(ll_fid, f_type); } +static void +check_lu_fid_pack(void) +{ + BLANK_LINE(); + CHECK_STRUCT(lu_fid_pack); + CHECK_MEMBER(lu_fid_pack, fp_len); + CHECK_MEMBER(lu_fid_pack, fp_area); +} + static void check_mds_status_req(void) { @@ -941,6 +950,23 @@ check_llog_size_change_rec(void) CHECK_MEMBER(llog_size_change_rec, lsc_tail); } +static void +check_llog_changelog_rec(void) +{ + BLANK_LINE(); + CHECK_STRUCT(llog_changelog_rec); + CHECK_MEMBER(llog_changelog_rec, cr_hdr); + CHECK_MEMBER(llog_changelog_rec, cr_flags); + CHECK_MEMBER(llog_changelog_rec, cr_namelen); + CHECK_MEMBER(llog_changelog_rec, cr_type); + CHECK_MEMBER(llog_changelog_rec, cr_index); + CHECK_MEMBER(llog_changelog_rec, cr_prev); + CHECK_MEMBER(llog_changelog_rec, cr_time); + CHECK_MEMBER(llog_changelog_rec, cr_tfid); + CHECK_MEMBER(llog_changelog_rec, cr_pfid); + CHECK_MEMBER(llog_changelog_rec, cr_tail); +} + static void check_llog_gen(void) { @@ -1168,6 +1194,29 @@ check_ll_fiemap_extent(void) CHECK_CDEFINE(FIEMAP_EXTENT_MERGED); } +static void +check_link_ea_header(void) +{ + BLANK_LINE(); + CHECK_STRUCT(link_ea_header); + CHECK_MEMBER(link_ea_header, leh_magic); + CHECK_MEMBER(link_ea_header, leh_reccount); + CHECK_MEMBER(link_ea_header, leh_len); + CHECK_MEMBER(link_ea_header, padding1); + CHECK_MEMBER(link_ea_header, padding2); +} + +static void +check_link_ea_entry(void) +{ + BLANK_LINE(); + CHECK_STRUCT(link_ea_entry); + CHECK_MEMBER(link_ea_entry, lee_reclen); + CHECK_MEMBER(link_ea_entry, lee_parent_fid); + CHECK_MEMBER(link_ea_entry, lee_name); +} + + static void system_string (char *cmdline, char *str, int len) { @@ -1353,7 +1402,7 @@ main(int argc, char **argv) CHECK_VALUE(MGS_CONNECT); CHECK_VALUE(MGS_DISCONNECT); - CHECK_VALUE(MGS_EXCEPTION); + CHECK_VALUE(MGS_EXCEPTION); CHECK_VALUE(MGS_TARGET_REG); CHECK_VALUE(MGS_TARGET_DEL); CHECK_VALUE(MGS_SET_INFO); @@ -1375,6 +1424,7 @@ main(int argc, char **argv) check_niobuf_remote(); check_ost_body(); check_ll_fid(); + check_lu_fid_pack(); check_mds_status_req(); check_mds_body(); check_mds_rec_setattr(); @@ -1406,6 +1456,7 @@ main(int argc, char **argv) check_llog_setattr_rec(); check_llog_setattr64_rec(); check_llog_size_change_rec(); + check_llog_changelog_rec(); check_llog_gen(); check_llog_gen_rec(); check_llog_log_hdr(); @@ -1427,7 +1478,8 @@ main(int argc, char **argv) check_posix_acl_xattr_entry(); check_posix_acl_xattr_header(); printf("#endif\n"); - + check_link_ea_header(); + check_link_ea_entry(); printf("}\n\n"); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index f881c822d433781981d07f4c55c9b63c1f185034..b0a750eb25c78bc8d1eb01dba5d9030c421dc14e 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -58,13 +58,12 @@ int main() return ret; } - void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' * (make -C lustre/utils newwiretest) - * running on Linux vb1 2.6.18-build.1 #1 SMP Thu Mar 27 14:34:21 MDT 2008 i686 i686 i386 GNU - * with gcc version 4.1.2 20070626 (Red Hat 4.1.2-14) */ + * running on Linux lin2 2.6.18-92.1.17-prep #3 Sun Nov 23 14:29:36 IST 2008 i686 i686 i386 G + * with gcc version 3.4.6 20060404 (Red Hat 3.4.6-10) */ /* Constants... */ @@ -1022,6 +1021,18 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, " found %lld\n", (long long)(int)sizeof(((struct ll_fid *)0)->f_type)); + /* Checks for struct lu_fid_pack */ + LASSERTF((int)sizeof(struct lu_fid_pack) == 17, " found %lld\n", + (long long)(int)sizeof(struct lu_fid_pack)); + LASSERTF((int)offsetof(struct lu_fid_pack, fp_len) == 0, " found %lld\n", + (long long)(int)offsetof(struct lu_fid_pack, fp_len)); + LASSERTF((int)sizeof(((struct lu_fid_pack *)0)->fp_len) == 1, " found %lld\n", + (long long)(int)sizeof(((struct lu_fid_pack *)0)->fp_len)); + LASSERTF((int)offsetof(struct lu_fid_pack, fp_area) == 1, " found %lld\n", + (long long)(int)offsetof(struct lu_fid_pack, fp_area)); + LASSERTF((int)sizeof(((struct lu_fid_pack *)0)->fp_area) == 16, " found %lld\n", + (long long)(int)sizeof(((struct lu_fid_pack *)0)->fp_area)); + /* Checks for struct mds_status_req */ LASSERTF((int)sizeof(struct mds_status_req) == 8, " found %lld\n", (long long)(int)sizeof(struct mds_status_req)); @@ -1958,6 +1969,50 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, " found %lld\n", (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail)); + /* Checks for struct llog_changelog_rec */ + LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, " found %lld\n", + (long long)(int)sizeof(struct llog_changelog_rec)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_flags) == 16, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_flags)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_flags) == 2, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_flags)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_namelen) == 18, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_namelen)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_namelen) == 2, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_namelen)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_type) == 20, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_type)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_type) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_type)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_index) == 24, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_index)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_index) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_index)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_prev) == 32, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_prev)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_prev) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_prev)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_time) == 40, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_time)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_time) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_time)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tfid) == 48, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_tfid)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tfid) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tfid)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_pfid) == 64, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_pfid)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_pfid) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_pfid)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, " found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_tail)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail)); + /* Checks for struct llog_gen */ LASSERTF((int)sizeof(struct llog_gen) == 16, " found %lld\n", (long long)(int)sizeof(struct llog_gen)); @@ -2397,17 +2452,44 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((xattr_acl_header *)0)->a_entries)); #endif - /* check fid range */ - LASSERTF((int)sizeof(struct lu_seq_range) == 24, " found %lld\n", - (long long)(int)sizeof(struct lu_seq_range)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, " found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_start)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, " found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_end)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_mdt) == 16, " found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_mdt)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_padding) == 20, " found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_padding)); - + /* Checks for struct link_ea_header */ + LASSERTF((int)sizeof(struct link_ea_header) == 24, " found %lld\n", + (long long)(int)sizeof(struct link_ea_header)); + LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_magic)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic)); + LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_reccount)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount)); + LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_len)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len)); + LASSERTF((int)offsetof(struct link_ea_header, padding1) == 16, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, padding1)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->padding1)); + LASSERTF((int)offsetof(struct link_ea_header, padding2) == 20, " found %lld\n", + (long long)(int)offsetof(struct link_ea_header, padding2)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->padding2)); + + /* Checks for struct link_ea_entry */ + LASSERTF((int)sizeof(struct link_ea_entry) == 19, " found %lld\n", + (long long)(int)sizeof(struct link_ea_entry)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, " found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_reclen)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, " found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 17, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 19, " found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_name)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name)); }