diff --git a/lustre/Makefile.am b/lustre/Makefile.am index 90726705a453f0fdb4dc41501be7c689de52a647..e1e25cb3ed2a73c8648ccd48aa3e6efdd5ebc646 100644 --- a/lustre/Makefile.am +++ b/lustre/Makefile.am @@ -4,8 +4,10 @@ # See the file COPYING in this distribution AUTOMAKE_OPTIONS = foreign -SUBDIRS = ptlrpc llite lib ldlm obdecho mdc osc extN mds ost utils tests obdfilter -SUBDIRS+= obdclass obdext2 obdfs demos doc scripts + +# NOTE: keep extN before mds +SUBDIRS = ptlrpc llite lib ldlm obdecho mdc osc extN mds ost +SUBDIRS+= utils tests obdfilter obdclass obdfs demos doc scripts EXTRA_DIST = BUGS FDL Rules include patches # We get the version from the spec file. diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 46eddde23c8563d975444125502ee415acce23ae..af77aa375bddbfd14f640ecaad9bdb00e5d435b8 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -8,7 +8,7 @@ LYX2HTML = lyx --export html SUFFIXES = .lyx .pdf .sgml .html .txt .fig .eps DOCS = master.pdf lustre-HOWTO.txt -IMAGES = bigpicture.eps intermezzo.eps mds.eps portals-lib.eps client.eps layering.eps metadata.eps sb.eps cow.eps lockacq.eps obdfs.eps snapsetup.eps dirbodyapi.eps loraid.eps ost.eps updates.eps hotmigrate.eps lustreclusters.eps osthw.eps portals-lib.eps +IMAGES = networklayer.fig bigpicture.eps intermezzo.eps mds.eps portals.eps client.eps layering.eps metadata.eps sb.eps cow.eps lockacq.eps obdfs.eps snapsetup.eps dirbodyapi.eps loraid.eps ost.eps updates.eps hotmigrate.eps lustreclusters.eps osthw.eps portals-lib.eps lockqueues.eps lockexample1.eps lockexample2.eps lockexample3.eps lockexample4.eps lockseverity.eps CLEANFILES = $(IMAGES) $(DOCS) EXTRA_DIST = $(DOCS) @@ -29,6 +29,10 @@ all: $(DOCS) master.pdf: master.lyx $(IMAGES) +syncweb: master.pdf + cp master.pdf /usr/src/www/content/lustre/docs/master.pdf + ( cd /usr/src/www ; make lustre ; make synclustre ) + dist-hook: rm -rf $(distdir)/figs/CVS diff --git a/lustre/doc/VERSIONING b/lustre/doc/VERSIONING index 56a525bf8715aebcdf854559f9539bcafcfc146e..a1a8b62265db21b7cb1f4d429de1430ff918691a 100644 --- a/lustre/doc/VERSIONING +++ b/lustre/doc/VERSIONING @@ -3,9 +3,38 @@ Lustre versioning 0.0.1 2/19/2002 <braam@clusterfs.com> 0.0.2 3/14/2002 <braam@clusterfs.com> describe branches / stable tag +0.0.3 6/10/2002 <braam@clusterfs.com> describe release mechanisms This document describes versioning of source and binaries for Lustre. +Packages +======== + +RPM's that you build should get 3 figure versions, CVS versions will +be 4 digits, and can correspond to test RPM's, and lead up to the +package version. So let's plan on releasing + +So you'd build 2 sets of test rpms this week: + +0.0.9.1 +0.0.9.2 + +we decide it's fine then and we release + +0.1.0 + +We go on developing with + +0.1.0.{1,2,3,4,...} + +as test releases and then we release: + +0.1.1 + +The 0.1 sequence is an unstable sequence, like 2.5 for the kernel is. +So we expect lots of 0.1.X releases leading up to a stable 0.2 (or +1.0) at the time of deployment. + CVS === @@ -38,7 +67,8 @@ New numbers are used as follows: What will run, what won't ? --------------------------- -1. If the test level is non-zero no guarantees of any kind are made. +1. If the test level is non-zero, i.e. there are 4 digits in the + version, no guarantees of any kind are made. 2. For three digit releases/tags the code should perform according to the announcement. @@ -46,7 +76,7 @@ What will run, what won't ? Moving tags ----------- -The last stable release will be tagged: CVS tag "stable" +The last stable release will be tagged: CVS tag "t_last_stable" The last operational development snapshot will be CVS tag "dstable" Branches @@ -64,4 +94,4 @@ removing the sticky tags: cvs update -A -fixing any conflicts and then committing. \ No newline at end of file +fixing any conflicts and then committing. diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 42621d914cbe9bf2e7a6a739d4561c2a7e6bc7f9..85c4638a2e8b0688cce39463957852ffa8982059 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -12,23 +12,26 @@ #define OBD_LDLM_DEVICENAME "ldlm" -typedef int cluster_host; -typedef int cluster_pid; - typedef enum { ELDLM_OK = 0, ELDLM_LOCK_CHANGED = 300, + ELDLM_LOCK_ABORTED = 301, + ELDLM_RESOURCE_FREED = 302, ELDLM_NAMESPACE_EXISTS = 400, ELDLM_BAD_NAMESPACE = 401 } ldlm_error_t; +#define LDLM_NAMESPACE_SERVER 0 +#define LDLM_NAMESPACE_CLIENT 1 + #define LDLM_FL_LOCK_CHANGED (1 << 0) #define LDLM_FL_BLOCK_GRANTED (1 << 1) #define LDLM_FL_BLOCK_CONV (1 << 2) #define LDLM_FL_BLOCK_WAIT (1 << 3) #define LDLM_FL_DYING (1 << 4) +#define LDLM_FL_AST_SENT (1 << 5) #define L2B(c) (1 << c) @@ -75,12 +78,12 @@ static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new) */ struct ldlm_namespace { - struct obd_device *ns_obddev; - __u32 ns_local; /* is this a local lock tree? */ - struct list_head *ns_hash; /* hash table for ns */ - __u32 ns_refcount; /* count of resources in the hash */ - struct list_head ns_root_list; /* all root resources in ns */ - spinlock_t ns_lock; /* protects hash, refcount, list */ + struct ptlrpc_client ns_client; /* used for revocation callbacks */ + __u32 ns_local; /* is this a local lock tree? */ + struct list_head *ns_hash; /* hash table for ns */ + __u32 ns_refcount; /* count of resources in the hash */ + struct list_head ns_root_list; /* all root resources in ns */ + spinlock_t ns_lock; /* protects hash, refcount, list */ }; /* @@ -114,12 +117,12 @@ struct ldlm_lock { struct ptlrpc_connection *l_connection; struct ptlrpc_client *l_client; __u32 l_flags; - struct ldlm_handle l_remote_handle; + struct lustre_handle l_remote_handle; void *l_data; __u32 l_data_len; + void *l_cookie; + int l_cookie_len; struct ldlm_extent l_extent; - //void *l_event; - //XXX cluster_host l_holder; __u32 l_version[RES_VERSION_SIZE]; __u32 l_readers; @@ -133,9 +136,7 @@ struct ldlm_lock { }; typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new); -typedef int (*ldlm_res_policy)(struct ldlm_resource *parent, - struct ldlm_extent *req_ex, - struct ldlm_extent *new_ex, +typedef int (*ldlm_res_policy)(struct ldlm_lock *lock, void *req_cookie, ldlm_mode_t mode, void *data); #define LDLM_PLAIN 0 @@ -161,7 +162,6 @@ struct ldlm_resource { ldlm_mode_t lr_most_restr; __u32 lr_type; /* PLAIN, EXTENT, or MDSINTENT */ struct ldlm_resource *lr_root; - //XXX cluster_host lr_master; __u64 lr_name[RES_NAME_SIZE]; __u32 lr_version[RES_VERSION_SIZE]; __u32 lr_refcount; @@ -173,24 +173,11 @@ static inline struct ldlm_extent *ldlm_res2extent(struct ldlm_resource *res) return (struct ldlm_extent *)(res->lr_name); } -static inline void *ldlm_handle2object(struct ldlm_handle *handle) -{ - if (handle) - return (void *)(unsigned long)(handle->addr); - return NULL; -} - -static inline void ldlm_object2handle(void *object, struct ldlm_handle *handle) -{ - handle->addr = (__u64)(unsigned long)object; -} - extern struct obd_ops ldlm_obd_ops; /* ldlm_extent.c */ int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *); -int ldlm_extent_policy(struct ldlm_resource *, struct ldlm_extent *, - struct ldlm_extent *, ldlm_mode_t, void *); +int ldlm_extent_policy(struct ldlm_lock *, void *, ldlm_mode_t, void *); /* ldlm_lock.c */ void ldlm_lock_free(struct ldlm_lock *lock); @@ -199,21 +186,21 @@ void ldlm_lock_addref(struct ldlm_lock *lock, __u32 mode); void ldlm_lock_decref(struct ldlm_lock *lock, __u32 mode); void ldlm_grant_lock(struct ldlm_resource *res, struct ldlm_lock *lock); int ldlm_local_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type, - struct ldlm_extent *extent, ldlm_mode_t mode, - struct ldlm_handle *lockh); + void *cookie, int cookielen, ldlm_mode_t mode, + struct lustre_handle *lockh); ldlm_error_t ldlm_local_lock_create(struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock_handle, + struct lustre_handle *parent_lock_handle, __u64 *res_id, __u32 type, ldlm_mode_t mode, void *data, __u32 data_len, - struct ldlm_handle *lockh); -ldlm_error_t ldlm_local_lock_enqueue(struct ldlm_handle *lockh, - struct ldlm_extent *req_ex, + struct lustre_handle *lockh); +ldlm_error_t ldlm_local_lock_enqueue(struct lustre_handle *lockh, + void *cookie, int cookie_len, int *flags, ldlm_lock_callback completion, ldlm_lock_callback blocking); -struct ldlm_resource *ldlm_local_lock_convert(struct ldlm_handle *lockh, +struct ldlm_resource *ldlm_local_lock_convert(struct lustre_handle *lockh, int new_mode, int *flags); struct ldlm_resource *ldlm_local_lock_cancel(struct ldlm_lock *lock); void ldlm_reprocess_all(struct ldlm_resource *res); @@ -223,8 +210,10 @@ void ldlm_lock_dump(struct ldlm_lock *lock); int ldlm_test(struct obd_device *device, struct ptlrpc_connection *conn); /* resource.c */ -struct ldlm_namespace *ldlm_namespace_new(struct obd_device *, __u32 local); +struct ldlm_namespace *ldlm_namespace_new(__u32 local); int ldlm_namespace_free(struct ldlm_namespace *ns); + +/* resourc.c - internal */ struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, __u64 *name, __u32 type, int create); @@ -236,20 +225,23 @@ void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); void ldlm_resource_dump(struct ldlm_resource *res); /* ldlm_request.c */ -int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *peer, +int ldlm_cli_enqueue(struct ptlrpc_client *cl, + struct ptlrpc_connection *peer, + struct ptlrpc_request *req, struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock_handle, + struct lustre_handle *parent_lock_handle, __u64 *res_id, __u32 type, - struct ldlm_extent *req_ex, + void *cookie, int cookielen, ldlm_mode_t mode, int *flags, + ldlm_lock_callback callback, void *data, __u32 data_len, - struct ldlm_handle *lockh); + struct lustre_handle *lockh); int ldlm_cli_callback(struct ldlm_lock *lock, struct ldlm_lock *new, void *data, __u32 data_len); -int ldlm_cli_convert(struct ptlrpc_client *, struct ldlm_handle *, +int ldlm_cli_convert(struct ptlrpc_client *, struct lustre_handle *, int new_mode, int *flags); int ldlm_cli_cancel(struct ptlrpc_client *, struct ldlm_lock *); diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 41216ad7e58f61de123ee2f4d3e1871196f8d2c4..be8348f61185e70e1a7d6485d4d7aab3f26b2031 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -45,18 +45,22 @@ #define PTL_RPC_MSG_REQUEST 4711 #define PTL_RPC_MSG_ERR 4712 +struct lustre_handle { + __u64 addr; + __u64 cookie; +}; + struct lustre_msg { - __u64 conn; - __u64 token; + __u64 conn; /* pointer to ptlrpc_connection */ + __u64 token; /* security token */ - __u32 opc; - __u32 xid; __u64 last_rcvd; __u64 last_committed; __u64 transno; + __u32 opc; __u32 status; __u32 type; - __u32 connid; + __u32 target_id; __u32 bufcount; __u32 buflens[0]; }; @@ -98,6 +102,7 @@ typedef uint32_t obd_count; #define OBD_FL_INLINEDATA (0x00000001) #define OBD_FL_OBDMDEXISTS (0x00000002) +#define OBD_FL_CREATEONOPEN (0x00000004) #define OBD_INLINESZ 60 #define OBD_OBDMDSZ 60 @@ -194,7 +199,9 @@ struct ost_body { #define MDS_REINT 4 #define MDS_READPAGE 6 #define MDS_CONNECT 7 -#define MDS_STATFS 8 +#define MDS_DISCONNECT 8 +#define MDS_GETSTATUS 9 +#define MDS_STATFS 10 #define REINT_SETATTR 1 #define REINT_CREATE 2 @@ -302,16 +309,15 @@ typedef enum { LCK_NL } ldlm_mode_t; -struct ldlm_handle { - __u64 addr; - __u64 cookie; -}; - struct ldlm_extent { __u64 start; __u64 end; }; +struct ldlm_intent { + __u64 opc; +}; + struct ldlm_resource_desc { __u32 lr_type; __u64 lr_name[RES_NAME_SIZE]; @@ -327,23 +333,24 @@ struct ldlm_lock_desc { }; struct ldlm_request { - __u32 flags; + __u32 lock_flags; struct ldlm_lock_desc lock_desc; - struct ldlm_handle lock_handle1; - struct ldlm_handle lock_handle2; + struct lustre_handle lock_handle1; + struct lustre_handle lock_handle2; }; struct ldlm_reply { - __u32 flags; - struct ldlm_handle lock_handle; - struct ldlm_extent lock_extent; + __u32 lock_flags; + __u64 lock_resource_name[3]; + struct lustre_handle lock_handle; + struct ldlm_extent lock_extent; /* XXX make this policy 1 &2 */ + __u64 lock_policy_res1; + __u64 lock_policy_res2; }; /* * OBD IOCTLS */ - - #define OBD_IOCTL_VERSION 0x00010001 struct obd_ioctl_data { diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index e7ae204038c748109ae181c13afedc2f17f18fb8..a24811ba858fa5caf6df9b5cdbecc454fa9cdddb 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -31,6 +31,9 @@ # include <string.h> #endif +#include <linux/portals_lib.h> +#include <linux/lustre_idl.h> + #ifdef __KERNEL__ /* page.c */ inline void lustre_put_page(struct page *page); @@ -65,6 +68,20 @@ static inline void ll_sleep(int t) } #endif +/* FIXME: This needs to validate pointers and cookies */ +static inline void *lustre_handle2object(struct lustre_handle *handle) +{ + if (handle) + return (void *)(unsigned long)(handle->addr); + return NULL; +} + +static inline void ldlm_object2handle(void *object, struct lustre_handle *handle) +{ + handle->addr = (__u64)(unsigned long)object; +} + + #include <linux/portals_lib.h> #endif /* _LUSTRE_LIB_H */ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index bdb9ed1c6eda04b6a51bc273448f7d094e3fbf30..3153e6080e0ea7b513f6a89c043d1d0602527192 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -46,6 +46,7 @@ struct ll_inode_info { struct obdo *lli_obdo; char *lli_symlink_name; char lli_inline[LL_INLINESZ]; + struct lustre_handle lli_intent_lock_handle; }; #define LL_SUPER_MAGIC 0x0BD00BD0 @@ -55,16 +56,14 @@ struct ll_inode_info { #define LL_COMMITCBD_RUNNING 0x4 struct ll_sb_info { - struct obd_conn ll_conn; + struct obd_conn ll_mdc_conn; + struct obd_conn ll_osc_conn; ino_t ll_rootino; /* number of root inode */ - struct ptlrpc_client ll_mds_client; - struct ptlrpc_connection *ll_mds_conn; - struct ldlm_namespace *ll_namespace; wait_queue_head_t ll_commitcbd_waitq; wait_queue_head_t ll_commitcbd_ctl_waitq; int ll_commitcbd_flags; - struct task_struct *ll_commitcbd_thread; + struct task_struct *ll_commitcbd_thread; time_t ll_commitcbd_waketime; time_t ll_commitcbd_timeout; spinlock_t ll_commitcbd_lock; @@ -78,7 +77,13 @@ static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb) static inline struct obd_conn *ll_s2obdconn(struct super_block *sb) { - return &(ll_s2sbi(sb))->ll_conn; + return &(ll_s2sbi(sb))->ll_osc_conn; +} + +static inline struct mdc_obd *sbi2mdc(struct ll_sb_info *sbi) +{ + struct obd_device *obd = sbi->ll_mdc_conn.oc_dev; + return &obd->u.mdc; } static inline struct ll_sb_info *ll_i2sbi(struct inode *inode) @@ -116,6 +121,13 @@ static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode) inode->i_mode & S_IFMT); } +/* namei.c */ +int ll_lock(struct inode *dir, struct dentry *dentry, + struct lookup_intent *it, struct lustre_handle *lockh); +int ll_unlock(__u32 mode, struct lustre_handle *lockh); + + + /* dir.c */ extern struct file_operations ll_dir_operations; extern struct inode_operations ll_dir_inode_operations; diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 222c09ab0eefa4f3bd6b662de79016a9b7c4f673..b96207ac1a6edc3de17011b3cc6ead246556d440 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -31,9 +31,11 @@ #include <linux/obd_class.h> #include <linux/lustre_idl.h> #include <linux/lustre_net.h> +#include <linux/lustre_dlm.h> #include <linux/obd_lov.h> /* for lov_md */ #define LUSTRE_MDS_NAME "mds" +#define LUSTRE_MDC_NAME "mdc" struct mds_update_record { __u32 ur_opcode; @@ -99,55 +101,79 @@ struct mds_file_data { }; /* mds/mds_reint.c */ -int mds_reint_rec(struct mds_update_record *r, struct ptlrpc_request *req); +int mds_reint_rec(struct mds_update_record *r, int offset, + struct ptlrpc_request *req); struct mds_client_info *mds_uuid_to_mci(struct mds_obd *mds, __u8 *uuid); /* lib/mds_updates.c */ +void mds_unpack_body(struct mds_body *b); void mds_pack_req_body(struct ptlrpc_request *); void mds_pack_rep_body(struct ptlrpc_request *); -void mds_unpack_req_body(struct ptlrpc_request *); -void mds_unpack_rep_body(struct ptlrpc_request *); -int mds_update_unpack(struct ptlrpc_request *, struct mds_update_record *); - -void mds_setattr_pack(struct mds_rec_setattr *, struct inode *, struct iattr *); -void mds_create_pack(struct mds_rec_create *, struct inode *, __u32 mode, - __u64 id, __u32 uid, __u32 gid, __u64 time); -void mds_unlink_pack(struct mds_rec_unlink *, struct inode *inode, - struct inode *child); -void mds_link_pack(struct mds_rec_link *, struct inode *ino, struct inode *dir); -void mds_rename_pack(struct mds_rec_rename *, struct inode *srcdir, - struct inode *tgtdir); +int mds_update_unpack(struct ptlrpc_request *, int offset, + struct mds_update_record *); + +void mds_getattr_pack(struct ptlrpc_request *req, int offset, + struct inode *inode, + const char *name, int namelen); +void mds_setattr_pack(struct ptlrpc_request *, int offset, struct inode *, + struct iattr *, const char *name, int namelen); +void mds_create_pack(struct ptlrpc_request *, int offset, struct inode *, + __u32 mode, __u64 id, __u32 uid, __u32 gid, __u64 time, + const char *name, int namelen, const char *tgt, + int tgtlen); +void mds_unlink_pack(struct ptlrpc_request *, int offset, struct inode *inode, + struct inode *child, const char *name, int namelen); +void mds_link_pack(struct ptlrpc_request *, int offset, struct inode *ino, + struct inode *dir, const char *name, int namelen); +void mds_rename_pack(struct ptlrpc_request *, int offset, struct inode *srcdir, + struct inode *tgtdir, const char *name, int namelen, + const char *tgt, int tgtlen); +void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode); +void mds_pack_inode2body(struct mds_body *body, struct inode *inode); /* mds/handler.c */ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, struct vfsmount **mnt); +int mds_lock_callback(struct ldlm_lock *lock, struct ldlm_lock *new, + void *data, int data_len); +int mds_reint(int offset, struct ptlrpc_request *req); /* mdc/mdc_request.c */ -int mdc_connect(struct ptlrpc_client *, struct ptlrpc_connection *, +static inline struct mdc_obd *mdc_conn2mdc(struct obd_conn *conn) +{ + return &conn->oc_dev->u.mdc; +} + +int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, + int lock_mode, struct inode *dir, struct dentry *de, + struct lustre_handle *h, __u64 id, char *tgt, int tgtlen, + void *data, int datalen); +int mdc_getstatus(struct obd_conn *conn, struct ll_fid *rootfid, __u64 *last_committed, __u64 *last_rcvd, __u32 *last_xid, struct ptlrpc_request **); -int mdc_getattr(struct ptlrpc_client *, struct ptlrpc_connection *, ino_t ino, - int type, unsigned long valid, size_t ea_size, - struct ptlrpc_request **); -int mdc_setattr(struct ptlrpc_client *, struct ptlrpc_connection *, +int mdc_getattr(struct obd_conn *conn, + ino_t ino, int type, unsigned long valid, size_t ea_size, + struct ptlrpc_request **request); +int mdc_setattr(struct obd_conn *conn, struct inode *, struct iattr *iattr, struct ptlrpc_request **); -int mdc_open(struct ptlrpc_client *, struct ptlrpc_connection *, ino_t ino, - int type, int flags, __u64 cookie, __u64 *fh, struct ptlrpc_request **req); -int mdc_close(struct ptlrpc_client *cl, struct ptlrpc_connection *peer, +int mdc_open(struct obd_conn *conn, + ino_t ino, int type, int flags, __u64 objid, __u64 cookie, + __u64 *fh, struct ptlrpc_request **request); +int mdc_close(struct obd_conn *conn, ino_t ino, int type, __u64 fh, struct ptlrpc_request **req); -int mdc_readpage(struct ptlrpc_client *, struct ptlrpc_connection *, ino_t ino, +int mdc_readpage(struct obd_conn *conn, ino_t ino, int type, __u64 offset, char *addr, struct ptlrpc_request **); -int mdc_create(struct ptlrpc_client *, struct ptlrpc_connection *, +int mdc_create(struct obd_conn *conn, struct inode *dir, const char *name, int namelen, const char *tgt, int tgtlen, int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev, struct obdo *obdo, struct ptlrpc_request **); -int mdc_unlink(struct ptlrpc_client *, struct ptlrpc_connection *, +int mdc_unlink(struct obd_conn *conn, struct inode *dir, struct inode *child, const char *name, int namelen, struct ptlrpc_request **); -int mdc_link(struct ptlrpc_client *, struct ptlrpc_connection *, +int mdc_link(struct obd_conn *conn, struct dentry *src, struct inode *dir, const char *name, int namelen, struct ptlrpc_request **); -int mdc_rename(struct ptlrpc_client *, struct ptlrpc_connection *, +int mdc_rename(struct obd_conn *conn, struct inode *src, struct inode *tgt, const char *old, int oldlen, const char *new, int newlen, struct ptlrpc_request **); diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index ae09030f3a3cdcc18ff5207dc7e9570fcbec15f8..75470a471ce503e72863c1f85b2334c73911a30f 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -36,11 +36,11 @@ #define CONNMGR_REQUEST_PORTAL 1 #define CONNMGR_REPLY_PORTAL 2 -#define OSC_REQUEST_PORTAL 3 +//#define OSC_REQUEST_PORTAL 3 #define OSC_REPLY_PORTAL 4 #define OSC_BULK_PORTAL 5 #define OST_REQUEST_PORTAL 6 -#define OST_REPLY_PORTAL 7 +//#define OST_REPLY_PORTAL 7 #define OST_BULK_PORTAL 8 #define MDC_REQUEST_PORTAL 9 #define MDC_REPLY_PORTAL 10 @@ -103,6 +103,7 @@ struct ptlrpc_client { __u32 cli_reply_portal; __u64 cli_last_rcvd; __u64 cli_last_committed; + __u32 cli_target_devno; void *cli_data; struct semaphore cli_rpc_sem; /* limits outstanding requests */ @@ -204,7 +205,7 @@ struct ptlrpc_bulk_desc { wait_queue_head_t b_waitq; struct list_head b_page_list; __u32 b_page_count; - atomic_t b_finished_count; + atomic_t b_pages_remaining; void *b_desc_private; }; diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index b7b201415c9e8f58956ac10aeef1cbfc5c966b7c..ec30611465f56fc19473db5c3d71ca3c5f7f1cbd 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -69,8 +69,18 @@ struct filter_obd { struct mds_client_info; struct mds_server_data; +struct mdc_obd { + struct ptlrpc_client *mdc_client; + struct ptlrpc_client *mdc_ldlm_client; + struct ptlrpc_connection *mdc_conn; + __u8 mdc_target_uuid[37]; +}; + struct mds_obd { + struct ldlm_namespace *mds_local_namespace; struct ptlrpc_service *mds_service; + struct ptlrpc_client *mds_ldlm_client; /* to be an LDLM client */ + struct ptlrpc_connection *mds_ldlm_conn; /* to be an LDLM client */ char *mds_fstype; struct super_block *mds_sb; @@ -147,9 +157,8 @@ struct raid1_obd { struct ost_obd { struct ptlrpc_service *ost_service; - - struct obd_device *ost_tgt; - struct obd_conn ost_conn; + struct obd_device *ost_tgt; /* the exported OBD */ + struct obd_conn ost_conn; /* the local connection to the OBD */ }; struct osc_obd { @@ -157,6 +166,7 @@ struct osc_obd { struct ptlrpc_client *osc_client; struct ptlrpc_client *osc_ldlm_client; struct ptlrpc_connection *osc_conn; + __u8 osc_target_uuid[37]; }; typedef __u8 uuid_t[37]; @@ -197,6 +207,7 @@ struct obd_device { struct ext2_obd ext2; struct filter_obd filter; struct mds_obd mds; + struct mdc_obd mdc; struct ost_obd ost; struct osc_obd osc; struct ldlm_obd ldlm; @@ -261,13 +272,12 @@ struct obd_ops { int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, void *desc_private); - - int (*o_enqueue)(struct obd_conn *conn, struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock, __u64 *res_id, - __u32 type, struct ldlm_extent *, __u32 mode, - int *flags, void *data, int datalen, - struct ldlm_handle *lockh); - int (*o_cancel)(struct obd_conn *, __u32 mode, struct ldlm_handle *); + int (*o_enqueue)(struct obd_conn *conn, + struct lustre_handle *parent_lock, __u64 *res_id, + __u32 type, void *cookie, int cookielen, __u32 mode, + int *flags, void *cb, void *data, int datalen, + struct lustre_handle *lockh); + int (*o_cancel)(struct obd_conn *, __u32 mode, struct lustre_handle *); }; #endif diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 7054978e155541541a3aa3670f0a6fbdf45e2356..dc0f6c66de41eaeba6bfe1457cc2f97b95c99490 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -44,7 +44,7 @@ /* * ======== OBD Device Declarations =========== */ -#define MAX_OBD_DEVICES 8 +#define MAX_OBD_DEVICES 32 extern struct obd_device obd_dev[MAX_OBD_DEVICES]; #define OBD_ATTACHED 0x1 @@ -345,24 +345,24 @@ static inline int obd_iocontrol(int cmd, struct obd_conn *conn, RETURN(rc); } -static inline int obd_enqueue(struct obd_conn *conn, struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock, __u64 *res_id, - __u32 type, struct ldlm_extent *extent, - __u32 mode, int *flags, void *data, int datalen, - struct ldlm_handle *lockh) +static inline int obd_enqueue(struct obd_conn *conn, + struct lustre_handle *parent_lock, __u64 *res_id, + __u32 type, void *cookie, int cookielen, + __u32 mode, int *flags, void *cb, void *data, + int datalen, struct lustre_handle *lockh) { int rc; OBD_CHECK_SETUP(conn); OBD_CHECK_OP(conn, enqueue); - - rc = OBP(conn->oc_dev, enqueue)(conn, ns, parent_lock, res_id, type, - extent, mode, flags, data, datalen, - lockh); + + rc = OBP(conn->oc_dev, enqueue)(conn, parent_lock, res_id, type, + cookie, cookielen, mode, flags, cb, + data, datalen, lockh); RETURN(rc); } static inline int obd_cancel(struct obd_conn *conn, __u32 mode, - struct ldlm_handle *lockh) + struct lustre_handle *lockh) { int rc; OBD_CHECK_SETUP(conn); @@ -675,6 +675,7 @@ static __inline__ int obdo_cmp_md(struct obdo *dst, struct obdo *src, #ifdef __KERNEL__ int obd_register_type(struct obd_ops *ops, char *nm); int obd_unregister_type(char *nm); +int obd_class_name2dev(char *name); struct obd_client { struct list_head cli_chain; diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 0112ac70f1c2250d4748432afceb434809109256..e3e4020cbd012c6cb0695d4a821d707816b5d0c4 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -57,6 +57,9 @@ extern unsigned long obd_fail_loc; #define OBD_FAIL_MDS_CONNECT_NET 0x117 #define OBD_FAIL_MDS_CONNECT_PACK 0x118 #define OBD_FAIL_MDS_REINT_NET_REP 0x119 +#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a +#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b +#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index 3e7c7232c396384800073178cee0a31db830543b..4f7f9eb53d073e8940fa00363690acb86cfa62d6 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -8,7 +8,7 @@ * * by Cluster File Systems, Inc. * authors, Peter Braam <braam@clusterfs.com> & - * Phil Schwan <phil@clusterfs.com> + * Phil Schwan <phil@clusterfs.com> */ #define DEBUG_SUBSYSTEM S_LDLM @@ -30,6 +30,14 @@ int ldlm_extent_compat(struct ldlm_lock *a, struct ldlm_lock *b) RETURN(1); } +/* The purpose of this function is to return: + * - the maximum extent + * - containing the requested extent + * - and not overlapping existing extents outside the requested one + * + * An alternative policy is to not shrink the new extent when conflicts exist. + * + * To reconstruct our formulas, take a deep breath. */ static void policy_internal(struct list_head *queue, struct ldlm_extent *req_ex, struct ldlm_extent *new_ex, ldlm_mode_t mode) { @@ -58,28 +66,27 @@ static void policy_internal(struct list_head *queue, struct ldlm_extent *req_ex, } } -/* The purpose of this function is to return: - * - the maximum extent - * - containing the requested extent - * - and not overlapping existing extents outside the requested one - * - * An alternative policy is to not shrink the new extent when conflicts exist. - * - * To reconstruct our formulas, take a deep breath. */ -int ldlm_extent_policy(struct ldlm_resource *res, struct ldlm_extent *req_ex, - struct ldlm_extent *new_ex, ldlm_mode_t mode, void *data) +/* apply the internal policy by walking all the lists */ +int ldlm_extent_policy(struct ldlm_lock *lock, void *req_cookie, + ldlm_mode_t mode, void *data) { - new_ex->start = 0; - new_ex->end = ~0; + struct ldlm_resource *res = lock->l_resource; + struct ldlm_extent *req_ex = req_cookie; + struct ldlm_extent new_ex; + new_ex.start = 0; + new_ex.end = ~0; if (!res) LBUG(); - policy_internal(&res->lr_granted, req_ex, new_ex, mode); - policy_internal(&res->lr_converting, req_ex, new_ex, mode); - policy_internal(&res->lr_waiting, req_ex, new_ex, mode); + policy_internal(&res->lr_granted, req_ex, &new_ex, mode); + policy_internal(&res->lr_converting, req_ex, &new_ex, mode); + policy_internal(&res->lr_waiting, req_ex, &new_ex, mode); + + memcpy(&lock->l_extent, &new_ex, sizeof(new_ex)); - if (new_ex->end != req_ex->end || new_ex->start != req_ex->start) + if (new_ex.end != req_ex->end || new_ex.start != req_ex->start) return ELDLM_LOCK_CHANGED; - return 0; + else + return 0; } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 30a99c0c7cda0555e522e39bcac1a1ca4ca867e5..3434c5b17062e1f51b1e6119bd2efcfff027b5a3 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -7,41 +7,169 @@ * See the file COPYING in this distribution * * by Cluster File Systems, Inc. - * authors, Peter Braam <braam@clusterfs.com> & + * authors, Peter Braam <braam@clusterfs.com> & * Phil Schwan <phil@clusterfs.com> */ #define DEBUG_SUBSYSTEM S_LDLM #include <linux/slab.h> +#include <linux/module.h> #include <linux/lustre_dlm.h> +#include <linux/lustre_mds.h> extern kmem_cache_t *ldlm_lock_slab; +int (*mds_reint_p)(int offset, struct ptlrpc_request *req) = NULL; +int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req) = NULL; static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b); -static int ldlm_intent_compat(struct ldlm_lock *a, struct ldlm_lock *b); +static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie, + ldlm_mode_t mode, void *data); ldlm_res_compat ldlm_res_compat_table [] = { [LDLM_PLAIN] ldlm_plain_compat, [LDLM_EXTENT] ldlm_extent_compat, - [LDLM_MDSINTENT] ldlm_intent_compat + [LDLM_MDSINTENT] ldlm_plain_compat }; ldlm_res_policy ldlm_res_policy_table [] = { [LDLM_PLAIN] NULL, [LDLM_EXTENT] ldlm_extent_policy, - [LDLM_MDSINTENT] NULL + [LDLM_MDSINTENT] ldlm_intent_policy }; -static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b) +static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie, + ldlm_mode_t mode, void *data) { - return lockmode_compat(a->l_req_mode, b->l_req_mode); + struct ptlrpc_request *req = req_cookie; + int rc = 0; + ENTRY; + + if (!req_cookie) + RETURN(0); + + if (req->rq_reqmsg->bufcount > 1) { + /* an intent needs to be considered */ + struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1); + struct mds_body *mds_rep; + struct ldlm_reply *rep; + struct ldlm_namespace *ns = lock->l_resource->lr_namespace; + __u32 type = lock->l_resource->lr_type; + __u64 new_resid[3] = {0, 0, 0}; + int bufcount, rc, size[3] = {sizeof(struct ldlm_reply), + sizeof(struct mds_body), + sizeof(struct obdo)}; + + it->opc = NTOH__u64(it->opc); + + switch(it->opc) { + case IT_GETATTR: + /* Note that in the negative case you may be returning + * a file and its obdo */ + case IT_CREAT: + case IT_CREAT|IT_OPEN: + case IT_MKDIR: + case IT_SYMLINK: + case IT_MKNOD: + case IT_LINK: + case IT_OPEN: + case IT_RENAME: + bufcount = 3; + break; + default: + bufcount = 2; + } + + rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen, + &req->rq_repmsg); + if (rc) { + rc = req->rq_status = -ENOMEM; + RETURN(rc); + } + + rep = lustre_msg_buf(req->rq_repmsg, 0); + rep->lock_policy_res1 = 1; + switch ( it->opc ) { + case IT_CREAT: + case IT_CREAT|IT_OPEN: + case IT_MKDIR: + case IT_SETATTR: + case IT_SYMLINK: + case IT_MKNOD: + case IT_LINK: + case IT_RENAME2: + if (mds_reint_p == NULL) + mds_reint_p = + inter_module_get_request + ("mds_reint", "mds"); + if (IS_ERR(mds_reint_p)) { + CERROR("MDSINTENT locks require the MDS " + "module.\n"); + LBUG(); + RETURN(-EINVAL); + } + rc = mds_reint_p(2, req); + if (rc) + LBUG(); + break; + case IT_GETATTR: + case IT_READDIR: + case IT_RENAME: + case IT_OPEN: + if (mds_getattr_name_p == NULL) + mds_getattr_name_p = + inter_module_get_request + ("mds_getattr_name", "mds"); + if (IS_ERR(mds_getattr_name_p)) { + CERROR("MDSINTENT locks require the MDS " + "module.\n"); + LBUG(); + RETURN(-EINVAL); + } + rc = mds_getattr_name_p(2, req); + if (rc) + LBUG(); + break; + case IT_READDIR|IT_OPEN: + LBUG(); + break; + default: + CERROR("Unhandled intent\n"); + LBUG(); + } + + mds_rep = lustre_msg_buf(req->rq_repmsg, 1); + rep->lock_policy_res2 = req->rq_status; + new_resid[0] = mds_rep->ino; + + CDEBUG(D_INFO, "remote intent: locking %d instead of" + "%ld\n", mds_rep->ino, + (long)lock->l_resource->lr_name[0]); + ldlm_resource_put(lock->l_resource); + + lock->l_resource = + ldlm_resource_get(ns, NULL, new_resid, type, 1); + if (lock->l_resource == NULL) { + LBUG(); + RETURN(-ENOMEM); + } + RETURN(ELDLM_LOCK_CHANGED); + } else { + int size = sizeof(struct ldlm_reply); + rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, + &req->rq_repmsg); + if (rc) { + CERROR("out of memory\n"); + LBUG(); + RETURN(-ENOMEM); + } + } + RETURN(rc); } -static int ldlm_intent_compat(struct ldlm_lock *a, struct ldlm_lock *b) +static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b) { - LBUG(); - return 0; + return lockmode_compat(a->l_req_mode, b->l_req_mode); } /* Args: referenced, unlocked parent (or NULL) @@ -117,10 +245,30 @@ void ldlm_lock_addref(struct ldlm_lock *lock, __u32 mode) spin_unlock(&lock->l_lock); } +void ldlm_send_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock *new) +{ + ENTRY; + + spin_lock(&lock->l_lock); + if (lock->l_flags & LDLM_FL_AST_SENT) { + EXIT; + return; + } + + lock->l_flags |= LDLM_FL_AST_SENT; + spin_unlock(&lock->l_lock); + + lock->l_blocking_ast(lock, new, lock->l_data, lock->l_data_len); + EXIT; +} + /* Args: unlocked lock */ void ldlm_lock_decref(struct ldlm_lock *lock, __u32 mode) { - int rc; + ENTRY; + + if (lock == NULL) + LBUG(); spin_lock(&lock->l_lock); if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR) @@ -136,15 +284,13 @@ void ldlm_lock_decref(struct ldlm_lock *lock, __u32 mode) } CDEBUG(D_INFO, "final decref done on dying lock, " - "cancelling.\n"); + "calling callback.\n"); spin_unlock(&lock->l_lock); - rc = ldlm_cli_cancel(lock->l_client, lock); - if (rc) { - /* FIXME: do something more dramatic */ - CERROR("ldlm_cli_cancel: %d\n", rc); - } + lock->l_blocking_ast(lock, NULL, lock->l_data, + lock->l_data_len); } else spin_unlock(&lock->l_lock); + EXIT; } /* Args: locked lock */ @@ -174,11 +320,11 @@ static int _ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs, rc = 1; - CDEBUG(D_OTHER, "compat function failed and lock modes are " - "incompatible; sending blocking AST.\n"); - if (send_cbs && child->l_blocking_ast != NULL) - child->l_blocking_ast(child, lock, child->l_data, - child->l_data_len); + CDEBUG(D_OTHER, "compat function failed and lock modes incompat\n"); + if (send_cbs && child->l_blocking_ast != NULL) { + CDEBUG(D_OTHER, "incompatible; sending blocking AST.\n"); + ldlm_send_blocking_ast(child, lock); + } } return rc; @@ -191,6 +337,7 @@ static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs) ENTRY; rc = _ldlm_lock_compat(lock, send_cbs, &lock->l_resource->lr_granted); + /* FIXME: should we be sending ASTs to converting? */ rc |= _ldlm_lock_compat(lock, send_cbs, &lock->l_resource->lr_converting); @@ -215,11 +362,11 @@ void ldlm_grant_lock(struct ldlm_resource *res, struct ldlm_lock *lock) } static int search_queue(struct list_head *queue, ldlm_mode_t mode, - struct ldlm_extent *extent, struct ldlm_handle *lockh) + struct ldlm_extent *extent, struct lustre_handle *lockh) { struct list_head *tmp; - list_for_each(tmp, queue) { + list_for_each(tmp, queue) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); @@ -227,7 +374,7 @@ static int search_queue(struct list_head *queue, ldlm_mode_t mode, continue; /* lock_convert() takes the resource lock, so we're sure that - * req_mode, lr_type, and l_extent won't change beneath us */ + * req_mode, lr_type, and l_cookie won't change beneath us */ if (lock->l_req_mode != mode) continue; @@ -249,8 +396,8 @@ static int search_queue(struct list_head *queue, ldlm_mode_t mode, * Returns 1 if it finds an already-existing lock that is compatible; in this * case, lockh is filled in with a addref()ed lock */ int ldlm_local_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type, - struct ldlm_extent *extent, ldlm_mode_t mode, - struct ldlm_handle *lockh) + void *cookie, int cookielen, ldlm_mode_t mode, + struct lustre_handle *lockh) { struct ldlm_resource *res; int rc = 0; @@ -261,11 +408,11 @@ int ldlm_local_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type, RETURN(0); spin_lock(&res->lr_lock); - if (search_queue(&res->lr_granted, mode, extent, lockh)) + if (search_queue(&res->lr_granted, mode, cookie, lockh)) GOTO(out, rc = 1); - if (search_queue(&res->lr_converting, mode, extent, lockh)) + if (search_queue(&res->lr_converting, mode, cookie, lockh)) GOTO(out, rc = 1); - if (search_queue(&res->lr_waiting, mode, extent, lockh)) + if (search_queue(&res->lr_waiting, mode, cookie, lockh)) GOTO(out, rc = 1); EXIT; @@ -278,17 +425,17 @@ int ldlm_local_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type, /* Must be called without the resource lock held. Returns a referenced, * unlocked ldlm_lock. */ ldlm_error_t ldlm_local_lock_create(struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock_handle, + struct lustre_handle *parent_lock_handle, __u64 *res_id, __u32 type, ldlm_mode_t mode, void *data, __u32 data_len, - struct ldlm_handle *lockh) + struct lustre_handle *lockh) { struct ldlm_resource *res, *parent_res = NULL; struct ldlm_lock *lock, *parent_lock; - parent_lock = ldlm_handle2object(parent_lock_handle); + parent_lock = lustre_handle2object(parent_lock_handle); if (parent_lock) parent_res = parent_lock->l_resource; @@ -314,8 +461,8 @@ ldlm_error_t ldlm_local_lock_create(struct ldlm_namespace *ns, } /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */ -ldlm_error_t ldlm_local_lock_enqueue(struct ldlm_handle *lockh, - struct ldlm_extent *req_ex, +ldlm_error_t ldlm_local_lock_enqueue(struct lustre_handle *lockh, + void *cookie, int cookie_len, int *flags, ldlm_lock_callback completion, ldlm_lock_callback blocking) @@ -326,28 +473,27 @@ ldlm_error_t ldlm_local_lock_enqueue(struct ldlm_handle *lockh, ldlm_res_policy policy; ENTRY; - lock = ldlm_handle2object(lockh); + lock = lustre_handle2object(lockh); res = lock->l_resource; local = res->lr_namespace->ns_local; spin_lock(&res->lr_lock); lock->l_blocking_ast = blocking; - if ((res->lr_type == LDLM_EXTENT && !req_ex) || - (res->lr_type != LDLM_EXTENT && req_ex)) - LBUG(); + if (res->lr_type == LDLM_EXTENT) + memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent)); - if ((policy = ldlm_res_policy_table[res->lr_type])) { - struct ldlm_extent new_ex; - int rc = policy(res, req_ex, &new_ex, lock->l_req_mode, NULL); + /* policies are not executed on the client */ + if (!local && (policy = ldlm_res_policy_table[res->lr_type])) { + int rc = policy(lock, cookie, lock->l_req_mode, NULL); if (rc == ELDLM_LOCK_CHANGED) { + res = lock->l_resource; *flags |= LDLM_FL_LOCK_CHANGED; - memcpy(req_ex, &new_ex, sizeof(new_ex)); } } - if (req_ex) - memcpy(&lock->l_extent, req_ex, sizeof(*req_ex)); + lock->l_cookie = cookie; + lock->l_cookie_len = cookie_len; if (local && lock->l_req_mode == lock->l_granted_mode) { /* The server returned a blocked lock, but it was granted before @@ -357,6 +503,7 @@ ldlm_error_t ldlm_local_lock_enqueue(struct ldlm_handle *lockh, } /* If this is a local resource, put it on the appropriate list. */ + list_del_init(&lock->l_res_link); if (local) { if (*flags & LDLM_FL_BLOCK_CONV) ldlm_resource_add_lock(res, res->lr_converting.prev, @@ -403,7 +550,7 @@ static int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *tmp, *pos; ENTRY; - list_for_each_safe(tmp, pos, converting) { + list_for_each_safe(tmp, pos, converting) { struct ldlm_lock *pending; pending = list_entry(tmp, struct ldlm_lock, l_res_link); @@ -411,7 +558,7 @@ static int ldlm_reprocess_queue(struct ldlm_resource *res, if (ldlm_lock_compat(pending, 1)) RETURN(1); - list_del_init(&pending->l_res_link); + list_del_init(&pending->l_res_link); ldlm_grant_lock(res, pending); ldlm_lock_addref(pending, pending->l_req_mode); @@ -461,14 +608,14 @@ struct ldlm_resource *ldlm_local_lock_cancel(struct ldlm_lock *lock) } /* Must be called with lock and lock->l_resource unlocked */ -struct ldlm_resource *ldlm_local_lock_convert(struct ldlm_handle *lockh, +struct ldlm_resource *ldlm_local_lock_convert(struct lustre_handle *lockh, int new_mode, int *flags) { struct ldlm_lock *lock; struct ldlm_resource *res; ENTRY; - lock = ldlm_handle2object(lockh); + lock = lustre_handle2object(lockh); res = lock->l_resource; spin_lock(&res->lr_lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 58d3a8f72fb60fa536199dada9f470e012113c85..b99492534e3711ef2f675ab07b42a27e880678ba 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -18,6 +18,8 @@ extern kmem_cache_t *ldlm_resource_slab; extern kmem_cache_t *ldlm_lock_slab; +extern int (*mds_reint_p)(int offset, struct ptlrpc_request *req); +extern int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req); #define LOOPBACK(x) (((x) & cpu_to_be32(0xff000000)) == cpu_to_be32(0x7f000000)) @@ -36,13 +38,14 @@ static int common_callback(struct ldlm_lock *lock, struct ldlm_lock *new, ldlm_mode_t mode, void *data, __u32 data_len) { ENTRY; - ldlm_lock_dump(lock); if (!lock) LBUG(); if (!lock->l_resource) LBUG(); + ldlm_lock_dump(lock); + spin_lock(&lock->l_resource->lr_lock); spin_lock(&lock->l_lock); if (!new) { @@ -59,9 +62,11 @@ static int common_callback(struct ldlm_lock *lock, struct ldlm_lock *new, spin_unlock(&lock->l_lock); spin_unlock(&lock->l_resource->lr_lock); if (!lock->l_readers && !lock->l_writers) { - CDEBUG(D_INFO, "Lock already unused, canceling.\n"); - if (ldlm_cli_cancel(lock->l_client, lock)) - LBUG(); + CDEBUG(D_INFO, "Lock already unused, calling " + "callback (%p).\n", lock->l_blocking_ast); + if (lock->l_blocking_ast != NULL) + lock->l_blocking_ast(lock, new, lock->l_data, + lock->l_data_len); } else { CDEBUG(D_INFO, "Lock still has references; lock will be" " cancelled later.\n"); @@ -70,12 +75,13 @@ static int common_callback(struct ldlm_lock *lock, struct ldlm_lock *new, RETURN(0); } +/* FIXME: I think that this is no longer necessary. */ static int local_callback(struct ldlm_lock *l, struct ldlm_lock *new, void *data, __u32 data_len) { struct ldlm_lock *lock; /* the 'remote handle' is the lock in the FS's namespace */ - lock = ldlm_handle2object(&l->l_remote_handle); + lock = lustre_handle2object(&l->l_remote_handle); return common_callback(lock, new, l->l_granted_mode, data, data_len); } @@ -85,10 +91,13 @@ static int _ldlm_enqueue(struct obd_device *obddev, struct ptlrpc_service *svc, { struct ldlm_reply *dlm_rep; struct ldlm_request *dlm_req; - int rc, size = sizeof(*dlm_rep); + int rc, size = sizeof(*dlm_rep), cookielen; + __u32 flags; ldlm_error_t err; struct ldlm_lock *lock = NULL; ldlm_lock_callback callback; + struct lustre_handle lockh; + void *cookie; ENTRY; /* Is this lock managed locally? */ @@ -97,37 +106,54 @@ static int _ldlm_enqueue(struct obd_device *obddev, struct ptlrpc_service *svc, else callback = ldlm_cli_callback; - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("out of memory\n"); - RETURN(-ENOMEM); - } - dlm_rep = lustre_msg_buf(req->rq_repmsg, 0); dlm_req = lustre_msg_buf(req->rq_reqmsg, 0); - - memcpy(&dlm_rep->lock_extent, &dlm_req->lock_desc.l_extent, - sizeof(dlm_rep->lock_extent)); - dlm_rep->flags = dlm_req->flags; + if (dlm_req->lock_desc.l_resource.lr_type == LDLM_MDSINTENT) { + /* In this case, the reply buffer is allocated deep in + * local_lock_enqueue by the policy function. */ + cookie = req; + cookielen = sizeof(*req); + } else { + rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, + &req->rq_repmsg); + if (rc) { + CERROR("out of memory\n"); + RETURN(-ENOMEM); + } + if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) { + cookie = &dlm_req->lock_desc.l_extent; + cookielen = sizeof(struct ldlm_extent); + } + } err = ldlm_local_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2, dlm_req->lock_desc.l_resource.lr_name, dlm_req->lock_desc.l_resource.lr_type, dlm_req->lock_desc.l_req_mode, - lustre_msg_buf(req->rq_reqmsg, 1), - req->rq_reqmsg->buflens[1], - &dlm_rep->lock_handle); + NULL, 0, &lockh); if (err != ELDLM_OK) GOTO(out, err); - err = ldlm_local_lock_enqueue(&dlm_rep->lock_handle, - &dlm_rep->lock_extent, - &dlm_rep->flags, + flags = dlm_req->lock_flags; + err = ldlm_local_lock_enqueue(&lockh, + cookie, cookielen, + &flags, callback, callback); if (err != ELDLM_OK) GOTO(out, err); - lock = ldlm_handle2object(&dlm_rep->lock_handle); + dlm_rep = lustre_msg_buf(req->rq_repmsg, 0); + dlm_rep->lock_flags = flags; + + memcpy(&dlm_rep->lock_handle, &lockh, sizeof(lockh)); + lock = lustre_handle2object(&lockh); + if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) + memcpy(&dlm_rep->lock_extent, &lock->l_extent, + sizeof(lock->l_extent)); + if (dlm_rep->lock_flags & LDLM_FL_LOCK_CHANGED) + memcpy(dlm_rep->lock_resource_name, lock->l_resource->lr_name, + sizeof(dlm_rep->lock_resource_name)); + memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1, sizeof(lock->l_remote_handle)); lock->l_connection = ptlrpc_connection_addref(req->rq_connection); @@ -160,11 +186,11 @@ static int _ldlm_convert(struct ptlrpc_service *svc, struct ptlrpc_request *req) } dlm_req = lustre_msg_buf(req->rq_reqmsg, 0); dlm_rep = lustre_msg_buf(req->rq_repmsg, 0); - dlm_rep->flags = dlm_req->flags; + dlm_rep->lock_flags = dlm_req->lock_flags; res = ldlm_local_lock_convert(&dlm_req->lock_handle1, dlm_req->lock_desc.l_req_mode, - &dlm_rep->flags); + &dlm_rep->lock_flags); req->rq_status = 0; if (ptlrpc_reply(svc, req) != 0) LBUG(); @@ -189,7 +215,7 @@ static int _ldlm_cancel(struct ptlrpc_service *svc, struct ptlrpc_request *req) } dlm_req = lustre_msg_buf(req->rq_reqmsg, 0); - lock = ldlm_handle2object(&dlm_req->lock_handle1); + lock = lustre_handle2object(&dlm_req->lock_handle1); res = ldlm_local_lock_cancel(lock); req->rq_status = 0; if (ptlrpc_reply(svc, req) != 0) @@ -222,18 +248,19 @@ static int _ldlm_callback(struct ptlrpc_service *svc, if (rc != 0) RETURN(rc); - lock1 = ldlm_handle2object(&dlm_req->lock_handle1); - lock2 = ldlm_handle2object(&dlm_req->lock_handle2); + lock1 = lustre_handle2object(&dlm_req->lock_handle1); + lock2 = lustre_handle2object(&dlm_req->lock_handle2); common_callback(lock1, lock2, dlm_req->lock_desc.l_granted_mode, NULL, 0); RETURN(0); } -static int ldlm_handle(struct obd_device *dev, struct ptlrpc_service *svc, +static int lustre_handle(struct obd_device *dev, struct ptlrpc_service *svc, struct ptlrpc_request *req) { - int rc; + struct obd_device *req_dev; + int id, rc; ENTRY; rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen); @@ -248,11 +275,16 @@ static int ldlm_handle(struct obd_device *dev, struct ptlrpc_service *svc, GOTO(out, rc = -EINVAL); } + id = req->rq_reqmsg->target_id; + if (id < 0 || id > MAX_OBD_DEVICES) + GOTO(out, rc = -ENODEV); + req_dev = req->rq_obd = &obd_dev[id]; + switch (req->rq_reqmsg->opc) { case LDLM_ENQUEUE: CDEBUG(D_INODE, "enqueue\n"); OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); - rc = _ldlm_enqueue(dev, svc, req); + rc = _ldlm_enqueue(req_dev, svc, req); break; case LDLM_CONVERT: @@ -329,13 +361,9 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf) int err; ENTRY; - obddev->obd_namespace = ldlm_namespace_new(obddev, 0); - if (obddev->obd_namespace == NULL) - LBUG(); - ldlm->ldlm_service = ptlrpc_init_svc(64 * 1024, LDLM_REQUEST_PORTAL, - LDLM_REPLY_PORTAL, "self", ldlm_handle); + LDLM_REPLY_PORTAL, "self", lustre_handle); if (!ldlm->ldlm_service) LBUG(); @@ -349,13 +377,16 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf) CERROR("cannot start thread\n"); LBUG(); } - - OBD_ALLOC(ldlm->ldlm_client, sizeof(*ldlm->ldlm_client)); - if (ldlm->ldlm_client == NULL) + err = ptlrpc_start_thread(obddev, ldlm->ldlm_service, "lustre_dlm"); + if (err) { + CERROR("cannot start thread\n"); LBUG(); - ptlrpc_init_client(NULL, NULL, - LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, - ldlm->ldlm_client); + } + err = ptlrpc_start_thread(obddev, ldlm->ldlm_service, "lustre_dlm"); + if (err) { + CERROR("cannot start thread\n"); + LBUG(); + } MOD_INC_USE_COUNT; RETURN(0); @@ -366,8 +397,6 @@ static int ldlm_cleanup(struct obd_device *obddev) struct ldlm_obd *ldlm = &obddev->u.ldlm; ENTRY; - ldlm_namespace_free(obddev->obd_namespace); - ptlrpc_stop_all_threads(ldlm->ldlm_service); rpc_unregister_service(ldlm->ldlm_service); @@ -379,6 +408,11 @@ static int ldlm_cleanup(struct obd_device *obddev) OBD_FREE(ldlm->ldlm_client, sizeof(*ldlm->ldlm_client)); OBD_FREE(ldlm->ldlm_service, sizeof(*ldlm->ldlm_service)); + if (mds_reint_p != NULL) + inter_module_put("mds_reint"); + if (mds_getattr_name_p != NULL) + inter_module_put("mds_getattr_name"); + MOD_DEC_USE_COUNT; RETURN(0); } @@ -427,7 +461,8 @@ EXPORT_SYMBOL(ldlm_lock_addref); EXPORT_SYMBOL(ldlm_lock_decref); EXPORT_SYMBOL(ldlm_cli_convert); EXPORT_SYMBOL(ldlm_cli_enqueue); -EXPORT_SYMBOL(ldlm_handle2object); +EXPORT_SYMBOL(ldlm_cli_cancel); +EXPORT_SYMBOL(lustre_handle2object); EXPORT_SYMBOL(ldlm_test); EXPORT_SYMBOL(ldlm_lock_dump); EXPORT_SYMBOL(ldlm_namespace_new); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 9275214eb1534052decd67e1bf0a0ec0a4255198..074d60e3a4d6a86037a78b2a1fa8f0a4a65212b4 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -14,36 +14,40 @@ #include <linux/lustre_dlm.h> int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, + struct ptlrpc_request *req, struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock_handle, + struct lustre_handle *parent_lock_handle, __u64 *res_id, __u32 type, - struct ldlm_extent *req_ex, + void *cookie, int cookielen, ldlm_mode_t mode, int *flags, + ldlm_lock_callback callback, void *data, __u32 data_len, - struct ldlm_handle *lockh) + struct lustre_handle *lockh) { struct ldlm_lock *lock; struct ldlm_request *body; struct ldlm_reply *reply; - struct ptlrpc_request *req; - char *bufs[2] = {NULL, data}; - int rc, size[2] = {sizeof(*body), data_len}; + int rc, size = sizeof(*body), req_passed_in = 1; ENTRY; *flags = 0; rc = ldlm_local_lock_create(ns, parent_lock_handle, res_id, type, mode, - NULL, 0, lockh); + data, data_len, lockh); if (rc != ELDLM_OK) GOTO(out, rc); - lock = ldlm_handle2object(lockh); + lock = lustre_handle2object(lockh); - req = ptlrpc_prep_req(cl, conn, LDLM_ENQUEUE, 2, size, bufs); - if (!req) - GOTO(out, rc = -ENOMEM); + if (req == NULL) { + req = ptlrpc_prep_req(cl, conn, LDLM_ENQUEUE, 1, &size, NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + req_passed_in = 0; + } else if (req->rq_reqmsg->buflens[0] != sizeof(*body)) + LBUG(); /* Dump all of this data into the request buffer */ body = lustre_msg_buf(req->rq_reqmsg, 0); @@ -52,10 +56,10 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, sizeof(body->lock_desc.l_resource.lr_name)); body->lock_desc.l_req_mode = mode; - if (req_ex) - memcpy(&body->lock_desc.l_extent, req_ex, + if (type == LDLM_EXTENT) + memcpy(&body->lock_desc.l_extent, cookie, sizeof(body->lock_desc.l_extent)); - body->flags = *flags; + body->lock_flags = *flags; memcpy(&body->lock_handle1, lockh, sizeof(body->lock_handle1)); @@ -64,8 +68,10 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, sizeof(body->lock_handle2)); /* Continue as normal. */ - size[0] = sizeof(*reply); - req->rq_replen = lustre_msg_size(1, size); + if (!req_passed_in) { + size = sizeof(*reply); + req->rq_replen = lustre_msg_size(1, &size); + } rc = ptlrpc_queue_wait(req); rc = ptlrpc_check_status(req, rc); @@ -82,8 +88,9 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, reply = lustre_msg_buf(req->rq_repmsg, 0); memcpy(&lock->l_remote_handle, &reply->lock_handle, sizeof(lock->l_remote_handle)); - memcpy(req_ex, &reply->lock_extent, sizeof(*req_ex)); - *flags = reply->flags; + if (type == LDLM_EXTENT) + memcpy(cookie, &reply->lock_extent, sizeof(reply->lock_extent)); + *flags = reply->lock_flags; CDEBUG(D_INFO, "remote handle: %p, flags: %d\n", (void *)(unsigned long)reply->lock_handle.addr, *flags); @@ -91,9 +98,26 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, (unsigned long long)reply->lock_extent.start, (unsigned long long)reply->lock_extent.end); - ptlrpc_free_req(req); + if (*flags & LDLM_FL_LOCK_CHANGED) { + CDEBUG(D_INFO, "remote intent success, locking %ld instead of" + "%ld\n", (long)reply->lock_resource_name[0], + (long)lock->l_resource->lr_name[0]); + ldlm_resource_put(lock->l_resource); - rc = ldlm_local_lock_enqueue(lockh, req_ex, flags, NULL, NULL); + lock->l_resource = + ldlm_resource_get(ns, NULL, reply->lock_resource_name, + type, 1); + if (lock->l_resource == NULL) { + LBUG(); + RETURN(-ENOMEM); + } + } + + if (!req_passed_in) + ptlrpc_free_req(req); + + rc = ldlm_local_lock_enqueue(lockh, cookie, cookielen, flags, callback, + callback); if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV)) { @@ -116,14 +140,12 @@ int ldlm_cli_callback(struct ldlm_lock *lock, struct ldlm_lock *new, { struct ldlm_request *body; struct ptlrpc_request *req; - struct obd_device *obddev = lock->l_resource->lr_namespace->ns_obddev; - struct ptlrpc_client *cl = obddev->u.ldlm.ldlm_client; - int rc, size[2] = {sizeof(*body), data_len}; - char *bufs[2] = {NULL, data}; + struct ptlrpc_client *cl = &lock->l_resource->lr_namespace->ns_client; + int rc, size = sizeof(*body); ENTRY; - req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CALLBACK, 2, size, - bufs); + req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CALLBACK, 1, &size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -151,24 +173,22 @@ int ldlm_cli_callback(struct ldlm_lock *lock, struct ldlm_lock *new, return rc; } -int ldlm_cli_convert(struct ptlrpc_client *cl, struct ldlm_handle *lockh, +int ldlm_cli_convert(struct ptlrpc_client *cl, struct lustre_handle *lockh, int new_mode, int *flags) { struct ldlm_request *body; + struct ldlm_reply *reply; struct ldlm_lock *lock; struct ldlm_resource *res; struct ptlrpc_request *req; - int rc, size[2] = {sizeof(*body), 0}; - char *bufs[2] = {NULL, NULL}; + int rc, size = sizeof(*body); ENTRY; - lock = ldlm_handle2object(lockh); + lock = lustre_handle2object(lockh); *flags = 0; - size[1] = lock->l_data_len; - bufs[1] = lock->l_data; - req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CONVERT, 2, size, - bufs); + req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CONVERT, 1, &size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -177,17 +197,18 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct ldlm_handle *lockh, sizeof(body->lock_handle1)); body->lock_desc.l_req_mode = new_mode; - body->flags = *flags; + body->lock_flags = *flags; - req->rq_replen = lustre_msg_size(1, size); + size = sizeof(*reply); + req->rq_replen = lustre_msg_size(1, &size); rc = ptlrpc_queue_wait(req); rc = ptlrpc_check_status(req, rc); if (rc != ELDLM_OK) GOTO(out, rc); - body = lustre_msg_buf(req->rq_repmsg, 0); - res = ldlm_local_lock_convert(lockh, new_mode, &body->flags); + reply = lustre_msg_buf(req->rq_repmsg, 0); + res = ldlm_local_lock_convert(lockh, new_mode, &reply->lock_flags); if (res != NULL) ldlm_reprocess_all(res); if (lock->l_req_mode != lock->l_granted_mode) { @@ -210,24 +231,11 @@ int ldlm_cli_cancel(struct ptlrpc_client *cl, struct ldlm_lock *lock) struct ldlm_request *body; struct ptlrpc_request *req; struct ldlm_resource *res; - int rc, size[2] = {sizeof(*body), 0}; - char *bufs[2] = {NULL, NULL}; + int rc, size = sizeof(*body); ENTRY; - if (lock->l_data_len == sizeof(struct inode)) { - /* FIXME: do something better than throwing away everything */ - struct inode *inode = lock->l_data; - if (inode == NULL) - LBUG(); - down(&inode->i_sem); - invalidate_inode_pages(inode); - up(&inode->i_sem); - } - - size[1] = lock->l_data_len; - bufs[1] = lock->l_data; - req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CANCEL, 2, size, - bufs); + req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CANCEL, 1, &size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -246,6 +254,8 @@ int ldlm_cli_cancel(struct ptlrpc_client *cl, struct ldlm_lock *lock) res = ldlm_local_lock_cancel(lock); if (res != NULL) ldlm_reprocess_all(res); + else + rc = ELDLM_RESOURCE_FREED; EXIT; out: return rc; diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 4a8dcdec6eaffbae0a51e77cea6473ea687e1a20..da263a7549f8b27daf064c8b12c9a72edfb1b048 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -15,25 +15,27 @@ kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; -struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obddev, - __u32 local) +struct ldlm_namespace *ldlm_namespace_new(__u32 local) { - struct ldlm_namespace *ns; + struct ldlm_namespace *ns = NULL; struct list_head *bucket; OBD_ALLOC(ns, sizeof(*ns)); if (!ns) { LBUG(); - RETURN(NULL); + GOTO(out, NULL); } + ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE); if (!ns->ns_hash) { - OBD_FREE(ns, sizeof(*ns)); LBUG(); - RETURN(NULL); + GOTO(out, ns); } - ns->ns_obddev = obddev; + ptlrpc_init_client(NULL, NULL, + LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, + &ns->ns_client); + INIT_LIST_HEAD(&ns->ns_root_list); ns->ns_lock = SPIN_LOCK_UNLOCKED; ns->ns_refcount = 0; @@ -42,32 +44,46 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obddev, for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash; bucket--) INIT_LIST_HEAD(bucket); + RETURN(ns); - return ns; + out: + if (ns && ns->ns_hash) + vfree(ns->ns_hash); + if (ns) + OBD_FREE(ns, sizeof(*ns)); + return NULL; } static int cleanup_resource(struct ldlm_resource *res, struct list_head *q) { struct list_head *tmp, *pos; - int rc = 0; + int rc = 0, client = res->lr_namespace->ns_local; + ENTRY; list_for_each_safe(tmp, pos, q) { struct ldlm_lock *lock; + lock = list_entry(tmp, struct ldlm_lock, l_res_link); - if (rc) { - /* Res was already cleaned up. */ - LBUG(); - } + if (client) { + rc = ldlm_cli_cancel(lock->l_client, lock); + if (rc < 0) { + CERROR("ldlm_cli_cancel: %d\n", rc); + LBUG(); + } + if (rc == ELDLM_RESOURCE_FREED) + rc = 1; + } else { + CERROR("Freeing a lock still held by a client node.\n"); - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - spin_lock(&lock->l_lock); - ldlm_resource_del_lock(lock); - ldlm_lock_free(lock); + spin_lock(&lock->l_lock); + ldlm_resource_del_lock(lock); + ldlm_lock_free(lock); - rc = ldlm_resource_put(res); + rc = ldlm_resource_put(res); + } } - return rc; + RETURN(rc); } int ldlm_namespace_free(struct ldlm_namespace *ns) @@ -75,6 +91,8 @@ int ldlm_namespace_free(struct ldlm_namespace *ns) struct list_head *tmp, *pos; int i, rc; + if (!ns) + RETURN(ELDLM_OK); /* We should probably take the ns_lock, but then ldlm_resource_put * couldn't take it. Hmm. */ for (i = 0; i < RES_HASH_SIZE; i++) { @@ -99,6 +117,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns) } vfree(ns->ns_hash /* , sizeof(struct list_head) * RES_HASH_SIZE */); + ptlrpc_cleanup_client(&ns->ns_client); OBD_FREE(ns, sizeof(*ns)); return ELDLM_OK; @@ -192,11 +211,12 @@ struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, struct list_head *bucket; struct list_head *tmp = bucket; struct ldlm_resource *res = NULL; - ENTRY; - if (ns->ns_hash == NULL) + if (ns == NULL || ns->ns_hash == NULL) { + LBUG(); RETURN(NULL); + } spin_lock(&ns->ns_lock); bucket = ns->ns_hash + ldlm_hash_fn(parent, name); @@ -278,6 +298,11 @@ int ldlm_resource_put(struct ldlm_resource *res) void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, struct ldlm_lock *lock) { + ldlm_resource_dump(res); + ldlm_lock_dump(lock); + if (!list_empty(&lock->l_res_link)) + LBUG(); + list_add(&lock->l_res_link, head); res->lr_refcount++; } @@ -289,7 +314,7 @@ void ldlm_resource_del_lock(struct ldlm_lock *lock) lock->l_resource->lr_refcount--; } -int ldlm_get_resource_handle(struct ldlm_resource *res, struct ldlm_handle *h) +int ldlm_get_resource_handle(struct ldlm_resource *res, struct lustre_handle *h) { LBUG(); return 0; diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c index fc88099a2828ef9148094fe23fd87ffeb7036700..c1105538c9bf7168067521475ae194233fd3cde4 100644 --- a/lustre/ldlm/ldlm_test.c +++ b/lustre/ldlm/ldlm_test.c @@ -26,23 +26,23 @@ int ldlm_test_basics(struct obd_device *obddev) struct ldlm_resource *res; __u64 res_id[RES_NAME_SIZE] = {1, 2, 3}; ldlm_error_t err; - struct ldlm_handle lockh_1, lockh_2; + struct lustre_handle lockh_1, lockh_2; int flags; - ns = ldlm_namespace_new(obddev, 0); + ns = ldlm_namespace_new(LDLM_NAMESPACE_SERVER); if (ns == NULL) LBUG(); err = ldlm_local_lock_create(ns, NULL, res_id, LDLM_PLAIN, LCK_CR, NULL, 0, &lockh_1); - err = ldlm_local_lock_enqueue(&lockh_1, NULL, &flags, + err = ldlm_local_lock_enqueue(&lockh_1, NULL, 0, &flags, ldlm_test_callback, ldlm_test_callback); if (err != ELDLM_OK) LBUG(); err = ldlm_local_lock_create(ns, NULL, res_id, LDLM_PLAIN, LCK_EX, NULL, 0, &lockh_2); - err = ldlm_local_lock_enqueue(&lockh_2, NULL, &flags, + err = ldlm_local_lock_enqueue(&lockh_2, NULL, 0, &flags, ldlm_test_callback, ldlm_test_callback); if (err != ELDLM_OK) LBUG(); @@ -71,18 +71,19 @@ int ldlm_test_extents(struct obd_device *obddev) struct ldlm_lock *lock; __u64 res_id[RES_NAME_SIZE] = {0, 0, 0}; struct ldlm_extent ext1 = {4, 6}, ext2 = {6, 9}, ext3 = {10, 11}; - struct ldlm_handle ext1_h, ext2_h, ext3_h; + struct lustre_handle ext1_h, ext2_h, ext3_h; ldlm_error_t err; int flags; - ns = ldlm_namespace_new(obddev, 0); + ns = ldlm_namespace_new(LDLM_NAMESPACE_SERVER); if (ns == NULL) LBUG(); flags = 0; err = ldlm_local_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL, 0, &ext1_h); - err = ldlm_local_lock_enqueue(&ext1_h, &ext1, &flags, NULL, NULL); + err = ldlm_local_lock_enqueue(&ext1_h, &ext1, sizeof(ext1), &flags, + NULL, NULL); if (err != ELDLM_OK) LBUG(); if (!(flags & LDLM_FL_LOCK_CHANGED)) @@ -91,7 +92,8 @@ int ldlm_test_extents(struct obd_device *obddev) flags = 0; err = ldlm_local_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL, 0, &ext2_h); - err = ldlm_local_lock_enqueue(&ext2_h, &ext2, &flags, NULL, NULL); + err = ldlm_local_lock_enqueue(&ext2_h, &ext2, sizeof(ext2), &flags, + NULL, NULL); if (err != ELDLM_OK) LBUG(); if (!(flags & LDLM_FL_LOCK_CHANGED)) @@ -100,7 +102,8 @@ int ldlm_test_extents(struct obd_device *obddev) flags = 0; err = ldlm_local_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_EX, NULL, 0, &ext3_h); - err = ldlm_local_lock_enqueue(&ext3_h, &ext3, &flags, NULL, NULL); + err = ldlm_local_lock_enqueue(&ext3_h, &ext3, sizeof(ext3), &flags, + NULL, NULL); if (err != ELDLM_OK) LBUG(); if (!(flags & LDLM_FL_BLOCK_GRANTED)) @@ -115,7 +118,7 @@ int ldlm_test_extents(struct obd_device *obddev) ldlm_reprocess_all(res); flags = 0; - lock = ldlm_handle2object(&ext2_h); + lock = lustre_handle2object(&ext2_h); res = ldlm_local_lock_cancel(lock); if (res != NULL) ldlm_reprocess_all(res); @@ -137,13 +140,14 @@ static int ldlm_test_network(struct obd_device *obddev, __u64 res_id[RES_NAME_SIZE] = {1, 2, 3}; struct ldlm_extent ext = {4, 6}; - struct ldlm_handle lockh1; + struct lustre_handle lockh1; int flags = 0; ldlm_error_t err; - err = ldlm_cli_enqueue(ldlm->ldlm_client, conn, obddev->obd_namespace, - NULL, res_id, LDLM_EXTENT, &ext, LCK_PR, &flags, - NULL, 0, &lockh1); + err = ldlm_cli_enqueue(ldlm->ldlm_client, conn, NULL, + obddev->obd_namespace, NULL, res_id, LDLM_EXTENT, + &ext, sizeof(ext), LCK_PR, &flags, NULL, NULL, 0, + &lockh1); CERROR("ldlm_cli_enqueue: %d\n", err); RETURN(err); diff --git a/lustre/lib/mds_updates.c b/lustre/lib/mds_updates.c index 44416614bccc9f27c63c23c196779e7b0acfe899..70e6a40d50a965e96d1eccaa0e569cc03ca95e68 100644 --- a/lustre/lib/mds_updates.c +++ b/lustre/lib/mds_updates.c @@ -36,6 +36,33 @@ #include <linux/lustre_mds.h> #include <linux/lustre_lite.h> +void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode) +{ + fid->id = HTON__u64(inode->i_ino); + fid->generation = HTON__u32(inode->i_generation); + fid->f_type = HTON__u32(S_IFMT & inode->i_mode); +} + + +void mds_pack_inode2body(struct mds_body *b, struct inode *inode) +{ + b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLNLINK | OBD_MD_FLGENER | OBD_MD_FLSIZE | OBD_MD_FLOBJID; + b->size = HTON__u64(inode->i_size); + b->mode = HTON__u32(inode->i_mode); + b->uid = HTON__u32(inode->i_uid); + b->gid = HTON__u32(inode->i_gid); + b->mtime = HTON__u32(inode->i_mtime); + b->ctime = HTON__u32(inode->i_ctime); + b->atime = HTON__u32(inode->i_atime); + b->flags = HTON__u32(inode->i_flags); + //b->major = HTON__u32(inode->i_major); + //b->minor = HTON__u32(inode->i_minor); + b->ino = HTON__u32(inode->i_ino); + b->nlink = HTON__u32(inode->i_nlink); + b->generation = HTON__u32(inode->i_generation); +} + + void mds_pack_fid(struct ll_fid *fid) { fid->id = HTON__u64(fid->id); @@ -68,6 +95,22 @@ static void mds_pack_body(struct mds_body *b) b->last_xid = HTON__u32(b->last_xid); } +void mds_getattr_pack(struct ptlrpc_request *req, int offset, + struct inode *inode, + const char *name, int namelen) +{ + struct mds_body *rec; + rec = lustre_msg_buf(req->rq_reqmsg, offset); + + ll_inode2fid(&rec->fid1, inode); + if (name) { + char *tmp; + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1); + LOGL0(name, namelen, tmp); + } +} + + void mds_pack_req_body(struct ptlrpc_request *req) { struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0); @@ -82,9 +125,15 @@ void mds_pack_rep_body(struct ptlrpc_request *req) /* packing of MDS records */ -void mds_create_pack(struct mds_rec_create *rec, struct inode *inode, - __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time) +void mds_create_pack(struct ptlrpc_request *req, int offset, + struct inode *inode, __u32 mode, __u64 rdev, __u32 uid, + __u32 gid, __u64 time, const char *name, int namelen, + const char *tgt, int tgtlen) { + struct mds_rec_create *rec; + char *tmp; + rec = lustre_msg_buf(req->rq_reqmsg, offset); + /* XXX do something about time, uid, gid */ rec->cr_opcode = HTON__u32(REINT_CREATE); ll_inode2fid(&rec->cr_fid, inode); @@ -93,11 +142,23 @@ void mds_create_pack(struct mds_rec_create *rec, struct inode *inode, rec->cr_uid = HTON__u32(uid); rec->cr_gid = HTON__u32(gid); rec->cr_time = HTON__u64(time); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1); + LOGL0(name, namelen, tmp); + + if (tgt) { + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2); + LOGL0(tgt, tgtlen, tmp); + } } -void mds_setattr_pack(struct mds_rec_setattr *rec, struct inode *inode, - struct iattr *iattr) +void mds_setattr_pack(struct ptlrpc_request *req, int offset, + struct inode *inode, struct iattr *iattr, + const char *name, int namelen) { + struct mds_rec_setattr *rec; + rec = lustre_msg_buf(req->rq_reqmsg, offset); + rec->sa_opcode = HTON__u32(REINT_SETATTR); ll_inode2fid(&rec->sa_fid, inode); rec->sa_valid = HTON__u32(iattr->ia_valid); @@ -109,31 +170,69 @@ void mds_setattr_pack(struct mds_rec_setattr *rec, struct inode *inode, rec->sa_mtime = HTON__u64(iattr->ia_mtime); rec->sa_ctime = HTON__u64(iattr->ia_ctime); rec->sa_attr_flags = HTON__u32(iattr->ia_attr_flags); + + if (namelen) { + char *tmp; + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1); + LOGL0(name, namelen, tmp); + } } -void mds_unlink_pack(struct mds_rec_unlink *rec, struct inode *inode, - struct inode *child) +void mds_unlink_pack(struct ptlrpc_request *req, int offset, + struct inode *inode, struct inode *child, + const char *name, int namelen) { + struct mds_rec_unlink *rec; + char *tmp; + + rec = lustre_msg_buf(req->rq_reqmsg, offset); + rec->ul_opcode = HTON__u32(REINT_UNLINK); ll_inode2fid(&rec->ul_fid1, inode); ll_inode2fid(&rec->ul_fid2, child); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1); + LOGL0(name, namelen, tmp); } -void mds_link_pack(struct mds_rec_link *rec, - struct inode *inode, struct inode *dir) +void mds_link_pack(struct ptlrpc_request *req, int offset, + struct inode *inode, struct inode *dir, + const char *name, int namelen) { + struct mds_rec_link *rec; + char *tmp; + + rec = lustre_msg_buf(req->rq_reqmsg, offset); + rec->lk_opcode = HTON__u32(REINT_LINK); ll_inode2fid(&rec->lk_fid1, inode); ll_inode2fid(&rec->lk_fid2, dir); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1); + LOGL0(name, namelen, tmp); } -void mds_rename_pack(struct mds_rec_rename *rec, struct inode *srcdir, - struct inode *tgtdir) +void mds_rename_pack(struct ptlrpc_request *req, int offset, + struct inode *srcdir, struct inode *tgtdir, + const char *old, int oldlen, const char *new, int newlen) { + struct mds_rec_rename *rec; + char *tmp; + + rec = lustre_msg_buf(req->rq_reqmsg, offset); + /* XXX do something about time, uid, gid */ rec->rn_opcode = HTON__u32(REINT_RENAME); ll_inode2fid(&rec->rn_fid1, srcdir); ll_inode2fid(&rec->rn_fid2, tgtdir); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1); + LOGL0(old, oldlen, tmp); + + if (new) { + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2); + LOGL0(new, newlen, tmp); + } } /* unpacking */ @@ -144,7 +243,7 @@ void mds_unpack_fid(struct ll_fid *fid) fid->f_type = NTOH__u32(fid->f_type); } -static void mds_unpack_body(struct mds_body *b) +void mds_unpack_body(struct mds_body *b) { if (b == NULL) LBUG(); @@ -169,28 +268,15 @@ static void mds_unpack_body(struct mds_body *b) b->last_xid = NTOH__u32(b->last_xid); } - -void mds_unpack_req_body(struct ptlrpc_request *req) -{ - struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0); - mds_unpack_body(b); -} - -void mds_unpack_rep_body(struct ptlrpc_request *req) -{ - struct mds_body *b = lustre_msg_buf(req->rq_repmsg, 0); - mds_unpack_body(b); -} - -static int mds_setattr_unpack(struct ptlrpc_request *req, +static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *r) { struct iattr *attr = &r->ur_iattr; - struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0); + struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset); ENTRY; - if (req->rq_reqmsg->bufcount != 1 || - req->rq_reqmsg->buflens[0] != sizeof(*rec)) + if (req->rq_reqmsg->bufcount < offset + 1 || + req->rq_reqmsg->buflens[offset] != sizeof(*rec)) RETURN(-EFAULT); r->ur_fid1 = &rec->sa_fid; @@ -203,17 +289,24 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, attr->ia_mtime = NTOH__u64(rec->sa_mtime); attr->ia_ctime = NTOH__u64(rec->sa_ctime); attr->ia_attr_flags = NTOH__u32(rec->sa_attr_flags); + + if (req->rq_reqmsg->bufcount == offset + 2) { + r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; + r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1); + } else + r->ur_namelen = 0; + RETURN(0); } -static int mds_create_unpack(struct ptlrpc_request *req, +static int mds_create_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *r) { - struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, 0); + struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, offset); ENTRY; - if (req->rq_reqmsg->bufcount < 2 || - req->rq_reqmsg->buflens[0] != sizeof(*rec)) + if (req->rq_reqmsg->bufcount != offset + 3 || + req->rq_reqmsg->buflens[offset] != sizeof(*rec)) RETURN(-EFAULT); r->ur_fid1 = &rec->cr_fid; @@ -223,74 +316,72 @@ static int mds_create_unpack(struct ptlrpc_request *req, r->ur_gid = NTOH__u32(rec->cr_gid); r->ur_time = NTOH__u64(rec->cr_time); - r->ur_name = lustre_msg_buf(req->rq_reqmsg, 1); - r->ur_namelen = req->rq_reqmsg->buflens[1]; + r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1); + r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; - if (S_ISLNK(r->ur_mode)) { - r->ur_tgt = lustre_msg_buf(req->rq_reqmsg, 2); - r->ur_tgtlen = req->rq_reqmsg->buflens[2]; - } + r->ur_tgt = lustre_msg_buf(req->rq_reqmsg, offset + 2); + r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2]; RETURN(0); } -static int mds_link_unpack(struct ptlrpc_request *req, +static int mds_link_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *r) { - struct mds_rec_link *rec = lustre_msg_buf(req->rq_reqmsg, 0); + struct mds_rec_link *rec = lustre_msg_buf(req->rq_reqmsg, offset); ENTRY; - if (req->rq_reqmsg->bufcount != 2 || - req->rq_reqmsg->buflens[0] != sizeof(*rec)) + if (req->rq_reqmsg->bufcount != offset + 2 || + req->rq_reqmsg->buflens[offset] != sizeof(*rec)) RETURN(-EFAULT); r->ur_fid1 = &rec->lk_fid1; r->ur_fid2 = &rec->lk_fid2; - r->ur_name = lustre_msg_buf(req->rq_reqmsg, 1); - r->ur_namelen = req->rq_reqmsg->buflens[1]; + r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1); + r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; RETURN(0); } -static int mds_unlink_unpack(struct ptlrpc_request *req, +static int mds_unlink_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *r) { - struct mds_rec_unlink *rec = lustre_msg_buf(req->rq_reqmsg, 0); + struct mds_rec_unlink *rec = lustre_msg_buf(req->rq_reqmsg, offset); ENTRY; - if (req->rq_reqmsg->bufcount != 2 || - req->rq_reqmsg->buflens[0] != sizeof(*rec)) + if (req->rq_reqmsg->bufcount != offset + 2 || + req->rq_reqmsg->buflens[offset] != sizeof(*rec)) RETURN(-EFAULT); r->ur_fid1 = &rec->ul_fid1; r->ur_fid2 = &rec->ul_fid2; - r->ur_name = lustre_msg_buf(req->rq_reqmsg, 1); - r->ur_namelen = req->rq_reqmsg->buflens[1]; + r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1); + r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; RETURN(0); } -static int mds_rename_unpack(struct ptlrpc_request *req, +static int mds_rename_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *r) { - struct mds_rec_rename *rec = lustre_msg_buf(req->rq_reqmsg, 0); + struct mds_rec_rename *rec = lustre_msg_buf(req->rq_reqmsg, offset); ENTRY; - if (req->rq_reqmsg->bufcount != 3 || - req->rq_reqmsg->buflens[0] != sizeof(*rec)) + if (req->rq_reqmsg->bufcount != offset + 3 || + req->rq_reqmsg->buflens[offset] != sizeof(*rec)) RETURN(-EFAULT); r->ur_fid1 = &rec->rn_fid1; r->ur_fid2 = &rec->rn_fid2; - r->ur_name = lustre_msg_buf(req->rq_reqmsg, 1); - r->ur_namelen = req->rq_reqmsg->buflens[1]; + r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1); + r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; - r->ur_tgt = lustre_msg_buf(req->rq_reqmsg, 2); - r->ur_tgtlen = req->rq_reqmsg->buflens[2]; + r->ur_tgt = lustre_msg_buf(req->rq_reqmsg, offset + 2); + r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2]; RETURN(0); } -typedef int (*update_unpacker)(struct ptlrpc_request *req, +typedef int (*update_unpacker)(struct ptlrpc_request *req, int offset, struct mds_update_record *r); static update_unpacker mds_unpackers[REINT_MAX + 1] = { @@ -302,13 +393,15 @@ static update_unpacker mds_unpackers[REINT_MAX + 1] = { [REINT_RECREATE] mds_create_unpack, }; -int mds_update_unpack(struct ptlrpc_request *req, struct mds_update_record *rec) +int mds_update_unpack(struct ptlrpc_request *req, int offset, + struct mds_update_record *rec) { - struct mds_update_record_hdr *hdr = lustre_msg_buf(req->rq_reqmsg, 0); + struct mds_update_record_hdr *hdr = + lustre_msg_buf(req->rq_reqmsg, offset); int rc; ENTRY; - if (!hdr || req->rq_reqmsg->buflens[0] < sizeof(__u32)) + if (!hdr || req->rq_reqmsg->buflens[offset] < sizeof(*hdr)) RETURN(-EFAULT); rec->ur_opcode = NTOH__u32(hdr->ur_opcode); @@ -316,6 +409,6 @@ int mds_update_unpack(struct ptlrpc_request *req, struct mds_update_record *rec) if (rec->ur_opcode < 0 || rec->ur_opcode > REINT_MAX) RETURN(-EFAULT); - rc = mds_unpackers[rec->ur_opcode](req, rec); + rc = mds_unpackers[rec->ur_opcode](req, offset, rec); RETURN(rc); } diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am index defc5327be34aee2afdf8a1a48137b836e9bf10c..88aa73c83d9ebfca051c118902e381d5af296ddf 100644 --- a/lustre/llite/Makefile.am +++ b/lustre/llite/Makefile.am @@ -14,7 +14,7 @@ page.c: LINX=page.c -llite_SOURCES = recover.c commit_callback.c page.c super.c rw.c file.c dir.c sysctl.c namei.c symlink.c +llite_SOURCES = dcache.c recover.c commit_callback.c page.c super.c rw.c file.c dir.c sysctl.c namei.c symlink.c dist-hook: list='$(LINX)'; for f in $$list; do rm -f $(distdir)/$$f; done diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 433056d4488abf2e8fbdbcf8ca2544b4034a3bdf..923f87b9f24c8cbfa851ee38b2e5fbad273821aa 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -1,4 +1,5 @@ -/* +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: * * This code is issued under the GNU General Public License. * See the file COPYING in this distribution @@ -18,15 +19,34 @@ extern struct address_space_operations ll_aops; +void ll_intent_release(struct dentry *de) +{ + struct ldlm_lock *lock; + struct lustre_handle *handle; + ENTRY; + + if (de->d_it == NULL) { + EXIT; + return; + } + + handle = (struct lustre_handle *)de->d_it->it_lock_handle; + lock = lustre_handle2object(handle); + CDEBUG(D_INFO, "calling ldlm_lock_decref(%p, %d)\n", lock, + de->d_it->it_lock_mode); + ldlm_lock_decref(lock, de->d_it->it_lock_mode); + EXIT; +} + int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) { ENTRY; - - RETURN(1); + RETURN(0); } struct dentry_operations ll_d_ops = { - d_revalidate2: ll_revalidate2 + d_revalidate2: ll_revalidate2, + d_intent_release: ll_intent_release }; diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 3d91ae641cfb0e9cf48a1f505b7f9a4e52d82486..ffb3696d0f7e6c9b72104ea7a69f2d08bdc0094b 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -41,6 +41,7 @@ #include <linux/lustre_idl.h> #include <linux/lustre_mds.h> #include <linux/lustre_lite.h> +#include <linux/lustre_dlm.h> typedef struct ext2_dir_entry_2 ext2_dirent; @@ -62,6 +63,8 @@ static int ll_dir_readpage(struct file *file, struct page *page) __u64 offset; int rc = 0; struct ptlrpc_request *request = NULL; + struct lustre_handle lockh; + struct lookup_intent it = {IT_READDIR }; ENTRY; @@ -72,6 +75,11 @@ static int ll_dir_readpage(struct file *file, struct page *page) goto readpage_out; } + rc = ll_lock(inode, NULL, &it, &lockh); + if (rc != ELDLM_OK) + CERROR("lock enqueue: err: %d\n", rc); + ldlm_lock_dump((void *)(unsigned long)lockh.addr); + if (Page_Uptodate(page)) { CERROR("Explain this please?\n"); EXIT; @@ -80,7 +88,7 @@ static int ll_dir_readpage(struct file *file, struct page *page) offset = page->index << PAGE_SHIFT; buf = kmap(page); - rc = mdc_readpage(&sbi->ll_mds_client, sbi->ll_mds_conn, inode->i_ino, + rc = mdc_readpage(&sbi->ll_mdc_conn, inode->i_ino, S_IFDIR, offset, buf, &request); kunmap(page); ptlrpc_free_req(request); @@ -91,6 +99,9 @@ static int ll_dir_readpage(struct file *file, struct page *page) SetPageUptodate(page); UnlockPage(page); + rc = ll_unlock(LCK_PR, &lockh); + if (rc != ELDLM_OK) + CERROR("ll_unlock: err: %d\n", rc); return rc; } /* ll_dir_readpage */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index f43bfa06b5a97ff1880610e0b7009f1bb0fbfdf0..57e4cca7c408b091e95ac9e54d58981b48da488c 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -38,19 +38,41 @@ static int ll_file_open(struct inode *inode, struct file *file) struct ll_file_data *fd; struct obdo *oa; struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ll_inode_info *lli = ll_i2info(inode); + __u64 id = 0; ENTRY; if (file->private_data) LBUG(); + /* delayed create of object (intent created inode) */ + /* XXX object needs to be cleaned up if mdc_open fails */ + /* XXX error handling appropriate here? */ + if (lli->lli_obdo == NULL) { + struct inode * inode = file->f_dentry->d_inode; + + oa = lli->lli_obdo = obdo_alloc(); + oa->o_valid = OBD_MD_FLMODE; + oa->o_mode = S_IFREG | 0600; + rc = obd_create(ll_i2obdconn(inode), oa); + if (rc) + RETURN(rc); + lli->lli_flags &= ~OBD_FL_CREATEONOPEN; + } + + oa = lli->lli_obdo; + if (oa == NULL) { + LBUG(); + GOTO(out_mdc, rc = -EINVAL); + } + fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL); if (!fd) GOTO(out, rc = -ENOMEM); memset(fd, 0, sizeof(*fd)); - rc = mdc_open(&sbi->ll_mds_client, sbi->ll_mds_conn, inode->i_ino, - S_IFREG, file->f_flags, (__u64)(unsigned long)file, - &fd->fd_mdshandle, &req); + rc = mdc_open(&sbi->ll_mdc_conn, inode->i_ino, S_IFREG, file->f_flags, + id, (__u64)(unsigned long)file, &fd->fd_mdshandle, &req); fd->fd_req = req; ptlrpc_req_finished(req); if (rc) @@ -62,15 +84,9 @@ static int ll_file_open(struct inode *inode, struct file *file) if (!fd->fd_mdshandle) CERROR("mdc_open didn't assign fd_mdshandle\n"); - oa = ll_i2info(inode)->lli_obdo; - if (oa == NULL) { - LBUG(); - GOTO(out_mdc, rc = -ENOMEM); - } rc = obd_open(ll_i2obdconn(inode), oa); - if (rc) { + if (rc) GOTO(out_mdc, rc = -abs(rc)); - } file->private_data = fd; @@ -78,7 +94,7 @@ static int ll_file_open(struct inode *inode, struct file *file) return 0; out_mdc: - mdc_close(&sbi->ll_mds_client, sbi->ll_mds_conn, inode->i_ino, + mdc_close(&sbi->ll_mdc_conn, inode->i_ino, S_IFREG, fd->fd_mdshandle, &req); out_req: ptlrpc_free_req(req); @@ -130,11 +146,11 @@ static int ll_file_release(struct inode *inode, struct file *file) } } - rc = mdc_close(&sbi->ll_mds_client, sbi->ll_mds_conn, inode->i_ino, + rc = mdc_close(&sbi->ll_mdc_conn, inode->i_ino, S_IFREG, fd->fd_mdshandle, &req); ptlrpc_req_finished(req); - if (rc) { - if (rc > 0) + if (rc) { + if (rc > 0) rc = -rc; GOTO(out, rc); } @@ -180,6 +196,32 @@ static void ll_update_atime(struct inode *inode) ll_inode_setattr(inode, &attr, 0); } +static int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock *new, + void *data, __u32 data_len) +{ + struct inode *inode = lock->l_data; + ENTRY; + + if (new == NULL) { + /* Completion AST. Do nothing. */ + RETURN(0); + } + + if (data_len != sizeof(struct inode)) + LBUG(); + + /* FIXME: do something better than throwing away everything */ + if (inode == NULL) + LBUG(); + down(&inode->i_sem); + invalidate_inode_pages(inode); + up(&inode->i_sem); + + if (ldlm_cli_cancel(lock->l_client, lock) < 0) + LBUG(); + RETURN(0); +} + static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { @@ -187,7 +229,7 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, struct inode *inode = filp->f_dentry->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ldlm_extent extent; - struct ldlm_handle lockh; + struct lustre_handle lockh; __u64 res_id[RES_NAME_SIZE] = {inode->i_ino}; int flags = 0; ldlm_error_t err; @@ -200,9 +242,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, CDEBUG(D_INFO, "Locking inode %ld, start %Lu end %Lu\n", inode->i_ino, extent.start, extent.end); - err = obd_enqueue(&sbi->ll_conn, sbi->ll_namespace, NULL, - res_id, LDLM_EXTENT, &extent, LCK_PR, &flags, - inode, sizeof(*inode), &lockh); + err = obd_enqueue(&sbi->ll_osc_conn, NULL, res_id, LDLM_EXTENT, + &extent, sizeof(extent), LCK_PR, &flags, + ll_lock_callback, inode, sizeof(*inode), + &lockh); if (err != ELDLM_OK) CERROR("lock enqueue: err: %d\n", err); ldlm_lock_dump((void *)(unsigned long)lockh.addr); @@ -211,11 +254,12 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, CDEBUG(D_INFO, "Reading inode %ld, %d bytes, offset %Ld\n", inode->i_ino, count, *ppos); retval = generic_file_read(filp, buf, count, ppos); + if (retval > 0) ll_update_atime(inode); if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK)) { - err = obd_cancel(&sbi->ll_conn, LCK_PR, &lockh); + err = obd_cancel(&sbi->ll_osc_conn, LCK_PR, &lockh); if (err != ELDLM_OK) CERROR("lock cancel: err: %d\n", err); } @@ -233,7 +277,7 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) struct inode *inode = file->f_dentry->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ldlm_extent extent; - struct ldlm_handle lockh; + struct lustre_handle lockh; __u64 res_id[RES_NAME_SIZE] = {inode->i_ino}; int flags = 0; ldlm_error_t err; @@ -246,9 +290,10 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) CDEBUG(D_INFO, "Locking inode %ld, start %Lu end %Lu\n", inode->i_ino, extent.start, extent.end); - err = obd_enqueue(&sbi->ll_conn, sbi->ll_namespace, NULL, - res_id, LDLM_EXTENT, &extent, LCK_PW, &flags, - inode, sizeof(*inode), &lockh); + err = obd_enqueue(&sbi->ll_osc_conn, NULL, res_id, LDLM_EXTENT, + &extent, sizeof(extent), LCK_PW, &flags, + ll_lock_callback, inode, sizeof(*inode), + &lockh); if (err != ELDLM_OK) CERROR("lock enqueue: err: %d\n", err); ldlm_lock_dump((void *)(unsigned long)lockh.addr); @@ -260,7 +305,7 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) retval = generic_file_write(file, buf, count, ppos); if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK)) { - err = obd_cancel(&sbi->ll_conn, LCK_PW, &lockh); + err = obd_cancel(&sbi->ll_osc_conn, LCK_PW, &lockh); if (err != ELDLM_OK) CERROR("lock cancel: err: %d\n", err); } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 70990cb45d83931cdb6b2f1b1ff9658dc1ec6ea4..e91e03da714e51967751430b4449d11398ef622a 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1,4 +1,5 @@ -/* +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: * * This code is issued under the GNU General Public License. * See the file COPYING in this distribution @@ -18,12 +19,12 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 * Directory entry file type support and forward compatibility hooks * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 - * + * * Changes for use in OBDFS * Copyright (c) 1999, Seagate Technology Inc. * Copyright (C) 2001, Cluster File Systems, Inc. * Rewritten based on recent ext2 page cache use. - * + * */ #include <linux/fs.h> @@ -34,6 +35,7 @@ #include <linux/obd_support.h> #include <linux/lustre_lite.h> +#include <linux/lustre_dlm.h> extern struct address_space_operations ll_aops; /* from super.c */ @@ -60,7 +62,7 @@ static inline void ext2_inc_count(struct inode *inode) inode->i_nlink++; } -/* postpone the disk update until the inode really goes away */ +/* postpone the disk update until the inode really goes away */ static inline void ext2_dec_count(struct inode *inode) { inode->i_nlink--; @@ -90,76 +92,146 @@ static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque) return 1; } -static struct dentry *ll_lookup(struct inode * dir, struct dentry *dentry) +extern struct dentry_operations ll_d_ops; + +int ll_lock(struct inode *dir, struct dentry *dentry, + struct lookup_intent *it, struct lustre_handle *lockh) +{ + struct ll_sb_info *sbi = ll_i2sbi(dir); + int err; + + if ((it->it_op & (IT_CREAT | IT_MKDIR | IT_SYMLINK | IT_SETATTR | + IT_MKNOD)) ) + err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_MDSINTENT, + it, LCK_PW, dir, dentry, lockh, 0, NULL, 0, + dir, sizeof(*dir)); + else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN)) + err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_MDSINTENT, + it, LCK_PR, dir, dentry, lockh, 0, NULL, 0, + dir, sizeof(*dir)); + else + LBUG(); + + RETURN(err); +} + +int ll_unlock(__u32 mode, struct lustre_handle *lockh) +{ + struct ldlm_lock *lock; + ENTRY; + + lock = lustre_handle2object(lockh); + ldlm_lock_decref(lock, mode); + + RETURN(0); +} + +static struct dentry *ll_lookup2(struct inode * dir, struct dentry *dentry, + struct lookup_intent *it) { struct ptlrpc_request *request = NULL; struct inode * inode = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct ll_inode_md md; - int err, type; + struct lustre_handle lockh; + int err, type, offset; ino_t ino; ENTRY; + + CDEBUG(D_INFO, "name: %*s, intent op: %d\n", dentry->d_name.len, + dentry->d_name.name, it->it_op); + if (dentry->d_name.len > EXT2_NAME_LEN) RETURN(ERR_PTR(-ENAMETOOLONG)); - ino = ll_inode_by_name(dir, dentry, &type); - if (!ino) + err = ll_lock(dir, dentry, it, &lockh); + memcpy(it->it_lock_handle, &lockh, sizeof(lockh)); + + if ( (it->it_op & (IT_CREAT | IT_MKDIR | IT_SYMLINK)) && + it->it_disposition && !it->it_status) GOTO(negative, NULL); - err = mdc_getattr(&sbi->ll_mds_client, sbi->ll_mds_conn, ino, type, - OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); - if (err) { - CERROR("failure %d inode %ld\n", err, (long)ino); - ptlrpc_free_req(request); - RETURN(ERR_PTR(-abs(err))); + if ( (it->it_op & (IT_GETATTR)) && + it->it_disposition && it->it_status) + GOTO(negative, NULL); + + if (!it->it_disposition) { + struct ll_inode_info *lli = ll_i2info(dir); + memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh)); + + ino = ll_inode_by_name(dir, dentry, &type); + + err = mdc_getattr(&sbi->ll_mdc_conn, ino, type, + OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); + if (err) { + CERROR("failure %d inode %ld\n", err, (long)ino); + ptlrpc_free_req(request); + RETURN(ERR_PTR(-abs(err))); + } + offset = 0; + } else { + offset = 1; + request = (struct ptlrpc_request *)it->it_data; } if (S_ISREG(type)) { - if (request->rq_repmsg->bufcount < 2 || - request->rq_repmsg->buflens[1] != sizeof(struct obdo)) + if (request->rq_repmsg->bufcount < offset + 2 || + request->rq_repmsg->buflens[offset + 1] != + sizeof(struct obdo)) LBUG(); - md.obdo = lustre_msg_buf(request->rq_repmsg, 1); + md.obdo = lustre_msg_buf(request->rq_repmsg, offset + 1); } else md.obdo = NULL; - md.body = lustre_msg_buf(request->rq_repmsg, 0); + md.body = lustre_msg_buf(request->rq_repmsg, offset); inode = iget4(dir->i_sb, ino, ll_find_inode, &md); + if (it->it_op & IT_RENAME) + it->it_data = dentry; + ptlrpc_free_req(request); - if (!inode) + if (!inode) RETURN(ERR_PTR(-ENOMEM)); EXIT; negative: + dentry->d_op = &ll_d_ops; d_add(dentry, inode); return NULL; } -static struct inode *ll_create_node(struct inode *dir, const char *name, - int namelen, const char *tgt, int tgtlen, - int mode, __u64 extra, struct obdo *obdo) +static struct inode *ll_create_node(struct inode *dir, const char *name, + int namelen, const char *tgt, int tgtlen, + int mode, __u64 extra, + struct lookup_intent *it, struct obdo *obdo) { struct inode *inode; struct ptlrpc_request *request = NULL; struct mds_body *body; - int err; + int rc; time_t time = CURRENT_TIME; struct ll_sb_info *sbi = ll_i2sbi(dir); struct ll_inode_md md; ENTRY; - err = mdc_create(&sbi->ll_mds_client, sbi->ll_mds_conn, dir, name, - namelen, tgt, tgtlen, mode, current->fsuid, - current->fsgid, time, extra, obdo, &request); - if (err) { - inode = ERR_PTR(err); - GOTO(out, err); + if (!it->it_disposition) { + rc = mdc_create(&sbi->ll_mdc_conn, dir, name, namelen, tgt, + tgtlen, mode, current->fsuid, + current->fsgid, time, extra, obdo, &request); + if (rc) { + inode = ERR_PTR(rc); + GOTO(out, rc); + } + body = lustre_msg_buf(request->rq_repmsg, 0); + } else { + request = it->it_data; + body = lustre_msg_buf(request->rq_repmsg, 1); } - body = lustre_msg_buf(request->rq_repmsg, 0); + body->valid = (__u32)OBD_MD_FLNOTOBD; body->nlink = 1; @@ -180,8 +252,8 @@ static struct inode *ll_create_node(struct inode *dir, const char *name, } if (!list_empty(&inode->i_dentry)) { - CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n", - body->ino, atomic_read(&inode->i_count), + CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n", + body->ino, atomic_read(&inode->i_count), inode->i_nlink); iput(inode); LBUG(); @@ -204,15 +276,14 @@ int ll_mdc_unlink(struct inode *dir, struct inode *child, ENTRY; - err = mdc_unlink(&sbi->ll_mds_client, sbi->ll_mds_conn, dir, child, + err = mdc_unlink(&sbi->ll_mdc_conn, dir, child, name, len, &request); ptlrpc_free_req(request); - EXIT; - return err; + RETURN(err); } -int ll_mdc_link(struct dentry *src, struct inode *dir, +int ll_mdc_link(struct dentry *src, struct inode *dir, const char *name, int len) { struct ptlrpc_request *request = NULL; @@ -221,30 +292,28 @@ int ll_mdc_link(struct dentry *src, struct inode *dir, ENTRY; - err = mdc_link(&sbi->ll_mds_client, sbi->ll_mds_conn, src, dir, name, + err = mdc_link(&sbi->ll_mdc_conn, src, dir, name, len, &request); ptlrpc_free_req(request); - EXIT; - return err; + RETURN(err); } -int ll_mdc_rename(struct inode *src, struct inode *tgt, +int ll_mdc_rename(struct inode *src, struct inode *tgt, struct dentry *old, struct dentry *new) { struct ptlrpc_request *request = NULL; - int err; struct ll_sb_info *sbi = ll_i2sbi(src); + int err; ENTRY; - err = mdc_rename(&sbi->ll_mds_client, sbi->ll_mds_conn, src, tgt, - old->d_name.name, old->d_name.len, + err = mdc_rename(&sbi->ll_mdc_conn, src, tgt, + old->d_name.name, old->d_name.len, new->d_name.name, new->d_name.len, &request); ptlrpc_free_req(request); - EXIT; - return err; + RETURN(err); } /* @@ -253,60 +322,71 @@ int ll_mdc_rename(struct inode *src, struct inode *tgt, * is so far negative - it has no inode. * * If the create succeeds, we fill in the inode information - * with d_instantiate(). + * with d_instantiate(). */ -static int ll_create (struct inode * dir, struct dentry * dentry, int mode) +static int ll_create(struct inode * dir, struct dentry * dentry, int mode) { - int err, rc; + int rc = 0; struct obdo oa; struct inode *inode; - memset(&oa, 0, sizeof(oa)); - oa.o_valid = OBD_MD_FLMODE; - oa.o_mode = S_IFREG | 0600; - rc = obd_create(ll_i2obdconn(dir), &oa); - if (rc) { - CERROR("error creating OST object: rc = %d\n", rc); - RETURN(rc); - } + if (dentry->d_it->it_disposition == 0) { + memset(&oa, 0, sizeof(oa)); + oa.o_valid = OBD_MD_FLMODE; + oa.o_mode = S_IFREG | 0600; + rc = obd_create(ll_i2obdconn(dir), &oa); + if (rc) + RETURN(rc); + } mode = mode | S_IFREG; CDEBUG(D_DENTRY, "name %s mode %o o_id %lld\n", dentry->d_name.name, mode, (unsigned long long)oa.o_id); - inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, - NULL, 0, mode, 0, &oa); - - if (IS_ERR(inode)) { - rc = PTR_ERR(inode); - CERROR("error creating MDS object for id %Ld: rc = %d\n", - (unsigned long long)oa.o_id, rc); - GOTO(out_destroy, rc); - } - - inode->i_op = &ll_file_inode_operations; - inode->i_fop = &ll_file_operations; - inode->i_mapping->a_ops = &ll_aops; - rc = ext2_add_nondir(dentry, inode); - /* XXX Handle err, but this will probably get more complex anyways */ + inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, + NULL, 0, mode, 0, dentry->d_it, &oa); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + CERROR("error creating MDS object for id %Ld: rc = %d\n", + (unsigned long long)oa.o_id, rc); + GOTO(out_destroy, rc); + } + + // XXX clean up the object + inode->i_op = &ll_file_inode_operations; + inode->i_fop = &ll_file_operations; + inode->i_mapping->a_ops = &ll_aops; + + if (dentry->d_it->it_disposition) { + struct ll_inode_info *ii = ll_i2info(inode); + ii->lli_flags |= OBD_FL_CREATEONOPEN; + memcpy(&ii->lli_intent_lock_handle, + dentry->d_it->it_lock_handle, + sizeof(struct lustre_handle)); + } + + /* no directory data updates when intents rule */ + if (dentry->d_it->it_disposition == 0) + rc = ext2_add_nondir(dentry, inode); + else + d_instantiate(dentry, inode); RETURN(rc); out_destroy: - err = obd_destroy(ll_i2obdconn(dir), &oa); - if (err) - CERROR("error destroying object %Ld in error path: err = %d\n", - (unsigned long long)oa.o_id, err); - return err; -} /* ll_create */ - + rc = obd_destroy(ll_i2obdconn(dir), &oa); + if (rc) + CERROR("error destroying object %Ld in error path: err = %d\n", + (unsigned long long)oa.o_id, rc); + return rc; +} -static int ll_mknod (struct inode * dir, struct dentry *dentry, int mode, - int rdev) +static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode, + int rdev) { - struct inode * inode = ll_create_node(dir, dentry->d_name.name, + struct inode * inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, NULL, 0, - mode, rdev, NULL); + mode, rdev, NULL, NULL); int err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, mode, rdev); @@ -315,8 +395,8 @@ static int ll_mknod (struct inode * dir, struct dentry *dentry, int mode, return err; } -static int ll_symlink (struct inode * dir, struct dentry * dentry, - const char * symname) +static int ll_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) { int err = -ENAMETOOLONG; unsigned l = strlen(symname); @@ -326,30 +406,30 @@ static int ll_symlink (struct inode * dir, struct dentry * dentry, if (l > LL_INLINESZ) return err; - inode = ll_create_node(dir, dentry->d_name.name, + inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, symname, l, - S_IFLNK | S_IRWXUGO, 0, NULL); + S_IFLNK | S_IRWXUGO, 0, dentry->d_it, NULL); err = PTR_ERR(inode); if (IS_ERR(inode)) return err; oinfo = ll_i2info(inode); - + inode->i_op = &ll_fast_symlink_inode_operations; memcpy(oinfo->lli_inline, symname, l); inode->i_size = l-1; err = ext2_add_nondir(dentry, inode); - if (err) { + if (err) { ext2_dec_count(inode); iput (inode); } return err; } -static int ll_link (struct dentry * old_dentry, struct inode * dir, - struct dentry *dentry) +static int ll_link(struct dentry * old_dentry, struct inode * dir, + struct dentry *dentry) { int err; struct inode *inode = old_dentry->d_inode; @@ -360,9 +440,9 @@ static int ll_link (struct dentry * old_dentry, struct inode * dir, if (inode->i_nlink >= EXT2_LINK_MAX) return -EMLINK; - err = ll_mdc_link(old_dentry, dir, + err = ll_mdc_link(old_dentry, dir, dentry->d_name.name, dentry->d_name.len); - if (err) { + if (err) { EXIT; return err; } @@ -374,7 +454,6 @@ static int ll_link (struct dentry * old_dentry, struct inode * dir, return ext2_add_nondir(dentry, inode); } - static int ll_mkdir(struct inode * dir, struct dentry * dentry, int mode) { struct inode * inode; @@ -386,9 +465,9 @@ static int ll_mkdir(struct inode * dir, struct dentry * dentry, int mode) ext2_inc_count(dir); - inode = ll_create_node (dir, dentry->d_name.name, - dentry->d_name.len, NULL, 0, - S_IFDIR | mode, 0, NULL); + inode = ll_create_node (dir, dentry->d_name.name, + dentry->d_name.len, NULL, 0, + S_IFDIR | mode, 0, dentry->d_it, NULL); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_dir; @@ -403,9 +482,12 @@ static int ll_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (err) goto out_fail; - err = ll_add_link(dentry, inode); - if (err) - goto out_fail; + /* no directory data updates when intents rule */ + if (dentry->d_it->it_disposition == 0) { + err = ll_add_link(dentry, inode); + if (err) + goto out_fail; + } d_instantiate(dentry, inode); out: @@ -433,10 +515,10 @@ static int ll_unlink(struct inode * dir, struct dentry *dentry) de = ext2_find_entry (dir, dentry, &page); if (!de) goto out; - + err = ll_mdc_unlink(dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - if (err) + if (err) goto out; err = ext2_delete_entry (de, page); @@ -467,7 +549,7 @@ static int ll_rmdir(struct inode * dir, struct dentry *dentry) } static int ll_rename (struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry ) + struct inode * new_dir, struct dentry * new_dentry ) { struct inode * old_inode = old_dentry->d_inode; struct inode * new_inode = new_dentry->d_inode; @@ -477,8 +559,14 @@ static int ll_rename (struct inode * old_dir, struct dentry * old_dentry, struct ext2_dir_entry_2 * old_de; int err = -ENOENT; - err = ll_mdc_rename(old_dir, new_dir, old_dentry, new_dentry); - if (err) + if (new_dentry->d_it) { + struct ptlrpc_request *req = new_dentry->d_it->it_data; + err = req->rq_status; + goto out; + } + + err = ll_mdc_rename(old_dir, new_dir, old_dentry, new_dentry); + if (err) goto out; old_de = ext2_find_entry (old_dir, old_dentry, &old_page); @@ -535,7 +623,6 @@ static int ll_rename (struct inode * old_dir, struct dentry * old_dentry, } return 0; - out_dir: if (dir_de) { kunmap(dir_page); @@ -550,7 +637,7 @@ out: struct inode_operations ll_dir_inode_operations = { create: ll_create, - lookup: ll_lookup, + lookup2: ll_lookup2, link: ll_link, unlink: ll_unlink, symlink: ll_symlink, diff --git a/lustre/llite/recover.c b/lustre/llite/recover.c index bad0c4a824b51606dfc6ce61af85e292f904fc1e..d642e3d6b49981c2016c399f84a231052583f31b 100644 --- a/lustre/llite/recover.c +++ b/lustre/llite/recover.c @@ -22,6 +22,7 @@ #include <linux/lustre_lite.h> #include <linux/lustre_ha.h> + static int ll_reconnect(struct ll_sb_info *sbi) { struct ll_fid rootfid; @@ -30,18 +31,18 @@ static int ll_reconnect(struct ll_sb_info *sbi) int err; struct ptlrpc_request *request; - ptlrpc_readdress_connection(sbi->ll_mds_conn, "mds"); + ptlrpc_readdress_connection(sbi2mdc(sbi)->mdc_conn, "mds"); - err = connmgr_connect(ptlrpc_connmgr, sbi->ll_mds_conn); + err = connmgr_connect(ptlrpc_connmgr, sbi2mdc(sbi)->mdc_conn); if (err) { CERROR("cannot connect to MDS: rc = %d\n", err); - ptlrpc_put_connection(sbi->ll_mds_conn); + ptlrpc_put_connection(sbi2mdc(sbi)->mdc_conn); GOTO(out_disc, err = -ENOTCONN); } - sbi->ll_mds_conn->c_level = LUSTRE_CONN_CON; + sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_CON; /* XXX: need to store the last_* values somewhere */ - err = mdc_connect(&sbi->ll_mds_client, sbi->ll_mds_conn, + err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed, &last_rcvd, &last_xid, @@ -50,8 +51,8 @@ static int ll_reconnect(struct ll_sb_info *sbi) CERROR("cannot mds_connect: rc = %d\n", err); GOTO(out_disc, err = -ENOTCONN); } - sbi->ll_mds_client.cli_last_rcvd = last_xid; - sbi->ll_mds_conn->c_level = LUSTRE_CONN_RECOVD; + sbi2mdc(sbi)->mdc_client->cli_last_rcvd = last_xid; + sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_RECOVD; out_disc: return err; @@ -125,7 +126,7 @@ int ll_recover(struct ptlrpc_client *cli) } - sbi->ll_mds_conn->c_level = LUSTRE_CONN_FULL; + sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_FULL; recovd_cli_fixed(cli); /* Finally, continue what we delayed since recovery started */ diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 2fe1f3ea5ab1de379fd92b5fb80ffe897b856ba5..eb5a602698b4d177f7d4df5320f293200290b9fd 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -173,7 +173,8 @@ static int ll_commit_write(struct file *file, struct page *page, &bufs_per_obdo, &page, &count, &offset, &flags, NULL); kunmap(page); - if ((iattr.ia_size = offset + to) > inode->i_size) { + iattr.ia_size = offset + to; + if (iattr.ia_size > inode->i_size) { /* do NOT truncate when writing in the middle of a file */ inode->i_size = iattr.ia_size; iattr.ia_valid = ATTR_SIZE; @@ -261,6 +262,65 @@ int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, } +int ll_flush_inode_pages(struct inode * inode) +{ + //int i; + // obd_count num_obdo = 1; + obd_count bufs_per_obdo = 0; + struct obdo *oa = NULL; + obd_size *count = NULL; + obd_off *offset = NULL; + obd_flag *flags = NULL; + int err = 0; + + ENTRY; + + spin_lock(&pagecache_lock); + + spin_unlock(&pagecache_lock); + + + OBD_ALLOC(count, sizeof(obd_size) * bufs_per_obdo); + if (!count) + GOTO(out, err=-ENOMEM); + + OBD_ALLOC(offset, sizeof(obd_off) * bufs_per_obdo); + if (!offset) + GOTO(out, err=-ENOMEM); + + OBD_ALLOC(flags, sizeof(obd_flag) * bufs_per_obdo); + if (!flags) + GOTO(out, err=-ENOMEM); + +#if 0 + for (i = 0 ; i < bufs_per_obdo ; i++) { + count[i] = PAGE_SIZE; + offset[i] = ((obd_off)(iobuf->maplist[i])->index) << PAGE_SHIFT; + flags[i] = OBD_BRW_CREATE; + } + + oa = ll_oa_from_inode(inode, OBD_MD_FLNOTOBD); + if (!oa) + RETURN(-ENOMEM); + + err = obd_brw(rw, ll_i2obdconn(inode), num_obdo, &oa, &bufs_per_obdo, + iobuf->maplist, count, offset, flags); + if (err == 0) + err = bufs_per_obdo * 4096; +#endif + out: + if (oa) + obdo_free(oa); + if (flags) + OBD_FREE(flags, sizeof(obd_flag) * bufs_per_obdo); + if (count) + OBD_FREE(count, sizeof(obd_count) * bufs_per_obdo); + if (offset) + OBD_FREE(offset, sizeof(obd_off) * bufs_per_obdo); + RETURN(err); +} + + struct address_space_operations ll_aops = { readpage: ll_readpage, diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 9a8c805a95f28cd8aeb65c93b8590c23cec94384..484f9deeff0108be7e96da0903d5a481aee3ca60 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -54,7 +54,7 @@ static char *ll_read_opt(const char *opt, char *data) RETURN(retval); } -static void ll_options(char *options, char **dev, char **vers) +static void ll_options(char *options, char **ost, char **mds) { char *this_char; ENTRY; @@ -68,8 +68,8 @@ static void ll_options(char *options, char **dev, char **vers) this_char != NULL; this_char = strtok (NULL, ",")) { CDEBUG(D_INFO, "this_char %s\n", this_char); - if ( (!*dev && (*dev = ll_read_opt("device", this_char)))|| - (!*vers && (*vers = ll_read_opt("version", this_char))) ) + if ( (!*ost && (*ost = ll_read_opt("ost", this_char)))|| + (!*mds && (*mds = ll_read_opt("mds", this_char))) ) continue; } EXIT; @@ -80,8 +80,8 @@ static struct super_block * ll_read_super(struct super_block *sb, { struct inode *root = 0; struct ll_sb_info *sbi; - char *device = NULL; - char *version = NULL; + char *ost = NULL; + char *mds = NULL; int devno; int err; struct ll_fid rootfid; @@ -101,56 +101,59 @@ static struct super_block * ll_read_super(struct super_block *sb, sb->u.generic_sbp = sbi; - ll_options(data, &device, &version); + ll_options(data, &ost, &mds); - if (!device) { - CERROR("no device\n"); + if (!ost) { + CERROR("no ost\n"); GOTO(out_free, sb = NULL); } - devno = simple_strtoul(device, NULL, 0); + if (!mds) { + CERROR("no mds\n"); + GOTO(out_free, sb = NULL); + } + + devno = simple_strtoul(ost, NULL, 0); if (devno >= MAX_OBD_DEVICES) { - CERROR("device of %s too high\n", device); + CERROR("devno of %s too high\n", ost); GOTO(out_free, sb = NULL); } - sbi->ll_conn.oc_dev = &obd_dev[devno]; - err = obd_connect(&sbi->ll_conn); + sbi->ll_osc_conn.oc_dev = &obd_dev[devno]; + err = obd_connect(&sbi->ll_osc_conn); if (err) { - CERROR("cannot connect to %s: rc = %d\n", device, err); + CERROR("cannot connect to %s: rc = %d\n", ost, err); GOTO(out_free, sb = NULL); } - sbi->ll_namespace = ldlm_namespace_new(NULL, 1); - if (sbi->ll_namespace == NULL) { - CERROR("failed to create local lock namespace\n"); - GOTO(out_obd, sb = NULL); + devno = simple_strtoul(mds, NULL, 0); + if (devno >= MAX_OBD_DEVICES) { + CERROR("devno of %s too high\n", mds); + GOTO(out_free, sb = NULL); } - ptlrpc_init_client(ptlrpc_connmgr, ll_recover, - MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, - &sbi->ll_mds_client); - - sbi->ll_mds_client.cli_data = sbi; - sbi->ll_mds_client.cli_name = "mdc"; - sbi->ll_mds_conn = ptlrpc_uuid_to_connection("mds"); - if (!sbi->ll_mds_conn) { - CERROR("cannot find MDS\n"); - GOTO(out_ldlm, sb = NULL); + sbi->ll_mdc_conn.oc_dev = &obd_dev[devno]; + err = obd_connect(&sbi->ll_mdc_conn); + if (err) { + CERROR("cannot connect to %s: rc = %d\n", mds, err); + GOTO(out_free, sb = NULL); } +#if 0 err = connmgr_connect(ptlrpc_connmgr, sbi->ll_mds_conn); if (err) { CERROR("cannot connect to MDS: rc = %d\n", err); GOTO(out_rpc, sb = NULL); } +#endif - sbi->ll_mds_conn->c_level = LUSTRE_CONN_FULL; + sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_FULL; /* XXX: need to store the last_* values somewhere */ - err = mdc_connect(&sbi->ll_mds_client, sbi->ll_mds_conn, - &rootfid, &last_committed, &last_rcvd, &last_xid, - &request); + err = mdc_getstatus(&sbi->ll_mdc_conn, + &rootfid, &last_committed, + &last_rcvd, &last_xid, + &request); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); GOTO(out_disc, sb = NULL); @@ -167,7 +170,7 @@ static struct super_block * ll_read_super(struct super_block *sb, sb->s_op = &ll_super_operations; /* make root inode */ - err = mdc_getattr(&sbi->ll_mds_client, sbi->ll_mds_conn, + err = mdc_getattr(&sbi->ll_mdc_conn, sbi->ll_rootino, S_IFDIR, OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); if (err) { @@ -200,25 +203,21 @@ static struct super_block * ll_read_super(struct super_block *sb, ptlrpc_free_req(request); out_dev: - if (device) - OBD_FREE(device, strlen(device) + 1); - if (version) - OBD_FREE(version, strlen(version) + 1); + if (mds) + OBD_FREE(mds, strlen(mds) + 1); + if (ost) + OBD_FREE(ost, strlen(ost) + 1); RETURN(sb); out_cdb: ll_commitcbd_cleanup(sbi); out_mdc: - ptlrpc_cleanup_client(&sbi->ll_mds_client); + obd_disconnect(&sbi->ll_mdc_conn); + obd_disconnect(&sbi->ll_osc_conn); out_disc: ptlrpc_free_req(request); -out_rpc: - ptlrpc_put_connection(sbi->ll_mds_conn); -out_ldlm: - ldlm_namespace_free(sbi->ll_namespace); -out_obd: - obd_disconnect(&sbi->ll_conn); + obd_disconnect(&sbi->ll_osc_conn); out_free: OBD_FREE(sbi, sizeof(*sbi)); @@ -231,11 +230,9 @@ static void ll_put_super(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; ll_commitcbd_cleanup(sbi); - ptlrpc_cleanup_client(&sbi->ll_mds_client); - ptlrpc_put_connection(sbi->ll_mds_conn); - ldlm_namespace_free(sbi->ll_namespace); - obd_disconnect(&sbi->ll_conn); - OBD_FREE(sbi, sizeof(*sbi)); + obd_disconnect(&sbi->ll_osc_conn); + obd_disconnect(&sbi->ll_mdc_conn); + MOD_DEC_USE_COUNT; EXIT; } /* ll_put_super */ @@ -321,7 +318,7 @@ int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc) /* change incore inode */ ll_attr2inode(inode, attr, do_trunc); - err = mdc_setattr(&sbi->ll_mds_client, sbi->ll_mds_conn, inode, attr, + err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request); if (err) CERROR("mdc_setattr fails (%d)\n", err); @@ -342,7 +339,7 @@ static int ll_statfs(struct super_block *sb, struct statfs *buf) int err; ENTRY; - err = obd_statfs(&ll_s2sbi(sb)->ll_conn, &tmp); + err = obd_statfs(&ll_s2sbi(sb)->ll_osc_conn, &tmp); if (err) { CERROR("obd_statfs fails (%d)\n", err); RETURN(err); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index ab87a92c67b9fa413245ff93b593b82e14cef62b..1b307d67b0f26845c547aa7aec01ba21920967a7 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -35,7 +35,7 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) int rc, size; ENTRY; - rc = mdc_getattr(&sbi->ll_mds_client, sbi->ll_mds_conn, + rc = mdc_getattr(&sbi->ll_mdc_conn, dentry->d_inode->i_ino, S_IFLNK, OBD_MD_LINKNAME, dentry->d_inode->i_size, &request); if (rc) { diff --git a/lustre/llite/sysctl.c b/lustre/llite/sysctl.c index 3c4ce1f8170201abb214968ed44f32b4bfdf0789..ee4ac75a86854f37c720906c07d9cdf355b65bfc 100644 --- a/lustre/llite/sysctl.c +++ b/lustre/llite/sysctl.c @@ -1,4 +1,6 @@ -/* +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * * Copyright (C) 2001 Cluster File Systems, Inc. * * This code is issued under the GNU General Public License. diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 049c409a85a503a26b6687e0d9f251c594aad983..c7447b0ca08b8cee5de35813514d84576418ce03 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -31,7 +31,7 @@ #include <linux/obd_class.h> #include <linux/lustre_mds.h> -static int mdc_reint(struct ptlrpc_client *cl, struct ptlrpc_request *request, int level) +static int mdc_reint(struct ptlrpc_request *request, int level) { int rc; request->rq_level = level; @@ -45,26 +45,27 @@ static int mdc_reint(struct ptlrpc_client *cl, struct ptlrpc_request *request, i return rc; } -int mdc_setattr(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_setattr(struct obd_conn *conn, struct inode *inode, struct iattr *iattr, struct ptlrpc_request **request) { + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct mds_rec_setattr *rec; struct ptlrpc_request *req; int rc, size = sizeof(*rec); ENTRY; - req = ptlrpc_prep_req(cl, conn, MDS_REINT, 1, &size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_REINT, 1, &size, NULL); if (!req) RETURN(-ENOMEM); - rec = lustre_msg_buf(req->rq_reqmsg, 0); - mds_setattr_pack(rec, inode, iattr); + mds_setattr_pack(req, 0, inode, iattr, NULL, 0); size = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, &size); - rc = mdc_reint(cl, req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS ) rc = 0; @@ -72,15 +73,16 @@ int mdc_setattr(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, RETURN(rc); } -int mdc_create(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_create(struct obd_conn *conn, struct inode *dir, const char *name, int namelen, const char *tgt, int tgtlen, int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev, struct obdo *obdo, struct ptlrpc_request **request) { struct mds_rec_create *rec; + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req; - int rc, size[3] = {sizeof(*rec), namelen + 1, 0}; + int rc, size[3] = {sizeof(struct mds_rec_create), namelen + 1, 0}; char *tmp, *bufs[3] = {NULL, NULL, NULL}; int level, bufcount = 2; ENTRY; @@ -94,15 +96,14 @@ int mdc_create(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, bufcount = 3; } - req = ptlrpc_prep_req(cl, conn, MDS_REINT, bufcount, size, bufs); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, MDS_REINT, + bufcount, size, bufs); if (!req) RETURN(-ENOMEM); rec = lustre_msg_buf(req->rq_reqmsg, 0); - mds_create_pack(rec, dir, mode, rdev, uid, gid, time); - - tmp = lustre_msg_buf(req->rq_reqmsg, 1); - LOGL0(name, namelen, tmp); + mds_create_pack(req, 0, dir, mode, rdev, uid, gid, time, + name, namelen, NULL, 0); if (S_ISREG(mode)) { tmp = lustre_msg_buf(req->rq_reqmsg, 2); @@ -117,7 +118,7 @@ int mdc_create(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, level = LUSTRE_CONN_FULL; resend: - rc = mdc_reint(cl, req, level); + rc = mdc_reint(req, level); if (rc == -ERESTARTSYS) { struct mds_update_record_hdr *hdr = lustre_msg_buf(req->rq_reqmsg, 0); @@ -132,30 +133,26 @@ int mdc_create(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, RETURN(rc); } -int mdc_unlink(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_unlink(struct obd_conn *conn, struct inode *dir, struct inode *child, const char *name, int namelen, struct ptlrpc_request **request) { - struct mds_rec_unlink *rec; + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req; - int rc, size[2] = {sizeof(*rec), namelen + 1}; - char *tmp; + int rc, size[2] = {sizeof(struct mds_rec_unlink), namelen + 1}; ENTRY; - req = ptlrpc_prep_req(cl, conn, MDS_REINT, 2, size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, MDS_REINT, 2, + size, NULL); if (!req) RETURN(-ENOMEM); - rec = lustre_msg_buf(req->rq_reqmsg, 0); - mds_unlink_pack(rec, dir, child); - - tmp = lustre_msg_buf(req->rq_reqmsg, 1); - LOGL0(name, namelen, tmp); + mds_unlink_pack(req, 0, dir, child, name, namelen); size[0] = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, size); - rc = mdc_reint(cl, req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS ) rc = 0; @@ -163,30 +160,26 @@ int mdc_unlink(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, RETURN(rc); } -int mdc_link(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_link(struct obd_conn *conn, struct dentry *src, struct inode *dir, const char *name, int namelen, struct ptlrpc_request **request) { - struct mds_rec_link *rec; + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req; - int rc, size[2] = {sizeof(*rec), namelen + 1}; - char *tmp; + int rc, size[2] = {sizeof(struct mds_rec_link), namelen + 1}; ENTRY; - req = ptlrpc_prep_req(cl, conn, MDS_REINT, 2, size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, MDS_REINT, 2, + size, NULL); if (!req) RETURN(-ENOMEM); - rec = lustre_msg_buf(req->rq_reqmsg, 0); - mds_link_pack(rec, src->d_inode, dir); - - tmp = lustre_msg_buf(req->rq_reqmsg, 1); - LOGL0(name, namelen, tmp); + mds_link_pack(req, 0, src->d_inode, dir, name, namelen); size[0] = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, size); - rc = mdc_reint(cl, req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS ) rc = 0; @@ -194,36 +187,28 @@ int mdc_link(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, RETURN(rc); } -int mdc_rename(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_rename(struct obd_conn *conn, struct inode *src, struct inode *tgt, const char *old, int oldlen, const char *new, int newlen, struct ptlrpc_request **request) { - struct mds_rec_rename *rec; + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req; - int rc, size[3] = {sizeof(*rec), oldlen + 1, newlen + 1}; - char *tmp; + int rc, size[3] = {sizeof(struct mds_rec_rename), oldlen + 1, + newlen + 1}; ENTRY; - req = ptlrpc_prep_req(cl, conn, MDS_REINT, 3, size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_REINT, 3, size, NULL); if (!req) RETURN(-ENOMEM); - rec = lustre_msg_buf(req->rq_reqmsg, 0); - mds_rename_pack(rec, src, tgt); - - tmp = lustre_msg_buf(req->rq_reqmsg, 1); - LOGL0(old, oldlen, tmp); - - if (tgt) { - tmp = lustre_msg_buf(req->rq_reqmsg, 2); - LOGL0(new, newlen, tmp); - } + mds_rename_pack(req, 0, src, tgt, old, oldlen, new, newlen); size[0] = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, size); - rc = mdc_reint(cl, req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS ) rc = 0; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index a928fc35d7624873ed59717017a845c5605fa5ce..94b79aa86544db2e2ff7ffdc54ddaa5dba5e2248 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -21,29 +21,31 @@ */ #define EXPORT_SYMTAB +#define DEBUG_SUBSYSTEM S_MDC #include <linux/module.h> #include <linux/miscdevice.h> - -#define DEBUG_SUBSYSTEM S_MDC - #include <linux/lustre_mds.h> #include <linux/lustre_lite.h> +#include <linux/lustre_dlm.h> #define REQUEST_MINOR 244 extern int mds_queue_req(struct ptlrpc_request *); -int mdc_connect(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, - struct ll_fid *rootfid, __u64 *last_committed, __u64 *last_rcvd, - __u32 *last_xid, struct ptlrpc_request **request) + +int mdc_getstatus(struct obd_conn *conn, struct ll_fid *rootfid, + __u64 *last_committed, __u64 *last_rcvd, + __u32 *last_xid, struct ptlrpc_request **request) { struct ptlrpc_request *req; struct mds_body *body; + struct mdc_obd *mdc = mdc_conn2mdc(conn); int rc, size = sizeof(*body); ENTRY; - req = ptlrpc_prep_req(cl, conn, MDS_CONNECT, 1, &size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_GETSTATUS, 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -56,8 +58,8 @@ int mdc_connect(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, rc = ptlrpc_check_status(req, rc); if (!rc) { - mds_unpack_rep_body(req); body = lustre_msg_buf(req->rq_repmsg, 0); + mds_unpack_body(body); memcpy(rootfid, &body->fid1, sizeof(*rootfid)); *last_committed = req->rq_repmsg->last_committed; *last_rcvd = req->rq_repmsg->last_rcvd; @@ -78,16 +80,18 @@ int mdc_connect(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, } -int mdc_getattr(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_getattr(struct obd_conn *conn, ino_t ino, int type, unsigned long valid, size_t ea_size, struct ptlrpc_request **request) { + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req; struct mds_body *body; int rc, size[2] = {sizeof(*body), 0}, bufcount = 1; ENTRY; - req = ptlrpc_prep_req(cl, conn, MDS_GETATTR, 1, size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_GETATTR, 1, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -109,8 +113,8 @@ int mdc_getattr(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, rc = ptlrpc_check_status(req, rc); if (!rc) { - mds_unpack_rep_body(req); body = lustre_msg_buf(req->rq_repmsg, 0); + mds_unpack_body(body); CDEBUG(D_NET, "mode: %o\n", body->mode); } @@ -120,21 +124,204 @@ int mdc_getattr(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, return rc; } -int mdc_open(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, - ino_t ino, int type, int flags, __u64 cookie, __u64 *fh, - struct ptlrpc_request **request) +static int mdc_lock_callback(struct ldlm_lock *lock, struct ldlm_lock *new, + void *data, int data_len) +{ + int rc; + struct inode *inode = data; + ENTRY; + + if (new == NULL) { + /* Completion AST. Do nothing. */ + RETURN(0); + } + + if (data_len != sizeof(*inode)) { + CERROR("data_len should be %d, but is %d\n", sizeof(*inode), + data_len); + LBUG(); + } + + /* FIXME: do something better than throwing away everything */ + if (inode == NULL) + LBUG(); + invalidate_inode_pages(inode); + + rc = ldlm_cli_cancel(lock->l_client, lock); + if (rc < 0) { + CERROR("ldlm_cli_cancel: %d\n", rc); + LBUG(); + } + RETURN(0); +} + +int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, + int lock_mode, struct inode *dir, struct dentry *de, + struct lustre_handle *lockh, __u64 id, char *tgt, int tgtlen, + void *data, int datalen) { + struct ptlrpc_request *req; + struct obd_device *obddev = conn->oc_dev; + struct mdc_obd *mdc = mdc_conn2mdc(conn); + __u64 res_id[RES_NAME_SIZE] = {dir->i_ino}; + int size[5] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)}; + int rc, flags; + struct ldlm_reply *dlm_rep; + struct ldlm_intent *lit; + ENTRY; + + switch (it->it_op) { + case IT_MKDIR: + it->it_mode = (it->it_mode | S_IFDIR) & ~current->fs->umask; + break; + case IT_SETATTR: + it->it_op = IT_GETATTR; + break; + case (IT_CREAT|IT_OPEN): + case IT_CREAT: + it->it_mode = (it->it_mode | S_IFREG) & ~current->fs->umask; + break; + case IT_SYMLINK: + it->it_mode = (it->it_mode | S_IFLNK) & ~current->fs->umask; + break; + } + + if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) { + size[2] = sizeof(struct mds_rec_create); + size[3] = de->d_name.len + 1; + size[4] = tgtlen + 1; + req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, + LDLM_ENQUEUE, 5, size, NULL); + if (!req) + RETURN(-ENOMEM); + + /* pack the intent */ + lit = lustre_msg_buf(req->rq_reqmsg, 1); + lit->opc = NTOH__u64((__u64)it->it_op); + + /* pack the intended request */ + mds_create_pack(req, 2, dir, it->it_mode, id, current->fsuid, + current->fsgid, CURRENT_TIME, de->d_name.name, + de->d_name.len, tgt, tgtlen); + + size[0] = sizeof(struct ldlm_reply); + size[1] = sizeof(struct mds_body); + size[2] = sizeof(struct obdo); + req->rq_replen = lustre_msg_size(3, size); + } else if ( it->it_op == IT_RENAME2 ) { + struct dentry *old_de = it->it_data; + + size[2] = sizeof(struct mds_rec_rename); + size[3] = old_de->d_name.len + 1; + size[4] = de->d_name.len + 1; + req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, + LDLM_ENQUEUE, 5, size, NULL); + if (!req) + RETURN(-ENOMEM); + + /* pack the intent */ + lit = lustre_msg_buf(req->rq_reqmsg, 1); + lit->opc = NTOH__u64((__u64)it->it_op); + + /* pack the intended request */ + mds_rename_pack(req, 2, old_de->d_inode, dir, + old_de->d_parent->d_name.name, + old_de->d_parent->d_name.len, + de->d_name.name, de->d_name.len); + + size[0] = sizeof(struct ldlm_reply); + size[1] = sizeof(struct mds_body); + req->rq_replen = lustre_msg_size(2, size); + } else if ( it->it_op == IT_GETATTR || it->it_op == IT_RENAME || + it->it_op == IT_OPEN ) { + size[2] = sizeof(struct mds_body); + size[3] = de->d_name.len + 1; + + req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, + LDLM_ENQUEUE, 4, size, NULL); + if (!req) + RETURN(-ENOMEM); + + /* pack the intent */ + lit = lustre_msg_buf(req->rq_reqmsg, 1); + lit->opc = NTOH__u64((__u64)it->it_op); + + /* pack the intended request */ + mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len); + + /* get ready for the reply */ + size[0] = sizeof(struct ldlm_reply); + size[1] = sizeof(struct mds_body); + size[2] = sizeof(struct obdo); + req->rq_replen = lustre_msg_size(3, size); + } else if ( it->it_op == IT_SETATTR) { + size[2] = sizeof(struct mds_rec_setattr); + size[3] = de->d_name.len + 1; + req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, + LDLM_ENQUEUE, 5, size, NULL); + if (!req) + RETURN(-ENOMEM); + + lit = lustre_msg_buf(req->rq_reqmsg, 1); + lit->opc = NTOH__u64((__u64)it->it_op); + + if (!it->it_iattr) + LBUG(); + + mds_setattr_pack(req, 2, dir, it->it_iattr, + de->d_name.name, de->d_name.len); + size[0] = sizeof(struct ldlm_reply); + size[1] = sizeof(struct mds_body); + req->rq_replen = lustre_msg_size(2, size); + } else if ( it->it_op == IT_READDIR ) { + req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, + LDLM_ENQUEUE, 1, size, NULL); + if (!req) + RETURN(-ENOMEM); + + /* get ready for the reply */ + size[0] = sizeof(struct ldlm_reply); + req->rq_replen = lustre_msg_size(1, size); + } else { + LBUG(); + } + + rc = ldlm_cli_enqueue(mdc->mdc_ldlm_client, mdc->mdc_conn, req, + obddev->obd_namespace, NULL, res_id, lock_type, + NULL, 0, lock_mode, &flags, + (void *)mdc_lock_callback, data, datalen, lockh); + + if (rc != 0) { + CERROR("ldlm_cli_enqueue: %d\n", rc); + RETURN(rc); + } + + dlm_rep = lustre_msg_buf(req->rq_repmsg, 0); + it->it_disposition = (int) dlm_rep->lock_policy_res1; + it->it_status = (int) dlm_rep->lock_policy_res2; + it->it_lock_mode = lock_mode; + it->it_data = req; + + RETURN(0); +} + +int mdc_open(struct obd_conn *conn, ino_t ino, int type, int flags, __u64 objid, + __u64 cookie, __u64 *fh, struct ptlrpc_request **request) +{ + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct mds_body *body; int rc, size = sizeof(*body); struct ptlrpc_request *req; - req = ptlrpc_prep_req(cl, conn, MDS_OPEN, 1, &size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_OPEN, 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); req->rq_flags |= PTL_RPC_FL_REPLAY; req->rq_level = LUSTRE_CONN_FULL; body = lustre_msg_buf(req->rq_reqmsg, 0); + ll_ino2fid(&body->fid1, ino, 0, type); body->flags = HTON__u32(flags); body->extra = cookie; @@ -145,8 +332,8 @@ int mdc_open(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, rc = ptlrpc_check_status(req, rc); if (!rc) { - mds_unpack_rep_body(req); body = lustre_msg_buf(req->rq_repmsg, 0); + mds_unpack_body(body); *fh = body->extra; } @@ -156,14 +343,16 @@ int mdc_open(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, return rc; } -int mdc_close(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, +int mdc_close(struct obd_conn *conn, ino_t ino, int type, __u64 fh, struct ptlrpc_request **request) { + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct mds_body *body; int rc, size = sizeof(*body); struct ptlrpc_request *req; - req = ptlrpc_prep_req(cl, conn, MDS_CLOSE, 1, &size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_CLOSE, 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -183,10 +372,10 @@ int mdc_close(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, return rc; } -int mdc_readpage(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, - ino_t ino, int type, __u64 offset, char *addr, - struct ptlrpc_request **request) +int mdc_readpage(struct obd_conn *conn, ino_t ino, int type, __u64 offset, + char *addr, struct ptlrpc_request **request) { + struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req = NULL; struct ptlrpc_bulk_desc *desc = NULL; struct ptlrpc_bulk_page *bulk = NULL; @@ -196,18 +385,19 @@ int mdc_readpage(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, CDEBUG(D_INODE, "inode: %ld\n", (long)ino); - desc = ptlrpc_prep_bulk(conn); + desc = ptlrpc_prep_bulk(mdc->mdc_conn); if (desc == NULL) GOTO(out, rc = -ENOMEM); - req = ptlrpc_prep_req(cl, conn, MDS_READPAGE, 1, &size, NULL); + req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_READPAGE, 1, &size, NULL); if (!req) GOTO(out2, rc = -ENOMEM); bulk = ptlrpc_prep_bulk_page(desc); bulk->b_buflen = PAGE_SIZE; bulk->b_buf = addr; - bulk->b_xid = req->rq_reqmsg->xid; + bulk->b_xid = req->rq_xid; desc->b_portal = MDS_BULK_PORTAL; rc = ptlrpc_register_bulk(desc); @@ -228,8 +418,10 @@ int mdc_readpage(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, if (rc) { ptlrpc_abort_bulk(desc); GOTO(out2, rc); - } else - mds_unpack_rep_body(req); + } else { + body = lustre_msg_buf(req->rq_repmsg, 0); + mds_unpack_body(body); + } EXIT; out2: @@ -268,9 +460,11 @@ int mdc_statfs(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, } #endif -static int request_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int mdc_ioctl(long cmd, struct obd_conn *conn, int len, void *karg, + void *uarg) { +#if 0 + /* FIXME XXX : This should use the new ioc_data to pass args in */ int err = 0; struct ptlrpc_client cl; struct ptlrpc_connection *conn; @@ -278,9 +472,6 @@ static int request_ioctl(struct inode *inode, struct file *file, ENTRY; - if (MINOR(inode->i_rdev) != REQUEST_MINOR) - RETURN(-EINVAL); - if (_IOC_TYPE(cmd) != IOC_REQUEST_TYPE || _IOC_NR(cmd) < IOC_REQUEST_MIN_NR || _IOC_NR(cmd) > IOC_REQUEST_MAX_NR ) { @@ -291,8 +482,8 @@ static int request_ioctl(struct inode *inode, struct file *file, ptlrpc_init_client(NULL, NULL, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl); - conn = ptlrpc_uuid_to_connection("mds"); - if (!conn) { + connection = ptlrpc_uuid_to_connection("mds"); + if (!connection) { CERROR("cannot create client\n"); RETURN(-EINVAL); } @@ -300,7 +491,8 @@ static int request_ioctl(struct inode *inode, struct file *file, switch (cmd) { case IOC_REQUEST_GETATTR: { CERROR("-- getting attr for ino %lu\n", arg); - err = mdc_getattr(&cl, conn, arg, S_IFDIR, ~0, 0, &request); + err = mdc_getattr(&cl, connection, arg, S_IFDIR, ~0, 0, + &request); CERROR("-- done err %d\n", err); GOTO(out, err); @@ -314,7 +506,8 @@ static int request_ioctl(struct inode *inode, struct file *file, GOTO(out, err); } CERROR("-- readpage 0 for ino %lu\n", arg); - err = mdc_readpage(&cl, conn, arg, S_IFDIR, 0, buf, &request); + err = mdc_readpage(&cl, connection, arg, S_IFDIR, 0, buf, + &request); CERROR("-- done err %d\n", err); OBD_FREE(buf, PAGE_SIZE); @@ -331,7 +524,7 @@ static int request_ioctl(struct inode *inode, struct file *file, iattr.ia_atime = 0; iattr.ia_valid = ATTR_MODE | ATTR_ATIME; - err = mdc_setattr(&cl, conn, &inode, &iattr, &request); + err = mdc_setattr(&cl, connection, &inode, &iattr, &request); CERROR("-- done err %d\n", err); GOTO(out, err); @@ -347,7 +540,7 @@ static int request_ioctl(struct inode *inode, struct file *file, iattr.ia_atime = 0; iattr.ia_valid = ATTR_MODE | ATTR_ATIME; - err = mdc_create(&cl, conn, &inode, + err = mdc_create(&cl, connection, &inode, "foofile", strlen("foofile"), NULL, 0, 0100707, 47114711, 11, 47, 0, NULL, &request); @@ -360,8 +553,8 @@ static int request_ioctl(struct inode *inode, struct file *file, __u64 fh, ino; copy_from_user(&ino, (__u64 *)arg, sizeof(ino)); CERROR("-- opening ino %llu\n", (unsigned long long)ino); - err = mdc_open(&cl, conn, ino, S_IFDIR, O_RDONLY, 4711, &fh, - &request); + err = mdc_open(&cl, connection, ino, S_IFDIR, O_RDONLY, 4711, + &fh, &request); copy_to_user((__u64 *)arg, &fh, sizeof(fh)); CERROR("-- done err %d (fh=%Lu)\n", err, (unsigned long long)fh); @@ -371,7 +564,7 @@ static int request_ioctl(struct inode *inode, struct file *file, case IOC_REQUEST_CLOSE: { CERROR("-- closing ino 2, filehandle %lu\n", arg); - err = mdc_close(&cl, conn, 2, S_IFDIR, arg, &request); + err = mdc_close(&cl, connection, 2, S_IFDIR, arg, &request); CERROR("-- done err %d\n", err); GOTO(out, err); @@ -383,39 +576,179 @@ static int request_ioctl(struct inode *inode, struct file *file, out: ptlrpc_free_req(request); - ptlrpc_put_connection(conn); + ptlrpc_put_connection(connection); ptlrpc_cleanup_client(&cl); RETURN(err); +#endif + return 0; } +static int mdc_setup(struct obd_device *obddev, obd_count len, void *buf) +{ + struct obd_ioctl_data* data = buf; + struct mdc_obd *mdc = &obddev->u.mdc; + char server_uuid[37]; + int rc; + ENTRY; -static struct file_operations requestdev_fops = { - ioctl: request_ioctl, -}; + if (data->ioc_inllen1 < 1) { + CERROR("osc setup requires a TARGET UUID\n"); + RETURN(-EINVAL); + } -static struct miscdevice request_dev = { - REQUEST_MINOR, - "request", - &requestdev_fops + if (data->ioc_inllen1 > 37) { + CERROR("mdc UUID must be less than 38 characters\n"); + RETURN(-EINVAL); + } + + if (data->ioc_inllen2 < 1) { + CERROR("mdc setup requires a SERVER UUID\n"); + RETURN(-EINVAL); + } + + if (data->ioc_inllen2 > 37) { + CERROR("mdc UUID must be less than 38 characters\n"); + RETURN(-EINVAL); + } + + memcpy(mdc->mdc_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1); + memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2, + sizeof(server_uuid))); + + mdc->mdc_conn = ptlrpc_uuid_to_connection(server_uuid); + if (!mdc->mdc_conn) + RETURN(-ENOENT); + + OBD_ALLOC(mdc->mdc_client, sizeof(*mdc->mdc_client)); + if (mdc->mdc_client == NULL) + GOTO(out_conn, rc = -ENOMEM); + + OBD_ALLOC(mdc->mdc_ldlm_client, sizeof(*mdc->mdc_ldlm_client)); + if (mdc->mdc_ldlm_client == NULL) + GOTO(out_client, rc = -ENOMEM); + + ptlrpc_init_client(NULL, NULL, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, + mdc->mdc_client); + ptlrpc_init_client(NULL, NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, + mdc->mdc_ldlm_client); + mdc->mdc_client->cli_name = "mdc"; + mdc->mdc_ldlm_client->cli_name = "ldlm"; + /* XXX get recovery hooked in here again */ + //ptlrpc_init_client(ptlrpc_connmgr, ll_recover,... + + ptlrpc_init_client(ptlrpc_connmgr, NULL, + MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, + mdc->mdc_client); + + MOD_INC_USE_COUNT; + RETURN(0); + + out_client: + OBD_FREE(mdc->mdc_client, sizeof(*mdc->mdc_client)); + out_conn: + ptlrpc_put_connection(mdc->mdc_conn); + return rc; +} + +static int mdc_cleanup(struct obd_device * obddev) +{ + struct mdc_obd *mdc = &obddev->u.mdc; + + ptlrpc_cleanup_client(mdc->mdc_client); + OBD_FREE(mdc->mdc_client, sizeof(*mdc->mdc_client)); + ptlrpc_cleanup_client(mdc->mdc_ldlm_client); + OBD_FREE(mdc->mdc_ldlm_client, sizeof(*mdc->mdc_ldlm_client)); + ptlrpc_put_connection(mdc->mdc_conn); + + MOD_DEC_USE_COUNT; + return 0; +} + +static int mdc_connect(struct obd_conn *conn) +{ + struct mdc_obd *mdc = mdc_conn2mdc(conn); + struct ptlrpc_request *request; + int rc, size = sizeof(mdc->mdc_target_uuid); + char *tmp = mdc->mdc_target_uuid; + + ENTRY; + + conn->oc_dev->obd_namespace = ldlm_namespace_new(LDLM_NAMESPACE_CLIENT); + if (conn->oc_dev->obd_namespace == NULL) + RETURN(-ENOMEM); + + request = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_CONNECT, 1, &size, &tmp); + if (!request) + RETURN(-ENOMEM); + + request->rq_replen = lustre_msg_size(0, NULL); + + rc = ptlrpc_queue_wait(request); + if (rc) + GOTO(out, rc); + + mdc->mdc_client->cli_target_devno = request->rq_repmsg->target_id; + mdc->mdc_ldlm_client->cli_target_devno = + mdc->mdc_client->cli_target_devno; + EXIT; + out: + ptlrpc_free_req(request); + return rc; +} + +static int mdc_disconnect(struct obd_conn *conn) +{ + struct mdc_obd *mdc = mdc_conn2mdc(conn); + struct ptlrpc_request *request; + struct mds_body *body; + int rc, size = sizeof(*body); + ENTRY; + + ldlm_namespace_free(conn->oc_dev->obd_namespace); + request = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, + MDS_DISCONNECT, 1, &size, + NULL); + if (!request) + RETURN(-ENOMEM); + + body = lustre_msg_buf(request->rq_reqmsg, 0); + body->valid = conn->oc_id; + + request->rq_replen = lustre_msg_size(0, NULL); + + rc = ptlrpc_queue_wait(request); + GOTO(out, rc); + out: + ptlrpc_free_req(request); + return rc; +} + +struct obd_ops mdc_obd_ops = { + o_setup: mdc_setup, + o_cleanup: mdc_cleanup, + o_connect: mdc_connect, + o_disconnect: mdc_disconnect, + o_iocontrol: mdc_ioctl }; static int __init ptlrpc_request_init(void) { - misc_register(&request_dev); - return 0; + return obd_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME); } static void __exit ptlrpc_request_exit(void) { - misc_deregister(&request_dev); + obd_unregister_type(LUSTRE_MDC_NAME); } -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre MDS Request Tester v1.0"); +MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>"); +MODULE_DESCRIPTION("Lustre Metadata Client v1.0"); MODULE_LICENSE("GPL"); -EXPORT_SYMBOL(mdc_connect); +EXPORT_SYMBOL(mdc_getstatus); +EXPORT_SYMBOL(mdc_enqueue); EXPORT_SYMBOL(mdc_getattr); EXPORT_SYMBOL(mdc_create); EXPORT_SYMBOL(mdc_unlink); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index ab4f48ad316cb09b53e274007c71db07e1bf9d8f..e1fa112d5b0d4b3721df369e1b10c026f6b0759d 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -10,7 +10,8 @@ * This code is issued under the GNU General Public License. * See the file COPYING in this distribution * - * by Peter Braam <braam@clusterfs.com> + * by Peter Braam <braam@clusterfs.com> & + * Andreas Dilger <braam@clusterfs.com> * * This server is single threaded at present (but can easily be multi threaded) * @@ -21,6 +22,10 @@ #include <linux/module.h> #include <linux/lustre_mds.h> +#include <linux/lustre_dlm.h> +extern int mds_update_last_rcvd(struct mds_obd *mds, void *handle, + struct ptlrpc_request *req); +static int mds_cleanup(struct obd_device * obddev); static int mds_sendpage(struct ptlrpc_request *req, struct file *file, __u64 offset) @@ -52,7 +57,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, if (rc != PAGE_SIZE) GOTO(cleanup_buf, rc = -EIO); - bulk->b_xid = req->rq_reqmsg->xid; + bulk->b_xid = req->rq_xid; bulk->b_buf = buf; bulk->b_buflen = PAGE_SIZE; desc->b_portal = MDS_BULK_PORTAL; @@ -143,26 +148,63 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, return result; } -static -int mds_connect(struct ptlrpc_request *req) +static int mds_connect(struct ptlrpc_request *req) +{ + struct mds_obd *mds; + char *uuid; + int rc, i; + ENTRY; + + uuid = lustre_msg_buf(req->rq_reqmsg, 0); + if (req->rq_reqmsg->buflens[0] > 37) { + /* Invalid UUID */ + req->rq_status = -EINVAL; + RETURN(-EINVAL); + } + + i = obd_class_name2dev(uuid); + if (i == -1) { + req->rq_status = -ENODEV; + RETURN(-ENODEV); + } + + mds = &(obd_dev[i].u.mds); + if (mds != &(req->rq_obd->u.mds)) { + CERROR("device %d (%s) is not an mds\n", i, uuid); + req->rq_status = -ENODEV; + RETURN(-ENODEV); + } + + CDEBUG(D_INFO, "MDS connect from UUID '%s'\n", + ptlrpc_req_to_uuid(req)); + rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_CONNECT_PACK)) { + req->rq_status = -ENOMEM; + RETURN(-ENOMEM); + } + req->rq_repmsg->target_id = i; + + RETURN(0); +} + +/* FIXME: the error cases need fixing to avoid leaks */ +static int mds_getstatus(struct mds_obd *mds, struct ptlrpc_request *req) { struct mds_body *body; - struct mds_obd *mds = &req->rq_obd->u.mds; struct mds_client_info *mci; struct mds_client_data *mcd; int rc, size = sizeof(*body); ENTRY; - CDEBUG(D_INFO, "MDS connect from UUID '%s'\n", ptlrpc_req_to_uuid(req)); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_CONNECT_PACK)) { + if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) { CERROR("mds: out of memory for message: size=%d\n", size); req->rq_status = -ENOMEM; RETURN(0); } body = lustre_msg_buf(req->rq_reqmsg, 0); - mds_unpack_req_body(req); + mds_unpack_body(body); /* Anything we need to do here with the client's trans no or so? */ body = lustre_msg_buf(req->rq_repmsg, 0); @@ -182,6 +224,8 @@ int mds_connect(struct ptlrpc_request *req) req->rq_status = -ENOMEM; RETURN(0); } + memcpy(mcd->mcd_uuid, ptlrpc_req_to_uuid(req), + sizeof(mcd->mcd_uuid)); rc = mds_client_add(mds, mcd, -1); if (rc) { req->rq_status = rc; @@ -193,27 +237,155 @@ int mds_connect(struct ptlrpc_request *req) CDEBUG(D_INFO, "found existing data for UUID '%s' at #%d\n", mcd->mcd_uuid, mci->mci_off); } - /* mcd_last_xid is is stored in little endian on the disk and + /* mcd_last_xid is is stored in little endian on the disk and mds_pack_rep_body converts it to network order */ body->last_xid = le32_to_cpu(mcd->mcd_last_xid); mds_pack_rep_body(req); RETURN(0); } -static int mds_getattr(struct ptlrpc_request *req) +static int mds_disconnect(struct mds_obd *mds, struct ptlrpc_request *req) +{ + struct mds_body *body; + int rc; + ENTRY; + + body = lustre_msg_buf(req->rq_reqmsg, 0); + + rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc) + RETURN(rc); + + RETURN(0); +} + +int mds_lock_callback(struct ldlm_lock *lock, struct ldlm_lock *new, + void *data, int data_len) { + ENTRY; + + if (new == NULL) { + /* Completion AST. Do nothing */ + RETURN(0); + } + + if (ldlm_cli_cancel(lock->l_client, lock) < 0) + LBUG(); + RETURN(0); +} + +static int mds_getattr_name(int offset, struct ptlrpc_request *req) +{ + struct mds_obd *mds = &req->rq_obd->u.mds; + struct mds_body *body; + struct dentry *de = NULL, *dchild = NULL; + struct inode *dir; + struct ldlm_lock *lock; + struct lustre_handle lockh; + char *name; + int namelen, flags, lock_mode, rc = 0; + __u64 res_id[3] = {0, 0, 0}; + ENTRY; + + if (strcmp(req->rq_obd->obd_type->typ_name, "mds") != 0) + LBUG(); + + if (req->rq_reqmsg->bufcount <= offset + 1) { + LBUG(); + GOTO(out_pre_de, rc = -EINVAL); + } + + body = lustre_msg_buf(req->rq_reqmsg, offset); + name = lustre_msg_buf(req->rq_reqmsg, offset + 1); + namelen = req->rq_reqmsg->buflens[offset + 1]; + /* requests were at offset 2, replies go back at 1 */ + if (offset) + offset = 1; + + de = mds_fid2dentry(mds, &body->fid1, NULL); + if (IS_ERR(de)) { + LBUG(); + GOTO(out_pre_de, rc = -ESTALE); + } + + dir = de->d_inode; + CDEBUG(D_INODE, "parent ino %ld\n", dir->i_ino); + + lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_CW : LCK_PW; + res_id[0] = dir->i_ino; + + rc = ldlm_local_lock_match(mds->mds_local_namespace, res_id, LDLM_PLAIN, + NULL, 0, lock_mode, &lockh); + if (rc == 0) { + rc = ldlm_cli_enqueue(mds->mds_ldlm_client, mds->mds_ldlm_conn, + NULL, mds->mds_local_namespace, NULL, + res_id, LDLM_PLAIN, NULL, 0, lock_mode, + &flags, (void *)mds_lock_callback, + NULL, 0, &lockh); + if (rc != ELDLM_OK) { + CERROR("lock enqueue: err: %d\n", rc); + GOTO(out_create_de, rc = -EIO); + } + } + ldlm_lock_dump((void *)(unsigned long)lockh.addr); + + down(&dir->i_sem); + dchild = lookup_one_len(name, de, namelen - 1); + if (IS_ERR(dchild)) { + CERROR("child lookup error %ld\n", PTR_ERR(dchild)); + up(&dir->i_sem); + LBUG(); + GOTO(out_create_dchild, rc = -ESTALE); + } + + if (dchild->d_inode) { + struct mds_body *body; + struct obdo *obdo; + struct inode *inode = dchild->d_inode; + CERROR("child exists (dir %ld, name %s, ino %ld)\n", + dir->i_ino, name, dchild->d_inode->i_ino); + + body = lustre_msg_buf(req->rq_repmsg, offset); + mds_pack_inode2fid(&body->fid1, inode); + mds_pack_inode2body(body, inode); + if (S_ISREG(inode->i_mode)) { + obdo = lustre_msg_buf(req->rq_repmsg, offset + 1); + mds_fs_get_obdo(mds, inode, obdo); + } + /* now a normal case for intent locking */ + rc = 0; + } else { + rc = -EEXIST; + } + + EXIT; +out_create_dchild: + l_dput(dchild); + up(&dir->i_sem); + lock = lustre_handle2object(&lockh); + ldlm_lock_decref(lock, lock_mode); +out_create_de: + l_dput(de); + out_pre_de: + req->rq_status = rc; + return 0; +} + + +static int mds_getattr(int offset, struct ptlrpc_request *req) +{ + struct mds_obd *mds = &req->rq_obd->u.mds; struct dentry *de; struct inode *inode; struct mds_body *body; - struct mds_obd *mds = &req->rq_obd->u.mds; int rc, size[2] = {sizeof(*body)}, bufcount = 1; ENTRY; - body = lustre_msg_buf(req->rq_reqmsg, 0); + body = lustre_msg_buf(req->rq_reqmsg, offset); de = mds_fid2dentry(mds, &body->fid1, NULL); if (IS_ERR(de)) { req->rq_status = -ENOENT; - RETURN(0); + RETURN(-ENOENT); } inode = de->d_inode; @@ -230,7 +402,7 @@ static int mds_getattr(struct ptlrpc_request *req) if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK)) { CERROR("mds: out of memory\n"); req->rq_status = -ENOMEM; - GOTO(out, 0); + GOTO(out, rc = -ENOMEM); } if (body->valid & OBD_MD_LINKNAME) { @@ -245,7 +417,7 @@ static int mds_getattr(struct ptlrpc_request *req) if (rc < 0) { req->rq_status = rc; CERROR("readlink failed: %d\n", rc); - GOTO(out, 0); + GOTO(out, rc); } } @@ -267,23 +439,22 @@ static int mds_getattr(struct ptlrpc_request *req) lustre_msg_buf(req->rq_repmsg, 1)); if (rc < 0) { req->rq_status = rc; - CERROR("mds_fs_get_objid failed: %d\n", rc); - GOTO(out, 0); + CERROR("mds_fs_get_obdo failed: %d\n", rc); + GOTO(out, rc); } } out: l_dput(de); - RETURN(0); + RETURN(rc); } -static -int mds_open(struct ptlrpc_request *req) +static int mds_open(struct ptlrpc_request *req) { - struct mds_obd *mds = &req->rq_obd->u.mds; struct dentry *de; struct mds_body *body; struct file *file; struct vfsmount *mnt; + struct mds_obd *mds = &req->rq_obd->u.mds; struct mds_client_info *mci; __u32 flags; struct list_head *tmp; @@ -301,7 +472,7 @@ int mds_open(struct ptlrpc_request *req) mci = mds_uuid_to_mci(mds, ptlrpc_req_to_uuid(req)); - if (!mci) { + if (!mci) { CERROR("mds: no mci!\n"); req->rq_status = -ENOTCONN; RETURN(0); @@ -310,18 +481,18 @@ int mds_open(struct ptlrpc_request *req) body = lustre_msg_buf(req->rq_reqmsg, 0); /* was this animal open already? */ - list_for_each(tmp, &mci->mci_open_head) { + list_for_each(tmp, &mci->mci_open_head) { struct mds_file_data *fd; fd = list_entry(tmp, struct mds_file_data, mfd_list); - if (body->extra == fd->mfd_clientfd && - body->fid1.id == fd->mfd_file->f_dentry->d_inode->i_ino) { + if (body->extra == fd->mfd_clientfd && + body->fid1.id == fd->mfd_file->f_dentry->d_inode->i_ino) { CERROR("Re opening %Ld\n", body->fid1.id); RETURN(0); } } OBD_ALLOC(mfd, sizeof(*mfd)); - if (!mfd) { + if (!mfd) { CERROR("mds: out of memory\n"); req->rq_status = -ENOMEM; RETURN(0); @@ -333,6 +504,43 @@ int mds_open(struct ptlrpc_request *req) RETURN(0); } + /* check if this inode has seen a delayed object creation */ + if (req->rq_reqmsg->bufcount > 1) { + void *handle; + struct inode *inode = de->d_inode; + struct iattr iattr; + struct obdo *obdo; + int rc; + + obdo = lustre_msg_buf(req->rq_reqmsg, 1); + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = inode->i_mode; + + handle = mds_fs_start(mds, de->d_inode, MDS_FSOP_SETATTR); + if (!handle) { + req->rq_status = -ENOMEM; + RETURN(0); + } + + /* XXX error handling */ + rc = mds_fs_set_obdo(mds, inode, handle, obdo); + rc = mds_fs_setattr(mds, de, handle, &iattr); + if (!rc) + rc = mds_update_last_rcvd(mds, handle, req); + else { + req->rq_status = rc; + RETURN(0); + } + /* FIXME: need to return last_rcvd, last_committed */ + + /* FIXME: keep rc intact */ + rc = mds_fs_commit(mds, de->d_inode, handle); + if (rc) { + req->rq_status = rc; + RETURN(0); + } + } + flags = body->flags; file = dentry_open(de, mnt, flags & ~O_DIRECT); if (!file || IS_ERR(file)) { @@ -344,7 +552,7 @@ int mds_open(struct ptlrpc_request *req) file->private_data = mfd; mfd->mfd_file = file; mfd->mfd_clientfd = body->extra; - list_add(&mfd->mfd_list, &mci->mci_open_head); + list_add(&mfd->mfd_list, &mci->mci_open_head); body = lustre_msg_buf(req->rq_repmsg, 0); body->extra = (__u64) (unsigned long)file; @@ -357,6 +565,7 @@ int mds_close(struct ptlrpc_request *req) struct dentry *de; struct mds_body *body; struct file *file; + struct mds_obd *mds = &req->rq_obd->u.mds; struct vfsmount *mnt; struct mds_file_data *mfd; int rc; @@ -370,14 +579,14 @@ int mds_close(struct ptlrpc_request *req) } body = lustre_msg_buf(req->rq_reqmsg, 0); - de = mds_fid2dentry(&req->rq_obd->u.mds, &body->fid1, &mnt); + de = mds_fid2dentry(mds, &body->fid1, &mnt); if (IS_ERR(de)) { req->rq_status = -ENOENT; RETURN(0); } file = (struct file *)(unsigned long)body->extra; - if (!file->f_dentry) + if (!file->f_dentry) LBUG(); mfd = (struct mds_file_data *)file->private_data; list_del(&mfd->mfd_list); @@ -390,7 +599,7 @@ int mds_close(struct ptlrpc_request *req) RETURN(0); } -int mds_readpage(struct ptlrpc_request *req) +static int mds_readpage(struct mds_obd *mds, struct ptlrpc_request *req) { struct vfsmount *mnt; struct dentry *de; @@ -407,7 +616,7 @@ int mds_readpage(struct ptlrpc_request *req) } body = lustre_msg_buf(req->rq_reqmsg, 0); - de = mds_fid2dentry(&req->rq_obd->u.mds, &body->fid1, &mnt); + de = mds_fid2dentry(mds, &body->fid1, &mnt); if (IS_ERR(de)) { req->rq_status = PTR_ERR(de); RETURN(0); @@ -432,26 +641,26 @@ int mds_readpage(struct ptlrpc_request *req) RETURN(0); } -int mds_reint(struct ptlrpc_request *req) +int mds_reint(int offset, struct ptlrpc_request *req) { int rc; struct mds_update_record rec; - rc = mds_update_unpack(req, &rec); + rc = mds_update_unpack(req, offset, &rec); if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNPACK)) { CERROR("invalid record\n"); req->rq_status = -EINVAL; RETURN(0); } /* rc will be used to interrupt a for loop over multiple records */ - rc = mds_reint_rec(&rec, req); - return 0; + rc = mds_reint_rec(&rec, offset, req); + return rc; } int mds_handle(struct obd_device *dev, struct ptlrpc_service *svc, struct ptlrpc_request *req) { - struct mds_obd *mds = &req->rq_obd->u.mds; + struct mds_obd *mds; int rc; ENTRY; @@ -467,34 +676,67 @@ int mds_handle(struct obd_device *dev, struct ptlrpc_service *svc, GOTO(out, rc = -EINVAL); } + if (req->rq_reqmsg->opc != MDS_CONNECT) { + int id = req->rq_reqmsg->target_id; + struct obd_device *obddev; + if (id < 0 || id > MAX_OBD_DEVICES) + GOTO(out, rc = -ENODEV); + obddev = &obd_dev[id]; + if (strcmp(obddev->obd_type->typ_name, "mds") != 0) + GOTO(out, rc = -EINVAL); + mds = &obddev->u.mds; + req->rq_obd = obddev; + } + switch (req->rq_reqmsg->opc) { case MDS_CONNECT: - CDEBUG(D_INODE, "getattr\n"); + CDEBUG(D_INODE, "connect\n"); OBD_FAIL_RETURN(OBD_FAIL_MDS_CONNECT_NET, 0); rc = mds_connect(req); break; + case MDS_DISCONNECT: + CDEBUG(D_INODE, "disconnect\n"); + OBD_FAIL_RETURN(OBD_FAIL_MDS_DISCONNECT_NET, 0); + rc = mds_disconnect(mds, req); + break; + + case MDS_GETSTATUS: + CDEBUG(D_INODE, "getstatus\n"); + OBD_FAIL_RETURN(OBD_FAIL_MDS_GETSTATUS_NET, 0); + rc = mds_getstatus(mds, req); + break; + case MDS_GETATTR: CDEBUG(D_INODE, "getattr\n"); OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NET, 0); - rc = mds_getattr(req); + rc = mds_getattr(0, req); break; case MDS_READPAGE: CDEBUG(D_INODE, "readpage\n"); OBD_FAIL_RETURN(OBD_FAIL_MDS_READPAGE_NET, 0); - rc = mds_readpage(req); + rc = mds_readpage(mds, req); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) return 0; break; - case MDS_REINT: + case MDS_REINT: { + int size = sizeof(struct mds_body); CDEBUG(D_INODE, "reint\n"); OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0); - rc = mds_reint(req); + + rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, + &req->rq_repmsg); + if (rc) { + rc = req->rq_status = -ENOMEM; + break; + } + rc = mds_reint(0, req); OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET_REP, 0); break; + } case MDS_OPEN: CDEBUG(D_INODE, "open\n"); @@ -520,15 +762,16 @@ out: * a difference on a per-client basis, because last_rcvd is global * and we are not supposed to allow transactions while in recovery. */ - req->rq_repmsg->last_rcvd = HTON__u64(mds->mds_last_rcvd); - req->rq_repmsg->last_committed = HTON__u64(mds->mds_last_committed); - CDEBUG(D_INFO, "last_rcvd %Lu, last_committed %Lu, xid %d\n", - (unsigned long long)mds->mds_last_rcvd, - (unsigned long long)mds->mds_last_committed, - cpu_to_le32(req->rq_reqmsg->xid)); if (rc) { ptlrpc_error(svc, req); } else { + req->rq_repmsg->last_rcvd = HTON__u64(mds->mds_last_rcvd); + req->rq_repmsg->last_committed = + HTON__u64(mds->mds_last_committed); + CDEBUG(D_INFO, "last_rcvd %Lu, last_committed %Lu, xid %d\n", + (unsigned long long)mds->mds_last_rcvd, + (unsigned long long)mds->mds_last_committed, + cpu_to_le32(req->rq_xid)); CDEBUG(D_NET, "sending reply\n"); ptlrpc_reply(svc, req); } @@ -585,6 +828,7 @@ static int mds_recover(struct obd_device *obddev) return rc; } + /* mount the file system (secretly) */ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) { @@ -633,6 +877,38 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) GOTO(err_svc, rc); } + rc = -ENOENT; + mds->mds_ldlm_conn = ptlrpc_uuid_to_connection("self"); + if (!mds->mds_ldlm_conn) { + mds_cleanup(obddev); + GOTO(err_thread, rc); + } + + obddev->obd_namespace = ldlm_namespace_new(LDLM_NAMESPACE_SERVER); + if (obddev->obd_namespace == NULL) { + LBUG(); + mds_cleanup(obddev); + GOTO(err_thread, rc); + } + + mds->mds_local_namespace = ldlm_namespace_new(LDLM_NAMESPACE_CLIENT); + if (mds->mds_local_namespace == NULL) { + LBUG(); + mds_cleanup(obddev); + GOTO(err_thread, rc); + } + + OBD_ALLOC(mds->mds_ldlm_client, sizeof(*mds->mds_ldlm_client)); + if (mds->mds_ldlm_client == NULL) { + LBUG(); + mds_cleanup(obddev); + GOTO(err_thread, rc); + } + ptlrpc_init_client(NULL, NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, + mds->mds_ldlm_client); + mds->mds_ldlm_client->cli_target_devno = obddev->obd_minor; + mds->mds_ldlm_client->cli_name = "mds ldlm"; + rc = mds_recover(obddev); if (rc) GOTO(err_thread, rc); @@ -696,6 +972,12 @@ static int mds_cleanup(struct obd_device * obddev) mntput(mds->mds_vfsmnt); mds->mds_sb = 0; kfree(mds->mds_fstype); + + ldlm_namespace_free(mds->mds_local_namespace); + ldlm_namespace_free(obddev->obd_namespace); + + OBD_FREE(mds->mds_ldlm_client, sizeof(*mds->mds_ldlm_client)); + lock_kernel(); #ifdef CONFIG_DEV_RDONLY dev_clear_rdonly(2); @@ -714,16 +996,21 @@ static struct obd_ops mds_obd_ops = { static int __init mds_init(void) { + inter_module_register("mds_reint", THIS_MODULE, &mds_reint); + inter_module_register("mds_getattr_name", THIS_MODULE, + &mds_getattr_name); obd_register_type(&mds_obd_ops, LUSTRE_MDS_NAME); return 0; } static void __exit mds_exit(void) { + inter_module_unregister("mds_reint"); + inter_module_unregister("mds_getattr_name"); obd_unregister_type(LUSTRE_MDS_NAME); } -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); +MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>"); MODULE_DESCRIPTION("Lustre Metadata Server (MDS) v0.01"); MODULE_LICENSE("GPL"); diff --git a/lustre/mds/mds_ext3.c b/lustre/mds/mds_ext3.c index fa1223b8a71bb37b0714779ce1bca040de1416c3..8c5d9bf7858fa89a69015392e9812228484a0c71 100644 --- a/lustre/mds/mds_ext3.c +++ b/lustre/mds/mds_ext3.c @@ -183,6 +183,7 @@ static void mds_ext3_delete_inode(struct inode *inode) mds_ext3_fs_ops.cl_delete_inode(inode); } + static void mds_ext3_callback_status(void *jcb, int error) { struct mds_cb_data *mcb = (struct mds_cb_data *)jcb; diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 927b04108cfd83a8f6da22b622020e639d5e1493..a52c02f91dcfbead589dabf9a43efc5722a10ed7 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -24,6 +24,7 @@ #include <linux/lustre_lib.h> #include <linux/lustre_idl.h> #include <linux/lustre_mds.h> +#include <linux/lustre_dlm.h> #include <linux/obd_class.h> struct mds_client_info *mds_uuid_to_mci(struct mds_obd *mds, __u8 *uuid) @@ -71,7 +72,7 @@ int mds_update_last_rcvd(struct mds_obd *mds, void *handle, req->rq_repmsg->transno = HTON__u64(mds->mds_last_rcvd); mci->mci_mcd->mcd_last_rcvd = cpu_to_le64(mds->mds_last_rcvd); mci->mci_mcd->mcd_mount_count = cpu_to_le64(mds->mds_mount_count); - mci->mci_mcd->mcd_last_xid = cpu_to_le32(req->rq_reqmsg->xid); + mci->mci_mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); mds_fs_set_last_rcvd(mds, handle); push_ctxt(&saved, &mds->mds_ctxt); @@ -93,7 +94,7 @@ int mds_update_last_rcvd(struct mds_obd *mds, void *handle, return rc; } -static int mds_reint_setattr(struct mds_update_record *rec, +static int mds_reint_setattr(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { struct mds_obd *mds = &req->rq_obd->u.mds; @@ -134,7 +135,7 @@ out_setattr: return(0); } -static int mds_reint_recreate(struct mds_update_record *rec, +static int mds_reint_recreate(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { struct dentry *de = NULL; @@ -167,7 +168,7 @@ static int mds_reint_recreate(struct mds_update_record *rec, body = lustre_msg_buf(req->rq_repmsg, 0); body->ino = dchild->d_inode->i_ino; body->generation = dchild->d_inode->i_generation; - } else { + } else { CERROR("child doesn't exist (dir %ld, name %s)\n", dir->i_ino, rec->ur_name); rc = -ENOENT; @@ -183,7 +184,7 @@ out_create_de: return 0; } -static int mds_reint_create(struct mds_update_record *rec, +static int mds_reint_create(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { struct dentry *de = NULL; @@ -191,19 +192,46 @@ static int mds_reint_create(struct mds_update_record *rec, struct dentry *dchild = NULL; struct inode *dir; void *handle; - int rc = 0, type = rec->ur_mode & S_IFMT; - int err; + struct ldlm_lock *lock; + struct lustre_handle lockh; + int rc = 0, err, flags, lock_mode, type = rec->ur_mode & S_IFMT; + __u64 res_id[3] = {0,0,0}; ENTRY; + /* requests were at offset 2, replies go back at 1 */ + if (offset) + offset = 1; + + if (strcmp(req->rq_obd->obd_type->typ_name, "mds") != 0) + LBUG(); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) { LBUG(); GOTO(out_create_de, rc = -ESTALE); } dir = de->d_inode; - CDEBUG(D_INODE, "parent ino %ld name %s mode %o\n", + CDEBUG(D_INODE, "parent ino %ld name %s mode %o\n", dir->i_ino, rec->ur_name, rec->ur_mode); + lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_CW : LCK_PW; + res_id[0] = dir->i_ino; + + rc = ldlm_local_lock_match(mds->mds_local_namespace, res_id, LDLM_PLAIN, + NULL, 0, lock_mode, &lockh); + if (rc == 0) { + rc = ldlm_cli_enqueue(mds->mds_ldlm_client, mds->mds_ldlm_conn, + NULL, mds->mds_local_namespace, NULL, + res_id, LDLM_PLAIN, NULL, 0, lock_mode, + &flags, (void *)mds_lock_callback, NULL, + 0, &lockh); + if (rc != ELDLM_OK) { + CERROR("lock enqueue: err: %d\n", rc); + GOTO(out_create_de, rc = -EIO); + } + } + ldlm_lock_dump((void *)(unsigned long)lockh.addr); + down(&dir->i_sem); dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); if (IS_ERR(dchild)) { @@ -214,9 +242,20 @@ static int mds_reint_create(struct mds_update_record *rec, } if (dchild->d_inode) { + struct mds_body *body; + struct obdo *obdo; + struct inode *inode = dchild->d_inode; CERROR("child exists (dir %ld, name %s, ino %ld)\n", dir->i_ino, rec->ur_name, dchild->d_inode->i_ino); - LBUG(); + + body = lustre_msg_buf(req->rq_repmsg, offset); + mds_pack_inode2fid(&body->fid1, inode); + mds_pack_inode2body(body, inode); + if (S_ISREG(inode->i_mode)) { + obdo = lustre_msg_buf(req->rq_repmsg, offset + 1); + mds_fs_get_obdo(mds, inode, obdo); + } + /* now a normal case for intent locking */ GOTO(out_create_dchild, rc = -EEXIST); } @@ -303,11 +342,11 @@ static int mds_reint_create(struct mds_update_record *rec, /* XXX should we abort here in case of error? */ } - body = lustre_msg_buf(req->rq_repmsg, 0); + body = lustre_msg_buf(req->rq_repmsg, offset); body->ino = inode->i_ino; body->generation = inode->i_generation; } - + EXIT; out_create_commit: err = mds_fs_commit(mds, dir, handle); if (err) { @@ -318,13 +357,15 @@ out_create_commit: out_create_dchild: l_dput(dchild); up(&dir->i_sem); + lock = lustre_handle2object(&lockh); + ldlm_lock_decref(lock, lock_mode); out_create_de: l_dput(de); req->rq_status = rc; return 0; } -static int mds_reint_unlink(struct mds_update_record *rec, +static int mds_reint_unlink(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { struct dentry *de = NULL; @@ -410,7 +451,7 @@ out_unlink: return 0; } -static int mds_reint_link(struct mds_update_record *rec, +static int mds_reint_link(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { struct dentry *de_src = NULL; @@ -477,7 +518,7 @@ out_link: return 0; } -static int mds_reint_rename(struct mds_update_record *rec, +static int mds_reint_rename(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { struct dentry *de_srcdir = NULL; @@ -500,6 +541,8 @@ static int mds_reint_rename(struct mds_update_record *rec, GOTO(out_rename_srcdir, rc = -ESTALE); } +#warning FIXME: This needs locking attention + de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1); if (IS_ERR(de_old)) { CERROR("old child lookup error %ld\n", PTR_ERR(de_old)); @@ -545,7 +588,8 @@ out_rename: return 0; } -typedef int (*mds_reinter)(struct mds_update_record *, struct ptlrpc_request*); +typedef int (*mds_reinter)(struct mds_update_record *, int offset, + struct ptlrpc_request *); static mds_reinter reinters[REINT_MAX+1] = { [REINT_SETATTR] mds_reint_setattr, @@ -556,9 +600,10 @@ static mds_reinter reinters[REINT_MAX+1] = { [REINT_RECREATE] mds_reint_recreate, }; -int mds_reint_rec(struct mds_update_record *rec, struct ptlrpc_request *req) +int mds_reint_rec(struct mds_update_record *rec, int offset, + struct ptlrpc_request *req) { - int rc, size = sizeof(struct mds_body); + int rc; if (rec->ur_opcode < 1 || rec->ur_opcode > REINT_MAX) { CERROR("opcode %d not valid\n", rec->ur_opcode); @@ -566,14 +611,7 @@ int mds_reint_rec(struct mds_update_record *rec, struct ptlrpc_request *req) RETURN(rc); } - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("mds: out of memory\n"); - rc = req->rq_status = -ENOMEM; - RETURN(rc); - } - - rc = reinters[rec->ur_opcode](rec, req); + rc = reinters[rec->ur_opcode](rec, offset, req); return rc; } diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index a747e854ec5b7b4c00544f6d1f1eed3c80720642..82300d15ff8e2c0b0537ac7a95d4ef40593352d7 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -87,14 +87,14 @@ static int obd_class_release(struct inode * inode, struct file * file) RETURN(0); } -static int obd_class_name2dev(char *name) +int obd_class_name2dev(char *name) { int res = -1; int i; for (i=0; i < MAX_OBD_DEVICES; i++) { struct obd_device *obd = &obd_dev[i]; - if (obd->obd_name && strcmp(name, obd->obd_name) == 0) { + if (obd->obd_name && strncmp(name, obd->obd_name, 37) == 0) { res = i; return res; } @@ -611,7 +611,7 @@ EXPORT_SYMBOL(obd_register_type); EXPORT_SYMBOL(obd_unregister_type); EXPORT_SYMBOL(obd_dev); - +EXPORT_SYMBOL(obd_class_name2dev); EXPORT_SYMBOL(gen_connect); EXPORT_SYMBOL(gen_client); EXPORT_SYMBOL(gen_cleanup); @@ -630,7 +630,7 @@ static int __init init_obdclass(void) int err; int i; - printk(KERN_INFO "OBD class driver v0.01, braam@stelias.com\n"); + printk(KERN_INFO "OBD class driver v0.9, info@clusterfs.com\n"); INIT_LIST_HEAD(&obd_types); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index b61750f2bb5123b640d1677cd5e9e57931af91df..55526f00b3ab21ffb7d29041dc8b3556ce482919 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -43,18 +43,21 @@ static void osc_con2dlmcl(struct obd_conn *conn, struct ptlrpc_client **cl, static int osc_connect(struct obd_conn *conn) { + struct osc_obd *osc = &conn->oc_dev->u.osc; struct ptlrpc_request *request; struct ptlrpc_client *cl; struct ptlrpc_connection *connection; struct ost_body *body; - int rc, size = sizeof(*body); + char *tmp = osc->osc_target_uuid; + int rc, size = sizeof(osc->osc_target_uuid); ENTRY; osc_con2cl(conn, &cl, &connection); - request = ptlrpc_prep_req(cl, connection, OST_CONNECT, 0, NULL, NULL); + request = ptlrpc_prep_req(cl, connection, OST_CONNECT, 1, &size, &tmp); if (!request) RETURN(-ENOMEM); + size = sizeof(*body); request->rq_replen = lustre_msg_size(1, &size); rc = ptlrpc_queue_wait(request); @@ -67,6 +70,10 @@ static int osc_connect(struct obd_conn *conn) body = lustre_msg_buf(request->rq_repmsg, 0); CDEBUG(D_INODE, "received connid %d\n", body->connid); + /* This might be redundant. */ + cl->cli_target_devno = request->rq_repmsg->target_id; + osc->osc_ldlm_client->cli_target_devno = cl->cli_target_devno; + /* XXX: Make this a handle */ conn->oc_id = body->connid; EXIT; out: @@ -92,7 +99,7 @@ static int osc_disconnect(struct obd_conn *conn) body = lustre_msg_buf(request->rq_reqmsg, 0); body->connid = conn->oc_id; - request->rq_replen = lustre_msg_size(1, &size); + request->rq_replen = lustre_msg_size(0, NULL); rc = ptlrpc_queue_wait(request); GOTO(out, rc); @@ -264,7 +271,6 @@ static int osc_create(struct obd_conn *conn, struct obdo *oa) body = lustre_msg_buf(request->rq_reqmsg, 0); memcpy(&body->oa, oa, sizeof(*oa)); - body->oa.o_valid = ~0; body->connid = conn->oc_id; request->rq_replen = lustre_msg_size(1, &size); @@ -638,14 +644,16 @@ static int osc_brw(int rw, struct obd_conn *conn, obd_count num_oa, offset, flags, (bulk_callback_t)callback); } -static int osc_enqueue(struct obd_conn *oconn, struct ldlm_namespace *ns, - struct ldlm_handle *parent_lock, __u64 *res_id, - __u32 type, struct ldlm_extent *extent, __u32 mode, - int *flags, void *data, int datalen, - struct ldlm_handle *lockh) +static int osc_enqueue(struct obd_conn *oconn, + struct lustre_handle *parent_lock, __u64 *res_id, + __u32 type, void *extentp, int extent_len, __u32 mode, + int *flags, void *callback, void *data, int datalen, + struct lustre_handle *lockh) { + struct obd_device *obddev = oconn->oc_dev; struct ptlrpc_connection *conn; struct ptlrpc_client *cl; + struct ldlm_extent *extent = extentp; int rc; __u32 mode2; @@ -656,7 +664,8 @@ static int osc_enqueue(struct obd_conn *oconn, struct ldlm_namespace *ns, /* Next, search for already existing extent locks that will cover us */ osc_con2dlmcl(oconn, &cl, &conn); - rc = ldlm_local_lock_match(ns, res_id, type, extent, mode, lockh); + rc = ldlm_local_lock_match(obddev->obd_namespace, res_id, type, extent, + sizeof(extent), mode, lockh); if (rc == 1) { /* We already have a lock, and it's referenced */ return 0; @@ -671,10 +680,11 @@ static int osc_enqueue(struct obd_conn *oconn, struct ldlm_namespace *ns, else mode2 = LCK_PW; - rc = ldlm_local_lock_match(ns, res_id, type, extent, mode2, lockh); + rc = ldlm_local_lock_match(obddev->obd_namespace, res_id, type, extent, + sizeof(extent), mode2, lockh); if (rc == 1) { int flags; - struct ldlm_lock *lock = ldlm_handle2object(lockh); + struct ldlm_lock *lock = lustre_handle2object(lockh); /* FIXME: This is not incredibly elegant, but it might * be more elegant than adding another parameter to * lock_match. I want a second opinion. */ @@ -691,18 +701,19 @@ static int osc_enqueue(struct obd_conn *oconn, struct ldlm_namespace *ns, return rc; } - rc = ldlm_cli_enqueue(cl, conn, ns, parent_lock, res_id, type, - extent, mode, flags, data, datalen, lockh); + rc = ldlm_cli_enqueue(cl, conn, NULL, obddev->obd_namespace, + parent_lock, res_id, type, extent, sizeof(extent), + mode, flags, callback, data, datalen, lockh); return rc; } static int osc_cancel(struct obd_conn *oconn, __u32 mode, - struct ldlm_handle *lockh) + struct lustre_handle *lockh) { struct ldlm_lock *lock; ENTRY; - lock = ldlm_handle2object(lockh); + lock = lustre_handle2object(lockh); ldlm_lock_decref(lock, mode); RETURN(0); @@ -710,17 +721,47 @@ static int osc_cancel(struct obd_conn *oconn, __u32 mode, static int osc_setup(struct obd_device *obddev, obd_count len, void *buf) { + struct obd_ioctl_data* data = buf; struct osc_obd *osc = &obddev->u.osc; + char server_uuid[37]; int rc; ENTRY; - osc->osc_conn = ptlrpc_uuid_to_connection("ost"); - if (!osc->osc_conn) + if (data->ioc_inllen1 < 1) { + CERROR("osc setup requires a TARGET UUID\n"); + RETURN(-EINVAL); + } + + if (data->ioc_inllen1 > 37) { + CERROR("osc TARGET UUID must be less than 38 characters\n"); + RETURN(-EINVAL); + } + + if (data->ioc_inllen2 < 1) { + CERROR("osc setup requires a SERVER UUID\n"); RETURN(-EINVAL); + } + + if (data->ioc_inllen2 > 37) { + CERROR("osc SERVER UUID must be less than 38 characters\n"); + RETURN(-EINVAL); + } + + memcpy(osc->osc_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1); + memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2, + sizeof(server_uuid))); + + osc->osc_conn = ptlrpc_uuid_to_connection(server_uuid); + if (!osc->osc_conn) + RETURN(-ENOENT); + + obddev->obd_namespace = ldlm_namespace_new(LDLM_NAMESPACE_CLIENT); + if (obddev->obd_namespace == NULL) + GOTO(out_conn, rc = -ENOMEM); OBD_ALLOC(osc->osc_client, sizeof(*osc->osc_client)); if (osc->osc_client == NULL) - GOTO(out_conn, rc = -ENOMEM); + GOTO(out_ns, rc = -ENOMEM); OBD_ALLOC(osc->osc_ldlm_client, sizeof(*osc->osc_ldlm_client)); if (osc->osc_ldlm_client == NULL) @@ -738,6 +779,8 @@ static int osc_setup(struct obd_device *obddev, obd_count len, void *buf) out_client: OBD_FREE(osc->osc_client, sizeof(*osc->osc_client)); + out_ns: + ldlm_namespace_free(obddev->obd_namespace); out_conn: ptlrpc_put_connection(osc->osc_conn); return rc; @@ -747,6 +790,8 @@ static int osc_cleanup(struct obd_device * obddev) { struct osc_obd *osc = &obddev->u.osc; + ldlm_namespace_free(obddev->obd_namespace); + ptlrpc_cleanup_client(osc->osc_client); OBD_FREE(osc->osc_client, sizeof(*osc->osc_client)); ptlrpc_cleanup_client(osc->osc_ldlm_client); @@ -813,8 +858,7 @@ struct obd_ops osc_obd_ops = { static int __init osc_init(void) { - obd_register_type(&osc_obd_ops, LUSTRE_OSC_NAME); - return 0; + return obd_register_type(&osc_obd_ops, LUSTRE_OSC_NAME); } static void __exit osc_exit(void) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 6040e5c82045dfe2bc874728ce085ae7dad96170..98e087494da14ae605b78960f9f82ea2e9e4f2db 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -36,6 +36,7 @@ #include <linux/module.h> #include <linux/obd_ost.h> #include <linux/lustre_net.h> +#include <linux/lustre_dlm.h> static int ost_destroy(struct ost_obd *ost, struct ptlrpc_request *req) { @@ -183,19 +184,36 @@ static int ost_setattr(struct ost_obd *ost, struct ptlrpc_request *req) RETURN(0); } -static int ost_connect(struct ost_obd *ost, struct ptlrpc_request *req) +static int ost_connect(struct ptlrpc_request *req) { struct obd_conn conn; struct ost_body *body; - int rc, size = sizeof(*body); + struct ost_obd *ost; + char *uuid; + int rc, size = sizeof(*body), i; ENTRY; + uuid = lustre_msg_buf(req->rq_reqmsg, 0); + if (req->rq_reqmsg->buflens[0] > 37) { + /* Invalid UUID */ + req->rq_status = -EINVAL; + RETURN(0); + } + + i = obd_class_name2dev(uuid); + if (i == -1) { + req->rq_status = -ENODEV; + RETURN(0); + } + + ost = &(obd_dev[i].u.ost); conn.oc_dev = ost->ost_tgt; rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); + req->rq_repmsg->target_id = i; req->rq_status = obd_connect(&conn); CDEBUG(D_IOCTL, "rep buffer %p, id %d\n", req->rq_repmsg, conn.oc_id); @@ -208,14 +226,14 @@ static int ost_disconnect(struct ost_obd *ost, struct ptlrpc_request *req) { struct obd_conn conn; struct ost_body *body; - int rc, size = sizeof(*body); + int rc; ENTRY; body = lustre_msg_buf(req->rq_reqmsg, 0); conn.oc_id = body->connid; conn.oc_dev = ost->ost_tgt; - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); + rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); @@ -482,7 +500,7 @@ static int ost_handle(struct obd_device *obddev, struct ptlrpc_service *svc, struct ptlrpc_request *req) { int rc; - struct ost_obd *ost = &obddev->u.ost; + struct ost_obd *ost; ENTRY; rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen); @@ -497,11 +515,23 @@ static int ost_handle(struct obd_device *obddev, struct ptlrpc_service *svc, GOTO(out, rc = -EINVAL); } + if (req->rq_reqmsg->opc != OST_CONNECT) { + int id = req->rq_reqmsg->target_id; + struct obd_device *obddev; + if (id < 0 || id > MAX_OBD_DEVICES) + GOTO(out, rc = -ENODEV); + obddev = &obd_dev[id]; + if (strcmp(obddev->obd_type->typ_name, "ost") != 0) + GOTO(out, rc = -EINVAL); + ost = &obddev->u.ost; + req->rq_obd = obddev; + } + switch (req->rq_reqmsg->opc) { case OST_CONNECT: CDEBUG(D_INODE, "connect\n"); OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0); - rc = ost_connect(ost, req); + rc = ost_connect(req); break; case OST_DISCONNECT: CDEBUG(D_INODE, "disconnect\n"); @@ -609,6 +639,10 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) GOTO(error_dec, err = -EINVAL); } + obddev->obd_namespace = ldlm_namespace_new(LDLM_NAMESPACE_SERVER); + if (obddev->obd_namespace == NULL) + LBUG(); + ost->ost_service = ptlrpc_init_svc(128 * 1024, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, "self", ost_handle); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 450ade19f772b4c32c5be2d4a917d65feea58a26..07b3f6d546a2f4ec5e8f0d5cd5f92dbf708a80f5 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -62,8 +62,10 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid) } c = ptlrpc_get_connection(&peer); - if (c) + if (c) { + memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid)); c->c_epoch++; + } return c; } @@ -108,6 +110,7 @@ struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc) ptl_set_inv_handle(&bulk->b_me_h); list_add_tail(&bulk->b_link, &desc->b_page_list); desc->b_page_count++; + atomic_inc(&desc->b_pages_remaining); } return bulk; } @@ -135,7 +138,8 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk) { - if (!bulk) { + ENTRY; + if (bulk == NULL) { EXIT; return; } @@ -143,6 +147,7 @@ void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk) list_del(&bulk->b_link); bulk->b_desc->b_page_count--; OBD_FREE(bulk, sizeof(*bulk)); + EXIT; } struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, @@ -163,8 +168,8 @@ struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, rc = lustre_pack_msg(count, lengths, bufs, &request->rq_reqlen, &request->rq_reqmsg); if (rc) { - OBD_FREE(request, sizeof(*request)); CERROR("cannot pack request %d\n", rc); + OBD_FREE(request, sizeof(*request)); RETURN(NULL); } @@ -175,13 +180,15 @@ struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, request->rq_reqmsg->token = conn->c_remote_token; request->rq_reqmsg->opc = HTON__u32(opcode); request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST); + request->rq_reqmsg->target_id = HTON__u32(cl->cli_target_devno); + INIT_LIST_HEAD(&request->rq_list); /* this will be dec()d once in req_finished, once in free_committed */ atomic_set(&request->rq_refcount, 2); spin_lock(&conn->c_lock); - request->rq_reqmsg->xid = HTON__u32(++conn->c_xid_out); + request->rq_xid = HTON__u32(++conn->c_xid_out); request->rq_xid = conn->c_xid_out; spin_unlock(&conn->c_lock); @@ -458,7 +465,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) } resend: req->rq_time = CURRENT_TIME; - req->rq_timeout = 30; + req->rq_timeout = 3; rc = ptl_send_rpc(req); if (rc) { CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc); @@ -502,7 +509,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) CERROR("unpack_rep failed: %d\n", rc); GOTO(out, rc); } - CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid); + CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid); if (req->rq_repmsg->status == 0) CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg, req->rq_replen, req->rq_repmsg->status); @@ -559,7 +566,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) GOTO(out, rc); } - CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid); + CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid); if (req->rq_repmsg->status == 0) CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg, req->rq_replen, req->rq_repmsg->status); diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 2723c153c16e3290945c40863476f1549926cb7e..71dc33721c23800ab47694690647672ce05ab4cd 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -89,6 +89,8 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c) spin_unlock(&conn_lock); rc = 1; } + if (atomic_read(&c->c_refcount) < 0) + CERROR("refcount < 0!\n"); RETURN(rc); } diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index e1a17263be69698d46587aa92a8419e42f28251f..629840c06e2f2e68009e0d9d5c8dcf30dc764046 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -69,13 +69,18 @@ static int reply_out_callback(ptl_event_t *ev) */ static int reply_in_callback(ptl_event_t *ev) { - struct ptlrpc_request *rpc = ev->mem_desc.user_ptr; + struct ptlrpc_request *req = ev->mem_desc.user_ptr; ENTRY; + if (req->rq_xid != ev->match_bits) { + CERROR("Reply packet for wrong request\n"); + LBUG(); + } + if (ev->type == PTL_EVENT_PUT) { - rpc->rq_repmsg = ev->mem_desc.start + ev->offset; + req->rq_repmsg = ev->mem_desc.start + ev->offset; barrier(); - wake_up_interruptible(&rpc->rq_wait_for_rep); + wake_up_interruptible(&req->rq_wait_for_rep); } else { // XXX make sure we understand all events, including ACK's CERROR("Unknown event %d\n", ev->type); @@ -88,40 +93,11 @@ static int reply_in_callback(ptl_event_t *ev) int request_in_callback(ptl_event_t *ev) { struct ptlrpc_service *service = ev->mem_desc.user_ptr; - int index; if (ev->rlength != ev->mlength) CERROR("Warning: Possibly truncated rpc (%d/%d)\n", ev->mlength, ev->rlength); - spin_lock(&service->srv_lock); - for (index = 0; index < service->srv_ring_length; index++) - if ( service->srv_buf[index] == ev->mem_desc.start) - break; - - if (index == service->srv_ring_length) - LBUG(); - - service->srv_ref_count[index]++; - - if (ptl_is_valid_handle(&ev->unlinked_me)) { - int idx; - - for (idx = 0; idx < service->srv_ring_length; idx++) - if (service->srv_me_h[idx].handle_idx == - ev->unlinked_me.handle_idx) - break; - if (idx == service->srv_ring_length) - LBUG(); - - CDEBUG(D_NET, "unlinked %d\n", idx); - ptl_set_inv_handle(&(service->srv_me_h[idx])); - - if (service->srv_ref_count[idx] == 0) - ptlrpc_link_svc_me(service, idx); - } - - spin_unlock(&service->srv_lock); if (ev->type == PTL_EVENT_PUT) wake_up(&service->srv_waitq); else @@ -142,7 +118,7 @@ static int bulk_source_callback(ptl_event_t *ev) CDEBUG(D_NET, "got ACK event\n"); if (bulk->b_cb != NULL) bulk->b_cb(bulk); - if (atomic_dec_and_test(&desc->b_finished_count)) { + if (atomic_dec_and_test(&desc->b_pages_remaining)) { desc->b_flags |= PTL_BULK_FL_SENT; wake_up_interruptible(&desc->b_waitq); if (desc->b_cb != NULL) @@ -167,7 +143,7 @@ static int bulk_sink_callback(ptl_event_t *ev) CERROR("bulkbuf != mem_desc -- why?\n"); if (bulk->b_cb != NULL) bulk->b_cb(bulk); - if (atomic_dec_and_test(&desc->b_finished_count)) { + if (atomic_dec_and_test(&desc->b_pages_remaining)) { desc->b_flags |= PTL_BULK_FL_RCVD; wake_up_interruptible(&desc->b_waitq); if (desc->b_cb != NULL) diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 3330dfba4d58880d6d1dabb58d85f5f7f4062585..bfbd55a04130a830c2926dd5cf96b44819eea974 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -91,7 +91,7 @@ static int ptl_send_buf(struct ptlrpc_request *request, CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %Ld\n", request->rq_req_md.length, portal, request->rq_xid); - rc = PtlPut(md_h, ack, remote_id, portal, 0, request->rq_reqmsg->xid, + rc = PtlPut(md_h, ack, remote_id, portal, 0, request->rq_xid, 0, 0); if (rc != PTL_OK) { CERROR("PtlPut(%Lu, %d, %Ld) failed: %d\n", remote_id.nid, @@ -109,18 +109,14 @@ int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *desc) ptl_process_id_t remote_id; ENTRY; - atomic_set(&desc->b_finished_count, desc->b_page_count); - list_for_each_safe(tmp, next, &desc->b_page_list) { - /* only request an ACK for the last page */ - int ack = (next == &desc->b_page_list); struct ptlrpc_bulk_page *bulk; bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link); bulk->b_md.start = bulk->b_buf; bulk->b_md.length = bulk->b_buflen; bulk->b_md.eventq = bulk_source_eq; - bulk->b_md.threshold = 1 + ack; /* SENT and (if last) ACK */ + bulk->b_md.threshold = 2; /* SENT and ACK */ bulk->b_md.options = PTL_MD_OP_PUT; bulk->b_md.user_ptr = bulk; @@ -138,8 +134,8 @@ int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *desc) CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %d\n", bulk->b_md.length, desc->b_portal, bulk->b_xid); - rc = PtlPut(bulk->b_md_h, (ack ? PTL_ACK_REQ : PTL_NOACK_REQ), - remote_id, desc->b_portal, 0, bulk->b_xid, 0, 0); + rc = PtlPut(bulk->b_md_h, PTL_ACK_REQ, remote_id, + desc->b_portal, 0, bulk->b_xid, 0, 0); if (rc != PTL_OK) { CERROR("PtlPut(%Lu, %d, %d) failed: %d\n", remote_id.nid, desc->b_portal, bulk->b_xid, rc); @@ -158,8 +154,6 @@ int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *desc) int rc; ENTRY; - atomic_set(&desc->b_finished_count, desc->b_page_count); - list_for_each_safe(tmp, next, &desc->b_page_list) { struct ptlrpc_bulk_page *bulk; bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link); @@ -224,7 +218,6 @@ int ptlrpc_reply(struct ptlrpc_service *svc, struct ptlrpc_request *req) req->rq_type = PTL_RPC_TYPE_REPLY; req->rq_repmsg->conn = req->rq_connection->c_remote_conn; req->rq_repmsg->token = req->rq_connection->c_remote_token; - req->rq_repmsg->xid = HTON__u32(req->rq_reqmsg->xid); req->rq_repmsg->status = HTON__u32(req->rq_status); req->rq_reqmsg->type = HTON__u32(req->rq_type); return ptl_send_buf(req, req->rq_connection, svc->srv_rep_portal); @@ -284,7 +277,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni, request->rq_client->cli_reply_portal, - local_id, request->rq_reqmsg->xid, 0, PTL_UNLINK, + local_id, request->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &request->rq_reply_me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc); @@ -369,7 +362,6 @@ int ptl_handled_rpc(struct ptlrpc_service *service, void *start) int index; spin_lock(&service->srv_lock); - for (index = 0; index < service->srv_ring_length; index++) if (service->srv_buf[index] == start) break; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index e932433eb590846c136e6674f78f4f7d31cea933..87f5e042687557f222dc15fdce08885fe6647250 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -81,13 +81,12 @@ int lustre_unpack_msg(struct lustre_msg *m, int len) RETURN(-EINVAL); m->opc = NTOH__u32(m->opc); - m->xid = NTOH__u32(m->xid); m->status = NTOH__u32(m->status); m->type = NTOH__u32(m->type); - m->connid = NTOH__u32(m->connid); m->bufcount = NTOH__u32(m->bufcount); m->last_rcvd = NTOH__u64(m->last_rcvd); m->last_committed = NTOH__u64(m->last_committed); + m->target_id = NTOH__u32(m->target_id); required_len = size_round(sizeof(*m) + m->bufcount * sizeof(__u32)); if (len < required_len) diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 97e683a497f23b6e2f56a22ddf47df001232293c..34147d35d543e3b8222d1e5f605be4b9c9e1d878 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -147,13 +147,11 @@ static int handle_incoming_request(struct obd_device *obddev, start = event->mem_desc.start; memset(&request, 0, sizeof(request)); request.rq_obd = obddev; + request.rq_xid = event->match_bits; request.rq_reqmsg = event->mem_desc.start + event->offset; request.rq_reqlen = event->mem_desc.length; - if (request.rq_reqmsg->xid != event->match_bits) - LBUG(); - - CDEBUG(D_NET, "got req %d\n", request.rq_reqmsg->xid); + CDEBUG(D_NET, "got req %Ld\n", request.rq_xid); peer.peer_nid = event->initiator.nid; /* FIXME: this NI should be the incoming NI. @@ -193,6 +191,38 @@ static int handle_incoming_request(struct obd_device *obddev, return rc; } +void ptlrpc_rotate_reqbufs(struct ptlrpc_service *service, + ptl_event_t *ev) +{ + int index; + + for (index = 0; index < service->srv_ring_length; index++) + if (service->srv_buf[index] == ev->mem_desc.start) + break; + + if (index == service->srv_ring_length) + LBUG(); + + service->srv_ref_count[index]++; + + if (ptl_is_valid_handle(&ev->unlinked_me)) { + int idx; + + for (idx = 0; idx < service->srv_ring_length; idx++) + if (service->srv_me_h[idx].handle_idx == + ev->unlinked_me.handle_idx) + break; + if (idx == service->srv_ring_length) + LBUG(); + + CDEBUG(D_NET, "unlinked %d\n", idx); + ptl_set_inv_handle(&(service->srv_me_h[idx])); + + if (service->srv_ref_count[idx] == 0) + ptlrpc_link_svc_me(service, idx); + } +} + static int ptlrpc_main(void *arg) { int rc; @@ -242,6 +272,8 @@ static int ptlrpc_main(void *arg) if (thread->t_flags & SVC_EVENT) { thread->t_flags &= ~SVC_EVENT; + ptlrpc_rotate_reqbufs(svc, &event); + rc = handle_incoming_request(obddev, svc, &event); thread->t_flags &= ~SVC_EVENT; continue; diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index a171dcdfde87868b97045c8096bf09f06aa02b32..d101b52652195f9c74b71f8a73c6dea28e303e61 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -12,4 +12,6 @@ truncate directio openme writeme +mcreate +tchmod fsx diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index be14c89f7bab85d52dae3b23f41926372ff829a8..58d5d838e02d8855ed0b601a34f180d92995c43c 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -3,10 +3,13 @@ CPPFLAGS := -I. -I$(PORTALS)/include -I$(top_srcdir)/include -D_LARGEFILE64_SOUR CFLAGS := -g -Wall # LDADD = -lldap # LDADD := -lreadline -ltermcap # -lefence -bin_PROGRAMS = openunlink testreq truncate directio openme writeme fsx #ldaptest +bin_PROGRAMS = openunlink testreq truncate directio openme writeme mcreate tchmod toexcl fsx #ldaptest # ldaptest_SOURCES = ldaptest.c +tchmod_SOURCES = tchmod.c +toexcl_SOURCES = toexcl.c testreq_SOURCES = testreq.c +mcreate_SOURCES = mcreate.c truncate_SOURCES = truncate.c directio_SOURCES = directio.c openunlink_SOURCES = openunlink.c diff --git a/lustre/tests/client-mount.cfg b/lustre/tests/client-mount.cfg index 644ae143d487600918f80fb5f45703263e6ee6d1..6f2addb1fcc72c0c0c6df724798078d075d8d99e 100644 --- a/lustre/tests/client-mount.cfg +++ b/lustre/tests/client-mount.cfg @@ -1,5 +1,6 @@ #!/bin/sh # Config file for mounting a client Lustre filesystem +SETUP_MDC=y SETUP_OSC=y OSCMT=/mnt/lustre SETUP_MOUNT=y diff --git a/lustre/tests/client-mount2.cfg b/lustre/tests/client-mount2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cb210c8019fbd40edc33b10281f028a69f08db01 --- /dev/null +++ b/lustre/tests/client-mount2.cfg @@ -0,0 +1,10 @@ +#!/bin/sh +# Config file for mounting a client Lustre filesystem +MDC_NAMES="MDCDEV1 MDCDEV2" +OSC_NAMES="OSCDEV1 OSCDEV2" +SETUP_MDC=y +SETUP_OSC=y +MT1="/mnt/lustre1 OSCDEV1 MDCDEV1" +MT2="/mnt/lustre2 OSCDEV2 MDCDEV2" +MOUNT_LIST="MT1 MT2" +SETUP_MOUNT=y diff --git a/lustre/tests/common.sh b/lustre/tests/common.sh index dd2b36fa8a0c932a7cf2e8769866019b60d24342..3c65364ed75891a9b7851e7c9e7feca388056e05 100644 --- a/lustre/tests/common.sh +++ b/lustre/tests/common.sh @@ -152,6 +152,7 @@ setup_portals() { fi [ -z "$OSTNODE" ] && OSTNODE=$SERVER + [ -z "$MDSNODE" ] && MDSNODE=$SERVER if [ -z "$DLM" ]; then if [ "$LOCALHOST" == "$SERVER" ]; then @@ -179,13 +180,13 @@ setup_portals() { $PTLCTL <<- EOF setup $NETWORK mynid $LOCALHOST - connect $SERVER $PORT - add_uuid self - add_uuid mds + connect $MDSNODE $PORT + add_uuid $MDSNODE connect $OSTNODE $PORT - add_uuid ost + add_uuid $OSTNODE connect $DLM $PORT - add_uuid ldlm + add_uuid $DLM + add_uuid self quit EOF } @@ -211,6 +212,7 @@ setup_lustre() { do_insmod $LUSTRE/mdc/mdc.o || exit -1 do_insmod $LUSTRE/llite/llite.o || exit -1 + echo "$R/tmp/lustre-log" > /proc/sys/portals/debug_path list_mods if $OBDCTL name2dev RPCDEV > /dev/null 2>&1; then @@ -345,38 +347,73 @@ setup_server() { } setup_osc() { + set -vx [ "$SETUP_OSC" != "y" ] && return 0 + [ "$OSC_NAMES" ] || OSC_NAMES=OSCDEV - if $OBDCTL name2dev OSCDEV > /dev/null 2>&1; then + for THEOSC in $OSC_NAMES ; do + OSCDEVNO=`find_devno $THEOSC` + if $OBDCTL name2dev $THEOSC > /dev/null 2>&1; then echo "$0: OSCDEV is already configured" return 0 + fi + + $OBDCTL <<- EOF || return $rc + newdev + attach osc $THEOSC + setup OSTDEV $OSTNODE + quit + EOF + done +} + +setup_mdc() { + set -vx + [ "$SETUP_MDC" != "y" ] && return 0 + [ "$MDC_NAMES" ] || MDC_NAMES=MDCDEV + + for THEMDC in $MDC_NAMES ; do + MDCDEVNO=`find_devno $THEMDC` + if $OBDCTL name2dev $THEMDC > /dev/null 2>&1; then + echo "$0: MDCDEV is already configured" + return 0 fi $OBDCTL <<- EOF || return $? newdev - attach osc OSCDEV - setup -1 + attach mdc $THEMDC + setup MDSDEV $MDSNODE quit EOF + done } + setup_mount() { + set -vx [ "$SETUP_MOUNT" != "y" ] && return 0 + [ "$MDC_NAMES" ] || MDC_NAMES=MDCDEV + [ "$OSC_NAMES" ] || OSC_NAMES=OSCDEV + [ -z "$MOUNT_LIST" -a "$OSCMT" ] && MOUNT_LIST="MT" && MT="$OSCMT OSCDEV MDCDEV" - [ "$OSCMT" ] || fail "error: $0: OSCMT unset" + [ "$MOUNT_LIST" ] || fail "error: $0: MOUNT_LIST unset" - if mount | grep -q $OSCMT; then - echo "$0: $OSCMT is already mounted" - return 0 - fi + for THEMOUNT in $MOUNT_LIST; do + eval "echo \$$THEMOUNT" | while read MTPT THEOSC THEMDC; do + if mount | grep -q $MTPT; then + echo "$0: $MTPT is already mounted" + return 0 + fi - [ ! -d $OSCMT ] && mkdir $OSCMT - echo "$0: mounting \$OSCDEV on $OSCMT" - mount -t lustre_lite -o device=`find_devno OSCDEV` none $OSCMT + [ ! -d $MTPT ] && mkdir $MTPT + echo mount -t lustre_lite -o ost=`find_devno $THEOSC`,mds=`find_devno $THEMDC` none $MTPT + mount -t lustre_lite -o ost=`find_devno $THEOSC`,mds=`find_devno $THEMDC` none $MTPT + done + done } setup_client() { - setup_osc && setup_mount + setup_osc && setup_mdc && setup_mount } DEBUG_ON="echo 0xffffffff > /proc/sys/portals/debug" @@ -406,9 +443,9 @@ cleanup_portals() { setup $NETWORK disconnect del_uuid self - del_uuid mds - del_uuid ost - del_uuid ldlm + del_uuid $MDSNODE + del_uuid $OSTNODE + del_uuid $DLM quit EOF @@ -503,23 +540,45 @@ cleanup_mount() { [ "$SETUP" -a -z "$SETUP_MOUNT" ] && return 0 [ "$OSCMT" ] || OSCMT=/mnt/lustre - if [ "`mount | grep $OSCMT`" ]; then - umount $OSCMT || fail "unable to unmount $OSCMT" - fi + for THEMOUNT in $OSCMT; do + if [ "`mount | grep $THEMOUNT`" ]; then + umount $THEMOUNT || fail "unable to unmount $THEMOUNT" + fi + done } cleanup_osc() { [ "$SETUP" -a -z "$SETUP_OSC" ] && return 0 + [ "$OSC_NAMES" ] || OSC_NAMES=OSCDEV - OSCDEVNO=`find_devno OSCDEV` - if [ "$OSCDEVNO" ]; then + for THEOSC in $OSC_NAMES ; do + OSCDEVNO=`find_devno $THEOSC` + if [ "$OSCDEVNO" ]; then $OBDCTL <<- EOF device $OSCDEVNO cleanup detach quit EOF - fi + fi + done +} + +cleanup_mdc() { + [ "$SETUP" -a -z "$SETUP_MDC" ] && return 0 + [ "$MDC_NAMES" ] || MDC_NAMES=MDCDEV + + for THEMDC in $MDC_NAMES ; do + MDCDEVNO=`find_devno $THEMDC` + if [ "$MDCDEVNO" ]; then + $OBDCTL <<- EOF + device $MDCDEVNO + cleanup + detach + quit + EOF + fi + done } cleanup_rpc() { @@ -535,7 +594,7 @@ cleanup_rpc() { } cleanup_client() { - cleanup_mount && cleanup_osc && cleanup_rpc + cleanup_mount && cleanup_osc && cleanup_mdc && cleanup_rpc } fail() { diff --git a/lustre/tests/llcleanup.sh b/lustre/tests/llcleanup.sh index 5b1f80cae9d0b632c199adc423d11ee85e62e825..ff6236ce1ec49b1774a3b875334fb4172f770b9b 100755 --- a/lustre/tests/llcleanup.sh +++ b/lustre/tests/llcleanup.sh @@ -5,14 +5,16 @@ SRCDIR="`dirname $0`/" setup_opts "$@" -$DBGCTL debug_kernel /tmp/debug.1 +TIME=`date +'%s'` + +$DBGCTL debug_kernel /tmp/debug.1.$TIME cleanup_client -$DBGCTL debug_kernel /tmp/debug.2 +$DBGCTL debug_kernel /tmp/debug.2.$TIME cleanup_server -$DBGCTL debug_kernel /tmp/debug.3 -cleanup_ldlm -$DBGCTL debug_kernel /tmp/debug.4 +$DBGCTL debug_kernel /tmp/debug.3.$TIME cleanup_lustre -$DBGCTL debug_kernel /tmp/debug.5 +$DBGCTL debug_kernel /tmp/debug.4.$TIME +cleanup_ldlm +$DBGCTL debug_kernel /tmp/debug.5.$TIME cleanup_portals diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh index 09ef43724ba761afb3798fa6a83901aff121fa28..d29f5748b478561cb0aedabe02820b6889d80170 100755 --- a/lustre/tests/llmount.sh +++ b/lustre/tests/llmount.sh @@ -4,7 +4,7 @@ SRCDIR="`dirname $0`/" . $SRCDIR/common.sh export DEBUG_WAIT=yes -. $SRCDIR/llsetup.sh $SRCDIR/net-local.cfg $SRCDIR/mds.cfg $SRCDIR/obdext2.cfg $SRCDIR/client-mount.cfg $SRCDIR/ldlm.cfg || exit 2 +. $SRCDIR/llsetup.sh $SRCDIR/net-local.cfg $SRCDIR/mds.cfg $SRCDIR/obdfilter.cfg $SRCDIR/client-mount.cfg $SRCDIR/ldlm.cfg || exit 2 debug_client_on #debug_client_off diff --git a/lustre/tests/llmountcleanup.sh b/lustre/tests/llmountcleanup.sh index 42bbcaeae051fcd20d312917b120baac9ca8340b..674be92e8de6b6610d57c3ed8a1ed7792623d437 100755 --- a/lustre/tests/llmountcleanup.sh +++ b/lustre/tests/llmountcleanup.sh @@ -3,7 +3,9 @@ SRCDIR="`dirname $0`/" . $SRCDIR/common.sh -$DBGCTL debug_kernel /tmp/debug.1 +TIME=`date +'%s'` + +$DBGCTL debug_kernel /tmp/debug.1.$TIME if mount | grep '/mnt/lustre'; then umount /mnt/lustre || fail "cannot unmount" @@ -11,16 +13,12 @@ fi killall acceptor rmmod llite -rmmod mdc $OBDCTL <<EOF -name2dev OSCDEV -cleanup -detach -name2dev LDLMDEV +name2dev MDCDEV cleanup detach -name2dev RPCDEV +name2dev OSCDEV cleanup detach name2dev OSTDEV @@ -32,6 +30,12 @@ detach name2dev MDSDEV cleanup detach +name2dev LDLMDEV +cleanup +detach +name2dev RPCDEV +cleanup +detach quit EOF @@ -40,6 +44,7 @@ rmmod mds_extN rmmod mds_ext3 rmmod mds_ext2 rmmod mds +rmmod mdc rmmod osc rmmod ost rmmod obdfilter @@ -49,15 +54,15 @@ rmmod ptlrpc rmmod obdclass rmmod extN -$DBGCTL debug_kernel /tmp/debug.2 +$DBGCTL debug_kernel /tmp/debug.2.$TIME $PTLCTL <<EOF setup tcp disconnect del_uuid self -del_uuid mds -del_uuid ost -del_uuid ldlm +del_uuid localhost +del_uuid localhost +del_uuid localhost quit EOF diff --git a/lustre/tests/llrmount.sh b/lustre/tests/llrmount.sh index 4376a58cd81bc32d98591a1eff1d194080718c57..4b8abc9b198687cb9e2f4d3418086c4a0cee8bed 100755 --- a/lustre/tests/llrmount.sh +++ b/lustre/tests/llrmount.sh @@ -4,7 +4,7 @@ SRCDIR="`dirname $0`/" . $SRCDIR/common.sh export DEBUG_WAIT=yes -. $SRCDIR/llrsetup.sh $SRCDIR/net-local.cfg $SRCDIR/client-mount.cfg $SRCDIR/mds.cfg $SRCDIR/obdext2.cfg $SRCDIR/ldlm.cfg || exit 2 +. $SRCDIR/llrsetup.sh $SRCDIR/net-local.cfg $SRCDIR/client-mount.cfg $SRCDIR/mds.cfg $SRCDIR/obdfilter.cfg $SRCDIR/ldlm.cfg || exit 2 debug_client_on #debug_client_off diff --git a/lustre/tests/mcreate.c b/lustre/tests/mcreate.c new file mode 100644 index 0000000000000000000000000000000000000000..0d8cd362521dadfbf1b8125bb69cb936967de41f --- /dev/null +++ b/lustre/tests/mcreate.c @@ -0,0 +1,23 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> + +int main(int argc, char ** argv) +{ + int rc; + + if (argc < 2) { + printf("Usage %s filename\n", argv[0]); + return 1; + } + + rc = mknod(argv[1], S_IFREG| 0444, 0); + if (rc) { + printf("error: %s\n", strerror(errno)); + } + return rc; +} diff --git a/lustre/tests/obdfilter.cfg b/lustre/tests/obdfilter.cfg index 24930d55a691d27a595a536e7f0b87f88f86120a..e9021c20aba14eb4debcb6e99e922ca49c1f9807 100644 --- a/lustre/tests/obdfilter.cfg +++ b/lustre/tests/obdfilter.cfg @@ -2,6 +2,6 @@ # Config file for setting up an object storage target with obdfilter OSTDEV=/tmp/ost OSTSIZE=10000 -OSTFS=ext2 +OSTFS=extN OSTTYPE=obdfilter SETUP_OST=y diff --git a/lustre/tests/ostreq.sh b/lustre/tests/ostreq.sh index 13e3fbc137af456f260afd154f94fa369ec0d713..24867c42c052de256e0b8acbe2b1adddfa5c34d5 100644 --- a/lustre/tests/ostreq.sh +++ b/lustre/tests/ostreq.sh @@ -12,8 +12,8 @@ $PTLCTL <<EOF mynid localhost setup tcp connect $SERVER $PORT +add_uuid $SERVER add_uuid self -add_uuid ost quit EOF diff --git a/lustre/tests/runfailure-client-mds-recover.sh b/lustre/tests/runfailure-client-mds-recover.sh index cc36136c760b4af1630dde899039853d64e47563..8ea79df6952a56a973250229a29c0e5067b30e95 100755 --- a/lustre/tests/runfailure-client-mds-recover.sh +++ b/lustre/tests/runfailure-client-mds-recover.sh @@ -13,25 +13,7 @@ EOF } -echo -echo "Test 5 reopen a file:" `date` "creating and writing/mnt/lustre/foo" -echo -rm -rf /mnt/lustre/* -./openme /mnt/lustre/foo3 & -./writeme /mnt/lustre/iogoeson & -sleep 1 -ls -l /mnt/lustre -echo 0x80000107 > /proc/sys/lustre/fail_loc -mknod /mnt/lustre/dev c 10 240 & -echo "MDS dropped create request -- sleep 4 secs - watch for timeout" -sleep 4 -reconnect -sleep 1 -echo "did things recover? check for file foo, bar, check log for reopen." -ls -l /mnt/lustre -echo "Test 5 done" -exit echo echo "Test 1 drop request:" `date` "creating /mnt/lustre/foo" @@ -41,11 +23,12 @@ echo 0x80000107 > /proc/sys/lustre/fail_loc touch /mnt/lustre/foo & ps axww | grep touch echo "MDS dropped create request -- sleep 4 secs - watch for timeout" -sleep 4 -reconnect +sleep 7 +# reconnect sleep 1 echo "did things recover? check for file foo." ls -l /mnt/lustre +echo "Test 1 done" echo @@ -59,14 +42,15 @@ ps axww | grep touch echo "MDS dropped create request -- sleep 4 secs - watch for timeout" sleep 4 touch /mnt/lustre/a/f & -reconnect -sleep 1 +#reconnect +sleep 5 echo "did things recover? check for file foo and a/f" ls -l /mnt/lustre ls -l /mnt/lustre/a +echo "Test 2 done" echo -echo "Test 4 dropped reply:" `date` "creating /mnt/lustre/foo2" +echo "Test 3 dropped reply:" `date` "creating /mnt/lustre/foo2" echo rm -rf /mnt/lustre/* echo 0x80000119 > /proc/sys/lustre/fail_loc @@ -74,18 +58,16 @@ touch /mnt/lustre/foo2 & ps axww | grep touch echo "MDS dropped create request -- sleep 4 secs - watch for timeout" sleep 4 -reconnect +# reconnect echo failure cleared -sleep 1 +sleep 4 echo "did things recover? check for file foo2" ls -l /mnt/lustre +echo "Test 3 done" - -exit - echo -echo "Test 3: Multiple failures" +echo "Test 4: Multiple failures" echo echo 0x0000107 > /proc/sys/lustre/fail_loc touch /mnt/lustre/bar & @@ -93,10 +75,29 @@ ps axww | grep touch echo "touch program will have repeated failures sleeping 10" sleep 10 echo 0 > /proc/sys/lustre/fail_loc -reconnect -sleep 1 +# reconnect +sleep 6 echo "failure cleared" echo "did things recover? Check for file bar" ls -l /mnt/lustre/bar +echo "Test 4 done" + +echo +echo "Test 5: Continue writing during recovery:" `date` "creating and writing/mnt/lustre/foo" +echo +rm -rf /mnt/lustre/* +./openme /mnt/lustre/foo3 & +./writeme /mnt/lustre/iogoeson & +sleep 1 +ls -l /mnt/lustre +echo 0x80000107 > /proc/sys/lustre/fail_loc +mknod /mnt/lustre/dev c 10 240 & +echo "MDS dropped create request -- sleep 4 secs - watch for timeout" +sleep 6 +# reconnect +sleep 1 +echo "did things recover? check for file foo, bar, check log for reopen." +ls -l /mnt/lustre +echo "Test 5 done" diff --git a/lustre/tests/tchmod.c b/lustre/tests/tchmod.c new file mode 100644 index 0000000000000000000000000000000000000000..9fcb1ac01eeff692f05ca364c8c82aa1b915fba8 --- /dev/null +++ b/lustre/tests/tchmod.c @@ -0,0 +1,17 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char **argv) +{ + mode_t mode; + + if (argc != 3) { + printf("usage: %s mode name\n", argv[0]); + return 1; + } + + mode = strtoul(argv[1], NULL, 8); + return chmod(argv[2], mode); +} diff --git a/lustre/tests/toexcl.c b/lustre/tests/toexcl.c new file mode 100644 index 0000000000000000000000000000000000000000..da13217115708b4c2f957152160de7d69937e3fb --- /dev/null +++ b/lustre/tests/toexcl.c @@ -0,0 +1,24 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> + +int main(int argc, char **argv) +{ + int rc; + + if (argc != 2) { + printf("usage: %s name\n", argv[0]); + return 1; + } + + rc = open(argv[1], O_CREAT|O_EXCL, 0644); + if (rc == -1) + printf("open failed: %s\n", strerror(errno)); + else + printf("open success.\n"); + return 0; +} diff --git a/lustre/utils/ha_assist2.sh b/lustre/utils/ha_assist2.sh index 4a674346c4afc5fbaa70c45f3f13fdfdd301f9ad..fcc4b4731ead47c81bffa1755bb49c116ee8ddae 100755 --- a/lustre/utils/ha_assist2.sh +++ b/lustre/utils/ha_assist2.sh @@ -2,7 +2,7 @@ set -vx date echo "ha assist checking for problems" -sleep 3 +sleep 1 if [ ! -e /tmp/halog ]; then echo "no problems, exiting" exit @@ -19,7 +19,7 @@ echo "- please supply a new mds" setup tcp close_uuid mds del_uuid mds -connect dev5 1234 +connect localhost 1234 add_uuid mds quit EOF3