diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 0c4c4a0159135058cd3a1cc476b2ac4b682b7d4f..09db989f92451e2bf8d91e840d58cd02f7409213 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -115,7 +115,7 @@ do { \ if (portal_cerror == 0) \ break; \ CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ + if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 0bc93f326e87a8934650238c4eee5a73f4a4c067..7ad93277d73d317530925cc58b97da4a23bbc954 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -633,9 +633,9 @@ int portals_debug_mark_buffer(char *text) if (debug_buf == NULL) return -EINVAL; - CDEBUG(0, "********************************************************\n"); + CDEBUG(D_TRACE,"***************************************************\n"); CWARN("DEBUG MARKER: %s\n", text); - CDEBUG(0, "********************************************************\n"); + CDEBUG(D_TRACE,"***************************************************\n"); return 0; } diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 40aa745e2cd091178c6abef064dff17f57ebac2a..cbdcb10d905ae82abab328be52a1714fce9b98c8 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,6 +1,8 @@ tbd Cluster File Systems, Inc. <info@clusterfs.com> * version 1.2.0 * bug fixes + - account for cache space usage on clients to avoid data loss (974) + - lfsck support in lustre kernel code (2349) - reduce journal credits needed for BRW writes (2370) - orphan handling to avoid losing space on client/server crashes - ptlrpcd can be blocked, stopping ALL progress (2477) diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h index 3f4d52fefc21f28f20e63aefa895b7a8a57721b7..3a7ac428d81529a030e45c7f109d744caf463d4d 100644 --- a/lustre/include/linux/lprocfs_status.h +++ b/lustre/include/linux/lprocfs_status.h @@ -249,6 +249,8 @@ extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count, int *eof, void *data); +extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, + int count, int *eof, void *data); extern int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, @@ -340,6 +342,9 @@ static inline int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } static inline +int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } static inline diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index f8ae03ca7f6adfa0edb7d7349440ab34e8649d19..b949fe1eaa7e81395ac81fba3a35d23b58ddaa2f 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -57,6 +57,9 @@ struct filter_export_data { struct filter_client_data *fed_fcd; loff_t fed_lr_off; int fed_lr_idx; + unsigned long fed_dirty; /* in bytes */ + unsigned long fed_grant; /* in bytes */ + unsigned long fed_pending; /* bytes just being written */ }; struct obd_export { diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 5f9ac77ad5f370c03f9551b164f09acc16d33d73..3f3421a4c429ff071b83164249c17644ca7a076c 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -28,7 +28,6 @@ #ifdef __KERNEL__ #include <linux/obd.h> -#include <linux/fs.h> typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, void *data, int error); @@ -222,10 +221,22 @@ static inline int fsfilt_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, cb_func, cb_data); } +/* very similar to obd_statfs(), but caller already holds obd_osfs_lock */ static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *sb, - struct obd_statfs *osfs) + unsigned long max_age) { - return obd->obd_fsops->fs_statfs(sb, osfs); + int rc = 0; + + CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age); + if (time_before(obd->obd_osfs_age, max_age)) { + rc = obd->obd_fsops->fs_statfs(sb, &obd->obd_osfs); + if (rc == 0) /* N.B. statfs can't really fail */ + obd->obd_osfs_age = jiffies; + } else { + CDEBUG(D_SUPER, "using cached obd_statfs data\n"); + } + + return rc; } static inline int fsfilt_sync(struct obd_device *obd, struct super_block *sb) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index da9bd52ca9abd6d102d20ababf6eb5fa323200a3..9428296dedf48adaf621377879b31f077ba1689b 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -265,7 +265,6 @@ typedef uint32_t obd_blksize; typedef uint32_t obd_mode; typedef uint32_t obd_uid; typedef uint32_t obd_gid; -typedef uint64_t obd_rdev; typedef uint32_t obd_flag; typedef uint32_t obd_count; @@ -274,6 +273,7 @@ typedef uint32_t obd_count; #define OBD_FL_DELORPHAN (0x00000004) /* if set in o_flags delete orphans */ #define OBD_FL_NORPC (0x00000008) // if set in o_flags set in OSC not OST #define OBD_FL_IDONLY (0x00000010) // if set in o_flags only adjust obj id +#define OBD_FL_RECREATE_OBJS (0x00000020) // recreate missing obj #define OBD_INLINESZ 64 @@ -285,21 +285,26 @@ struct obdo { obd_time o_mtime; obd_time o_ctime; obd_size o_size; - obd_blocks o_blocks; /* brw: clients sent cached bytes */ - obd_rdev o_rdev; /* brw: clients/servers sent grant */ + obd_blocks o_blocks; /* brw: cli sent cached bytes */ + obd_size o_grant; obd_blksize o_blksize; /* optimal IO blocksize */ - obd_mode o_mode; + obd_mode o_mode; /* brw: cli sent cache remain */ obd_uid o_uid; obd_gid o_gid; obd_flag o_flags; - obd_count o_nlink; /* brw: checksum */ + obd_count o_nlink; /* brw: checksum */ obd_count o_generation; obd_flag o_valid; /* hot fields in this obdo */ - obd_flag o_obdflags; + obd_count o_misc; __u32 o_easize; /* epoch in ost writes */ char o_inline[OBD_INLINESZ]; /* fid in ost writes */ }; +#define o_dirty o_blocks +#define o_undirty o_mode +#define o_dropped o_misc +#define o_cksum o_nlink + extern void lustre_swab_obdo (struct obdo *o); #define LOV_MAGIC_V1 0x0BD10BD0 @@ -357,7 +362,6 @@ struct lov_mds_md_v0 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLUID (0x00000200) /* user ID */ #define OBD_MD_FLGID (0x00000400) /* group ID */ #define OBD_MD_FLFLAGS (0x00000800) /* flags word */ -#define OBD_MD_FLOBDFLG (0x00001000) #define OBD_MD_FLNLINK (0x00002000) /* link count */ #define OBD_MD_FLGENER (0x00004000) /* generation number */ #define OBD_MD_FLINLINE (0x00008000) /* inline data */ @@ -372,7 +376,8 @@ struct lov_mds_md_v0 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLGROUP (0x01000000) /* group */ #define OBD_MD_FLIFID (0x02000000) /* ->ost write inline fid */ #define OBD_MD_FLEPOCH (0x04000000) /* ->ost write easize is epoch */ -#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\ +#define OBD_MD_FLGRANT (0x08000000) /* ost preallocation space grant */ +#define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\ OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\ OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE)) @@ -411,10 +416,10 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os); #define OBD_BRW_READ 0x01 #define OBD_BRW_WRITE 0x02 #define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) -#define OBD_BRW_CREATE 0x04 #define OBD_BRW_SYNC 0x08 #define OBD_BRW_CHECK 0x10 #define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ +#define OBD_BRW_GRANTED 0x40 /* the ost manages this */ #define OBD_OBJECT_EOF 0xffffffffffffffffULL @@ -497,6 +502,11 @@ struct ll_fid { __u32 f_type; }; +struct ll_recreate_obj { + __u64 lrc_id; + __u32 lrc_ost_idx; +}; + extern void lustre_swab_ll_fid (struct ll_fid *fid); #define MDS_STATUS_CONN 1 @@ -588,6 +598,7 @@ extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); #define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ #define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ +#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ struct mds_rec_create { __u32 cr_opcode; diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 0ad0ec78a069906226cb96222c3ca49a87c4f752..9013e8a1b563609e9faae64c5e1d48a442cae520 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -431,6 +431,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) #define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define OBD_IOC_LOV_SETEA _IOW ('f', 156, long) #define OBD_IOC_MOUNTOPT _IOWR('f', 170, long) diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h index 33a62512140eae62b57ae23a5a5adb0bbf0936ba..2eba4859f942de4f1ce4c52380b48a6c7fcd93be 100644 --- a/lustre/include/linux/lustre_user.h +++ b/lustre/include/linux/lustre_user.h @@ -30,6 +30,8 @@ #define LL_IOC_CLRFLAGS _IOW ('f', 153, long) #define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) #define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define LL_IOC_LOV_SETEA _IOW ('f', 156, long) +#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long) #define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ @@ -42,6 +44,7 @@ #define LOV_PATTERN_RAID1 0x002 #define LOV_PATTERN_FIRST 0x100 +#define lov_user_ost_data lov_user_ost_data_v1 struct lov_user_ost_data_v1 { /* per-stripe data structure */ __u64 l_object_id; /* OST object ID */ __u64 l_object_gr; /* OST object group (creating MDS number) */ diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 44b180967e126eb5442575aad0e709b5a6ab11fa..619010b90bbc7c2dd1abce90373d8df3e82387ef 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -164,9 +164,9 @@ struct filter_obd { struct list_head fo_export_list; int fo_subdir_count; - spinlock_t fo_grant_lock; /* protects tot_granted */ - obd_size fo_tot_granted; - obd_size fo_tot_cached; + obd_size fo_tot_dirty; /* protected by obd_osfs_lock */ + obd_size fo_tot_granted; /* all values in bytes */ + obd_size fo_tot_pending; obd_size fo_readcache_max_filesize; @@ -177,7 +177,7 @@ struct filter_obd { struct ptlrpc_client fo_mdc_client; #endif struct file **fo_last_objid_files; - __u64 *fo_last_objids; //last created object ID for groups + __u64 *fo_last_objids; /* last created objid for groups */ struct semaphore fo_alloc_lock; @@ -210,18 +210,14 @@ struct client_obd { //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */ void *cl_llcd_offset; - struct semaphore cl_dirty_sem; - obd_size cl_dirty; /* all _dirty_ in bytes */ - obd_size cl_dirty_granted; /* from ost */ - obd_size cl_dirty_max; /* allowed w/o rpc */ - struct list_head cl_cache_waiters; - struct obd_device *cl_mgmtcli_obd; - /* this is just to keep existing infinitely caching behaviour between - * clients and OSTs that don't have the grant code in yet.. it can - * be yanked once everything speaks grants */ - char cl_ost_can_grant; + /* the grant values are protected by loi_list_lock below */ + long cl_dirty; /* all _dirty_ in bytes */ + long cl_dirty_max; /* allowed w/o rpc */ + long cl_avail_grant; /* bytes of credit for ost */ + long cl_lost_grant; /* lost credits (trunc) */ + struct list_head cl_cache_waiters; /* waiting for cache/grant */ /* keep track of objects that have lois that contain pages which * have been queued for async brw. this lock also protects the @@ -372,10 +368,10 @@ struct niobuf_local { __u64 offset; __u32 len; __u32 flags; - int rc; struct page *page; struct dentry *dentry; - unsigned long start; + int lnb_grant_used; + int rc; }; @@ -465,6 +461,7 @@ struct obd_device { spinlock_t obd_dev_lock; __u64 obd_last_committed; struct fsfilt_operations *obd_fsops; + spinlock_t obd_osfs_lock; struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; struct obd_statfs obd_osfs; unsigned long obd_osfs_age; diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 660f588e2bddc688443a4ce6dc3a22cd0d330056..3e1a5125d3b4b22a33854ced8fa50f20948fb377 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -621,17 +621,17 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, OBD_COUNTER_INCREMENT(obd, statfs); CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age); - if (obd->obd_osfs_age == 0 || time_before(obd->obd_osfs_age, max_age)) { + if (time_before(obd->obd_osfs_age, max_age)) { rc = OBP(obd, statfs)(obd, osfs, max_age); - spin_lock(&obd->obd_dev_lock); + spin_lock(&obd->obd_osfs_lock); memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); obd->obd_osfs_age = jiffies; - spin_unlock(&obd->obd_dev_lock); + spin_unlock(&obd->obd_osfs_lock); } else { CDEBUG(D_SUPER, "using cached obd_statfs data\n"); - spin_lock(&obd->obd_dev_lock); + spin_lock(&obd->obd_osfs_lock); memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); - spin_unlock(&obd->obd_dev_lock); + spin_unlock(&obd->obd_osfs_lock); } RETURN(rc); } @@ -724,10 +724,10 @@ static inline int obd_prep_async_page(struct obd_export *exp, RETURN(ret); } -static inline int obd_queue_async_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, +static inline int obd_queue_async_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + int cmd, obd_off off, int count, obd_flag brw_flags, obd_flag async_flags) { int rc; diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index 246ed17cd33763dc5514e2cb786b75445e19238c..9c29dbcde6862aa5a7e99f2c7f6bf3ea4f574e88 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -78,7 +78,7 @@ ldlm_extent_internal_policy(struct list_head *queue, struct ldlm_lock *req, EXIT; return; } - new_ex->start = MIN(lock->l_policy_data.l_extent.end+1, + new_ex->start = min(lock->l_policy_data.l_extent.end+1, req_start); } diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 9d4934ed84403ea0e1ef5719d7696d3c243b18ce..4b7eb3bc0ee332954abfe42df122b33922a123d1 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -91,17 +91,14 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) RETURN(-EINVAL); } - sema_init(&cli->cl_sem, 1); cli->cl_conn_count = 0; - memcpy(server_uuid.uuid, lcfg->lcfg_inlbuf2, MIN(lcfg->lcfg_inllen2, + memcpy(server_uuid.uuid, lcfg->lcfg_inlbuf2, min(lcfg->lcfg_inllen2, sizeof(server_uuid))); - init_MUTEX(&cli->cl_dirty_sem); cli->cl_dirty = 0; - cli->cl_dirty_granted = 0; + cli->cl_avail_grant = 0; cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; - cli->cl_ost_can_grant = 1; INIT_LIST_HEAD(&cli->cl_cache_waiters); INIT_LIST_HEAD(&cli->cl_loi_ready_list); INIT_LIST_HEAD(&cli->cl_loi_write_list); @@ -472,6 +469,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) } } + /* If all else goes well, this is our RPC return code. */ req->rq_status = 0; diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 64dfb52a008dfbefa25d2235f85ab585082898be..e1fe6583c3525d4906d276cb6a0a3e070b227ea3 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -688,7 +688,9 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) lock = ldlm_handle2lock(&dlm_req->lock_handle1); if (!lock) { CERROR("received cancel for unknown lock cookie "LPX64 - " from nid "LPX64" (%s)\n", dlm_req->lock_handle1.cookie, + " from client %s nid "LPX64" (%s)\n", + dlm_req->lock_handle1.cookie, + req->rq_export->exp_client_uuid.uuid, req->rq_peer.peer_nid, portals_nid2str(req->rq_peer.peer_ni->pni_number, req->rq_peer.peer_nid, str)); diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index af3d3aa0e856f2f2e793fe64c2aa383a1c8f9f59..2bd824897471f4f64160ee1f062781c20359e377 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -166,12 +166,8 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) lli->lli_st_gid = src->o_gid; if (valid & OBD_MD_FLFLAGS) lli->lli_st_flags = src->o_flags; - if (valid & OBD_MD_FLNLINK) - lli->lli_st_nlink = src->o_nlink; if (valid & OBD_MD_FLGENER) lli->lli_st_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - lli->lli_st_rdev = to_kdev_t(src->o_rdev); } #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) @@ -231,18 +227,10 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) dst->o_flags = lli->lli_st_flags; newvalid |= OBD_MD_FLFLAGS; } - if (valid & OBD_MD_FLNLINK) { - dst->o_nlink = lli->lli_st_nlink; - newvalid |= OBD_MD_FLNLINK; - } if (valid & OBD_MD_FLGENER) { dst->o_generation = lli->lli_st_generation; newvalid |= OBD_MD_FLGENER; } - if (valid & OBD_MD_FLRDEV) { - dst->o_rdev = (__u32)kdev_t_to_nr(lli->lli_st_rdev); - newvalid |= OBD_MD_FLRDEV; - } dst->o_valid |= newvalid; } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 1f6c88ab60bd6fa898b79352f5a4a80576ba2d3f..6c76e871561cc5710aa771c6a1ace695fb9e85a5 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -690,27 +690,79 @@ out: RETURN(retval); } -static int ll_lov_setstripe(struct inode *inode, struct file *file, - unsigned long arg) +static int ll_lov_recreate_obj(struct inode *inode, struct file *file, + unsigned long arg) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_export *exp = ll_i2obdexp(inode); + struct ll_recreate_obj ucreatp; + struct obd_trans_info oti = { 0 }; + struct obdo *oa = NULL; + int lsm_size; + int rc = 0; + struct lov_stripe_md *lsm, *lsm2; + ENTRY; + + if (!capable (CAP_SYS_ADMIN)) + RETURN(-EPERM); + + rc = copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg, + sizeof(struct ll_recreate_obj)); + if (rc) { + RETURN(-EFAULT); + } + oa = obdo_alloc(); + if (oa == NULL) { + RETURN(-ENOMEM); + } + + down(&lli->lli_open_sem); + lsm = lli->lli_smd; + if (lsm == NULL) { + up(&lli->lli_open_sem); + obdo_free(oa); + RETURN (-ENOENT); + } + lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) * + (lsm->lsm_stripe_count)); + + OBD_ALLOC(lsm2, lsm_size); + if (lsm2 == NULL) { + up(&lli->lli_open_sem); + obdo_free(oa); + RETURN(-ENOMEM); + } + + oa->o_id = ucreatp.lrc_id; + oa->o_nlink = ucreatp.lrc_ost_idx; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS; + oa->o_flags |= OBD_FL_RECREATE_OBJS; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + oti.oti_objid = NULL; + memcpy(lsm2, lsm, lsm_size); + rc = obd_create(exp, oa, &lsm2, &oti); + + up(&lli->lli_open_sem); + OBD_FREE(lsm2, lsm_size); + obdo_free(oa); + RETURN (rc); +} + +static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, + int flags, struct lov_user_md *lum, int lum_size) { struct ll_inode_info *lli = ll_i2info(inode); struct file *f; struct obd_export *exp = ll_i2obdexp(inode); struct lov_stripe_md *lsm; - struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = FMODE_WRITE}; - struct lov_user_md lum, *lump = (struct lov_user_md *)arg; + struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; struct ptlrpc_request *req = NULL; + int rc = 0; struct lustre_md md; - int rc; ENTRY; - /* Bug 1152: copy properly when this is no longer true */ - LASSERT(sizeof(lum) == sizeof(*lump)); - LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0])); - rc = copy_from_user(&lum, lump, sizeof(lum)); - if (rc) - RETURN(-EFAULT); - down(&lli->lli_open_sem); lsm = lli->lli_smd; if (lsm) { @@ -727,7 +779,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, f->f_dentry = file->f_dentry; f->f_vfsmnt = file->f_vfsmnt; - rc = ll_intent_file_open(f, &lum, sizeof(lum), &oit); + rc = ll_intent_file_open(f, lum, lum_size, &oit); if (rc) GOTO(out, rc); if (it_disposition(&oit, DISP_LOOKUP_NEG)) @@ -759,6 +811,55 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, RETURN(rc); } +static int ll_lov_setea(struct inode *inode, struct file *file, + unsigned long arg) +{ + int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; + struct lov_user_md *lump; + int lum_size = sizeof(struct lov_user_md) + + sizeof(struct lov_user_ost_data); + int rc; + ENTRY; + + if (!capable (CAP_SYS_ADMIN)) + RETURN(-EPERM); + + OBD_ALLOC(lump, lum_size); + if (lump == NULL) { + RETURN(-ENOMEM); + } + rc = copy_from_user(lump, (struct lov_user_md *)arg, + lum_size); + if (rc) { + OBD_FREE(lump, lum_size); + RETURN(-EFAULT); + } + + rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size); + + OBD_FREE(lump, lum_size); + RETURN(rc); +} + +static int ll_lov_setstripe(struct inode *inode, struct file *file, + unsigned long arg) +{ + struct lov_user_md lum, *lump = (struct lov_user_md *)arg; + int rc; + int flags = FMODE_WRITE; + ENTRY; + + /* Bug 1152: copy properly when this is no longer true */ + LASSERT(sizeof(lum) == sizeof(*lump)); + LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0])); + rc = copy_from_user(&lum, lump, sizeof(lum)); + if (rc) + RETURN(-EFAULT); + + rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum)); + RETURN(rc); +} + static int ll_lov_getstripe(struct inode *inode, unsigned long arg) { struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; @@ -804,8 +905,12 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, RETURN(0); case LL_IOC_LOV_SETSTRIPE: RETURN(ll_lov_setstripe(inode, file, arg)); + case LL_IOC_LOV_SETEA: + RETURN( ll_lov_setea(inode, file, arg) ); case LL_IOC_LOV_GETSTRIPE: RETURN(ll_lov_getstripe(inode, arg)); + case LL_IOC_RECREATE_OBJ: + RETURN(ll_lov_recreate_obj(inode, file, arg)); case EXT3_IOC_GETFLAGS: case EXT3_IOC_SETFLAGS: RETURN( ll_iocontrol(inode, file, cmd, arg) ); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index a109b2de21e8e83bcda3b16eca3cf64064da6d8b..21555c215db5673e284f510b7529253c49f58079 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -43,7 +43,7 @@ extern struct super_operations ll_super_operations; #define log2(n) ffz(~(n)) #endif -struct ll_sb_info *lustre_init_sbi(struct super_block *sb) +struct ll_sb_info *lustre_init_sbi(struct super_block *sb) { struct ll_sb_info *sbi = NULL; class_uuid_t uuid; @@ -65,7 +65,7 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb) RETURN(sbi); } -void lustre_free_sbi(struct super_block *sb) +void lustre_free_sbi(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; @@ -126,8 +126,8 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) sb->s_blocksize_bits = log2(osfs.os_bsize); sb->s_magic = LL_SUPER_MAGIC; sb->s_maxbytes = PAGE_CACHE_MAXBYTES; - - devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, + + devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid)); sb->s_dev = devno; @@ -159,7 +159,7 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) sb->s_op = &lustre_super_operations; - /* make root inode + /* make root inode * XXX: move this to after cbd setup? */ err = mdc_getattr(sbi->ll_mdc_exp, &rootfid, OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); @@ -436,14 +436,14 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, err = class_process_config(&lcfg); if (err < 0) GOTO(out_detach, err); - + obd = class_name2obd(name); if (obd == NULL) GOTO(out_cleanup, err = -EINVAL); /* Disable initial recovery on this import */ - err = obd_set_info(obd->obd_self_export, - strlen("initial_recov"), "initial_recov", + err = obd_set_info(obd->obd_self_export, + strlen("initial_recov"), "initial_recov", sizeof(allow_recov), &allow_recov); if (err) GOTO(out_cleanup, err); @@ -453,9 +453,9 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, err); GOTO(out_cleanup, err); } - + exp = class_conn2export(&mdc_conn); - + ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); rc = class_config_parse_llog(ctxt, profile, cfg); if (rc) { @@ -496,7 +496,7 @@ out_del_conn: out: if (rc == 0) rc = err; - + RETURN(rc); } @@ -527,15 +527,15 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) } OBD_ALLOC(sbi->ll_lmd, sizeof(*sbi->ll_lmd)); - if (sbi->ll_lmd == NULL) + if (sbi->ll_lmd == NULL) GOTO(out_free, err = -ENOMEM); memcpy(sbi->ll_lmd, lmd, sizeof(*lmd)); /* generate a string unique to this super, let's try the address of the super itself.*/ - len = (sizeof(sb) * 2) + 1; + len = (sizeof(sb) * 2) + 1; OBD_ALLOC(sbi->ll_instance, len); - if (sbi->ll_instance == NULL) + if (sbi->ll_instance == NULL) GOTO(out_free, err = -ENOMEM); sprintf(sbi->ll_instance, "%p", sb); @@ -556,13 +556,13 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) } if (osc) OBD_FREE(osc, strlen(osc) + 1); - OBD_ALLOC(osc, strlen(lprof->lp_osc) + + OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(sbi->ll_instance) + 2); sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance); if (mdc) OBD_FREE(mdc, strlen(mdc) + 1); - OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + + OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(sbi->ll_instance) + 2); sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance); } @@ -576,9 +576,9 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) CERROR("no mdc\n"); GOTO(out_free, err = -EINVAL); } - + err = lustre_common_fill_super(sb, mdc, osc); - + if (err) GOTO(out_free, err); @@ -605,9 +605,9 @@ out_free: OBD_ALLOC(cln_prof, len); sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile); - err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, + err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0); - if (err < 0) + if (err < 0) CERROR("Unable to process log: %s\n", cln_prof); OBD_FREE(cln_prof, len); OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1); @@ -619,11 +619,11 @@ out_free: goto out_dev; } /* lustre_fill_super */ -static void lustre_manual_cleanup(struct ll_sb_info *sbi) +static void lustre_manual_cleanup(struct ll_sb_info *sbi) { struct lustre_cfg lcfg; struct obd_device *obd; - int next = 0; + int next = 0; while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) { @@ -644,7 +644,7 @@ static void lustre_manual_cleanup(struct ll_sb_info *sbi) } } - if (sbi->ll_lmd != NULL) + if (sbi->ll_lmd != NULL) class_del_profile(sbi->ll_lmd->lmd_profile); } @@ -660,7 +660,7 @@ void lustre_put_super(struct super_block *sb) if (obd) force_umount = obd->obd_no_recov; obd = NULL; - + lustre_common_put_super(sb); if (sbi->ll_lmd != NULL) { @@ -860,7 +860,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) /* from sys_utime() */ if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) { if (current->fsuid != inode->i_uid && - (rc = ll_permission(inode, MAY_WRITE, NULL)) != 0) + (rc=ll_permission(inode,MAY_WRITE,NULL))!=0) RETURN(rc); } else { /* from inode_change_ok() */ @@ -878,7 +878,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) * If we don't we can race with other i_size updaters on our node, like * ll_file_read. We can also race with i_size propogation to other * nodes through dirtying and writeback of final cached pages. This - * last one is especially bad for racing o_append users on other + * last one is especially bad for racing o_append users on other * nodes. */ if (ia_valid & ATTR_SIZE) { struct ldlm_extent extent = { .start = attr->ia_size, diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 06a7a7cdbff4b076491eca580b8211f535f641e8..58c9ed9a48a67b94409b5bc44b34a65ec1a1332a 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -105,6 +105,28 @@ static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count, return rc; } +static int ll_rd_kbytesavail(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct super_block *sb = (struct super_block *)data; + struct obd_statfs osfs; + int rc; + + LASSERT(sb != NULL); + rc = ll_statfs_internal(sb, &osfs, jiffies - HZ); + if (!rc) { + __u32 blk_size = osfs.os_bsize >> 10; + __u64 result = osfs.os_bavail; + + while (blk_size >>= 1) + result <<= 1; + + *eof = 1; + rc = snprintf(page, count, LPU64"\n", result); + } + return rc; +} + static int ll_rd_filestotal(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -206,6 +228,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "blocksize", ll_rd_blksize, 0, 0 }, { "kbytestotal", ll_rd_kbytestotal, 0, 0 }, { "kbytesfree", ll_rd_kbytesfree, 0, 0 }, + { "kbytesavail", ll_rd_kbytesavail, 0, 0 }, { "filestotal", ll_rd_filestotal, 0, 0 }, { "filesfree", ll_rd_filesfree, 0, 0 }, //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 7c4c7913df8daffe8071bec7667cbd673c0b6d74..52e1437c381e8e74f92cccea57a31af1e2ab8e48 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -146,7 +146,7 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; - struct brw_page pg; + struct brw_page pga; struct obdo oa; int rc = 0; ENTRY; @@ -154,19 +154,24 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, if (!PageLocked(page)) LBUG(); - if (PageUptodate(page)) - RETURN(0); - /* Check to see if we should return -EIO right away */ - pg.pg = page; - pg.off = offset; - pg.count = PAGE_SIZE; - pg.flag = 0; - rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), NULL, lsm, 1, &pg, + pga.pg = page; + pga.off = offset; + pga.count = PAGE_SIZE; + pga.flag = 0; + + oa.o_id = lsm->lsm_object_id; + oa.o_mode = inode->i_mode; + oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE; + + rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oa, lsm, 1, &pga, NULL); if (rc) RETURN(rc); + if (PageUptodate(page)) + RETURN(0); + /* We're completely overwriting an existing page, so _don't_ set it up * to date until commit_write */ if (from == 0 && to == PAGE_SIZE) { @@ -183,10 +188,6 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, GOTO(prepare_done, rc = 0); } - oa.o_id = lsm->lsm_object_id; - oa.o_mode = inode->i_mode; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE; - /* XXX could be an async ocp read.. read-ahead? */ rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0); if (rc == 0) { @@ -231,15 +232,15 @@ static int ll_ap_make_ready(void *data, int cmd) struct ll_async_page *llap; struct page *page; ENTRY; - + llap = llap_from_cookie(data); - if (IS_ERR(llap)) + if (IS_ERR(llap)) RETURN(-EINVAL); page = llap->llap_page; if (cmd == OBD_BRW_READ) { - /* _sync_page beat us to it and is about to call + /* _sync_page beat us to it and is about to call * _set_async_flags which will fire off rpcs again */ if (!test_and_clear_bit(LL_PRIVBITS_READ, &page->private)) RETURN(-EAGAIN); @@ -413,15 +414,15 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, /* _make_ready only sees llap once we've unlocked the page */ llap->llap_write_queued = 1; - rc = obd_queue_async_io(exp, lsm, NULL, llap->llap_cookie, + rc = obd_queue_async_io(exp, lsm, NULL, llap->llap_cookie, OBD_BRW_WRITE, 0, 0, 0, 0); if (rc != 0) { /* async failed, try sync.. */ struct obd_sync_io_container *osic; osic_init(&osic); llap->llap_write_queued = 0; - rc = obd_queue_sync_io(exp, lsm, NULL, osic, - llap->llap_cookie, + rc = obd_queue_sync_io(exp, lsm, NULL, osic, + llap->llap_cookie, OBD_BRW_WRITE, 0, to, 0); if (rc) GOTO(free_osic, rc); @@ -544,14 +545,13 @@ static int ll_page_matches(struct page *page) } RETURN(matches); } - -static int ll_issue_page_read(struct obd_export *exp, - struct ll_async_page *llap, - int defer_uptodate) -{ + +static int ll_issue_page_read(struct obd_export *exp, + struct ll_async_page *llap, int defer_uptodate) +{ struct page *page = llap->llap_page; int rc; - + /* we don't issue this page as URGENT so that it can be batched * with other pages by the kernel's read-ahead. We have a strong * requirement that readpage() callers must call wait_on_page() @@ -559,8 +559,8 @@ static int ll_issue_page_read(struct obd_export *exp, llap->llap_defer_uptodate = defer_uptodate; page_cache_get(page); set_bit(LL_PRIVBITS_READ, &page->private); /* see ll_sync_page() */ - rc = obd_queue_async_io(exp, ll_i2info(page->mapping->host)->lli_smd, - NULL, llap->llap_cookie, OBD_BRW_READ, 0, + rc = obd_queue_async_io(exp, ll_i2info(page->mapping->host)->lli_smd, + NULL, llap->llap_cookie, OBD_BRW_READ, 0, PAGE_SIZE, 0, ASYNC_COUNT_STABLE); if (rc) { LL_CDEBUG_PAGE(page, "read queueing failed\n"); diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index 81467da5a778bb42b3f7e40354373bdca9209f5d..cda014eb87063026fc85598e934206c58b2c66b0 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -109,18 +109,17 @@ static int ll_writepage_24(struct page *page) page_cache_get(page); if (llap->llap_write_queued) { LL_CDEBUG_PAGE(page, "marking urgent\n"); - rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, - llap->llap_cookie, ASYNC_READY | - ASYNC_URGENT); + rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, + ASYNC_READY | ASYNC_URGENT); } else { llap->llap_write_queued = 1; - rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, - llap->llap_cookie, OBD_BRW_WRITE, 0, 0, - OBD_BRW_CREATE, ASYNC_READY | - ASYNC_URGENT); + rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, OBD_BRW_WRITE, 0, 0, + 0, ASYNC_READY | ASYNC_URGENT); if (rc == 0) LL_CDEBUG_PAGE(page, "mmap write queued\n"); - else + else llap->llap_write_queued = 0; } if (rc) @@ -170,7 +169,7 @@ static int ll_direct_IO_24(int rw, RETURN(-ENOMEM); } - flags = (rw == WRITE ? OBD_BRW_CREATE : 0) /* | OBD_BRW_DIRECTIO */; + flags = 0 /* | OBD_BRW_DIRECTIO */; offset = ((obd_off)blocknr << inode->i_blkbits); length = iobuf->length; diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 7edee0d0718519772a903b692774b7f2354d865a..21e884f1a29b9ff15b8bd13341b412a6f97adf3a 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -107,22 +107,21 @@ static int ll_writepage_26(struct page *page, struct writeback_control *wbc) llap = llap_from_page(page); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); - page_cache_get(page); + page_cache_get(page); if (llap->llap_write_queued) { LL_CDEBUG_PAGE(page, "marking urgent\n"); - rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, - llap->llap_cookie, ASYNC_READY | - ASYNC_URGENT); + rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, + ASYNC_READY | ASYNC_URGENT); } else { llap->llap_write_queued = 1; - rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, - llap->llap_cookie, OBD_BRW_WRITE, 0, 0, - OBD_BRW_CREATE, ASYNC_READY | - ASYNC_URGENT); + rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, OBD_BRW_WRITE, 0, 0, + 0, ASYNC_READY | ASYNC_URGENT); if (rc == 0) LL_CDEBUG_PAGE(page, "mmap write queued\n"); - else + else llap->llap_write_queued = 0; } if (rc) diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 6c26a166744eec2dac6ca7cb0b78f3ed67042e00..a565f515370b1b72e6bf32bb4e5aafdb948c33a5 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -41,6 +41,8 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_mds_md *lmm, int lmm_bytes); int lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_user_md *lump); +int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_user_md *lump); int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_user_md *lump); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 64b6a28f81fdcb965a5fd47f8a31d470eeb75f1f..03506e6304ef42fa7239652b641f32bdb3611414 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -558,6 +558,32 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, if (!lov->desc.ld_active_tgt_count) RETURN(-EIO); + /* Recreate a specific object id at the given OST index */ + if (src_oa->o_valid & OBD_MD_FLFLAGS && src_oa->o_flags & + OBD_FL_RECREATE_OBJS) { + struct lov_stripe_md obj_md; + struct lov_stripe_md *obj_mdp = &obj_md; + + ost_idx = src_oa->o_nlink; + lsm = *ea; + if (lsm == NULL) + RETURN(-EINVAL); + if (ost_idx >= lov->desc.ld_tgt_count) + RETURN(-EINVAL); + for (i = 0; i < lsm->lsm_stripe_count; i++) { + if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) { + if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id) + RETURN(-EINVAL); + break; + } + } + if (i == lsm->lsm_stripe_count) + RETURN(-EINVAL); + + rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti); + RETURN(rc); + } + ret_oa = obdo_alloc(); if (!ret_oa) RETURN(-ENOMEM); @@ -1320,10 +1346,11 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa, RETURN(rc); } -static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm, +static int lov_brw_check(struct lov_obd *lov, struct obdo *oa, + struct lov_stripe_md *lsm, obd_count oa_bufs, struct brw_page *pga) { - int i; + int i, rc = 0; /* The caller just wants to know if there's a chance that this * I/O can succeed */ @@ -1342,8 +1369,12 @@ static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm, CDEBUG(D_HA, "lov idx %d inactive\n", ost); return -EIO; } + rc = obd_brw(OBD_BRW_CHECK, lov->tgts[stripe].ltd_exp, oa, + NULL, 1, &pga[i], NULL); + if (rc) + break; } - return 0; + return rc; } static int lov_brw(int cmd, struct obd_export *exp, struct obdo *src_oa, @@ -1370,7 +1401,7 @@ static int lov_brw(int cmd, struct obd_export *exp, struct obdo *src_oa, lov = &exp->exp_obd->u.lov; if (cmd == OBD_BRW_CHECK) { - rc = lov_brw_check(lov, lsm, oa_bufs, pga); + rc = lov_brw_check(lov, src_oa, lsm, oa_bufs, pga); RETURN(rc); } @@ -1526,7 +1557,7 @@ static int lov_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, lov = &exp->exp_obd->u.lov; if (cmd == OBD_BRW_CHECK) { - rc = lov_brw_check(lov, lsm, oa_bufs, pga); + rc = lov_brw_check(lov, oa, lsm, oa_bufs, pga); RETURN(rc); } @@ -1707,24 +1738,24 @@ int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, /* so the callback doesn't need the lsm */ lap->lap_loi_id = loi->loi_id; - rc = obd_prep_async_page(lov->tgts[loi->loi_ost_idx].ltd_exp, + rc = obd_prep_async_page(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, page, lap->lap_sub_offset, - &lov_async_page_ops, lap, + &lov_async_page_ops, lap, &lap->lap_sub_cookie); if (rc) { OBD_FREE(lap, sizeof(*lap)); RETURN(rc); } - CDEBUG(D_CACHE, "lap %p page %p cookie %p off "LPU64"\n", lap, page, + CDEBUG(D_CACHE, "lap %p page %p cookie %p off "LPU64"\n", lap, page, lap->lap_sub_cookie, offset); *res = lap; RETURN(0); } -static int lov_queue_async_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, +static int lov_queue_async_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + int cmd, obd_off off, int count, obd_flag brw_flags, obd_flag async_flags) { struct lov_obd *lov = &exp->exp_obd->u.lov; @@ -1766,16 +1797,16 @@ static int lov_set_async_flags(struct obd_export *exp, RETURN(PTR_ERR(lap)); loi = &lsm->lsm_oinfo[lap->lap_stripe]; - rc = obd_set_async_flags(lov->tgts[loi->loi_ost_idx].ltd_exp, + rc = obd_set_async_flags(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, lap->lap_sub_cookie, async_flags); RETURN(rc); } -static int lov_queue_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, +static int lov_queue_sync_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, struct obd_sync_io_container *osic, void *cookie, - int cmd, obd_off off, int count, + int cmd, obd_off off, int count, obd_flag brw_flags) { struct lov_obd *lov = &exp->exp_obd->u.lov; @@ -1792,17 +1823,17 @@ static int lov_queue_sync_io(struct obd_export *exp, RETURN(PTR_ERR(lap)); loi = &lsm->lsm_oinfo[lap->lap_stripe]; - rc = obd_queue_sync_io(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, - osic, lap->lap_sub_cookie, cmd, off, count, + rc = obd_queue_sync_io(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, + osic, lap->lap_sub_cookie, cmd, off, count, brw_flags); RETURN(rc); } /* this isn't exactly optimal. we may have queued sync io in oscs on - * all stripes, but we don't record that fact at queue time. so we + * all stripes, but we don't record that fact at queue time. so we * trigger sync io on all stripes. */ -static int lov_trigger_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, +static int lov_trigger_sync_io(struct obd_export *exp, + struct lov_stripe_md *lsm, struct lov_oinfo *loi, struct obd_sync_io_container *osic) { @@ -1814,7 +1845,7 @@ static int lov_trigger_sync_io(struct obd_export *exp, if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++) { err = obd_trigger_sync_io(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, osic); @@ -2163,6 +2194,9 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case LL_IOC_LOV_GETSTRIPE: rc = lov_getstripe(exp, karg, uarg); break; + case LL_IOC_LOV_SETEA: + rc = lov_setea(exp, karg, uarg); + break; default: { int set = 0; if (count == 0) diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index 0053a58a6c40b025ccec5705990a3590e78c8818..ef04e681c0441a58bd1869b12996cb64e7f1d470 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -500,6 +500,44 @@ int lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, RETURN(0); } +int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_user_md *lump) +{ + int i; + int rc; + struct obd_export *oexp; + struct lov_obd *lov = &exp->exp_obd->u.lov; + obd_id last_id = 0; + + for (i = 0; i < lump->lmm_stripe_count; i++) { + __u32 len = sizeof(last_id); + oexp = lov->tgts[lump->lmm_objects[i].l_ost_idx].ltd_exp; + rc = obd_get_info(oexp, strlen("last_id"), "last_id", + &len, &last_id); + if (rc) + RETURN(rc); + if (last_id < lump->lmm_objects[i].l_object_id) { + CERROR("Setting EA for object > than last id on " + "ost idx %d "LPD64" > "LPD64" \n", + lump->lmm_objects[i].l_ost_idx, + lump->lmm_objects[i].l_object_id, last_id); + RETURN(-EINVAL); + } + } + + rc = lov_setstripe(exp, lsmp, lump); + if (rc) + RETURN(rc); + for (i = 0; i < lump->lmm_stripe_count; i++) { + (*lsmp)->lsm_oinfo[i].loi_ost_idx = + lump->lmm_objects[i].l_ost_idx; + (*lsmp)->lsm_oinfo[i].loi_id = lump->lmm_objects[i].l_object_id; + (*lsmp)->lsm_oinfo[i].loi_gr = lump->lmm_objects[i].l_object_gr; + } + RETURN(0); +} + + /* Retrieve object striping information. * * @lump is a pointer to an in-core struct with lmm_ost_count indicating diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 2a322e695100ecb6ca90841772dee22210eb6db9..c29644c14c5f86c5d1bf147e83b596fe0724f427 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -187,6 +187,7 @@ struct lprocfs_vars lprocfs_obd_vars[] = { { "blocksize", lprocfs_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "desc_uuid", lov_rd_desc_uuid, 0, 0 }, { 0 } }; diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index a45560ae11b54154f6871dc0007530c38c30b1f5..91513f8749e379fef0907372b6c0e13d8d49ce24 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -664,7 +664,11 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs) { struct kstatfs sfs; - int rc = vfs_statfs(sb, &sfs); + int rc; + + memset(&sfs, 0, sizeof(sfs)); + + rc = sb->s_op->statfs(sb, &sfs); if (!rc && sfs.f_bfree < sfs.f_ffree) { sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; diff --git a/lustre/lvfs/fsfilt_extN.c b/lustre/lvfs/fsfilt_extN.c index b4f3fc7068fbefd5d05b8cbb4f748fffb487089a..8756f9a2cfa43efdde541c14371e497d02994604 100644 --- a/lustre/lvfs/fsfilt_extN.c +++ b/lustre/lvfs/fsfilt_extN.c @@ -636,7 +636,11 @@ static int fsfilt_extN_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs) { struct kstatfs sfs; - int rc = vfs_statfs(sb, &sfs); + int rc; + + memset(&sfs, 0, sizeof(sfs)); + + rc = sb->s_op->statfs(sb, &sfs); if (!rc && sfs.f_bfree < sfs.f_ffree) { sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; diff --git a/lustre/lvfs/fsfilt_reiserfs.c b/lustre/lvfs/fsfilt_reiserfs.c index 2e16c18fc3b9f12c25a8f882b5c0ae3150e452b6..9864eda7a78385ce9e08f4b10f3c308d04071629 100644 --- a/lustre/lvfs/fsfilt_reiserfs.c +++ b/lustre/lvfs/fsfilt_reiserfs.c @@ -158,10 +158,15 @@ static int fsfilt_reiserfs_add_journal_cb(struct obd_device *obd, return 0; } -static int fsfilt_reiserfs_statfs(struct super_block *sb, struct obd_statfs *osfs) +static int fsfilt_reiserfs_statfs(struct super_block *sb, + struct obd_statfs *osfs) { - struct statfs sfs; - int rc = vfs_statfs(sb, &sfs); + struct kstatfs sfs; + int rc; + + memset(&sfs, 0, sizeof(sfs)); + + rc = sb->s_op->statfs(sb, &sfs); statfs_pack(osfs, &sfs); return rc; diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 6dca228fdba84dc42e0f1decefda4dfa03497d1c..7223b814ec218aae6e612247e9bdf4fbe1b2d957 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -35,6 +35,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "blocksize", lprocfs_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 0de8ad73a0c21a7ccfed08f4a67c074ce0017ab5..794bcf95143c7aaf873f0588aefb29a6fe6f82d7 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -99,7 +99,8 @@ static __u32 mds_pack_open_flags(__u32 flags) { return (flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC | - MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA)) | + MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA | + MDS_OPEN_HAS_OBJS)) | ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) | ((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) | ((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) | diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 06dd213f9af24b8a1221caaf925e7d1dad298f59..c512293b6da044bac54b31a4e53294e9a1f8257d 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -574,7 +574,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, if (inode->i_size + 1 != body->eadatasize) CERROR("symlink size: %Lu, reply space: %d\n", inode->i_size + 1, body->eadatasize); - size[bufcount] = MIN(inode->i_size + 1, body->eadatasize); + size[bufcount] = min_t(int, inode->i_size+1, body->eadatasize); bufcount++; CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n", inode->i_size + 1, body->eadatasize); @@ -784,7 +784,15 @@ out_pop: static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, unsigned long max_age) { - return fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs); + int rc; + + spin_lock(&obd->obd_osfs_lock); + rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, max_age); + if (rc == 0) + memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); + spin_unlock(&obd->obd_osfs_lock); + + return rc; } static int mds_statfs(struct ptlrpc_request *req) @@ -800,7 +808,8 @@ static int mds_statfs(struct ptlrpc_request *req) } /* We call this so that we can cache a bit - 1 jiffie worth */ - rc = obd_statfs(obd, lustre_msg_buf(req->rq_repmsg,0,size),jiffies-HZ); + rc = mds_obd_statfs(obd, lustre_msg_buf(req->rq_repmsg, 0, size), + jiffies - HZ); if (rc) { CERROR("mds_obd_statfs failed: rc %d\n", rc); GOTO(out, rc); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 59b3401a7d9f856c24a61b8e12ce48c03462e9bf..10365a67a9c67d4dfd87bccf3a166617c0e09e20 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -158,6 +158,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "blocksize", lprocfs_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "fstype", lprocfs_rd_fstype, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 7a123621b9c9ae61e23a70e7a5a0d08933d3400e..80728da84062a22cad629ba4de1f0b734103d2cb 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -373,28 +373,38 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME|OBD_MD_FLMTIME| OBD_MD_FLCTIME); - /* check if things like lstripe/lfs stripe are sending us the ea */ - if (rec->ur_flags & MDS_OPEN_HAS_EA) { - rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_osc_exp, + if (!(rec->ur_flags & MDS_OPEN_HAS_OBJS)) { + /* check if things like lstripe/lfs stripe are sending us the ea */ + if (rec->ur_flags & MDS_OPEN_HAS_EA) { + rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, + mds->mds_osc_exp, + 0, &lsm, rec->ur_eadata); + if (rc) + GOTO(out_oa, rc); + } + rc = obd_create(mds->mds_osc_exp, oa, &lsm, &oti); + if (rc) { + int level = D_ERROR; + if (rc == -ENOSPC) + level = D_INODE; + CDEBUG(level, "error creating objects for " + "inode %lu: rc = %d\n", + inode->i_ino, rc); + if (rc > 0) { + CERROR("obd_create returned invalid " + "rc %d\n", rc); + rc = -EIO; + } + GOTO(out_oa, rc); + } + } else { + rc = obd_iocontrol(OBD_IOC_LOV_SETEA, mds->mds_osc_exp, 0, &lsm, rec->ur_eadata); - if (rc) + if (rc) { GOTO(out_oa, rc); - } - - rc = obd_create(mds->mds_osc_exp, oa, &lsm, &oti); - if (rc) { - int level = D_ERROR; - if (rc == -ENOSPC) - level = D_INODE; - CDEBUG(level, "error creating objects for inode %lu: rc = %d\n", - inode->i_ino, rc); - if (rc > 0) { - CERROR("obd_create returned invalid rc %d\n", rc); - rc = -EIO; } - GOTO(out_oa, rc); + lsm->lsm_object_id = oa->o_id; } - if (inode->i_size) { oa->o_size = inode->i_size; obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME| diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index dbd805e480f54ce46439666d188967123208908d..615c10200541d1c511bae7bbe34e40370fd01376 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -30,7 +30,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/pagemap.h> -#else +#else #include <liblustre.h> #include <linux/obd_class.h> #include <linux/obd.h> @@ -241,7 +241,7 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid) /* Search for a client OBD connected to tgt_uuid. If grp_uuid is specified, then only the client with that uuid is returned, otherwise any client connected to the tgt is returned. */ -struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, +struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, char * typ_name, struct obd_uuid *grp_uuid) { @@ -251,13 +251,13 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, struct obd_device *obd = &obd_dev[i]; if (obd->obd_type == NULL) continue; - if ((strncmp(obd->obd_type->typ_name, typ_name, + if ((strncmp(obd->obd_type->typ_name, typ_name, strlen(typ_name)) == 0)) { struct client_obd *cli = &obd->u.cli; struct obd_import *imp = cli->cl_import; if (obd_uuid_equals(tgt_uuid, &imp->imp_target_uuid) && - ((grp_uuid)? obd_uuid_equals(grp_uuid, - &obd->obd_uuid) : 1)) { + ((grp_uuid)? obd_uuid_equals(grp_uuid, + &obd->obd_uuid) : 1)) { return obd; } } @@ -597,7 +597,7 @@ int class_disconnect(struct obd_export *export, int flags) if (list_empty(&export->exp_handle.h_link)) RETURN(0); - CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", + CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", export->exp_handle.h_cookie); class_unlink_export(export); @@ -624,10 +624,10 @@ void class_disconnect_exports(struct obd_device *obd, int flags) list_for_each_safe(tmp, n, &work_list) { exp = list_entry(tmp, struct obd_export, exp_obd_chain); class_export_get(exp); - - if (obd_uuid_equals(&exp->exp_client_uuid, + + if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) { - CDEBUG(D_HA, + CDEBUG(D_HA, "exp %p export uuid == obd uuid, don't discon\n", exp); class_export_put(exp); @@ -688,11 +688,11 @@ void osic_add_one(struct obd_sync_io_container *osic, osic_grab(osic); } -void osic_complete_one(struct obd_sync_io_container *osic, +void osic_complete_one(struct obd_sync_io_container *osic, struct osic_callback_context *occ, int rc) { unsigned long flags; - wait_queue_head_t *wake = NULL; + wait_queue_head_t *wake = NULL; int old_rc; spin_lock_irqsave(&osic->osic_lock, flags); @@ -710,7 +710,7 @@ void osic_complete_one(struct obd_sync_io_container *osic, spin_unlock_irqrestore(&osic->osic_lock, flags); CDEBUG(D_CACHE, "osic %p completed, rc %d -> %d via %d, %d now " - "pending (racey)\n", osic, old_rc, osic->osic_rc, rc, + "pending (racey)\n", osic, old_rc, osic->osic_rc, rc, osic->osic_pending); if (wake) wake_up(wake); @@ -737,7 +737,7 @@ static void interrupted_osic(void *data) spin_lock_irqsave(&osic->osic_lock, flags); list_for_each(pos, &osic->osic_occ_list) { - occ = list_entry(pos, struct osic_callback_context, + occ = list_entry(pos, struct osic_callback_context, occ_osic_item); occ->occ_interrupted(occ); } @@ -760,10 +760,10 @@ int osic_wait(struct obd_sync_io_container *osic) lwi = (struct l_wait_info){ 0, }; } while (rc == -EINTR); - LASSERTF(osic->osic_pending == 0, + LASSERTF(osic->osic_pending == 0, "exiting osic_wait(osic = %p) with %d pending\n", osic, osic->osic_pending); - CDEBUG(D_CACHE, "done waiting on osic %p\n", osic); + CDEBUG(D_CACHE, "done waiting on osic %p rc %d\n", osic, osic->osic_rc); return osic->osic_rc; } diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 82ceab472e7b9fcfe677d3c704bee326827891ad..e4146dc965c215c00390fd935a4fd610578630fe 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -215,7 +215,6 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, else last_index = LLOG_BITMAP_BYTES * 8 - 1; - while (rc == 0) { struct llog_rec_hdr *rec; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 3547ea7b5a06504cd6eb2893d3b109f7bba93e49..fadf05b43b437d78764fbccb15723fb41036dbd8 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -265,6 +265,24 @@ int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count, return rc; } +int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_statfs osfs; + int rc = obd_statfs(data, &osfs, jiffies - HZ); + if (!rc) { + __u32 blk_size = osfs.os_bsize >> 10; + __u64 result = osfs.os_bavail; + + while (blk_size >>= 1) + result <<= 1; + + *eof = 1; + rc = snprintf(page, count, LPU64"\n", result); + } + return rc; +} + int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -783,6 +801,7 @@ EXPORT_SYMBOL(lprocfs_rd_numrefs); EXPORT_SYMBOL(lprocfs_rd_blksize); EXPORT_SYMBOL(lprocfs_rd_kbytestotal); EXPORT_SYMBOL(lprocfs_rd_kbytesfree); +EXPORT_SYMBOL(lprocfs_rd_kbytesavail); EXPORT_SYMBOL(lprocfs_rd_filestotal); EXPORT_SYMBOL(lprocfs_rd_filesfree); EXPORT_SYMBOL(lprocfs_rd_filegroups); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index fcaa7a78fc29a3b209648874b1d333e47c71b952..46710aae28abd532d54c9412e093d6f80617886b 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -122,6 +122,8 @@ int class_attach(struct lustre_cfg *lcfg) INIT_LIST_HEAD(&obd->obd_exports); obd->obd_num_exports = 0; spin_lock_init(&obd->obd_dev_lock); + spin_lock_init(&obd->obd_osfs_lock); + obd->obd_osfs_age = jiffies - 1000 * HZ; init_waitqueue_head(&obd->obd_refcount_waitq); /* XXX belongs in setup not attach */ diff --git a/lustre/obdclass/obdo.c b/lustre/obdclass/obdo.c index 996ef58d4ccf3b2940e9fb5f35106cab19d0c0b9..4e8e2449ebc04868b745ef6471feaa3e73c82208 100644 --- a/lustre/obdclass/obdo.c +++ b/lustre/obdclass/obdo.c @@ -183,22 +183,10 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) dst->o_flags = src->i_flags; newvalid |= OBD_MD_FLFLAGS; } - if (valid & OBD_MD_FLNLINK) { - dst->o_nlink = src->i_nlink; - newvalid |= OBD_MD_FLNLINK; - } if (valid & OBD_MD_FLGENER) { dst->o_generation = src->i_generation; newvalid |= OBD_MD_FLGENER; } - if (valid & OBD_MD_FLRDEV) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev); -#else - dst->o_rdev = (__u32)old_decode_dev(src->i_rdev); -#endif - newvalid |= OBD_MD_FLRDEV; - } dst->o_valid |= newvalid; } @@ -265,16 +253,8 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) dst->i_gid = src->o_gid; if (valid & OBD_MD_FLFLAGS) dst->i_flags = src->o_flags; - if (valid & OBD_MD_FLNLINK) - dst->i_nlink = src->o_nlink; if (valid & OBD_MD_FLGENER) dst->i_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - dst->i_rdev = to_kdev_t(src->o_rdev); -#else - dst->i_rdev = old_decode_dev(src->o_rdev); -#endif } EXPORT_SYMBOL(obdo_to_inode); #endif @@ -307,21 +287,10 @@ void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid) dst->o_gid = src->o_gid; if (valid & OBD_MD_FLFLAGS) dst->o_flags = src->o_flags; - /* - if (valid & OBD_MD_FLOBDFLG) - dst->o_obdflags = src->o_obdflags; - */ - if (valid & OBD_MD_FLNLINK) - dst->o_nlink = src->o_nlink; if (valid & OBD_MD_FLGENER) dst->o_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - dst->o_rdev = src->o_rdev; - if (valid & OBD_MD_FLINLINE && - src->o_obdflags & OBD_FL_INLINEDATA) { + if (valid & OBD_MD_FLINLINE) memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline)); - dst->o_obdflags |= OBD_FL_INLINEDATA; - } dst->o_valid |= valid; } diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 36beb06ed812e1c8df78e377478ed85a4cb225ba..c627f828afb3f7baac283eb375985509ff45a941 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -221,7 +221,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, RETURN(-EINVAL); /* Temp fix to stop falling foul of osc_announce_cached() */ - oa->o_valid &= ~(OBD_MD_FLBLOCKS | OBD_MD_FLRDEV); + oa->o_valid &= ~(OBD_MD_FLBLOCKS | OBD_MD_FLGRANT); memset(res, 0, sizeof(*res) * niocount); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index fa591b22a3e105af11c2b368023d7eb3670f721c..d3d79ad17f50505eb0609f1c3625962239b7fa67 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -784,8 +784,8 @@ static int echo_client_async_page(struct obd_export *exp, int rw, eas.eas_next_offset += PAGE_SIZE; eap->eap_off = eas.eas_next_offset; - rc = obd_prep_async_page(exp, lsm, NULL, eap->eap_page, - eap->eap_off, &ec_async_page_ops, + rc = obd_prep_async_page(exp, lsm, NULL, eap->eap_page, + eap->eap_off, &ec_async_page_ops, eap, &eap->eap_cookie); if (rc) { spin_lock_irqsave(&eas.eas_lock, flags); @@ -794,8 +794,8 @@ static int echo_client_async_page(struct obd_export *exp, int rw, } /* always asserts urgent, which isn't quite right */ - rc = obd_queue_async_io(exp, lsm, NULL, eap->eap_cookie, - rw, 0, PAGE_SIZE, 0, + rc = obd_queue_async_io(exp, lsm, NULL, eap->eap_cookie, + rw, 0, PAGE_SIZE, 0, ASYNC_READY | ASYNC_URGENT | ASYNC_COUNT_STABLE); spin_lock_irqsave(&eas.eas_lock, flags); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index f8b2ed179e0d377f4442242a060222d0b199a9e7..d576705598a82a84e083d7633226841ab4496fc3 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -93,7 +93,7 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd); spin_unlock(&filter->fo_translock); oti->oti_transno = last_rcvd; - } else { + } else { spin_lock(&filter->fo_translock); last_rcvd = oti->oti_transno; if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno)) @@ -1017,7 +1017,7 @@ static int filter_prepare_destroy(struct obd_device *obd, obd_id objid) * file then this enqueue will communicate the DISCARD to all the * clients. This assumes that we always destroy all the objects for * a file at a time, as is currently the case. If we're not the - * OST at stripe 0 then we'll harmlessly get a very lonely lock in + * OST at stripe 0 then we'll harmlessly get a very lonely lock in * the local DLM and immediately drop it. */ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, res_id, LDLM_EXTENT, &extent, @@ -1045,7 +1045,7 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid, if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { CERROR("destroying objid %*s nlink = %lu, count = %d\n", dchild->d_name.len, dchild->d_name.name, - (unsigned long)inode->i_nlink, + (unsigned long)inode->i_nlink, atomic_read(&inode->i_count)); } @@ -1320,6 +1320,11 @@ static int filter_destroy_export(struct obd_export *exp) { ENTRY; + if (exp->exp_filter_data.fed_pending) + CERROR("%s: cli %s/%p has %lu pending on destroyed export\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, + exp, exp->exp_filter_data.fed_pending); + target_destroy_export(exp); if (exp->exp_obd->obd_replayable) @@ -1330,20 +1335,50 @@ static int filter_destroy_export(struct obd_export *exp) /* also incredibly similar to mds_disconnect */ static int filter_disconnect(struct obd_export *exp, int flags) { + struct filter_obd *filter = &exp->exp_obd->u.filter; + struct filter_export_data *fed = &exp->exp_filter_data; unsigned long irqflags; struct llog_ctxt *ctxt; int rc; ENTRY; LASSERT(exp); + + /* This would imply RPCs still in flight or preprw/commitrw imbalance */ + if (fed->fed_pending) + CWARN("%s: cli %s has %lu pending at disconnect time\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, + fed->fed_pending); + + /* Forget what this client had cached. This is also done on the + * client when it invalidates its import. Do this before unlinking + * from the export list so filter_grant_sanity_check totals are OK. */ + spin_lock(&exp->exp_obd->obd_osfs_lock); + LASSERTF(exp->exp_obd->u.filter.fo_tot_dirty >= fed->fed_dirty, + "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %lu\n", + exp->exp_obd->obd_name, exp->exp_obd->u.filter.fo_tot_dirty, + exp->exp_client_uuid.uuid, exp, fed->fed_dirty); + exp->exp_obd->u.filter.fo_tot_dirty -= fed->fed_dirty; + LASSERTF(exp->exp_obd->u.filter.fo_tot_granted >= fed->fed_grant, + "%s: tot_granted "LPU64" cli %s/%p fed_grant %lu\n", + exp->exp_obd->obd_name, exp->exp_obd->u.filter.fo_tot_granted, + exp->exp_client_uuid.uuid, exp, fed->fed_grant); + exp->exp_obd->u.filter.fo_tot_granted -= fed->fed_grant; + LASSERTF(exp->exp_obd->u.filter.fo_tot_pending >= fed->fed_pending, + "%s: tot_pending "LPU64" cli %s/%p fed_pending %lu\n", + exp->exp_obd->obd_name, exp->exp_obd->u.filter.fo_tot_pending, + exp->exp_client_uuid.uuid, exp, fed->fed_pending); + fed->fed_dirty = 0; + fed->fed_grant = 0; + spin_unlock(&exp->exp_obd->obd_osfs_lock); + ldlm_cancel_locks_for_export(exp); spin_lock_irqsave(&exp->exp_lock, irqflags); exp->exp_flags = flags; spin_unlock_irqrestore(&exp->exp_lock, irqflags); - fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb); - /* XXX cleanup preallocated inodes */ + fsfilt_sync(exp->exp_obd, filter->fo_sb); /* flush any remaining cancel messages out to the target */ ctxt = llog_get_context(exp->exp_obd, LLOG_UNLINK_REPL_CTXT); @@ -1554,21 +1589,21 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, diff = oa->o_id - filter_last_id(filter, oa); CDEBUG(D_INFO, "filter_last_id() = "LPU64" -> diff = %d\n", filter_last_id(filter, oa), diff); - + /* delete orphans request */ - if ((oa->o_valid & OBD_MD_FLFLAGS) && + if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_DELORPHAN)) { if (diff >= 0) RETURN(diff); filter_destroy_precreated(exp, oa, filter); rc = filter_update_last_objid(obd, group, 0); if (rc) - CERROR("unable to write lastobjid, but orphans" + CERROR("unable to write lastobjid, but orphans" "were deleted\n"); RETURN(0); } else { /* only precreate if group == 0 and o_id is specfied */ - if (!(oa->o_valid & OBD_FL_DELORPHAN) && + if (!(oa->o_valid & OBD_FL_DELORPHAN) && (group != 0 || oa->o_id == 0)) RETURN(1); @@ -1596,15 +1631,33 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, struct dentry *dparent; int err = 0, rc = 0, i; __u64 next_id; + int recreate_obj = 0; void *handle = NULL; ENTRY; filter = &obd->u.filter; + if ((oa->o_valid & OBD_MD_FLFLAGS) && + (oa->o_flags & OBD_FL_RECREATE_OBJS)) { + recreate_obj = 1; + } + for (i = 0; i < *num && err == 0; i++) { int cleanup_phase = 0; - next_id = filter_last_id(filter, oa) + 1; + if (recreate_obj) { + __u64 last_id; + next_id = oa->o_id; + last_id = filter_last_id(filter, NULL); + if (next_id > last_id) { + CERROR("Error: Trying to recreate obj greater" + "than last id "LPD64" > "LPD64"\n", + next_id, last_id); + RETURN(-EINVAL); + } + } else + next_id = filter_last_id(filter, NULL) + 1; + CDEBUG(D_INFO, "precreate objid "LPU64"\n", next_id); dparent = filter_parent_lock(obd, group, next_id, LCK_PW, @@ -1620,9 +1673,18 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, if (dchild->d_inode != NULL) { /* This would only happen if lastobjid was bad on disk*/ - CERROR("Serious error: objid %*s already exists; is " - "this filesystem corrupt?\n", - dchild->d_name.len, dchild->d_name.name); + /* Could also happen if recreating missing obj but + * already exists + */ + if (recreate_obj) { + CERROR("Serious error: recreating obj %*s but " + "obj already exists \n", + dchild->d_name.len, dchild->d_name.name); + } else { + CERROR("Serious error: objid %*s already " + "exists; is this filesystem corrupt?\n", + dchild->d_name.len, dchild->d_name.name); + } GOTO(cleanup, rc = -EEXIST); } @@ -1636,12 +1698,15 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, if (rc) { CERROR("create failed rc = %d\n", rc); GOTO(cleanup, rc); - } + } - filter_set_last_id(filter, oa, next_id); - err = filter_update_last_objid(obd, group, 0); - if (err) - CERROR("unable to write lastobjid but file created\n"); + if (!recreate_obj) { + filter_set_last_id(filter, NULL, next_id); + err = filter_update_last_objid(obd, group, 0); + if (err) + CERROR("unable to write lastobjid " + "but file created\n"); + } cleanup: switch(cleanup_phase) { @@ -1659,7 +1724,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, case 0: break; } - + if (rc) break; } @@ -1696,12 +1761,18 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, obd = exp->exp_obd; push_ctxt(&saved, &obd->obd_ctxt, NULL); - diff = filter_should_precreate(exp, oa, group); - if (diff > 0) { - oa->o_id = filter_last_id(&obd->u.filter, oa); + if ((oa->o_valid & OBD_MD_FLFLAGS) && + (oa->o_flags & OBD_FL_RECREATE_OBJS)) { + diff = 1; rc = filter_precreate(obd, oa, group, &diff); - oa->o_id += diff; - oa->o_valid = OBD_MD_FLID; + } else { + diff = filter_should_precreate(exp, oa, group); + if (diff > 0) { + oa->o_id = filter_last_id(&obd->u.filter, oa); + rc = filter_precreate(obd, oa, group, &diff); + oa->o_id += diff; + oa->o_valid = OBD_MD_FLID; + } } pop_ctxt(&saved, &obd->obd_ctxt, NULL); @@ -1754,7 +1825,7 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, cleanup_phase = 2; if (dchild->d_inode == NULL) { - CDEBUG(D_INODE, "destroying non-existent object "LPU64"\n", + CDEBUG(D_INODE, "destroying non-existent object "LPU64"\n", oa->o_id); GOTO(cleanup, rc = -ENOENT); } @@ -1905,11 +1976,90 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, RETURN(rc); } +/* debugging to make sure that nothing bad happens, can be turned off soon. + * caller must hold osfs lock */ +static void filter_grant_total_exports(struct obd_device *obd, + obd_size *tot_dirty, + obd_size *tot_pending, + obd_size *tot_granted, + obd_size maxsize) +{ + struct filter_export_data *fed; + struct obd_export *exp_pos; + + spin_lock(&obd->obd_dev_lock); + list_for_each_entry(exp_pos, &obd->obd_exports, exp_obd_chain) { + fed = &exp_pos->exp_filter_data; + LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64, + exp_pos->exp_client_uuid.uuid, exp_pos, + fed->fed_dirty, maxsize); + LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, + "cli %s/%p %lu+%lu > "LPU64, + exp_pos->exp_client_uuid.uuid, exp_pos, + fed->fed_grant, fed->fed_pending, maxsize); + *tot_dirty += fed->fed_dirty; + *tot_pending += fed->fed_pending; + *tot_granted += fed->fed_grant + fed->fed_pending; + } + spin_unlock(&obd->obd_dev_lock); +} + +static void filter_grant_sanity_check(obd_size tot_dirty, obd_size tot_pending, + obd_size tot_granted, + obd_size fo_tot_dirty, + obd_size fo_tot_pending, + obd_size fo_tot_granted, obd_size maxsize) +{ + LASSERTF(tot_dirty == fo_tot_dirty, LPU64" != "LPU64, + tot_dirty, fo_tot_dirty); + LASSERTF(tot_pending == fo_tot_pending, LPU64" != "LPU64, + tot_pending, fo_tot_pending); + LASSERTF(tot_granted == fo_tot_granted, LPU64" != "LPU64, + tot_granted, fo_tot_granted); + LASSERTF(tot_dirty <= maxsize, LPU64" > "LPU64, tot_dirty, maxsize); + LASSERTF(tot_pending <= tot_granted, LPU64" > "LPU64, tot_pending, + tot_granted); + LASSERTF(tot_granted <= maxsize, LPU64" > "LPU64, tot_granted, maxsize); +} + static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, unsigned long max_age) { + struct filter_obd *filter = &obd->u.filter; + obd_size tot_cached = 0, tot_pending = 0, tot_granted = 0; + obd_size fo_tot_cached, fo_tot_pending, fo_tot_granted; + int blockbits = filter->fo_sb->s_blocksize_bits; + int rc; ENTRY; - RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs)); + + /* at least try to account for cached pages. its still racey and + * might be under-reporting if clients haven't announced their + * caches with brw recently */ + spin_lock(&obd->obd_osfs_lock); + rc = fsfilt_statfs(obd, filter->fo_sb, max_age); + memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); + filter_grant_total_exports(obd, &tot_cached, &tot_pending, &tot_granted, + osfs->os_blocks << blockbits); + fo_tot_cached = filter->fo_tot_dirty; + fo_tot_pending = filter->fo_tot_pending; + fo_tot_granted = filter->fo_tot_granted; + spin_unlock(&obd->obd_osfs_lock); + + /* Do check outside spinlock, to avoid wedging system on failure */ + filter_grant_sanity_check(tot_cached, tot_pending, tot_granted, + fo_tot_cached, fo_tot_pending, + fo_tot_granted, osfs->os_blocks << blockbits); + + CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64 + "pending "LPU64" free "LPU64" avail "LPU64"\n", + tot_cached >> blockbits, tot_granted >> blockbits, + tot_pending >> blockbits, osfs->os_bfree, osfs->os_bavail); + + osfs->os_bavail -= min(osfs->os_bavail, + (tot_cached +tot_pending +osfs->os_bsize -1) >> + blockbits); + + RETURN(rc); } static int filter_get_info(struct obd_export *exp, __u32 keylen, @@ -2000,7 +2150,7 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, BDEVNAME_DECLARE_STORAGE(tmp); CERROR("setting device %s read-only\n", ll_bdevname(sb, tmp)); - + handle = fsfilt_start(obd, inode, FSFILT_OP_MKNOD, NULL); LASSERT(handle); (void)fsfilt_commit(obd, inode, handle, 1); @@ -2015,18 +2165,18 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, } case OBD_IOC_LLOG_CANCEL: - case OBD_IOC_LLOG_REMOVE: + case OBD_IOC_LLOG_REMOVE: case OBD_IOC_LLOG_INFO: case OBD_IOC_LLOG_PRINT: { /* FIXME to be finished */ RETURN(-EOPNOTSUPP); /* struct llog_ctxt *ctxt = NULL; - + push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); - + RETURN(rc); */ } @@ -2046,12 +2196,12 @@ static struct llog_operations filter_size_orig_logops = { }; static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_logid *logid) + int count, struct llog_logid *logid) { struct llog_ctxt *ctxt; int rc; ENTRY; - + filter_unlink_repl_logops = llog_client_ops; filter_unlink_repl_logops.lop_cancel = llog_obd_repl_cancel; filter_unlink_repl_logops.lop_connect = llog_repl_connect; @@ -2074,7 +2224,7 @@ static int filter_llog_finish(struct obd_device *obd, int count) { int rc; ENTRY; - + rc = llog_cleanup(llog_get_context(obd, LLOG_UNLINK_REPL_CTXT)); if (rc) RETURN(rc); diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index ce7b4a372ba9143cd36f73755e4dbe3d861df656..06d852c4735b390d759e9826f4b5d24d29471866 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -48,6 +48,8 @@ #define FILTER_INCOMPAT_GROUPS 0x00000001 #define FILTER_INCOMPAT_SUPP (FILTER_INCOMPAT_GROUPS) +#define FILTER_GRANT_CHUNK (2ULL*1024*1024) + /* Data stored per server at the head of the last_rcvd file. In le32 order. * Try to keep this the same as mds_server_data so we might one day merge. */ struct filter_server_data { @@ -128,15 +130,20 @@ void flip_into_page_cache(struct inode *inode, struct page *new_page); int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti); +obd_size filter_grant_space_left(struct obd_export *exp); +long filter_grant(struct obd_export *exp, obd_size current_grant, + obd_size want, obd_size fs_space_left); +void filter_grant_commit(struct obd_export *exp, int niocount, + struct niobuf_local *res); /* filter_log.c */ struct ost_filterdata { __u32 ofd_epoch; }; -int filter_log_sz_change(struct llog_handle *cathandle, +int filter_log_sz_change(struct llog_handle *cathandle, struct ll_fid *mds_fid, __u32 io_epoch, - struct llog_cookie *logcookie, + struct llog_cookie *logcookie, struct inode *inode); //int filter_get_catalog(struct obd_device *); void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index f4581bb001b598e9df18b770bc768482fefbc7d9..c2867b5bc6b3ca61c78712adbbac9fd1013a654a 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -99,12 +99,171 @@ err_page: return lnb->rc; } +/* Grab the dirty and seen grant announcements from the incoming obdo. + * We will later calculate the clients new grant and return it. + * Caller must hold osfs lock */ +static void filter_grant_incoming(struct obd_export *exp, struct obdo *oa) +{ + struct filter_export_data *fed; + struct obd_device *obd = exp->exp_obd; + ENTRY; + + if ((oa->o_valid & (OBD_MD_FLBLOCKS|OBD_MD_FLGRANT)) != + (OBD_MD_FLBLOCKS|OBD_MD_FLGRANT)) { + oa->o_valid &= ~OBD_MD_FLGRANT; + EXIT; + return; + } + + fed = &exp->exp_filter_data; + + /* Add some margin, since there is a small race if other RPCs arrive + * out-or-order and have already consumed some grant. We want to + * leave this here in case there is a large error in accounting. */ + CDEBUG(oa->o_grant > fed->fed_grant + FILTER_GRANT_CHUNK ? + D_ERROR : D_CACHE, + "%s: cli %s reports granted: "LPU64" dropped: %u, local: %lu\n", + obd->obd_name, exp->exp_client_uuid.uuid, oa->o_grant, + oa->o_dropped, fed->fed_grant); + + /* Update our accounting now so that statfs takes it into account. + * Note that fed_dirty is only approximate and can become incorrect + * if RPCs arrive out-of-order. No important calculations depend + * on fed_dirty however. */ + obd->u.filter.fo_tot_dirty += oa->o_dirty - fed->fed_dirty; + if (fed->fed_grant < oa->o_dropped) { + CERROR("%s: cli %s reports %u dropped > fed_grant %lu\n", + obd->obd_name, exp->exp_client_uuid.uuid, + oa->o_dropped, fed->fed_grant); + oa->o_dropped = 0; + } + if (obd->u.filter.fo_tot_granted < oa->o_dropped) { + CERROR("%s: cli %s reports %u dropped > tot_granted "LPU64"\n", + obd->obd_name, exp->exp_client_uuid.uuid, + oa->o_dropped, obd->u.filter.fo_tot_granted); + oa->o_dropped = 0; + } + obd->u.filter.fo_tot_granted -= oa->o_dropped; + fed->fed_grant -= oa->o_dropped; + fed->fed_dirty = oa->o_dirty; + EXIT; +} + +#define GRANT_FOR_LLOG 16 + +/* Figure out how much space is available between what we've granted + * and what remains in the filesystem. Compensate for ext3 indirect + * block overhead when computing how much free space is left ungranted. + * + * Caller must hold obd_osfs_lock. */ +obd_size filter_grant_space_left(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + int blockbits = obd->u.filter.fo_sb->s_blocksize_bits; + obd_size tot_granted = obd->u.filter.fo_tot_granted, avail, left = 0; + int rc, statfs_done = 0; + + if (time_before(obd->obd_osfs_age, jiffies - HZ)) { +restat: + rc = fsfilt_statfs(obd, obd->u.filter.fo_sb, jiffies + 1); + if (rc) /* N.B. statfs can't really fail */ + RETURN(0); + statfs_done = 1; + } + + avail = obd->obd_osfs.os_bavail; + left = avail - (avail >> (blockbits - 3)); /* (d)indirect */ + if (left > GRANT_FOR_LLOG) { + left = (left - GRANT_FOR_LLOG) << blockbits; + } else { + left = 0 /* << blockbits */; + } + + if (!statfs_done && left < 32 * FILTER_GRANT_CHUNK + tot_granted) { + CDEBUG(D_CACHE, "fs has no space left and statfs too old\n"); + goto restat; + } + + if (left >= tot_granted) { + left -= tot_granted; + } else { + static unsigned long next; + if (left < tot_granted - obd->u.filter.fo_tot_pending && + time_after(jiffies, next)) { + spin_unlock(&obd->obd_osfs_lock); + CERROR("%s: cli %s granted "LPU64" more than available " + LPU64" and pending "LPU64"\n", obd->obd_name, + exp->exp_client_uuid.uuid, tot_granted, left, + obd->u.filter.fo_tot_pending); + if (next == 0) + portals_debug_dumplog(); + next = jiffies + 20 * HZ; + spin_lock(&obd->obd_osfs_lock); + } + left = 0; + } + + CDEBUG(D_CACHE, "%s: cli %s free: "LPU64" avail: "LPU64" grant "LPU64 + " left: "LPU64" pending: "LPU64"\n", obd->obd_name, + exp->exp_client_uuid.uuid, obd->obd_osfs.os_bfree << blockbits, + avail << blockbits, tot_granted, left, + obd->u.filter.fo_tot_pending); + + return left; +} + +/* Calculate how much grant space to allocate to this client, based on how + * much space is currently free and how much of that is already granted. + * + * Caller must hold obd_osfs_lock. */ +long filter_grant(struct obd_export *exp, obd_size current_grant, + obd_size want, obd_size fs_space_left) +{ + struct obd_device *obd = exp->exp_obd; + struct filter_export_data *fed = &exp->exp_filter_data; + int blockbits = obd->u.filter.fo_sb->s_blocksize_bits; + __u64 grant = 0; + + /* Grant some fraction of the client's requested grant space so that + * they are not always waiting for write credits (not all of it to + * avoid overgranting in face of multiple RPCs in flight). This + * essentially will be able to control the OSC_MAX_RIF for a client. + * + * If we do have a large disparity and multiple RPCs in flight we + * might grant "too much" but that's OK because it means we are + * dirtying a lot on the client and will likely use it up quickly. */ + if (current_grant < want) { + grant = min((want >> blockbits) / 2, + (fs_space_left >> blockbits) / 8); + grant <<= blockbits; + + if (grant) { + if (grant > FILTER_GRANT_CHUNK) + grant = FILTER_GRANT_CHUNK; + + obd->u.filter.fo_tot_granted += grant; + fed->fed_grant += grant; + } + } + + CDEBUG(D_CACHE,"%s: cli %s wants: "LPU64" granting: "LPU64"\n", + obd->obd_name, exp->exp_client_uuid.uuid, want, grant); + CDEBUG(D_CACHE, + "%s: cli %s tot cached:"LPU64" granted:"LPU64 + " num_exports: %d\n", obd->obd_name, exp->exp_client_uuid.uuid, + obd->u.filter.fo_tot_dirty, + obd->u.filter.fo_tot_granted, obd->obd_num_exports); + + return grant; +} + static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, struct obd_trans_info *oti) { + struct obd_device *obd = exp->exp_obd; struct obd_run_ctxt saved; struct obd_ioobj *o; struct niobuf_remote *rnb; @@ -119,6 +278,21 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, /* We are currently not supporting multi-obj BRW_READ RPCS at all. * When we do this function's dentry cleanup will need to be fixed */ LASSERT(objcount == 1); + LASSERT(obj->ioo_bufcnt > 0); + + if (oa && oa->o_valid & OBD_MD_FLGRANT) { + spin_lock(&obd->obd_osfs_lock); + filter_grant_incoming(exp, oa); + +#if 0 + /* Reads do not increase grants */ + oa->o_grant = filter_grant(exp, oa->o_grant, oa->o_undirty, + filter_grant_space_left(exp)); +#else + oa->o_grant = 0; +#endif + spin_unlock(&obd->obd_osfs_lock); + } OBD_ALLOC(fso, objcount * sizeof(*fso)); if (fso == NULL) @@ -130,7 +304,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, for (i = 0, o = obj; i < objcount; i++, o++) { LASSERT(o->ioo_bufcnt); - dentry = filter_oa2dentry(exp->exp_obd, oa); + dentry = filter_oa2dentry(obd, oa); if (IS_ERR(dentry)) GOTO(cleanup, rc = PTR_ERR(dentry)); @@ -160,7 +334,6 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, lnb->offset = rnb->offset; lnb->len = rnb->len; lnb->flags = rnb->flags; - lnb->start = jiffies; if (inode->i_size <= rnb->offset) { /* If there's no more data, abort early. @@ -195,8 +368,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, CDEBUG(D_INFO, "start_page_read: %lu jiffies\n", (jiffies - now)); - lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_READ_BYTES, - tot_bytes); + lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes); while (lnb-- > res) { rc = filter_finish_page_read(lnb); if (rc) { @@ -235,6 +407,111 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, return rc; } +/* When clients have dirtied as much space as they've been granted they + * fall through to sync writes. These sync writes haven't been expressed + * in grants and need to error with ENOSPC when there isn't room in the + * filesystem for them after grants are taken into account. However, + * writeback of the dirty data that was already granted space can write + * right on through. + * + * Caller must hold obd_osfs_lock. */ +static int filter_grant_check(struct obd_export *exp, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_remote *rnb, + struct niobuf_local *lnb, obd_size *left, + struct inode *inode) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + int blocksize = exp->exp_obd->u.filter.fo_sb->s_blocksize; + unsigned long used = 0, ungranted = 0, using; + int i, rc = -ENOSPC, obj, n = 0, mask = D_CACHE; + + for (obj = 0; obj < objcount; obj++) { + for (i = 0; i < fso[obj].fso_bufcnt; i++, n++) { + int tmp, bytes; + + /* FIXME: this is calculated with PAGE_SIZE on client */ + bytes = rnb[n].len; + bytes += rnb[n].offset & (blocksize - 1); + tmp = (rnb[n].offset + rnb[n].len) & (blocksize - 1); + if (tmp) + bytes += blocksize - tmp; + + if (rnb[n].flags & OBD_BRW_FROM_GRANT) { + if (fed->fed_grant < used + bytes) { + CDEBUG(D_CACHE, + "%s: cli %s claims %ld+%d GRANT," + " no such grant %lu, idx %d\n", + exp->exp_obd->obd_name, + exp->exp_client_uuid.uuid, + used, bytes, fed->fed_grant, n); + mask = D_ERROR; + } else { + used += bytes; + rnb[n].flags |= OBD_BRW_GRANTED; + lnb[n].lnb_grant_used = bytes; + CDEBUG(0, "idx %d used=%lu\n", n, used); + rc = 0; + continue; + } + } + if (*left > ungranted) { + /* if enough space, pretend it was granted */ + ungranted += bytes; + rnb[n].flags |= OBD_BRW_GRANTED; + CDEBUG(0, "idx %d ungranted=%lu\n",n,ungranted); + rc = 0; + continue; + } + + /* We can't check for already-mapped blocks here, as + * it requires dropping the osfs lock to do the bmap. + * Instead, we return ENOSPC and in that case we need + * to go through and verify if all of the blocks not + * marked BRW_GRANTED are already mapped and we can + * ignore this error. */ + lnb[n].rc = -ENOSPC; + rnb[n].flags &= OBD_BRW_GRANTED; + CDEBUG(D_CACHE, "%s: cli %s idx %d no space for %d\n", + exp->exp_obd->obd_name, + exp->exp_client_uuid.uuid, n, bytes); + } + } + + /* Now substract what client have used already. We don't subtract + * this from the tot_granted yet, so that other client's can't grab + * that space before we have actually allocated our blocks. That + * happens in filter_grant_commit() after the writes are done. */ + *left -= ungranted; + fed->fed_grant -= used; + fed->fed_pending += used; + exp->exp_obd->u.filter.fo_tot_pending += used; + + CDEBUG(mask, + "%s: cli %s used: %lu ungranted: %lu grant: %lu dirty: %lu\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, used, + ungranted, fed->fed_grant, fed->fed_dirty); + + /* Rough calc in case we don't refresh cached statfs data */ + using = (used + ungranted + 1 ) >> + exp->exp_obd->u.filter.fo_sb->s_blocksize_bits; + if (exp->exp_obd->obd_osfs.os_bavail > using) + exp->exp_obd->obd_osfs.os_bavail -= using; + else + exp->exp_obd->obd_osfs.os_bavail = 0; + + if (fed->fed_dirty < used) { + CERROR("%s: cli %s claims used %lu > fed_dirty %lu\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, + used, fed->fed_dirty); + used = fed->fed_dirty; + } + exp->exp_obd->u.filter.fo_tot_dirty -= used; + fed->fed_dirty -= used; + + return rc; +} + static int filter_start_page_write(struct inode *inode, struct niobuf_local *lnb) { @@ -272,11 +549,12 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, { struct obd_run_ctxt saved; struct niobuf_remote *rnb; - struct niobuf_local *lnb = NULL; + struct niobuf_local *lnb; struct fsfilt_objinfo fso; struct dentry *dentry; - int rc = 0, i, tot_bytes = 0; + obd_size left; unsigned long now = jiffies; + int rc = 0, i, tot_bytes = 0, cleanup_phase = 1; ENTRY; LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); @@ -305,25 +583,47 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, CDEBUG(D_INFO, "preprw_write setup: %lu jiffies\n", (jiffies - now)); + spin_lock(&exp->exp_obd->obd_osfs_lock); + if (oa) + filter_grant_incoming(exp, oa); + cleanup_phase = 0; + + left = filter_grant_space_left(exp); + + rc = filter_grant_check(exp, objcount, &fso, niocount, nb, res, + &left, dentry->d_inode); + if (oa && oa->o_valid & OBD_MD_FLGRANT) + oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left); + + spin_unlock(&exp->exp_obd->obd_osfs_lock); + + if (rc) { + f_dput(dentry); + GOTO(cleanup, rc); + } + for (i = 0, rnb = nb, lnb = res; i < obj->ioo_bufcnt; i++, lnb++, rnb++) { + /* We still set up for ungranted pages so that granted pages + * can be written to disk as they were promised, and portals + * needs to keep the pages all aligned properly. */ lnb->dentry = dentry; lnb->offset = rnb->offset; lnb->len = rnb->len; lnb->flags = rnb->flags; - lnb->start = jiffies; rc = filter_start_page_write(dentry->d_inode, lnb); if (rc) { - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, "page err %u@" - LPU64" %u/%u %p: rc %d\n", lnb->len, lnb->offset, + CDEBUG(D_ERROR, "page err %u@"LPU64" %u/%u %p: rc %d\n", + lnb->len, lnb->offset, i, obj->ioo_bufcnt, dentry, rc); while (lnb-- > res) __free_pages(lnb->page, 0); f_dput(dentry); GOTO(cleanup, rc); } - tot_bytes += lnb->len; + if (lnb->rc == 0) + tot_bytes += lnb->len; } if (time_after(jiffies, now + 15 * HZ)) @@ -336,6 +636,14 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, tot_bytes); EXIT; cleanup: + switch(cleanup_phase) { + case 1: + spin_lock(&exp->exp_obd->obd_osfs_lock); + if (oa) + filter_grant_incoming(exp, oa); + spin_unlock(&exp->exp_obd->obd_osfs_lock); + default: ; + } pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); return rc; } @@ -432,7 +740,37 @@ void flip_into_page_cache(struct inode *inode, struct page *new_page) } while (rc != 0); } -/* XXX needs to trickle its oa down */ +void filter_grant_commit(struct obd_export *exp, int niocount, + struct niobuf_local *res) +{ + struct filter_obd *filter = &exp->exp_obd->u.filter; + struct niobuf_local *lnb = res; + unsigned long pending = 0; + int i; + + spin_lock(&exp->exp_obd->obd_osfs_lock); + for (i = 0, lnb = res; i < niocount; i++, lnb++) + pending += lnb->lnb_grant_used; + + LASSERTF(exp->exp_filter_data.fed_pending >= pending, + "%s: cli %s/%p fed_pending: %lu grant_used: %lu\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, + exp->exp_filter_data.fed_pending, pending); + exp->exp_filter_data.fed_pending -= pending; + LASSERTF(filter->fo_tot_granted >= pending, + "%s: cli %s/%p tot_granted: "LPU64" grant_used: %lu\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, + exp->exp_obd->u.filter.fo_tot_granted, pending); + filter->fo_tot_granted -= pending; + LASSERTF(filter->fo_tot_pending >= pending, + "%s: cli %s/%p tot_pending: "LPU64" grant_used: %lu\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, + filter->fo_tot_pending, pending); + filter->fo_tot_pending -= pending; + + spin_unlock(&exp->exp_obd->obd_osfs_lock); +} + int filter_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti) diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 32adb9f651ef7397f7d1c8c9f63ebadaf73c7414..eced5090a8a530285e0ddd3055f5976b44abaa7e 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -202,6 +202,27 @@ cleanup: return rc; } +/* See if there are unallocated parts in given file region */ +static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) +{ + int (*fs_bmap)(struct address_space *, long) = + inode->i_mapping->a_ops->bmap; + int j; + + /* We can't know if the range is mapped already or not */ + if (fs_bmap == NULL) + return 0; + + offset >>= inode->i_blkbits; + len >>= inode->i_blkbits; + + for (j = 0; j <= len; j++) + if (fs_bmap(inode->i_mapping, offset + j) == 0) + return 0; + + return 1; +} + int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti) @@ -213,7 +234,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct iattr iattr = { 0 }; struct kiobuf *iobuf; struct inode *inode = NULL; - int rc = 0, i, cleanup_phase = 0, err; + int rc = 0, i, n, cleanup_phase = 0, err; unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ void *wait_handle; ENTRY; @@ -234,18 +255,29 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, GOTO(cleanup, rc); iobuf->offset = 0; - iobuf->length = PAGE_SIZE * obj->ioo_bufcnt; - iobuf->nr_pages = obj->ioo_bufcnt; + iobuf->length = 0; + iobuf->nr_pages = 0; cleanup_phase = 1; fso.fso_dentry = res->dentry; fso.fso_bufcnt = obj->ioo_bufcnt; inode = res->dentry->d_inode; - iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); - for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { + for (i = 0, lnb = res, n = 0; i < obj->ioo_bufcnt; i++, lnb++) { loff_t this_size; - iobuf->maplist[i] = lnb->page; + + /* If overwriting an existing block, we don't need a grant */ + if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && + filter_range_is_mapped(inode, lnb->offset, lnb->len)) + lnb->rc = 0; + + if (lnb->rc) /* ENOSPC, network RPC error */ + continue; + + iobuf->maplist[n++] = lnb->page; + iobuf->length += PAGE_SIZE; + iobuf->nr_pages++; + /* We expect these pages to be in offset order, but we'll * be forgiving */ this_size = lnb->offset + lnb->len; @@ -270,6 +302,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (time_after(jiffies, now + 15 * HZ)) CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, iobuf, exp, &iattr, oti, &wait_handle); if (rc == 0) @@ -278,6 +311,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (time_after(jiffies, now + 15 * HZ)) CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + filter_grant_commit(exp, niocount, res); err = fsfilt_commit_wait(obd, inode, wait_handle); if (err) rc = err; diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index b312f8b19331e4513d0faeda41da21af2493c4b7..fb4370230c1273e7fc5f9a8d377fa13d62fe1c1a 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -37,9 +37,6 @@ #warning "implement writeback mode -bzzz" -int ext3_map_inode_page(struct inode *inode, struct page *page, - unsigned long *blocks, int *created, int create); - /* 512byte block min */ #define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512) struct dio_request { @@ -77,6 +74,27 @@ static int can_be_merged(struct bio *bio, sector_t sector) return bio->bi_sector + size == sector ? 1 : 0; } +/* See if there are unallocated parts in given file region */ +static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) +{ + sector_t (*fs_bmap)(struct address_space *, sector_t) = + inode->i_mapping->a_ops->bmap; + int j; + + /* We can't know if we are overwriting or not */ + if (fs_bmap == NULL) + return 0; + + offset >>= inode->i_blkbits; + len >>= inode->i_blkbits; + + for (j = 0; j <= len; j++) + if (fs_bmap(inode->i_mapping, offset + j) == 0) + return 0; + + return 1; +} + int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti) @@ -128,14 +146,23 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (time_after(jiffies, now + 15 * HZ)) CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { loff_t this_size; sector_t sector; int offs; + /* If overwriting an existing block, we don't need a grant */ + if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && + filter_range_is_mapped(inode, lnb->offset, lnb->len)) + lnb->rc = 0; + + if (lnb->rc) /* ENOSPC, network RPC error */ + continue; + /* get block number for next page */ - rc = ext3_map_inode_page(inode, lnb->page, dreq->blocks, - dreq->created, 1); + rc = fsfilt_map_inode_page(obd, inode, lnb->page, dreq->blocks, + dreq->created, 1); if (rc) GOTO(cleanup, rc); @@ -175,6 +202,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, submit_bio(WRITE, bio); } + filter_grant_commit(exp, niocount, res); + /* time to wait for I/O completion */ wait_event(dreq->wait, atomic_read(&dreq->numreqs) == 0); @@ -187,7 +216,6 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (rc == 0) { down(&inode->i_sem); - inode_update_time(inode, 1); if (iattr.ia_size > inode->i_size) { CDEBUG(D_INFO, "setting i_size to "LPU64"\n", iattr.ia_size); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 51458c0a8b138e524247945f2a7da858b73f04fc..6fae59d257e3cf4d96d5f842008951a038f62923 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -89,6 +89,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "blocksize", lprocfs_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index ee22f1c0b3e3015a68fe7c754bce3cfabe9116f0..9216ec0b8d6b5ef12ca29dbf01f7ed654956d9e5 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -134,6 +134,7 @@ int osc_wr_max_dirty_mb(struct file *file, const char *buffer, spin_lock(&cli->cl_loi_list_lock); cli->cl_dirty_max = (obd_count)val * 1024 * 1024; + osc_wake_cache_waiters(cli); spin_unlock(&cli->cl_loi_list_lock); return count; @@ -147,7 +148,20 @@ int osc_rd_cur_dirty_bytes(char *page, char **start, off_t off, int count, int rc; spin_lock(&cli->cl_loi_list_lock); - rc = snprintf(page, count, LPU64"\n", cli->cl_dirty); + rc = snprintf(page, count, "%lu\n", cli->cl_dirty); + spin_unlock(&cli->cl_loi_list_lock); + return rc; +} + +int osc_rd_cur_grant_bytes(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *dev = data; + struct client_obd *cli = &dev->u.cli; + int rc; + + spin_lock(&cli->cl_loi_list_lock); + rc = snprintf(page, count, "%lu\n", cli->cl_avail_grant); spin_unlock(&cli->cl_loi_list_lock); return rc; } @@ -281,6 +295,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "blocksize", lprocfs_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, @@ -292,6 +307,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { osc_wr_max_rpcs_in_flight, 0 }, { "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 }, { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 }, + { "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 }, {"create_low_watermark", osc_rd_create_low_wm, osc_wr_create_low_wm, 0}, { "create_count", osc_rd_create_count, osc_wr_create_count, 0 }, { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 }, diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 149ff44bc7a21fb00b634fa6f20065dcc1d6bbc3..845b3063ff973d14fa0f7ce753072cd263ececb7 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -216,6 +216,11 @@ int osc_create(struct obd_export *exp, struct obdo *oa, if ((oa->o_valid & OBD_MD_FLGROUP) && (oa->o_gr != 0)) RETURN(osc_real_create(exp, oa, ea, oti)); + if ((oa->o_valid & OBD_MD_FLFLAGS) && + oa->o_flags == OBD_FL_RECREATE_OBJS) { + RETURN(osc_real_create(exp, oa, ea, oti)); + } + lsm = *ea; if (lsm == NULL) { rc = obd_alloc_memmd(exp, &lsm); diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index d78c8bf2a111306f2f532a001f906d687d09057a..b5f639268e44bf881d35cef567a2d7563508e78c 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -44,6 +44,13 @@ struct osc_async_page { void *oap_caller_data; }; +struct osc_cache_waiter { + struct list_head ocw_entry; + wait_queue_head_t ocw_waitq; + struct osc_async_page *ocw_oap; + int ocw_rc; +}; + #define OSCC_FLAG_RECOVERING 1 #define OSCC_FLAG_CREATING 2 #define OSCC_FLAG_NOSPC 4 /* can't create more objects on this OST */ @@ -53,6 +60,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa, int osc_real_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); void oscc_init(struct obd_export *exp); +void osc_wake_cache_waiters(struct client_obd *cli); #ifdef __KERNEL__ int lproc_osc_attach_seqstat(struct obd_device *dev); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index b817a1411464680e167dabf0fad1854a0f467a1e..e8dd04324129d4146ea915073197751c2e228310 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -532,38 +532,80 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, return rc; } -static void osc_announce_cached(struct client_obd *cli, struct ost_body *body) +static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, + long writing_bytes) { - obd_flag bits = OBD_MD_FLBLOCKS|OBD_MD_FLRDEV; + obd_flag bits = OBD_MD_FLBLOCKS|OBD_MD_FLGRANT; - LASSERT(!(body->oa.o_valid & bits)); + LASSERT(!(oa->o_valid & bits)); - body->oa.o_valid |= bits; - down(&cli->cl_dirty_sem); - body->oa.o_blocks = cli->cl_dirty; - body->oa.o_rdev = cli->cl_dirty_granted; - up(&cli->cl_dirty_sem); - CDEBUG(D_INODE, "announcing "LPU64" dirty "LPU64" granted\n", - cli->cl_dirty, cli->cl_dirty_granted); + oa->o_valid |= bits; + spin_lock(&cli->cl_loi_list_lock); + oa->o_dirty = cli->cl_dirty; + oa->o_undirty = cli->cl_dirty_max - oa->o_dirty; + oa->o_grant = cli->cl_avail_grant; + oa->o_dropped = cli->cl_lost_grant; + cli->cl_lost_grant = 0; + spin_unlock(&cli->cl_loi_list_lock); + CDEBUG(D_CACHE,"dirty: "LPU64" undirty: %u dropped %u grant: "LPU64"\n", + oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant); } -static void osc_update_grant(struct client_obd *cli, struct ost_body *body) +/* caller must hold loi_list_lock */ +static void osc_consume_write_grant(struct client_obd *cli, + struct osc_async_page *oap) +{ + cli->cl_dirty += PAGE_SIZE; + cli->cl_avail_grant -= PAGE_SIZE; + oap->oap_brw_flags |= OBD_BRW_FROM_GRANT; + CDEBUG(D_CACHE, "using %lu grant credits for oap %p\n", PAGE_SIZE, oap); + LASSERT(cli->cl_avail_grant >= 0); +} + +/* caller must hold loi_list_lock */ +void osc_wake_cache_waiters(struct client_obd *cli) { - if(!(body->oa.o_valid & OBD_MD_FLRDEV)) { - if (cli->cl_ost_can_grant) { - CDEBUG(D_INODE, "%s can't grant\n", - cli->cl_import->imp_target_uuid.uuid); + struct list_head *l, *tmp; + struct osc_cache_waiter *ocw; + + list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { + /* if we can't dirty more, we must wait until some is written */ + if (cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) { + CDEBUG(D_CACHE, "no dirty room: dirty: %ld max %ld\n", + cli->cl_dirty, cli->cl_dirty_max); + return; } - cli->cl_ost_can_grant = 0; - return; + + /* if still dirty cache but no grant wait for pending RPCs that + * may yet return us some grant before doing sync writes */ + if (cli->cl_brw_in_flight && cli->cl_avail_grant < PAGE_SIZE) { + CDEBUG(D_CACHE, "%d BRWs in flight, no grant\n", + cli->cl_brw_in_flight); + return; + } + + ocw = list_entry(l, struct osc_cache_waiter, ocw_entry); + list_del_init(&ocw->ocw_entry); + if (cli->cl_avail_grant < PAGE_SIZE) { + /* no more RPCs in flight to return grant, do sync IO */ + ocw->ocw_rc = -EDQUOT; + CDEBUG(D_INODE, "wake oap %p for sync\n", ocw->ocw_oap); + } else { + osc_consume_write_grant(cli, ocw->ocw_oap); + } + wake_up(&ocw->ocw_waitq); } - CDEBUG(D_ERROR, "got "LPU64" grant\n", body->oa.o_rdev); - down(&cli->cl_dirty_sem); - cli->cl_dirty_granted = body->oa.o_rdev; - /* XXX check for over-run and wake up the io thread that - * doesn't exist yet */ - up(&cli->cl_dirty_sem); + EXIT; +} + +static void osc_update_grant(struct client_obd *cli, struct ost_body *body) +{ + spin_lock(&cli->cl_loi_list_lock); + CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant); + cli->cl_avail_grant += body->oa.o_grant; + /* waiters are woken in brw_interpret_oap */ + spin_unlock(&cli->cl_loi_list_lock); } /* We assume that the reason this OSC got a short read is because it read @@ -637,7 +679,7 @@ static int check_write_rcs(struct ptlrpc_request *request, int niocount, static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) { if (p1->flag != p2->flag) { - unsigned mask = ~(OBD_BRW_CREATE|OBD_BRW_FROM_GRANT); + unsigned mask = ~OBD_BRW_FROM_GRANT; /* warn if we try to combine flags that we don't know to be * safe to combine */ @@ -696,7 +738,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ; for (niocount = i = 1; i < page_count; i++) - if (!can_merge_pages (&pga[i - 1], &pga[i])) + if (!can_merge_pages(&pga[i - 1], &pga[i])) niocount++; size[0] = sizeof(*body); @@ -760,7 +802,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, LASSERT((void *)(niobuf - niocount) == lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf))); - osc_announce_cached(cli, body); + osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0); spin_lock_irqsave(&req->rq_lock, flags); req->rq_no_resend = 1; spin_unlock_irqrestore(&req->rq_lock, flags); @@ -769,7 +811,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, if (opc == OST_WRITE) { #if CHECKSUM_BULK body->oa.o_valid |= OBD_MD_FLCKSUM; - body->oa.o_nlink = cksum_pages(requested_nob, page_count, pga); + body->oa.o_cksum = cksum_pages(requested_nob, page_count, pga); #endif /* 1 RC per niobuf */ size[1] = sizeof(__u32) * niocount; @@ -796,14 +838,15 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, { struct client_obd *cli = &req->rq_import->imp_obd->u.cli; struct ost_body *body; + ENTRY; if (rc < 0) - return (rc); + RETURN(rc); body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { CERROR ("Can't unpack body\n"); - return (-EPROTO); + RETURN(-EPROTO); } osc_update_grant(cli, body); @@ -811,15 +854,15 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, if (req->rq_reqmsg->opc == OST_WRITE) { if (rc > 0) { CERROR ("Unexpected +ve rc %d\n", rc); - return (-EPROTO); + RETURN(-EPROTO); } - return(check_write_rcs(req, niocount, page_count, pga)); + RETURN(check_write_rcs(req, niocount, page_count, pga)); } if (rc > requested_nob) { CERROR("Unexpected rc %d (%d requested)\n", rc, requested_nob); - return (-EPROTO); + RETURN(-EPROTO); } if (rc < requested_nob) @@ -832,7 +875,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, const struct ptlrpc_peer *peer = &req->rq_import->imp_connection->c_peer; static int cksum_counter; - obd_count server_cksum = oa->o_nlink; + obd_count server_cksum = oa->o_cksum; obd_count cksum = cksum_pages(rc, page_count, pga); char str[PTL_NALFMT_SIZE]; @@ -844,7 +887,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, LPX64" (%s)\n", server_cksum, cksum, peer->peer_nid, str); cksum_counter = 0; - oa->o_nlink = cksum; + oa->o_cksum = cksum; } else if ((cksum_counter & (-cksum_counter)) == cksum_counter){ CWARN("Checksum %u from "LPX64" (%s) OK: %x\n", cksum_counter, peer->peer_nid, str, cksum); @@ -859,7 +902,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, req->rq_import->imp_connection->c_peer.peer_nid); } #endif - return (0); + RETURN(0); } static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa, @@ -1088,7 +1131,8 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, } static void osc_check_rpcs(struct client_obd *cli); -static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap); +static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, + int sent); static void loi_list_maint(struct client_obd *cli, struct lov_oinfo *loi); static void lop_update_pending(struct client_obd *cli, struct loi_oap_pages *lop, int cmd, int delta); @@ -1127,27 +1171,25 @@ static void osc_occ_interrupted(struct osic_callback_context *occ) list_del_init(&oap->oap_urgent_item); loi = oap->oap_loi; - lop = (oap->oap_cmd == OBD_BRW_WRITE) ? + lop = (oap->oap_cmd == OBD_BRW_WRITE) ? &loi->loi_write_lop : &loi->loi_read_lop; lop_update_pending(oap->oap_cli, lop, oap->oap_cmd, -1); loi_list_maint(oap->oap_cli, oap->oap_loi); osic_complete_one(oap->oap_osic, &oap->oap_occ, 0); oap->oap_osic = NULL; - } unlock: spin_unlock(&oap->oap_cli->cl_loi_list_lock); } -/* this must be called holding the list lock to give coverage to exit_cache, +/* this must be called holding the loi list lock to give coverage to exit_cache, * async_flag maintenance, and oap_request */ static void osc_complete_oap(struct client_obd *cli, - struct osc_async_page *oap, int rc) + struct osc_async_page *oap, int sent, int rc) { - ENTRY; - osc_exit_cache(cli, oap); + osc_exit_cache(cli, oap, sent); oap->oap_async_flags = 0; oap->oap_interrupted = 0; @@ -1165,7 +1207,6 @@ static void osc_complete_oap(struct client_obd *cli, oap->oap_caller_ops->ap_completion(oap->oap_caller_data, oap->oap_cmd, rc); - EXIT; } static int brw_interpret_oap(struct ptlrpc_request *request, @@ -1190,6 +1231,11 @@ static int brw_interpret_oap(struct ptlrpc_request *request, spin_lock(&cli->cl_loi_list_lock); + /* We need to decrement before osc_complete_oap->osc_wake_cache_waiters + * is called so we know whether to go to sync BRWs or wait for more + * RPCs to complete */ + cli->cl_brw_in_flight--; + /* the caller may re-use the oap after the completion call so * we need to clean it up a little */ list_for_each_safe(pos, n, &aa->aa_oaps) { @@ -1199,10 +1245,10 @@ static int brw_interpret_oap(struct ptlrpc_request *request, //oap->oap_page, oap->oap_page->index, oap); list_del_init(&oap->oap_rpc_item); - osc_complete_oap(cli, oap, rc); + osc_complete_oap(cli, oap, 1, rc); } - cli->cl_brw_in_flight--; + osc_wake_cache_waiters(cli); osc_check_rpcs(cli); spin_unlock(&cli->cl_loi_list_lock); @@ -1250,8 +1296,8 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, pga[i].pg = oap->oap_page; pga[i].count = oap->oap_count; pga[i].flag = oap->oap_brw_flags; - //CDEBUG(D_INODE, "putting page %p index %lu oap %p into pga\n", - //pga[i].pg, oap->oap_page->index, oap); + CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", + pga[i].pg, oap->oap_page->index, oap, pga[i].flag); i++; } @@ -1328,15 +1374,15 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, int rc = ops->ap_make_ready(oap->oap_caller_data, cmd); if (rc < 0) CDEBUG(D_INODE, "oap %p page %p returned %d " - "instead of ready\n", oap, + "instead of ready\n", oap, oap->oap_page, rc); switch (rc) { case -EAGAIN: /* llite is telling us that the page is still * in commit_write and that we should try - * and put it in an rpc again later. we + * and put it in an rpc again later. we * break out of the loop so we don't create - * a hole in the sequence of pages in the rpc + * a hole in the sequence of pages in the rpc * stream.*/ pos = NULL; break; @@ -1351,7 +1397,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, break; default: LASSERTF(0, "oap %p page %p returned %d " - "from make_ready\n", oap, + "from make_ready\n", oap, oap->oap_page, rc); break; } @@ -1367,13 +1413,12 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, /* ask the caller for the size of the io as the rpc leaves. */ if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) - oap->oap_count = ops->ap_refresh_count( - oap->oap_caller_data, - cmd); + oap->oap_count = + ops->ap_refresh_count(oap->oap_caller_data,cmd); if (oap->oap_count <= 0) { - CDEBUG(D_INODE, "oap %p count %d, completing\n", oap, + CDEBUG(D_CACHE, "oap %p count %d, completing\n", oap, oap->oap_count); - osc_complete_oap(cli, oap, oap->oap_count); + osc_complete_oap(cli, oap, 0, oap->oap_count); continue; } @@ -1383,6 +1428,8 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, break; } + osc_wake_cache_waiters(cli); + if (page_count == 0) RETURN(0); @@ -1403,7 +1450,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, * were between the pending list and the rpc */ if (oap->oap_interrupted) { CDEBUG(D_INODE, "oap %p interrupted\n", oap); - osc_complete_oap(cli, oap, oap->oap_count); + osc_complete_oap(cli, oap, 0, oap->oap_count); continue; } @@ -1430,7 +1477,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_brw_in_flight); } else { lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count); - lprocfs_oh_tally(&cli->cl_write_rpc_hist, + lprocfs_oh_tally(&cli->cl_write_rpc_hist, cli->cl_brw_in_flight); } @@ -1442,7 +1489,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, list_for_each(pos, &aa->aa_oaps) { oap = list_entry(pos, struct osc_async_page, oap_rpc_item); if (oap->oap_interrupted) { - CDEBUG(D_INODE, "oap %p in req %p interrupted\n", + CDEBUG(D_INODE, "oap %p in req %p interrupted\n", oap, request); ptlrpc_mark_interrupted(request); break; @@ -1487,7 +1534,7 @@ static int lop_makes_rpc(struct client_obd *cli, struct loi_oap_pages *lop, * that are being queued but which can't be made ready until * the queuer finishes with the page. this is a wart for * llite::commit_write() */ - optimal *= 2; + optimal += 16; } if (lop->lop_num_pending >= optimal) RETURN(1); @@ -1495,7 +1542,7 @@ static int lop_makes_rpc(struct client_obd *cli, struct loi_oap_pages *lop, RETURN(0); } -static void on_list(struct list_head *item, struct list_head *list, +static void on_list(struct list_head *item, struct list_head *list, int should_be_on) { if (list_empty(item) && should_be_on) @@ -1508,39 +1555,39 @@ static void on_list(struct list_head *item, struct list_head *list, * can find pages to build into rpcs quickly */ static void loi_list_maint(struct client_obd *cli, struct lov_oinfo *loi) { - on_list(&loi->loi_cli_item, &cli->cl_loi_ready_list, + on_list(&loi->loi_cli_item, &cli->cl_loi_ready_list, lop_makes_rpc(cli, &loi->loi_write_lop, OBD_BRW_WRITE) || lop_makes_rpc(cli, &loi->loi_read_lop, OBD_BRW_READ)); - on_list(&loi->loi_write_item, &cli->cl_loi_write_list, + on_list(&loi->loi_write_item, &cli->cl_loi_write_list, loi->loi_write_lop.lop_num_pending); } -#define LOI_DEBUG(LOI, STR, args...) \ - CDEBUG(D_INODE, "loi ready %d wr %d:%d rd %d:%d " STR, \ - !list_empty(&(LOI)->loi_cli_item), \ +#define LOI_DEBUG(LOI, STR, args...) \ + CDEBUG(D_INODE, "loi ready %d wr %d:%d rd %d:%d " STR, \ + !list_empty(&(LOI)->loi_cli_item), \ (LOI)->loi_write_lop.lop_num_pending, \ - !list_empty(&(LOI)->loi_write_lop.lop_urgent), \ + !list_empty(&(LOI)->loi_write_lop.lop_urgent), \ (LOI)->loi_read_lop.lop_num_pending, \ - !list_empty(&(LOI)->loi_read_lop.lop_urgent), \ - args) \ + !list_empty(&(LOI)->loi_read_lop.lop_urgent), \ + args) \ struct lov_oinfo *osc_next_loi(struct client_obd *cli) { ENTRY; /* first return all objects which we already know to have - * pages ready to be stuffed into rpcs */ + * pages ready to be stuffed into rpcs */ if (!list_empty(&cli->cl_loi_ready_list)) - RETURN(list_entry(cli->cl_loi_ready_list.next, + RETURN(list_entry(cli->cl_loi_ready_list.next, struct lov_oinfo, loi_cli_item)); - - /* then if we have cache waiters, return all objects with queued + + /* then if we have cache waiters, return all objects with queued * writes. This is especially important when many small files * have filled up the cache and not been fired into rpcs because * they don't pass the nr_pending/object threshhold */ if (!list_empty(&cli->cl_cache_waiters) && !list_empty(&cli->cl_loi_write_list)) - RETURN(list_entry(cli->cl_loi_write_list.next, + RETURN(list_entry(cli->cl_loi_write_list.next, struct lov_oinfo, loi_write_item)); RETURN(NULL); } @@ -1608,73 +1655,78 @@ static void osc_check_rpcs(struct client_obd *cli) /* we're trying to queue a page in the osc so we're subject to the * 'cl_dirty_max' limit on the number of pages that can be queued in the osc. * If the osc's queued pages are already at that limit, then we want to sleep - * until there is space in the osc's queue for us. we need this goofy - * little struct to really tell that our allocation was fulfilled in - * the presence of pending signals */ -struct osc_cache_waiter { - struct list_head ocw_entry; - wait_queue_head_t ocw_waitq; -}; + * until there is space in the osc's queue for us. We also may be waiting for + * write credits from the OST if there are RPCs in flight that may return some + * before we fall back to sync writes. + * + * We need this know our allocation was granted in the presence of signals */ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw) { int rc; ENTRY; spin_lock(&cli->cl_loi_list_lock); - rc = list_empty(&ocw->ocw_entry); + rc = list_empty(&ocw->ocw_entry) || cli->cl_brw_in_flight == 0; spin_unlock(&cli->cl_loi_list_lock); RETURN(rc); }; + +/* Caller must hold loi_list_lock - we drop/regain it if we need to wait for + * grant or cache space. */ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi, struct osc_async_page *oap) { struct osc_cache_waiter ocw; - struct l_wait_info lwi = {0}; - int rc = 0; - ENTRY; + struct l_wait_info lwi = { 0 }; + + CDEBUG(D_CACHE, "dirty: %ld dirty_max: %ld dropped: %lu grant: %lu\n", + cli->cl_dirty, cli->cl_dirty_max, cli->cl_lost_grant, + cli->cl_avail_grant); - /* XXX check for ost grants here as well.. for now we ignore them. */ if (cli->cl_dirty_max < PAGE_SIZE) - RETURN(-EDQUOT); + return(-EDQUOT); - /* if we fail this test then cl_dirty contains at least one page - * that will have to be completed after we release the lock */ - if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max) { + + /* Hopefully normal case - cache space and write credits available */ + if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max && + cli->cl_avail_grant >= PAGE_SIZE) { /* account for ourselves */ - cli->cl_dirty += PAGE_SIZE; - GOTO(out, rc = 0); + osc_consume_write_grant(cli, oap); + return(0); } - init_waitqueue_head(&ocw.ocw_waitq); - list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); + /* Make sure that there are write rpcs in flight to wait for. This + * is a little silly as this object may not have any pending but + * other objects sure might. */ + if (cli->cl_brw_in_flight) { + list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); + init_waitqueue_head(&ocw.ocw_waitq); + ocw.ocw_oap = oap; + ocw.ocw_rc = 0; - /* make sure that there are write rpcs in flight to wait for. this - * is a little silly as this object may not have any pending - * but other objects sure might. this should probably be cleaned. */ - loi_list_maint(cli, loi); - osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + loi_list_maint(cli, loi); + osc_check_rpcs(cli); + spin_unlock(&cli->cl_loi_list_lock); - CDEBUG(D_INODE, "sleeping for cache space\n"); - l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); + CDEBUG(0, "sleeping for cache space\n"); + l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); - spin_lock(&cli->cl_loi_list_lock); - if (!list_empty(&ocw.ocw_entry)) { - rc = -EINTR; - list_del(&ocw.ocw_entry); + spin_lock(&cli->cl_loi_list_lock); + if (!list_empty(&ocw.ocw_entry)) { + list_del(&ocw.ocw_entry); + RETURN(-EINTR); + } + RETURN(ocw.ocw_rc); } - GOTO(out, rc); -out: - if (rc == 0) - oap->oap_brw_flags |= OBD_BRW_FROM_GRANT; - return rc; + + RETURN(-EDQUOT); } -/* the companion to enter_cache, called when an oap is now longer part of the +/* the companion to enter_cache, called when an oap is no longer part of the * dirty accounting.. so writeback completes or truncate happens before writing * starts. must be called with the loi lock held. */ -static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap) +static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, + int sent) { - struct osc_cache_waiter *ocw; ENTRY; if (!(oap->oap_brw_flags & OBD_BRW_FROM_GRANT)) { @@ -1682,16 +1734,14 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap) return; } - if (list_empty(&cli->cl_cache_waiters)) { - cli->cl_dirty -= PAGE_SIZE; - } else { - ocw = list_entry(cli->cl_cache_waiters.next, - struct osc_cache_waiter, ocw_entry); - list_del_init(&ocw->ocw_entry); - wake_up(&ocw->ocw_waitq); + oap->oap_brw_flags &= ~OBD_BRW_FROM_GRANT; + cli->cl_dirty -= PAGE_SIZE; + if (!sent) { + cli->cl_lost_grant += PAGE_SIZE; + CDEBUG(D_CACHE, "lost grant: %lu avail grant: %lu dirty: %lu\n", + cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty); } - oap->oap_brw_flags &= ~OBD_BRW_FROM_GRANT; EXIT; } @@ -1973,7 +2023,8 @@ static int osc_teardown_async_page(struct obd_export *exp, if (!list_empty(&oap->oap_rpc_item)) GOTO(out, rc = -EBUSY); - osc_exit_cache(cli, oap); + osc_exit_cache(cli, oap, 0); + osc_wake_cache_waiters(cli); if (!list_empty(&oap->oap_urgent_item)) { list_del_init(&oap->oap_urgent_item); @@ -2771,7 +2822,7 @@ static int osc_disconnect(struct obd_export *exp, int flags) if (obd->u.cli.cl_conn_count == 1) { /* flush any remaining cancel messages out to the target */ llog_sync(ctxt, exp); - + /* balance the conn2export for oscc in osc_connect */ class_export_put(exp); } @@ -2796,18 +2847,27 @@ static int osc_lock_contains(struct obd_export *exp, struct lov_stripe_md *lsm, static int osc_invalidate_import(struct obd_device *obd, struct obd_import *imp) { + struct client_obd *cli; LASSERT(imp->imp_obd == obd); /* this used to try and tear down queued pages, but it was * not correctly implemented. We'll have to do it again once * we call obd_invalidate_import() agian */ - LBUG(); + /* XXX And we still need to do this */ + + /* Reset grants, too */ + cli = &obd->u.cli; + spin_lock(&cli->cl_loi_list_lock); + cli->cl_avail_grant = 0; + cli->cl_lost_grant = 0; + spin_unlock(&cli->cl_loi_list_lock); + RETURN(0); } int osc_setup(struct obd_device *obd, obd_count len, void *buf) { int rc; - + rc = ptlrpcd_addref(); if (rc) return rc; diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 6edebc81424b3e2a2752d376d90d6f2488ab0a48..dfdcf1cb538886999d439cba6d3bd421b9ee4632 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -305,12 +305,13 @@ static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo, LASSERT(page < npages); pp_rnb[page].len = pnob; pp_rnb[page].offset = off; - pp_rnb[page].flags = rnb->flags; + pp_rnb[page].flags = rnb[rnbidx].flags; - CDEBUG(D_PAGE, " obj %d id "LPX64 - "page %d(%d) "LPX64" for %d\n", + CDEBUG(0, " obj %d id "LPX64 + "page %d(%d) "LPX64" for %d, flg %x\n", i, ioo[i].ioo_id, obj_pages, page, - pp_rnb[page].offset, pp_rnb[page].len); + pp_rnb[page].offset, pp_rnb[page].len, + pp_rnb[page].flags); page++; obj_pages++; @@ -384,9 +385,6 @@ static int ost_brw_read(struct ptlrpc_request *req) GOTO(out, rc = -EFAULT); } - /* BUG 974: when we send back cache grants, don't clear this flag */ - body->oa.o_valid &= ~OBD_MD_FLRDEV; - ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj); if (ioo == NULL) { CERROR("Missing/short ioobj\n"); @@ -478,15 +476,15 @@ static int ost_brw_read(struct ptlrpc_request *req) rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1, ioo, npages, local_nb, &oti); - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa)); + if (rc == 0) { + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); + memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa)); #if CHECKSUM_BULK - if (rc == 0) { - repbody->oa.o_nlink = ost_checksum_bulk(desc); + repbody->oa.o_cksum = ost_checksum_bulk(desc); repbody->oa.o_valid |= OBD_MD_FLCKSUM; - } #endif + } out_bulk: ptlrpc_free_bulk(desc); @@ -564,9 +562,6 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) GOTO(out, rc = -EFAULT); } - /* BUG 974: when we send back cache grants, don't clear this flag */ - body->oa.o_valid &= ~OBD_MD_FLRDEV; - LASSERT_REQSWAB(req, 1); objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo); if (objcount == 0) { @@ -655,7 +650,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) #if CHECKSUM_BULK if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) { static int cksum_counter; - obd_count client_cksum = body->oa.o_nlink; + obd_count client_cksum = body->oa.o_cksum; obd_count cksum = ost_checksum_bulk(desc); portals_nid2str(req->rq_connection->c_peer.peer_ni->pni_number, @@ -665,7 +660,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) LPX64" (%s)\n", client_cksum, cksum, req->rq_connection->c_peer.peer_nid, str); cksum_counter = 1; - repbody->oa.o_nlink = cksum; + repbody->oa.o_cksum = cksum; } else { cksum_counter++; if ((cksum_counter & (-cksum_counter)) == cksum_counter) @@ -946,11 +941,12 @@ static int ost_handle(struct ptlrpc_request *req) oti_init(oti, req); switch (req->rq_reqmsg->opc) { - case OST_CONNECT: + case OST_CONNECT: { CDEBUG(D_INODE, "connect\n"); OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0); rc = target_handle_connect(req, ost_handle); break; + } case OST_DISCONNECT: CDEBUG(D_INODE, "disconnect\n"); OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0); diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 0c4c4a0159135058cd3a1cc476b2ac4b682b7d4f..09db989f92451e2bf8d91e840d58cd02f7409213 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -115,7 +115,7 @@ do { \ if (portal_cerror == 0) \ break; \ CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ + if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 0bc93f326e87a8934650238c4eee5a73f4a4c067..7ad93277d73d317530925cc58b97da4a23bbc954 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -633,9 +633,9 @@ int portals_debug_mark_buffer(char *text) if (debug_buf == NULL) return -EINVAL; - CDEBUG(0, "********************************************************\n"); + CDEBUG(D_TRACE,"***************************************************\n"); CWARN("DEBUG MARKER: %s\n", text); - CDEBUG(0, "********************************************************\n"); + CDEBUG(D_TRACE,"***************************************************\n"); return 0; } diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index e0e725a206ea94b25b5867041ad319ef6569c5ef..d29fe39dac68a62ed3e73c1d3f7557d52625c384 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -306,7 +306,7 @@ void lustre_swab_obdo (struct obdo *o) __swab64s (&o->o_ctime); __swab64s (&o->o_size); __swab64s (&o->o_blocks); - __swab64s (&o->o_rdev); + __swab64s (&o->o_grant); __swab32s (&o->o_blksize); __swab32s (&o->o_mode); __swab32s (&o->o_uid); @@ -315,7 +315,7 @@ void lustre_swab_obdo (struct obdo *o) __swab32s (&o->o_nlink); __swab32s (&o->o_generation); __swab32s (&o->o_valid); - __swab32s (&o->o_obdflags); + __swab32s (&o->o_misc); __swab32s (&o->o_easize); /* o_inline is opaque */ } @@ -615,7 +615,7 @@ void lustre_swab_llogd_conn_body (struct llogd_conn_body *d) void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' - * running on Linux schnapps.adilger.int 2.4.22-l32 #4 Thu Jan 8 14:32:57 MST 2004 i686 i686 + * running on Linux schnapps.adilger.int 2.4.22-l32 #4 Thu Jan 8 14:32:57 MST 2004 i686 i686 * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */ @@ -756,8 +756,8 @@ void lustre_assert_wire_constants(void) LASSERT((int)sizeof(((struct obdo *)0)->o_size) == 8); LASSERT(offsetof(struct obdo, o_blocks) == 48); LASSERT((int)sizeof(((struct obdo *)0)->o_blocks) == 8); - LASSERT(offsetof(struct obdo, o_rdev) == 56); - LASSERT((int)sizeof(((struct obdo *)0)->o_rdev) == 8); + LASSERT(offsetof(struct obdo, o_grant) == 56); + LASSERT((int)sizeof(((struct obdo *)0)->o_grant) == 8); LASSERT(offsetof(struct obdo, o_blksize) == 64); LASSERT((int)sizeof(((struct obdo *)0)->o_blksize) == 4); LASSERT(offsetof(struct obdo, o_mode) == 68); @@ -774,8 +774,8 @@ void lustre_assert_wire_constants(void) LASSERT((int)sizeof(((struct obdo *)0)->o_generation) == 4); LASSERT(offsetof(struct obdo, o_valid) == 92); LASSERT((int)sizeof(((struct obdo *)0)->o_valid) == 4); - LASSERT(offsetof(struct obdo, o_obdflags) == 96); - LASSERT((int)sizeof(((struct obdo *)0)->o_obdflags) == 4); + LASSERT(offsetof(struct obdo, o_misc) == 96); + LASSERT((int)sizeof(((struct obdo *)0)->o_misc) == 4); LASSERT(offsetof(struct obdo, o_easize) == 100); LASSERT((int)sizeof(((struct obdo *)0)->o_easize) == 4); LASSERT(offsetof(struct obdo, o_inline) == 104); @@ -792,7 +792,6 @@ void lustre_assert_wire_constants(void) LASSERT(OBD_MD_FLUID == 512); LASSERT(OBD_MD_FLGID == 1024); LASSERT(OBD_MD_FLFLAGS == 2048); - LASSERT(OBD_MD_FLOBDFLG == 4096); LASSERT(OBD_MD_FLNLINK == 8192); LASSERT(OBD_MD_FLGENER == 16384); LASSERT(OBD_MD_FLINLINE == 32768); @@ -810,6 +809,7 @@ void lustre_assert_wire_constants(void) LASSERT(OBD_FL_DELORPHAN == 4); LASSERT(OBD_FL_NORPC == 8); LASSERT(OBD_FL_IDONLY == 16); + LASSERT(OBD_FL_RECREATE_OBJS == 32); /* Checks for struct lov_mds_md_v1 */ LASSERT((int)sizeof(struct lov_mds_md_v1) == 32); @@ -885,7 +885,6 @@ void lustre_assert_wire_constants(void) LASSERT((int)sizeof(((struct niobuf_remote *)0)->flags) == 4); LASSERT(OBD_BRW_READ == 1); LASSERT(OBD_BRW_WRITE == 2); - LASSERT(OBD_BRW_CREATE == 4); LASSERT(OBD_BRW_SYNC == 8); LASSERT(OBD_BRW_FROM_GRANT == 32); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 6c3a68b4b34a078535e04d7bf0f5bd4bca3f22ac..6b069a50441dba1a48b77e18d7b4d1a7824662b7 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -233,9 +233,7 @@ inline void ptlrpc_invalidate_import_state(struct obd_import *imp) ptlrpc_abort_inflight(imp); -#if 0 obd_invalidate_import(obd, imp); -#endif ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); } diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index 08a43454db6aea6216e41c8b253047d7a1efc098..82a7d26249582e3093a1971fcc9ab316d561f5f1 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -1,5 +1,5 @@ # lustre.spec -%define version HEAD +%define version b_bug974 %define kversion @LINUXRELEASE@ %define linuxdir @LINUX@ %define enable_doc @ENABLE_DOC@ diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index b6a2ee15112ef4a4efd4a1c23b5665581eb716c8..536e99e41b9a4ba6056ee292324860350a46a07e 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -3,7 +3,8 @@ # the CVS HEAD are allowed. set -vxe -[ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no +PATH=`dirname $0`/../utils:$PATH + [ "$CONFIGS" ] || CONFIGS="local lov" [ "$MAX_THREADS" ] || MAX_THREADS=10 if [ -z "$THREADS" ]; then @@ -15,6 +16,7 @@ fi [ "$RSIZE" ] || RSIZE=64 [ "$UID" ] || UID=1000 [ "$MOUNT" ] || MOUNT=/mnt/lustre +[ "$MOUNT2" ] || MOUNT2=${MOUNT}2 [ "$TMP" ] || TMP=/tmp [ "$COUNT" ] || COUNT=1000 #[ "$DEBUG_LVL" ] || DEBUG_LVL=0x370200 @@ -110,23 +112,46 @@ for NAME in $CONFIGS; do if [ "$FSX" != "no" ]; then mount | grep $MOUNT || sh llmount.sh $DEBUG_OFF - ./fsx -W -c 50 -p 1000 -P $TMP -l 1024000 -N $(($COUNT * 100)) $MOUNT/fsxfile + ./fsx -W -c 50 -p 1000 -P $TMP -l $SIZE \ + -N $(($COUNT * 100)) $MOUNT/fsxfile $DEBUG_ON sh llmountcleanup.sh - #sh llrmount.sh + sh llrmount.sh fi + if [ "$SANITYN" != "no" ]; then + mount | grep $MOUNT || sh llmount.sh + $DEBUG_OFF + + mkdir -p $MOUNT2 + case $NAME in + local|lov) + MDSNODE=`hostname` + MDSNAME=mds1 + CLIENT=client + ;; + *) # we could extract this from $NAME.xml somehow + ;; + esac + if [ "$MDSNODE" -a "$MDSNAME" -a "$CLIENT" ]; then + llmount $MDSNODE:/$MDSNAME/$CLIENT $MOUNT2 + SANITYLOG=$TMP/sanity.log START=: CLEAN=: sh sanityN.sh + umount $MOUNT2 + else + echo "don't know \$MDSNODE, \$MDSNAME, \$CLIENT" + echo "can't mount2 for '$NAME', skipping sanityN.sh" + fi + + $DEBUG_ON + sh llmountcleanup.sh + #sh llrmount.sh + fi + mount | grep $MOUNT && sh llmountcleanup.sh done if [ "$REPLAY_SINGLE" != "no" ]; then sh replay-single.sh fi -if [ "$SANITYN" != "no" ]; then - export NAME=mount2 - mount | grep $MOUNT || sh llmount.sh - sh sanityN.sh - mount | grep $MOUNT && sh llmountcleanup.sh -fi if [ "$CONF_SANITY" != "no" ]; then sh conf-sanity.sh diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 95dd2768fc871637cf0ce459a4cfa0ce3fa59c05..0f8fe13facd813543a510861c5e41053caacd5f9 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -11,7 +11,7 @@ MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-100000} FSTYPE=${FSTYPE:-ext3} MOUNT=${MOUNT:-/mnt/lustre} -#MOUNT2=${MOUNT2:-${MOUNT}2} +MOUNT2=${MOUNT2:-${MOUNT}2} NETWORKTYPE=${NETWORKTYPE:-tcp} OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`} @@ -20,7 +20,8 @@ OSTSIZE=${OSTSIZE:-200000} # specific journal size for the ost, in MB JSIZE=${JSIZE:-0} [ "$JSIZE" -gt 0 ] && JARG="--journal_size $JSIZE" -MDSISIZE=${MDSISIZE:-128} +MDSISIZE=${MDSISIZE:-0} +[ "$MDSISIZE" -gt 0 ] && IARG="--inode_size $MDSISIZE" STRIPE_BYTES=65536 STRIPES_PER_OBJ=0 # 0 means stripe over all OSTs @@ -30,9 +31,10 @@ rm -f $config # create nodes ${LMC} --add node --node localhost || exit 10 ${LMC} --add net --node localhost --nid `hostname` --nettype $NETWORKTYPE || exit 11 +${LMC} --add net --node client --nid '*' --nettype $NETWORKTYPE || exit 12 # configure mds server -${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE $JARG --mkfsoptions "-I $MDSISIZE" || exit 20 +${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE $JARG $IARG || exit 20 # configure ost ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 @@ -40,4 +42,4 @@ ${LMC} --add ost --nspath /mnt/ost_ns --node localhost --lov lov1 --fstype $FSTY # create client config ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 -#${LMC} --add mtpt --node localhost --path $MOUNT2 --mds mds1 --lov lov1 || exit 40 +${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index 2be5a7429b8241aa338d4c425f2ccf962085d485..ec09598e7a0edf27f415d5c2596e744ad4e38acf 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -6,17 +6,20 @@ export PATH=`dirname $0`/../utils:$PATH config=${1:-lov.xml} -LMC=${LMC:-lmc} +LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-100000} FSTYPE=${FSTYPE:-ext3} +MOUNT=${MOUNT:-/mnt/lustre} +MOUNT2=${MOUNT2:-${MOUNT}2} +NETWORKTYPE=${NETWORKTYPE:-tcp} OSTCOUNT=${OSTCOUNT:-5} # OSTDEVN will still override the device for OST N -OSTSIZE=${OSTSIZE:-100000} +OSTSIZE=${OSTSIZE:-150000} # 1 to config an echo client instead of llite ECHO_CLIENT=${ECHO_CLIENT:-} @@ -28,26 +31,31 @@ JSIZE=${JSIZE:-0} JARG="" [ "$JSIZE" -gt 0 ] && JARG="--journal_size $JSIZE" +rm -f $config + # create nodes -${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp +${LMC} --add node --node localhost || exit 10 +${LMC} --add net --node localhost --nid `hostname` --nettype $NETWORKTYPE || exit 11 +${LMC} --add net --node client --nid '*' --nettype $NETWORKTYPE || exit 12 # configure mds server -${LMC} -m $config --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE +${LMC} --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 # configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 +${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 for num in `seq $OSTCOUNT`; do OST=ost$num DEVPTR=OSTDEV$num eval $DEVPTR=${!DEVPTR:=$TMP/$OST-`hostname`} - ${LMC} -m $config --add ost --node localhost --lov lov1 --ost $OST --fstype $FSTYPE --dev ${!DEVPTR} --size $OSTSIZE $JARG + ${LMC} --add ost --node localhost --lov lov1 --ost $OST --fstype $FSTYPE --dev ${!DEVPTR} --size $OSTSIZE $JARG || exit 30 done if [ -z "$ECHO_CLIENT" ]; then # create client config - ${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 + ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 + ${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41 else - ${LMC} -m $config --add echo_client --node localhost --ost lov1 + ${LMC} --add echo_client --node localhost --ost lov1 || exit 42 fi diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh index 8519dad6769d5c67695385b0ab352cf0b059e643..5a2646a41bc8ffcf30b59686a782d199432bd6b1 100755 --- a/lustre/tests/oos.sh +++ b/lustre/tests/oos.sh @@ -1,46 +1,77 @@ #!/bin/bash -export NAME=${NAME:-local} -export OSTSIZE=10000 +set -e +set -vx +export PATH=`dirname $0`/../utils:$PATH +LFS=${LFS:-lfs} +MOUNT=${MOUNT:-$1} MOUNT=${MOUNT:-/mnt/lustre} +OOS=$MOUNT/oosfile TMP=${TMP:-/tmp} - -echo "mnt.." -sh llmount.sh -echo "done" +LOG=$TMP/ooslog SUCCESS=1 -FREESPACE=`df |grep $MOUNT|tr -s ' '|cut -d ' ' -f4` +rm -f $OOS + +sleep 1 # to ensure we get up-to-date statfs info -rm -f $TMP/oosfile -dd if=/dev/zero of=$MOUNT/oosfile count=$[$FREESPACE + 1] bs=1k 2>$TMP/oosfile +#echo -1 > /proc/sys/portals/debug +#echo 0x40a8 > /proc/sys/portals/subsystem_debug +#lctl clear +#lctl debug_daemon start /r/tmp/debug 1024 -RECORDSOUT=`grep "records out" $TMP/oosfile|cut -d + -f1` +STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -1` +ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -1` +MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))} +if [ $ORIGFREE -gt $MAXFREE ]; then + echo "skipping out-of-space test on $OSC" + echo "reports ${ORIGFREE}kB free, more tham MAXFREE ${MAXFREE}kB" + echo "increase $MAXFREE (or reduce test fs size) to proceed" + exit 0 +fi -[ -z "`grep "No space left on device" $TMP/oosfile`" ] && \ - echo "failed:dd not return ENOSPC" && SUCCESS=0 +export LANG=C LC_LANG=C # for "No space left on device" message -REMAINEDFREE=`df |grep $MOUNT|tr -s ' '|cut -d ' ' -f4` -[ $[$FREESPACE - $REMAINEDFREE ] -lt $RECORDSOUT ] && \ - echo "failed:the space written by dd not equal to available space" && \ - SUCCESS=0 && echo "$FREESPACE - $REMAINEDFREE $RECORDSOUT" +# make sure we stripe over all OSTs to avoid OOS on only a subset of OSTs +$LFS setstripe $OOS 65536 0 $STRIPECOUNT +if dd if=/dev/zero of=$OOS count=$(($ORIGFREE + 100)) bs=1k 2> $LOG; then + echo "ERROR: dd did not fail" + SUCCESS=0 +fi -[ $REMAINEDFREE -gt 100 ] && \ - echo "failed:too many space left $REMAINEDFREE and -ENOSPC returned" &&\ +if [ "`grep -c 'No space left on device' $LOG`" -ne 1 ]; then + echo "ERROR: dd not return ENOSPC" SUCCESS=0 +fi + +# flush cache to OST(s) so avail numbers are correct +sync; sleep 1 ; sync + +for AVAIL in /proc/fs/lustre/osc/OSC*MNT*/kbytesavail; do + [ `cat $AVAIL` -lt 400 ] && OSCFULL=full +done +if [ -z "$OSCFULL" ]; then + echo "no OSTs are close to full" + grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*} + SUCCESS=0 +fi + +RECORDSOUT=`grep "records out" $LOG | cut -d + -f1` -FILESIZE=`ls -l $MOUNT/oosfile|tr -s ' '|cut -d ' ' -f5` -[ $RECORDSOUT -ne $[$FILESIZE/1024] ] && \ - echo "failed:the space written by dd not equal to the size of file" && \ +FILESIZE=`ls -l $OOS | awk '{ print $5 }'` +if [ $RECORDSOUT -ne $(($FILESIZE / 1024)) ]; then + echo "ERROR: blocks written by dd not equal to the size of file" SUCCESS=0 +fi -[ $SUCCESS -eq 1 ] && echo "Success!" +#lctl debug_daemon stop -rm -f $MOUNT/oosfile* -rm -f $TMP/oosfile +rm -f $OOS -echo "" -echo "cln.." -sh llmountcleanup.sh +if [ $SUCCESS -eq 1 ]; then + echo "Success!" +else + exit 1 +fi diff --git a/lustre/tests/oos2.sh b/lustre/tests/oos2.sh new file mode 100644 index 0000000000000000000000000000000000000000..42b55714b97beba93dfedba2df3a804856615150 --- /dev/null +++ b/lustre/tests/oos2.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +set -e +set -vx + +export PATH=`dirname $0`/../utils:$PATH +LFS=${LFS:-lfs} +MOUNT=${MOUNT:-$1} +MOUNT=${MOUNT:-/mnt/lustre} +MOUNT2=${MOUNT2:-$2} +MOUNT2=${MOUNT2:-${MOUNT}2} +OOS=$MOUNT/oosfile +OOS2=$MOUNT2/oosfile2 +TMP=${TMP:-/tmp} +LOG=$TMP/oosfile +LOG2=${LOG}2 + +SUCCESS=1 + +rm -f $OOS $OOS2 $LOG $LOG2 + +sleep 1 # to ensure we get up-to-date statfs info + +STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -1` +ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -1` +MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))} +if [ $ORIGFREE -gt $MAXFREE ]; then + echo "skipping out-of-space test on $OSC" + echo "reports ${ORIGFREE}kB free, more tham MAXFREE ${MAXFREE}kB" + echo "increase $MAXFREE (or reduce test fs size) to proceed" + exit 0 +fi + +export LANG=C LC_LANG=C # for "No space left on device" message + +# make sure we stripe over all OSTs to avoid OOS on only a subset of OSTs +$LFS setstripe $OOS 65536 -1 $STRIPECOUNT +$LFS setstripe $OOS2 65536 -1 $STRIPECOUNT +dd if=/dev/zero of=$OOS count=$((3 * $ORIGFREE / 4 + 100)) bs=1k 2>> $LOG & +DDPID=$! +if dd if=/dev/zero of=$OOS2 count=$((3*$ORIGFREE/4 + 100)) bs=1k 2>> $LOG2; then + echo "ERROR: dd2 did not fail" + SUCCESS=0 +fi +if wait $DDPID; then + echo "ERROR: dd did not fail" + SUCCESS=0 +fi + +if [ "`cat $LOG $LOG2 | grep -c 'No space left on device'`" -ne 2 ]; then + echo "ERROR: dd not return ENOSPC" + SUCCESS=0 +fi + +# flush cache to OST(s) so avail numbers are correct +sync; sleep 1 ; sync + +for AVAIL in /proc/fs/lustre/osc/OSC*MNT*/kbytesavail; do + [ `cat $AVAIL` -lt 400 ] && OSCFULL=full +done +if [ -z "$OSCFULL" ]; then + echo "no OSTs are close to full" + grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*} |tee -a $LOG + SUCCESS=0 +fi + +RECORDSOUT=$((`grep "records out" $LOG | cut -d+ -f 1` + \ + `grep "records out" $LOG2 | cut -d+ -f 1`)) + +FILESIZE=$((`ls -l $OOS | awk '{print $5}'` + `ls -l $OOS2 | awk '{print $5}'`)) +if [ $RECORDSOUT -ne $(($FILESIZE / 1024)) ]; then + echo "ERROR: blocks written by dd not equal to the size of file" + SUCCESS=0 +fi + +rm -f $OOS $OOS2 + +if [ $SUCCESS -eq 1 ]; then + echo "Success!" +else + exit 1 +fi diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index ce4a4a698893ebb724c43034221da2ce096920e1..e4eefd01ad74f69409fa44ace5d0395ab936047b 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -22,10 +22,10 @@ CLIENT=${CLIENT:-mdev8} NETWORKTYPE=${NETWORKTYPE:-tcp} MOUNTPT=${MOUNTPT:-/mnt/lustre} CONFIG=${CONFIG:-recovery-cleanup.xml} -MDSDEV=${MDSDEV:-/tmp/mds-`hostname`} +MDSDEV=${MDSDEV:-/tmp/mds1-`hostname`} MDSSIZE=${MDSSIZE:-100000} FSTYPE=${FSTYPE:-ext3} -OSTDEV=${OSTDEV:-/tmp/ost-`hostname`} +OSTDEV=${OSTDEV:-/tmp/ost1-`hostname`} OSTSIZE=${OSTSIZE:-100000} do_mds() { diff --git a/lustre/tests/runiozone b/lustre/tests/runiozone index c2eec04deff823295c14eb1fa488a3b3ed899dc3..db74c2e4df503d590433b237141516bf74198e94 100755 --- a/lustre/tests/runiozone +++ b/lustre/tests/runiozone @@ -2,7 +2,7 @@ [ -z "$SIZE" ] && SIZE=5g [ -z "$COUNT" ] && COUNT=100 [ -z "$VERIFY" ] && VERIFY="-+d" -[ -z "$ODIR" ] && ODIR="-I" +#[ -z "$ODIR" ] && ODIR="-I" [ -z "$REC" ] && REC=64 [ -z "$FILE" ] && FILE=/mnt/lustre/iozone.$$ [ $1 ] && SIZE=$1 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 7c91dd5200e8e4eb9fe908d100ab6237a6d7ca6a..ec166e4daf1ca6adb3b73f974c8c3ecaa433e8e0 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1414,6 +1414,8 @@ do_dirty_record() { } test_45() { f="$DIR/f45" + # Obtain grants from OST if it supports it + echo blah > ${f}_grant stop_kupdated sync do_dirty_record "echo blah > $f" @@ -1734,6 +1736,17 @@ test_63() { } run_test 63 "Verify osic_wait interruption does not crash ======" +test_64a () { + df $DIR + grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/cur* +} +run_test 64a "verify filter grant calculations (in kernel) ======" + +test_64b () { + sh oos.sh $MOUNT +} +run_test 64b "check out-of-space detection on client ============" + # on the LLNL clusters, runas will still pick up root's $TMP settings, # which will not be writable for the runas user, and then you get a CVS # error message with a corrupt path string (CVS bug) and panic. diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 703d378164b809783ec41d9c5c94553f59310628..37f3c9625fbf6d6da8747c34bd710b003e3ee52f 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -91,7 +91,7 @@ run_test() { [ "$SANITYLOG" ] && rm -f $SANITYLOG || true error () { - log "FAIL: $@" + log "FAIL: $TESTNAME $@" if [ "$SANITYLOG" ]; then echo "FAIL: $TESTNAME $@" >> $SANITYLOG else @@ -284,7 +284,14 @@ test_13() { # bug 2451 - directory coherency } run_test 13 "test directory page revocation ====================" +test_14() { # bug 974 - ENOSPC + env + sh oos2.sh $MOUNT1 $MOUNT2 +} +run_test 14 "test out-of-space with multiple writers ===========" + log "cleanup: ======================================================" rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true + echo '=========================== finished ===============================' [ -f "$SANITYLOG" ] && cat $SANITYLOG && exit 1 || true diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 9225374a7cb79ca6c9b460b83a5e031d6f4eb51d..30f843777e0ebf9a796928179dea0b461b6b0484 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -1352,7 +1352,7 @@ class MDSDEV(Module): self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') - self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mkfsoptions = self.db.get_val('mkfsoptions', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') mds = self.db.lookup(target_uuid) @@ -1588,7 +1588,7 @@ class OSD(Module): self.uuid = target_uuid # modules self.add_lustre_module('ost', 'ost') - # FIXME: should we default to ext3 here? + # FIXME: should we default to ext3 here? if self.fstype: self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) self.add_lustre_module(self.osdtype, self.osdtype) @@ -1760,14 +1760,14 @@ class MDC(Client): Client.__init__(self, db, uuid, 'mdc', fs_name) def permits_inactive(self): - return 0 + return 0 class OSC(Client): def __init__(self, db, uuid, fs_name): Client.__init__(self, db, uuid, 'osc', fs_name) def permits_inactive(self): - return 1 + return 1 def mgmtcli_name_for_uuid(uuid): return 'MGMTCLI_%s' % uuid diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 9de30587ae244e703777e64b10817ac4246a6108..ad043aaed057c46d36f71ca9b0183fa1e0189414 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -171,10 +171,10 @@ char *obdo_print(struct obdo *obd) sprintf(buf, "id: "LPX64"\ngrp: "LPX64"\natime: "LPU64"\nmtime: "LPU64 "\nctime: "LPU64"\nsize: "LPU64"\nblocks: "LPU64 "\nblksize: %u\nmode: %o\nuid: %d\ngid: %d\nflags: %x\n" - "obdflags: %x\nnlink: %d,\nvalid %x\n", + "misc: %x\nnlink: %d,\nvalid %x\n", obd->o_id, obd->o_gr, obd->o_atime, obd->o_mtime, obd->o_ctime, obd->o_size, obd->o_blocks, obd->o_blksize, obd->o_mode, - obd->o_uid, obd->o_gid, obd->o_flags, obd->o_obdflags, + obd->o_uid, obd->o_gid, obd->o_flags, obd->o_misc, obd->o_nlink, obd->o_valid); return strdup(buf); } @@ -1353,7 +1353,7 @@ int jt_obd_test_brw(int argc, char **argv) cmd = write ? OBD_IOC_BRW_WRITE : OBD_IOC_BRW_READ; for (i = 1, next_count = verbose; i <= count; i++) { - data.ioc_obdo1.o_valid &= ~(OBD_MD_FLBLOCKS|OBD_MD_FLRDEV); + data.ioc_obdo1.o_valid &= ~(OBD_MD_FLBLOCKS|OBD_MD_FLGRANT); IOC_PACK(argv[0], data); rc = l2_ioctl(OBD_DEV_ID, cmd, buf); SHMEM_BUMP(); diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index fb90a0f88083eb88ce21a7685981ea839ecd2789..8beb8024f9a5ef7ed527c9cf08f3557e88f299f3 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -101,7 +101,7 @@ check_obdo(void) CHECK_MEMBER(obdo, o_ctime); CHECK_MEMBER(obdo, o_size); CHECK_MEMBER(obdo, o_blocks); - CHECK_MEMBER(obdo, o_rdev); + CHECK_MEMBER(obdo, o_grant); CHECK_MEMBER(obdo, o_blksize); CHECK_MEMBER(obdo, o_mode); CHECK_MEMBER(obdo, o_uid); @@ -110,7 +110,7 @@ check_obdo(void) CHECK_MEMBER(obdo, o_nlink); CHECK_MEMBER(obdo, o_generation); CHECK_MEMBER(obdo, o_valid); - CHECK_MEMBER(obdo, o_obdflags); + CHECK_MEMBER(obdo, o_misc); CHECK_MEMBER(obdo, o_easize); CHECK_MEMBER(obdo, o_inline); @@ -126,7 +126,6 @@ check_obdo(void) CHECK_VALUE(OBD_MD_FLUID); CHECK_VALUE(OBD_MD_FLGID); CHECK_VALUE(OBD_MD_FLFLAGS); - CHECK_VALUE(OBD_MD_FLOBDFLG); CHECK_VALUE(OBD_MD_FLNLINK); CHECK_VALUE(OBD_MD_FLGENER); CHECK_VALUE(OBD_MD_FLINLINE); @@ -145,6 +144,7 @@ check_obdo(void) CHECK_VALUE(OBD_FL_DELORPHAN); CHECK_VALUE(OBD_FL_NORPC); CHECK_VALUE(OBD_FL_IDONLY); + CHECK_VALUE(OBD_FL_RECREATE_OBJS); } void @@ -212,7 +212,6 @@ check_niobuf_remote(void) CHECK_VALUE(OBD_BRW_READ); CHECK_VALUE(OBD_BRW_WRITE); - CHECK_VALUE(OBD_BRW_CREATE); CHECK_VALUE(OBD_BRW_SYNC); CHECK_VALUE(OBD_BRW_FROM_GRANT); } diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index c3704562d4abad4f018180348f917a36473be5e6..9f8bcd0fbf263b720de18061d7890a92d4e1faae 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -164,8 +164,8 @@ void lustre_assert_wire_constants(void) LASSERT((int)sizeof(((struct obdo *)0)->o_size) == 8); LASSERT(offsetof(struct obdo, o_blocks) == 48); LASSERT((int)sizeof(((struct obdo *)0)->o_blocks) == 8); - LASSERT(offsetof(struct obdo, o_rdev) == 56); - LASSERT((int)sizeof(((struct obdo *)0)->o_rdev) == 8); + LASSERT(offsetof(struct obdo, o_grant) == 56); + LASSERT((int)sizeof(((struct obdo *)0)->o_grant) == 8); LASSERT(offsetof(struct obdo, o_blksize) == 64); LASSERT((int)sizeof(((struct obdo *)0)->o_blksize) == 4); LASSERT(offsetof(struct obdo, o_mode) == 68); @@ -182,8 +182,8 @@ void lustre_assert_wire_constants(void) LASSERT((int)sizeof(((struct obdo *)0)->o_generation) == 4); LASSERT(offsetof(struct obdo, o_valid) == 92); LASSERT((int)sizeof(((struct obdo *)0)->o_valid) == 4); - LASSERT(offsetof(struct obdo, o_obdflags) == 96); - LASSERT((int)sizeof(((struct obdo *)0)->o_obdflags) == 4); + LASSERT(offsetof(struct obdo, o_misc) == 96); + LASSERT((int)sizeof(((struct obdo *)0)->o_misc) == 4); LASSERT(offsetof(struct obdo, o_easize) == 100); LASSERT((int)sizeof(((struct obdo *)0)->o_easize) == 4); LASSERT(offsetof(struct obdo, o_inline) == 104); @@ -200,7 +200,6 @@ void lustre_assert_wire_constants(void) LASSERT(OBD_MD_FLUID == 512); LASSERT(OBD_MD_FLGID == 1024); LASSERT(OBD_MD_FLFLAGS == 2048); - LASSERT(OBD_MD_FLOBDFLG == 4096); LASSERT(OBD_MD_FLNLINK == 8192); LASSERT(OBD_MD_FLGENER == 16384); LASSERT(OBD_MD_FLINLINE == 32768); @@ -218,6 +217,7 @@ void lustre_assert_wire_constants(void) LASSERT(OBD_FL_DELORPHAN == 4); LASSERT(OBD_FL_NORPC == 8); LASSERT(OBD_FL_IDONLY == 16); + LASSERT(OBD_FL_RECREATE_OBJS == 32); /* Checks for struct lov_mds_md_v1 */ LASSERT((int)sizeof(struct lov_mds_md_v1) == 32); @@ -293,7 +293,6 @@ void lustre_assert_wire_constants(void) LASSERT((int)sizeof(((struct niobuf_remote *)0)->flags) == 4); LASSERT(OBD_BRW_READ == 1); LASSERT(OBD_BRW_WRITE == 2); - LASSERT(OBD_BRW_CREATE == 4); LASSERT(OBD_BRW_SYNC == 8); LASSERT(OBD_BRW_FROM_GRANT == 32);