From 32e3f2e70b8e7de4fd2bcdbcfbc1ccac74c609e6 Mon Sep 17 00:00:00 2001 From: rread <rread> Date: Thu, 19 Dec 2002 08:29:06 +0000 Subject: [PATCH] * New LMC Interface * and assorted cleanups --- lustre/kernel_patches/README | 104 + .../patches/ext3-xattr-2.5.patch | 2690 ----------------- .../kernel_patches/patches/lustre-2.5.patch | 552 ---- lustre/tests/ba-echo.sh | 8 +- lustre/tests/ba-mount.sh | 14 +- lustre/tests/cobd.sh | 41 + lustre/tests/llecho.sh | 30 +- lustre/tests/local.sh | 9 +- lustre/tests/lov.sh | 14 +- .../tests/mcr-individual-ost-nogw-config.sh | 8 +- lustre/tests/mcr-mds-failover-config.sh | 20 +- lustre/tests/mcr-routed-config.sh | 24 +- lustre/tests/mcr.sh | 18 +- lustre/tests/mcrlov.sh | 23 +- lustre/tests/mount2.sh | 10 +- lustre/tests/mount2fs.sh | 22 +- lustre/tests/uml.sh | 12 +- lustre/utils/lconf.in | 186 +- lustre/utils/lmc | 483 ++- 19 files changed, 614 insertions(+), 3654 deletions(-) create mode 100644 lustre/kernel_patches/README delete mode 100644 lustre/kernel_patches/patches/ext3-xattr-2.5.patch delete mode 100644 lustre/kernel_patches/patches/lustre-2.5.patch create mode 100755 lustre/tests/cobd.sh diff --git a/lustre/kernel_patches/README b/lustre/kernel_patches/README new file mode 100644 index 0000000000..05a04d02e1 --- /dev/null +++ b/lustre/kernel_patches/README @@ -0,0 +1,104 @@ + +Lustre requires changes to the core kernel before it can be compiled against +hte core kernel source tree. We use Andrew Morton's 'patch-scripts' utilties +to keep the complexity of managing changes across multiple kernel targets down. +They handle the ordering metadata, application, refreshing, and removal of +patches for us. Please read scripts/docco.txt for a more thorough explanation +of what 'patch-scripts' do. + +We create a thin wrapper around patchscripts with our ./prepare_tree.sh. It +exports two environment variables. PATCHSCRIPTS is a relative path from the +kernel source tree to the checked-out patchscripts repository. It is requires +for patchscripts to operate on data outside the kernel source tree. It also +puts the absolute path to the scripts/ directory at the front of PATH. +Finally, it creates a 'series' link from the kernel tree back to the proper +series file for that kernel tree. More on that below. + +prepare_tree.sh and the patch-scripts commands are the only interface we should +use on a daily basis. We should never have to manage the patches by hand. +This will save us heart-ache once we're good with the tools. I promise. + +Data to be aware of: + +patches/ + contains all the patch files themselves. We should have a patch per + functional change. + +series/ + the text files that patch-utils use to define the ordering of patches + that are applied to a tree. We have a series file for each kernel + tree variant that requires wildly different patches. (architecture + differences, stock vs. redhat, etc) + +pc/ + control files for patch-utils. These are per tree and should never + be in cvs. + +txt/ + text descriptions of the patches. Nice, but not functionally required. + +First, before anything happens, you need to prep a tree for use with +patch-utils. This means putting a series link in the file and setting the +environment variable: + + $ eval `./prepare_tree.sh -t /tmp/kernels/linux-2.4.18 -r stock-2.4` + +prepare-tree.sh is careful to output variable assignments to stdout and +everything else to stderr so the eval won't go awry. It also is clever about +resolving the series name, so tab-completed relative paths to the series files +can be used with -r. (it assumes that series/ is under where prepare_tree.sh +was executed from). The series link that is created from the tree back into +the cvs repository is created by force. Don't re-run the command with a +different role. (this should probably be fixed) + +With this in place, the shell that did the eval is ready to wield patch-utils. + +] To apply all the patches to a given tree: + + $ eval `./prepare_tree.sh -t /tmp/kernels/linux-2.4.18 -r stock-2.4` + $ cd /tmp/kernels/linux-2.4.18 + $ pushpatch 100000 + ( the huge number just serves to iterate through the patches ) + +] To refresh the patches against a newer kernel that the series applies to. + +Say the series file 'rh-8.0-dev' corresponds to a CFS policy of tracking the +most recent red hat 8.0 distro kernel. It used to be 2.4.18-14, say, and RH +has now released RH 2.4.18-17.8.0 and CFS has decided to move to it. We +want to update the patches in cvs HEAD to be against 2.4.18-17.8.0 + + $ eval `./prepare_tree.sh -t /tmp/linux-2.4.18-17.8.0 -r rh-8.0-dev` + $ cd /tmp/linux-2.4.18-17.8.0 + $ for a in $NUM_PATCHES_HAVE ; do + pushpatch; + refpatch; + done + +] To add a new series + +Simply add a new empty file to the series/ directory, choosing a descriptive +name for the series. + +] To add a patch into a series + +Ideally a patch can be added to the end of the series. This is most easily +done with patch-utils import_patch. After the patch is imported it still needs +to be applied and refreshed with 'pushpatch' and 'refpatch'. ___remember to +cvs add the patch with -ko___ so that tags in the context of the diff aren't +change by CVS, rendering the patch unusable. + +It is considered valuable to have a common HEAD which can be checked out to +patch a kernel and build lustre across lots of targets. This creates some +friction in the desire to keep a single canonical set of patches in CVS. We +solve this with the patch-utils scripts by having well-named patches that are +bound to the different series. Say alpha and ia64 kernel trees both need a +common lustre patch. Ideally they'd both have our-funcionality.patch in their +series, but perhaps the code path we want to alter is different in the trees +and not in the architecture-dependant part of the kernel. For this we'd want +our-functionality-ia64.patch in the ia64 series file, and +our-functionality-alpha.patch in the alpha. This split becomes irritating to +manage as shared changes want to be pushed to all the patches. This will be a +pain as long as the kernel's we're receiving don't share revision control +somehow. At least the patch utils make it relatively painless to 'pushpatch' +the source patch, clean up rejects, test, and 'refpatch' to generate the new +patch for that series. diff --git a/lustre/kernel_patches/patches/ext3-xattr-2.5.patch b/lustre/kernel_patches/patches/ext3-xattr-2.5.patch deleted file mode 100644 index 41798393c9..0000000000 --- a/lustre/kernel_patches/patches/ext3-xattr-2.5.patch +++ /dev/null @@ -1,2690 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# This patch format is intended for GNU patch command version 2.5 or higher. -# This patch includes the following deltas: -# ChangeSet 1.809 -> 1.810 -# fs/ext3/Makefile 1.4 -> 1.5 -# include/linux/ext3_jbd.h 1.5 -> 1.6 -# fs/ext3/ialloc.c 1.17 -> 1.18 -# fs/ext3/symlink.c 1.3 -> 1.4 -# fs/Makefile 1.42 -> 1.43 -# fs/ext3/namei.c 1.22 -> 1.23 -# include/linux/ext3_fs.h 1.11 -> 1.12 -# fs/Config.in 1.39 -> 1.40 -# fs/ext3/inode.c 1.42 -> 1.43 -# fs/Config.help 1.21 -> 1.22 -# fs/ext3/super.c 1.33 -> 1.34 -# fs/ext3/file.c 1.9 -> 1.10 -# (new) -> 1.1 fs/ext3/xattr.h -# (new) -> 1.1 include/linux/mbcache.h -# (new) -> 1.1 fs/ext3/xattr.c -# (new) -> 1.1 fs/mbcache.c -# (new) -> 1.1 fs/ext3/xattr_user.c -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/10/20 braam@clusterfs.com 1.810 -# xattrs for UML bk repository -# -------------------------------------------- -# -diff -Nru a/fs/Config.help b/fs/Config.help ---- a/fs/Config.help Sun Dec 8 02:49:56 2002 -+++ b/fs/Config.help Sun Dec 8 02:49:56 2002 -@@ -154,6 +154,13 @@ - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ <http://acl.bestbits.at/> for details). -+ -+ If unsure, say N. -+ - CONFIG_JBD - This is a generic journaling layer for block devices. It is - currently used by the ext3 file system, but it could also be used to -diff -Nru a/fs/Config.in b/fs/Config.in ---- a/fs/Config.in Sun Dec 8 02:49:56 2002 -+++ b/fs/Config.in Sun Dec 8 02:49:56 2002 -@@ -27,6 +27,7 @@ - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -180,6 +181,17 @@ - define_tristate CONFIG_ZISOFS_FS $CONFIG_ISO9660_FS - else - define_tristate CONFIG_ZISOFS_FS n -+fi -+ -+# Meta block cache for Extended Attributes (ext2/ext3) -+if [ "$CONFIG_EXT2_FS_XATTR" = "y" -o "$CONFIG_EXT3_FS_XATTR" = "y" ]; then -+ if [ "$CONFIG_EXT2_FS" = "y" -o "$CONFIG_EXT3_FS" = "y" ]; then -+ define_tristate CONFIG_FS_MBCACHE y -+ else -+ if [ "$CONFIG_EXT2_FS" = "m" -o "$CONFIG_EXT3_FS" = "m" ]; then -+ define_tristate CONFIG_FS_MBCACHE m -+ fi -+ fi - fi - - mainmenu_option next_comment -diff -Nru a/fs/Makefile b/fs/Makefile ---- a/fs/Makefile Sun Dec 8 02:49:56 2002 -+++ b/fs/Makefile Sun Dec 8 02:49:56 2002 -@@ -6,7 +6,7 @@ - # - - export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \ -- fcntl.o read_write.o dcookies.o -+ fcntl.o read_write.o dcookies.o mbcache.o - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ -@@ -29,6 +29,8 @@ - obj-y += binfmt_script.o - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o -+ -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o - - obj-$(CONFIG_QUOTA) += dquot.o - obj-$(CONFIG_QFMT_V1) += quota_v1.o -diff -Nru a/fs/ext3/Makefile b/fs/ext3/Makefile ---- a/fs/ext3/Makefile Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/Makefile Sun Dec 8 02:49:56 2002 -@@ -7,4 +7,10 @@ - ext3-objs := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o - -+export-objs += xattr.o -+ -+ifeq ($(CONFIG_EXT3_FS_XATTR),y) -+ext3-objs += xattr.o xattr_user.o -+endif -+ - include $(TOPDIR)/Rules.make -diff -Nru a/fs/ext3/file.c b/fs/ext3/file.c ---- a/fs/ext3/file.c Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/file.c Sun Dec 8 02:49:56 2002 -@@ -23,7 +23,7 @@ - #include <linux/jbd.h> - #include <linux/ext3_fs.h> - #include <linux/ext3_jbd.h> --#include <linux/smp_lock.h> -+#include "xattr.h" - - /* - * Called when an inode is released. Note that this is different -@@ -98,5 +98,9 @@ - struct inode_operations ext3_file_inode_operations = { - .truncate = ext3_truncate, - .setattr = ext3_setattr, -+ .setxattr = ext3_setxattr, -+ .getxattr = ext3_getxattr, -+ .listxattr = ext3_listxattr, -+ .removexattr = ext3_removexattr, - }; - -diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c ---- a/fs/ext3/ialloc.c Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/ialloc.c Sun Dec 8 02:49:56 2002 -@@ -25,6 +25,8 @@ - #include <asm/bitops.h> - #include <asm/byteorder.h> - -+#include "xattr.h" -+ - /* - * ialloc.c contains the inodes allocation and deallocation routines - */ -@@ -118,6 +120,7 @@ - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - -diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c ---- a/fs/ext3/inode.c Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/inode.c Sun Dec 8 02:49:56 2002 -@@ -42,6 +42,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = EXT3_I(inode)->i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -51,7 +63,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -167,9 +179,7 @@ - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1979,6 +1989,8 @@ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2130,8 +2142,6 @@ - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2263,10 +2273,7 @@ - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - if (ext3_should_writeback_data(inode)) -@@ -2277,18 +2284,20 @@ - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - if (ext3_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext3_writeback_aops; - else - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; - if (ei->i_flags & EXT3_APPEND_FL) -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/namei.c Sun Dec 8 02:49:56 2002 -@@ -36,6 +36,7 @@ - #include <linux/quotaops.h> - #include <linux/buffer_head.h> - #include <linux/smp_lock.h> -+#include "xattr.h" - - - /* -@@ -1654,7 +1655,7 @@ - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1662,7 +1663,6 @@ - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1689,9 +1689,6 @@ - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2068,7 +2065,7 @@ - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - if (ext3_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext3_writeback_aops; - else -@@ -2284,4 +2281,17 @@ - .rmdir = ext3_rmdir, - .mknod = ext3_mknod, - .rename = ext3_rename, -+ .setxattr = ext3_setxattr, -+ .getxattr = ext3_getxattr, -+ .listxattr = ext3_listxattr, -+ .removexattr = ext3_removexattr, - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ .setxattr = ext3_setxattr, -+ .getxattr = ext3_getxattr, -+ .listxattr = ext3_listxattr, -+ .removexattr = ext3_removexattr, -+}; -+ -+ -diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c ---- a/fs/ext3/super.c Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/super.c Sun Dec 8 02:49:56 2002 -@@ -30,6 +30,7 @@ - #include <linux/smp_lock.h> - #include <linux/buffer_head.h> - #include <asm/uaccess.h> -+#include "xattr.h" - - #ifdef CONFIG_JBD_DEBUG - static int ext3_ro_after; /* Make fs read-only after this many jiffies */ -@@ -405,6 +406,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -554,6 +556,7 @@ - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -566,6 +569,13 @@ - continue; - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -982,6 +992,12 @@ - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR -+ set_opt(sbi->s_mount_opt, XATTR_USER); -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) - goto out_fail; - -@@ -1820,7 +1836,10 @@ - - static int __init init_ext3_fs(void) - { -- int err = init_inodecache(); -+ int err = init_ext3_xattr(); -+ if (err) -+ return err; -+ err = init_inodecache(); - if (err) - goto out1; - err = register_filesystem(&ext3_fs_type); -@@ -1830,6 +1849,7 @@ - out: - destroy_inodecache(); - out1: -+ exit_ext3_xattr(); - return err; - } - -@@ -1837,6 +1857,7 @@ - { - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); -+ exit_ext3_xattr(); - } - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -diff -Nru a/fs/ext3/symlink.c b/fs/ext3/symlink.c ---- a/fs/ext3/symlink.c Sun Dec 8 02:49:56 2002 -+++ b/fs/ext3/symlink.c Sun Dec 8 02:49:56 2002 -@@ -20,6 +20,7 @@ - #include <linux/fs.h> - #include <linux/jbd.h> - #include <linux/ext3_fs.h> -+#include "xattr.h" - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ - return vfs_follow_link(nd, (char*)ei->i_data); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ .readlink = page_readlink, -+ .follow_link = page_follow_link, -+ .setxattr = ext3_setxattr, -+ .getxattr = ext3_getxattr, -+ .listxattr = ext3_listxattr, -+ .removexattr = ext3_removexattr, -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { -- .readlink = ext3_readlink, /* BKL not held. Don't need */ -+ .readlink = ext3_readlink, /* BKL not held. Don't need */ - .follow_link = ext3_follow_link, /* BKL not held. Don't need */ -+ .setxattr = ext3_setxattr, -+ .getxattr = ext3_getxattr, -+ .listxattr = ext3_listxattr, -+ .removexattr = ext3_removexattr, - }; -diff -Nru a/fs/ext3/xattr.c b/fs/ext3/xattr.c ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/fs/ext3/xattr.c Sun Dec 8 02:49:56 2002 -@@ -0,0 +1,1127 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> -+ * -+ * Fix by Harrison Xing <harrison@mountainviewdata.com>. -+ * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>. -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko <luka.renko@hermes.si>. -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * ¦ entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS holdsinode->i_sem semaphore when any of the xattr inode -+ * operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ */ -+ -+#include <linux/init.h> -+#include <linux/fs.h> -+#include <linux/slab.h> -+#include <linux/ext3_jbd.h> -+#include <linux/ext3_fs.h> -+#include <linux/mbcache.h> -+#include <linux/quotaops.h> -+#include <asm/semaphore.h> -+#include "xattr.h" -+ -+#define EXT3_EA_USER "user." -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+static DECLARE_MUTEX(ext3_xattr_sem); -+static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) { -+ if (ext3_xattr_handlers[i]) { -+ const char *n = strcmp_prefix(*name, -+ ext3_xattr_handlers[i]->prefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -EOPNOTSUPP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -EOPNOTSUPP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -EOPNOTSUPP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENODATA; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENODATA; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len) + 1; -+ } -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ *buf++ = '\0'; -+ } -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (EXT3_I(inode)->i_file_acl) { -+ /* The inode already has an extended attribute block. */ -+ int block = EXT3_I(inode)->i_file_acl; -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENODATA; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (DQUOT_ALLOC_BLOCK(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int block; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ block = ext3_new_block(handle, inode, goal, 0, -+ 0, &error); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: -+ ext3_free_blocks(handle, inode, block, 1); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ set_buffer_uptodate(new_bh); -+ unlock_buffer(new_bh); -+ ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ DQUOT_FREE_BLOCK(inode, 1); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_free_blocks(handle, inode, block, 1); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ DQUOT_FREE_BLOCK(inode, 1); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+ mb_cache_shrink(ext3_xattr_cache, sb->s_bdev); -+} -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_bdev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, (unsigned long) ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d", -+ (unsigned long) ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_bdev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, -+ bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ int err; -+ -+ err = ext3_xattr_register(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler); -+ if (err) -+ return err; -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 6); -+ if (!ext3_xattr_cache) { -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler); -+} -+ -diff -Nru a/fs/ext3/xattr.h b/fs/ext3/xattr.h ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/fs/ext3/xattr.h Sun Dec 8 02:49:56 2002 -@@ -0,0 +1,133 @@ -+/* -+ File: fs/ext3/xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> -+*/ -+ -+#include <linux/config.h> -+#include <linux/xattr.h> -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS) -+#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1) -+#define EXT3_XATTR_LEN(name_len) \ -+ (((name_len) + EXT3_XATTR_ROUND + \ -+ sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND) -+#define EXT3_XATTR_NEXT(entry) \ -+ ( (struct ext3_xattr_entry *)( \ -+ (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void); -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+extern struct ext3_xattr_handler ext3_xattr_user_handler; -diff -Nru a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/fs/ext3/xattr_user.c Sun Dec 8 02:49:56 2002 -@@ -0,0 +1,99 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> -+ */ -+ -+#include <linux/module.h> -+#include <linux/string.h> -+#include <linux/fs.h> -+#include <linux/smp_lock.h> -+#include <linux/ext3_jbd.h> -+#include <linux/ext3_fs.h> -+#include "xattr.h" -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include <linux/ext3_acl.h> -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ } -+ return prefix_len + name_len; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -EOPNOTSUPP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -EOPNOTSUPP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ lock_kernel(); -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -diff -Nru a/fs/mbcache.c b/fs/mbcache.c ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/fs/mbcache.c Sun Dec 8 02:49:56 2002 -@@ -0,0 +1,702 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org> -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired (at compile time) -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+#include <linux/hash.h> -+#include <linux/fs.h> -+#include <linux/mm.h> -+#include <linux/slab.h> -+#include <linux/sched.h> -+#include <linux/init.h> -+#include <linux/mbcache.h> -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+MODULE_LICENSE("GPL"); -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. (The lru list is -+ * global across all mbcaches.) -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+static struct shrinker *mb_shrinker; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask); -+ -+static inline void -+__mb_cache_entry_takeout_lru(struct mb_cache_entry *ce) -+{ -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_into_lru(struct mb_cache_entry *ce) -+{ -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+} -+ -+ -+static inline int -+__mb_cache_entry_in_lru(struct mb_cache_entry *ce) -+{ -+ return (!list_empty(&ce->e_lru_list)); -+} -+ -+ -+/* -+ * Insert the cache entry into all hashes. -+ */ -+static inline void -+__mb_cache_entry_link(struct mb_cache_entry *ce) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket; -+ int n; -+ -+ bucket = hash_long((unsigned long)ce->e_bdev + -+ (ce->e_block & 0xffffff), cache->c_bucket_bits); -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; n<mb_cache_indexes(cache); n++) { -+ bucket = hash_long(ce->e_indexes[n].o_key, -+ cache->c_bucket_bits); -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+} -+ -+ -+/* -+ * Remove the cache entry from all hashes. -+ */ -+static inline void -+__mb_cache_entry_unlink(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ list_del_init(&ce->e_block_list); -+ for (n = 0; n < mb_cache_indexes(ce->e_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+} -+ -+ -+static inline int -+__mb_cache_entry_is_linked(struct mb_cache_entry *ce) -+{ -+ return (!list_empty(&ce->e_block_list)); -+} -+ -+ -+static inline struct mb_cache_entry * -+__mb_cache_entry_read(struct mb_cache_entry *ce) -+{ -+ __mb_cache_entry_takeout_lru(ce); -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ atomic_dec(&cache->c_entry_count); -+ if (cache->c_op.free) -+ cache->c_op.free(ce); -+ kmem_cache_free(cache->c_entry_cache, ce); -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (!__mb_cache_entry_is_linked(ce)) -+ goto forget; -+ __mb_cache_entry_into_lru(ce); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ return; -+forget: -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce); -+} -+ -+ -+/* -+ * mb_cache_shrink_fn() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @nr_to_scan: Number of objects to scan -+ * @gfp_mask: (ignored) -+ * -+ * Returns the number of objects which are present in the cache. -+ */ -+static int -+mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_prev(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d entries", nr_to_scan); -+ if (nr_to_scan == 0) { -+ spin_unlock(&mb_cache_spinlock); -+ goto out; -+ } -+ while (nr_to_scan && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.prev, -+ struct mb_cache_entry, e_lru_list); -+ list_move(&ce->e_lru_list, &free_list); -+ if (__mb_cache_entry_is_linked(ce)) -+ __mb_cache_entry_unlink(ce); -+ nr_to_scan--; -+ } -+ spin_unlock(&mb_cache_spinlock); -+ l = free_list.prev; -+ while (l != &free_list) { -+ struct mb_cache_entry *ce = list_entry(l, -+ struct mb_cache_entry, e_lru_list); -+ l = l->prev; -+ __mb_cache_entry_forget(ce); -+ count--; -+ } -+out: -+ mb_debug("%d remaining entries ", count); -+ return count; -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_bits: log2(number of hash buckets) -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_bits) -+{ -+ int m=0, n, bucket_count = 1 << bucket_bits; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ else -+ cache->c_op.free = NULL; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_bits = bucket_bits; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; n<bucket_count; n++) -+ INIT_LIST_HEAD(&cache->c_block_hash[n]); -+ for (m=0; m<indexes_count; m++) { -+ cache->c_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; n<bucket_count; n++) -+ INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ if (list_empty(&mb_cache_list)) { -+ if (mb_shrinker) { -+ printk(KERN_ERR "%s: already have a shrinker!\n", -+ __FUNCTION__); -+ remove_shrinker(mb_shrinker); -+ } -+ mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn); -+ } -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. All others -+ * are freed. -+ * -+ * @cache: which cache to shrink -+ * @bdev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, struct block_device *bdev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l; -+ -+ spin_lock(&mb_cache_spinlock); -+ l = mb_cache_lru_list.prev; -+ while (l != &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ l = l->prev; -+ if (ce->e_bdev == bdev) { -+ list_move(&ce->e_lru_list, &free_list); -+ if (__mb_cache_entry_is_linked(ce)) -+ __mb_cache_entry_unlink(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ l = free_list.prev; -+ while (l != &free_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ l = l->prev; -+ __mb_cache_entry_forget(ce); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ l = mb_cache_lru_list.prev; -+ while (l != &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ l = l->prev; -+ if (ce->e_cache == cache) { -+ list_move(&ce->e_lru_list, &free_list); -+ if (__mb_cache_entry_is_linked(ce)) -+ __mb_cache_entry_unlink(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ if (list_empty(&mb_cache_list) && mb_shrinker) { -+ remove_shrinker(mb_shrinker); -+ mb_shrinker = 0; -+ } -+ spin_unlock(&mb_cache_spinlock); -+ -+ l = free_list.prev; -+ while (l != &free_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ l = l->prev; -+ __mb_cache_entry_forget(ce); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+ kmem_cache_destroy(cache->c_entry_cache); -+ -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ -+ kfree(cache); -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, but when another process -+ * has inserted the same cache entry in the meantime). -+ * -+ * @bdev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, -+ sector_t block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), -+ cache->c_bucket_bits); -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_prev(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_bdev == bdev && ce->e_block == block) -+ goto out; -+ } -+ mb_assert(!__mb_cache_entry_is_linked(ce)); -+ ce->e_bdev = bdev; -+ ce->e_block = block; -+ for (n=0; n<mb_cache_indexes(cache); n++) -+ ce->e_indexes[n].o_key = keys[n]; -+ __mb_cache_entry_link(ce); -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(!__mb_cache_entry_in_lru(ce)); -+ if (__mb_cache_entry_is_linked(ce)) -+ __mb_cache_entry_unlink(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(!__mb_cache_entry_in_lru(ce)); -+ if (__mb_cache_entry_is_linked(ce)) -+ __mb_cache_entry_unlink(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, -+ sector_t block) -+{ -+ unsigned int bucket; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), -+ cache->c_bucket_bits); -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_bdev == bdev && ce->e_block == block) { -+ ce = __mb_cache_entry_read(ce); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, struct block_device *bdev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_bdev == bdev && -+ ce->e_indexes[index].o_key == key) { -+ ce = __mb_cache_entry_read(ce); -+ if (ce) -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=index<indexes_count) -+ * @bdev: the device the cache entry should belong to -+ * @key: the key in the index -+ */ -+struct mb_cache_entry * -+mb_cache_entry_find_first(struct mb_cache *cache, int index, -+ struct block_device *bdev, unsigned int key) -+{ -+ unsigned int bucket = hash_long(key, cache->c_bucket_bits); -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, bdev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=index<indexes_count) -+ * @bdev: the device the cache entry should belong to -+ * @key: the key in the index -+ */ -+struct mb_cache_entry * -+mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, -+ struct block_device *bdev, unsigned int key) -+{ -+ struct mb_cache *cache = prev->e_cache; -+ unsigned int bucket = hash_long(key, cache->c_bucket_bits); -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, bdev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h ---- a/include/linux/ext3_fs.h Sun Dec 8 02:49:56 2002 -+++ b/include/linux/ext3_fs.h Sun Dec 8 02:49:56 2002 -@@ -64,8 +64,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -95,7 +93,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -130,28 +127,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -347,6 +322,7 @@ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -529,7 +505,7 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -713,6 +689,7 @@ - - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -781,8 +758,10 @@ - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - -diff -Nru a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h ---- a/include/linux/ext3_jbd.h Sun Dec 8 02:49:56 2002 -+++ b/include/linux/ext3_jbd.h Sun Dec 8 02:49:56 2002 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - -diff -Nru a/include/linux/mbcache.h b/include/linux/mbcache.h ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/include/linux/mbcache.h Sun Dec 8 02:49:56 2002 -@@ -0,0 +1,72 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ void (*free)(struct mb_cache_entry *); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_bits; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ struct block_device *e_bdev; -+ sector_t e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, struct block_device *); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *, -+ sector_t, unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, -+ struct block_device *, -+ sector_t); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ struct block_device *, -+ unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ struct block_device *, -+ unsigned int); -+#endif diff --git a/lustre/kernel_patches/patches/lustre-2.5.patch b/lustre/kernel_patches/patches/lustre-2.5.patch deleted file mode 100644 index 5e88d35579..0000000000 --- a/lustre/kernel_patches/patches/lustre-2.5.patch +++ /dev/null @@ -1,552 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# This patch format is intended for GNU patch command version 2.5 or higher. -# This patch includes the following deltas: -# ChangeSet 1.810 -> 1.811 -# kernel/ksyms.c 1.149 -> 1.150 -# fs/driverfs/inode.c 1.52 -> 1.53 -# include/linux/fs.h 1.175 -> 1.176 -# include/linux/namei.h 1.3 -> 1.4 -# fs/namei.c 1.56 -> 1.57 -# fs/nfsd/vfs.c 1.44 -> 1.45 -# arch/um/kernel/mem.c 1.5 -> 1.6 -# net/unix/af_unix.c 1.29 -> 1.30 -# mm/slab.c 1.33 -> 1.34 -# fs/sysfs/inode.c 1.55 -> 1.56 -# include/linux/slab.h 1.13 -> 1.14 -# include/linux/dcache.h 1.19 -> 1.20 -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/10/20 braam@clusterfs.com 1.811 -# Changes for Lustre -# -------------------------------------------- -# -diff -Nru a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c ---- a/arch/um/kernel/mem.c Sun Dec 8 02:49:38 2002 -+++ b/arch/um/kernel/mem.c Sun Dec 8 02:49:38 2002 -@@ -656,6 +656,22 @@ - return(phys_mem_map(pte_val(pte))); - } - -+struct page *check_get_page(unsigned long kaddr) -+{ -+ struct page *page; -+ struct mem_region *mr; -+ unsigned long phys = __pa(kaddr); -+ unsigned int n = phys_region_index(phys); -+ -+ if(regions[n] == NULL) -+ return NULL; -+ -+ mr = regions[n]; -+ page = (struct page *) mr->mem_map; -+ return page + ((phys_addr(phys)) >> PAGE_SHIFT); -+} -+ -+ - struct mem_region *page_region(struct page *page, int *index_out) - { - int i; -@@ -743,7 +759,7 @@ - (addr <= region->start + region->len)) - return(mk_phys(addr - region->start, i)); - } -- panic("region_pa : no region for virtual address"); -+ //panic("region_pa : no region for virtual address"); - return(0); - } - -diff -Nru a/fs/driverfs/inode.c b/fs/driverfs/inode.c ---- a/fs/driverfs/inode.c Sun Dec 8 02:49:38 2002 -+++ b/fs/driverfs/inode.c Sun Dec 8 02:49:38 2002 -@@ -523,7 +523,7 @@ - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); -- return lookup_hash(&qstr,parent); -+ return lookup_hash(&qstr,parent, NULL); - } - - /** -diff -Nru a/fs/namei.c b/fs/namei.c ---- a/fs/namei.c Sun Dec 8 02:49:38 2002 -+++ b/fs/namei.c Sun Dec 8 02:49:38 2002 -@@ -265,6 +265,9 @@ - - void path_release(struct nameidata *nd) - { -+ if (nd->dentry && nd->dentry->d_op && -+ nd->dentry->d_op->d_intent_release) -+ nd->dentry->d_op->d_intent_release(nd->dentry, &nd->it); - dput(nd->dentry); - mntput(nd->mnt); - } -@@ -273,10 +276,18 @@ - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -351,7 +362,7 @@ - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -@@ -369,7 +380,10 @@ - struct dentry * dentry = d_alloc(parent, name); - result = ERR_PTR(-ENOMEM); - if (dentry) { -- result = dir->i_op->lookup(dir, dentry); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); -+ else -+ result = dir->i_op->lookup(dir, dentry); - if (result) - dput(dentry); - else { -@@ -391,6 +405,12 @@ - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ result = ERR_PTR(-ENOENT); -+ } - } - return result; - } -@@ -534,7 +554,7 @@ - unlock_nd(nd); - - need_lookup: -- dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE, &nd->it); - if (IS_ERR(dentry)) - goto fail; - mntget(mnt); -@@ -684,7 +704,7 @@ - nd->dentry = next.dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) - break; - continue; - /* here ends the main loop */ -@@ -737,7 +757,8 @@ - break; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) - break; - } - goto return_base; -@@ -886,7 +907,8 @@ - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -909,13 +931,16 @@ - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; -- dentry = inode->i_op->lookup(inode, new); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); -+ else -+ dentry = inode->i_op->lookup(inode, new); - if (!dentry) { - dentry = new; - security_ops->inode_post_lookup(inode, dentry); -@@ -927,7 +952,7 @@ - } - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -947,11 +972,16 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -@@ -1268,7 +1298,7 @@ - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); - - do_last: - error = PTR_ERR(dentry); -@@ -1370,7 +1400,7 @@ - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); - putname(nd->last.name); - goto do_last; - } -@@ -1384,7 +1414,7 @@ - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1614,7 +1644,7 @@ - goto exit1; - } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, &nd.it); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1675,7 +1705,7 @@ - if (nd.last_type != LAST_NORM) - goto exit1; - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, &nd.it); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1949,7 +1979,8 @@ - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); -@@ -2020,7 +2051,7 @@ - - trap = lock_rename(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash(&oldnd.last, old_dir, &oldnd.it); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -2040,7 +2071,7 @@ - error = -EINVAL; - if (old_dentry == trap) - goto exit4; -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash(&newnd.last, new_dir, &newnd.it); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; -@@ -2050,7 +2081,7 @@ - goto exit5; - - error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); - exit5: - dput(new_dentry); - exit4: -diff -Nru a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c ---- a/fs/nfsd/vfs.c Sun Dec 8 02:49:38 2002 -+++ b/fs/nfsd/vfs.c Sun Dec 8 02:49:38 2002 -@@ -1292,7 +1292,7 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -diff -Nru a/fs/sysfs/inode.c b/fs/sysfs/inode.c ---- a/fs/sysfs/inode.c Sun Dec 8 02:49:39 2002 -+++ b/fs/sysfs/inode.c Sun Dec 8 02:49:39 2002 -@@ -471,7 +471,7 @@ - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); -- return lookup_hash(&qstr,parent); -+ return lookup_hash(&qstr,parent,NULL); - } - - /** -diff -Nru a/include/linux/dcache.h b/include/linux/dcache.h ---- a/include/linux/dcache.h Sun Dec 8 02:49:39 2002 -+++ b/include/linux/dcache.h Sun Dec 8 02:49:39 2002 -@@ -9,6 +9,24 @@ - #include <linux/spinlock.h> - #include <asm/page.h> /* for BUG() */ - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_MKDIR (1<<2) -+#define IT_LINK (1<<3) -+#define IT_LINK2 (1<<4) -+#define IT_SYMLINK (1<<5) -+#define IT_UNLINK (1<<6) -+#define IT_RMDIR (1<<7) -+#define IT_RENAME (1<<8) -+#define IT_RENAME2 (1<<9) -+#define IT_READDIR (1<<10) -+#define IT_GETATTR (1<<11) -+#define IT_SETATTR (1<<12) -+#define IT_READLINK (1<<13) -+#define IT_MKNOD (1<<14) -+#define IT_LOOKUP (1<<15) -+ -+ - /* - * linux/include/linux/dcache.h - * -@@ -30,6 +48,8 @@ - unsigned int hash; - }; - -+#include <linux/namei.h> -+ - struct dentry_stat_t { - int nr_dentry; - int nr_unused; -@@ -79,6 +99,7 @@ - struct list_head d_subdirs; /* our children */ - struct list_head d_alias; /* inode alias list */ - int d_mounted; -+ struct lookup_intent *d_it; - struct qstr d_name; - unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; -@@ -96,6 +117,8 @@ - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); - }; - - /* the dentry parameter passed to d_hash and d_compare is the parent -diff -Nru a/include/linux/fs.h b/include/linux/fs.h ---- a/include/linux/fs.h Sun Dec 8 02:49:38 2002 -+++ b/include/linux/fs.h Sun Dec 8 02:49:38 2002 -@@ -700,7 +700,7 @@ - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct lookup_intent *it); - - /* - * File types -@@ -769,6 +769,8 @@ - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, -+ struct lookup_intent *); - int (*link) (struct dentry *,struct inode *,struct dentry *); - int (*unlink) (struct inode *,struct dentry *); - int (*symlink) (struct inode *,struct dentry *,const char *); -@@ -995,6 +997,7 @@ - extern int unregister_filesystem(struct file_system_type *); - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - extern long do_mount(char *, char *, char *, unsigned long, void *); - - #define kern_umount mntput -diff -Nru a/include/linux/namei.h b/include/linux/namei.h ---- a/include/linux/namei.h Sun Dec 8 02:49:38 2002 -+++ b/include/linux/namei.h Sun Dec 8 02:49:38 2002 -@@ -5,6 +5,17 @@ - - struct vfsmount; - -+struct lookup_intent { -+ int it_op; -+ int it_mode; -+ int it_disposition; -+ int it_status; -+ struct iattr *it_iattr; -+ __u64 it_lock_handle[2]; -+ int it_lock_mode; -+ void *it_data; -+}; -+ - struct nameidata { - struct dentry *dentry; - struct vfsmount *mnt; -@@ -13,6 +24,7 @@ - int last_type; - struct dentry *old_dentry; - struct vfsmount *old_mnt; -+ struct lookup_intent it; - }; - - /* -@@ -46,7 +58,7 @@ - extern void path_release(struct nameidata *); - - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); --extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -+extern struct dentry * lookup_hash(struct qstr *, struct dentry *, struct lookup_intent *); - - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); -diff -Nru a/include/linux/slab.h b/include/linux/slab.h ---- a/include/linux/slab.h Sun Dec 8 02:49:39 2002 -+++ b/include/linux/slab.h Sun Dec 8 02:49:39 2002 -@@ -56,6 +56,7 @@ - extern int kmem_cache_shrink(kmem_cache_t *); - extern void *kmem_cache_alloc(kmem_cache_t *, int); - extern void kmem_cache_free(kmem_cache_t *, void *); -+extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); - extern unsigned int kmem_cache_size(kmem_cache_t *); - - extern void *kmalloc(size_t, int); -diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c ---- a/kernel/ksyms.c Sun Dec 8 02:49:38 2002 -+++ b/kernel/ksyms.c Sun Dec 8 02:49:38 2002 -@@ -365,6 +365,13 @@ - EXPORT_SYMBOL(tty_get_baud_rate); - EXPORT_SYMBOL(do_SAK); - -+/* lustre */ -+EXPORT_SYMBOL(panic_notifier_list); -+//EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+EXPORT_SYMBOL(exit_files); -+EXPORT_SYMBOL(kmem_cache_validate); -+ - /* filesystem registration */ - EXPORT_SYMBOL(register_filesystem); - EXPORT_SYMBOL(unregister_filesystem); -diff -Nru a/mm/slab.c b/mm/slab.c ---- a/mm/slab.c Sun Dec 8 02:49:39 2002 -+++ b/mm/slab.c Sun Dec 8 02:49:39 2002 -@@ -1236,6 +1236,59 @@ - * Called with the cache-lock held. - */ - -+extern struct page *check_get_page(unsigned long kaddr); -+struct page *page_mem_map(struct page *page); -+static int kmem_check_cache_obj (kmem_cache_t * cachep, -+ slab_t *slabp, void * objp) -+{ -+ int i; -+ unsigned int objnr; -+ -+#if DEBUG -+ if (cachep->flags & SLAB_RED_ZONE) { -+ objp -= BYTES_PER_WORD; -+ if ( *(unsigned long *)objp != RED_MAGIC2) -+ /* Either write before start, or a double free. */ -+ return 0; -+ if (*(unsigned long *)(objp+cachep->objsize - -+ BYTES_PER_WORD) != RED_MAGIC2) -+ /* Either write past end, or a double free. */ -+ return 0; -+ } -+#endif -+ -+ objnr = (objp-slabp->s_mem)/cachep->objsize; -+ if (objnr >= cachep->num) -+ return 0; -+ if (objp != slabp->s_mem + objnr*cachep->objsize) -+ return 0; -+ -+ /* Check slab's freelist to see if this obj is there. */ -+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { -+ if (i == objnr) -+ return 0; -+ } -+ return 1; -+} -+ -+ -+int kmem_cache_validate(kmem_cache_t *cachep, void *objp) -+{ -+ struct page *page = check_get_page((unsigned long)objp); -+ -+ if (!page_mem_map(page)) -+ return 0; -+ -+ if (!PageSlab(page)) -+ return 0; -+ -+ /* XXX check for freed slab objects ? */ -+ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp)) -+ return 0; -+ -+ return (cachep == GET_PAGE_CACHE(page)); -+} -+ - #if DEBUG - static int kmem_extra_free_checks (kmem_cache_t * cachep, - slab_t *slabp, void * objp) -diff -Nru a/net/unix/af_unix.c b/net/unix/af_unix.c ---- a/net/unix/af_unix.c Sun Dec 8 02:49:38 2002 -+++ b/net/unix/af_unix.c Sun Dec 8 02:49:38 2002 -@@ -715,7 +715,7 @@ - /* - * Do the final lookup. - */ -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_unlock; diff --git a/lustre/tests/ba-echo.sh b/lustre/tests/ba-echo.sh index a3b97cde45..9f31edc742 100644 --- a/lustre/tests/ba-echo.sh +++ b/lustre/tests/ba-echo.sh @@ -22,17 +22,17 @@ save_cmd() { [ -f $config ] && rm $config # Client node -${LMC} --node $CLIENT --tcpbuf $TCPBUF --net '*' tcp +${LMC} --add net --node $CLIENT --tcpbuf $TCPBUF --nid '*' --nettype tcp OBD_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` [ "$OBD_UUID" ] && OBD_UUID="--obduuid=$OBD_UUID" || echo "$OST: no UUID" # server node -${LMC} --node $OST --tcpbuf $TCPBUF --net $OST tcp -${LMC} --node $OST --obdtype=obdecho $OBD_UUID --ost +${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp +${LMC} --add ost --node $OST --obd obd1 --obdtype=obdecho -obduuid $OBD_UUID # osc on client -${LMC} --node $CLIENT --osc OSC_$OST +${LMC} --add oscref --node $CLIENT --echo_client obd1 $LMC_REAL --batch $BATCH rm -f $BATCH diff --git a/lustre/tests/ba-mount.sh b/lustre/tests/ba-mount.sh index b81455f2a2..91cd7802d6 100644 --- a/lustre/tests/ba-mount.sh +++ b/lustre/tests/ba-mount.sh @@ -32,22 +32,22 @@ save_cmd() { [ -f $config ] && rm $config # MDS/client node -${LMC} --node $MDS --tcpbuf $TCPBUF --net $MDS tcp -${LMC} --node $MDS --mds mds1 /tmp/mds1 50000 +${LMC} --add net --node $MDS --tcpbuf $TCPBUF --nid $MDS --nettype tcp +${LMC} --add mds --node $MDS --mds mds1 --dev /tmp/mds1 --size 50000 OBD_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` [ "$OBD_UUID" ] && OBD_UUID="--obduuid=$OBD_UUID" || echo "$OST: no UUID" # server node -${LMC} --node $OST --tcpbuf $TCPBUF --net $OST tcp -${LMC} --node $OST $OBD_UUID --ost bluearc +${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp +${LMC} --add ost --node $OST -obd obd1 --obduuid $OBD_UUID --dev bluearc # mount point on the MDS/client -${LMC} --node $MDS --mtpt /mnt/lustre mds1 OSC_$OST +${LMC} --add mtpt --node $MDS --path /mnt/lustre --mds mds1 --lov obd1 # other clients -${LMC} --node client --tcpbuf $TCPBUF --net '*' tcp -${LMC} --node client --mtpt /mnt/lustre mds1 OSC_$OST +${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp +${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov obd1 $LMC_REAL --batch $BATCH rm -f $BATCH diff --git a/lustre/tests/cobd.sh b/lustre/tests/cobd.sh new file mode 100755 index 0000000000..3f6521a193 --- /dev/null +++ b/lustre/tests/cobd.sh @@ -0,0 +1,41 @@ +#!/bin/bash + + +config=${1:-$(basename $0 .sh)}.xml + +LMC=${LMC:-../utils/lmc -m $config} +TMP=${TMP:-/tmp} + +MDSDEV=$TMP/mds1 +MDSSIZE=50000 + +OSTDEV=$TMP/ost1 +OSTSIZE=200000 + +kver=`uname -r | cut -d "." -f 1,2` + +case $kver in + 2.4) FSTYPE="--fstype=extN" ;; + 2.5) FSTYPE="--fstype=ext3" ;; + *) echo "Kernel version $kver not supported" + exit 1 + ;; +esac + +rm -f $config +# create nodes +${LMC} --add node --node localhost || exit 10 +${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 + +# configure mds server +${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20 + +# configure ost +${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30 +# configure ost +${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30 + +${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2 + +# create client config +# ${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --obd obd1 || exit 40 diff --git a/lustre/tests/llecho.sh b/lustre/tests/llecho.sh index a47c088276..82b2c1ea5f 100644 --- a/lustre/tests/llecho.sh +++ b/lustre/tests/llecho.sh @@ -23,24 +23,30 @@ while [ "$1" ]; do shift done +rm -f $config # create nodes -$LMC -o $config --node $SERVER --net $SERVER tcp || exit 1 +$LMC -o $config --add node --node $SERVER || exit 1 +$LMC -m $config --add net --node $SERVER --nid $SERVER --nettype tcp || exit 2 if (($LOV)); then - $LMC -m $config --node $SERVER --mds mds1 $MDSDEV $MDSSIZE || exit 10 - $LMC -m $config --lov lov1 mds1 $STRIPE_BYTES $STRIPES_PER_OBJ 0 || exit 11 - $LMC -m $config --node $SERVER --lov lov1 --obdtype=obdecho --ost || exit 12 - $LMC -m $config --node $SERVER --lov lov1 --obdtype=obdecho --ost || exit 13 - - $LMC -m $config --node $CLIENT --echo_client lov1 || exit 3 + $LMC -m $config --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10 + $LMC -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11 + $LMC -m $config --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 12 + $LMC -m $config --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 13 + OBD_NAME=lov1 else - $LMC -m $config --node $SERVER --obdtype=obdecho --ost || exit 2 - # force the osc to be configured (this is normally done when it is mounted) - $LMC -m $config --node $CLIENT --osc OSC_$SERVER || exit 3 - $LMC -m $config --node $CLIENT --echo_client OSC_${SERVER} || exit 3 + $LMC -m $config --add ost --obd obd1 --node $SERVER --obdtype=obdecho || exit 2 + OBD_NAME=obd1 +fi + +if [ "$SERVER" != "$CLIENT" ]; then + $LMC -m $config --add node --node $CLIENT || exit 1 + $LMC -m $config --add node --node $CLIENT --nid $CLIENT --nettype tcp || exit 2 fi -$LCONF --gdb $OPTS $config || exit 4 +$LMC -m $config --add echo_client --node $CLIENT --obd ${OBD_NAME} || exit 3 + +$LCONF --reformat --gdb $OPTS $config || exit 4 cat <<EOF diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 222ac30625..7d369f4ece 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -23,13 +23,14 @@ esac # create nodes -${LMC} -o $config --node localhost --net localhost tcp || exit 1 +${LMC} -o $config --add node --node localhost || exit 10 +${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 11 # configure mds server -${LMC} -m $config --format --node localhost $FSTYPE --mds mds1 $MDSDEV $MDSSIZE || exit 2 +${LMC} -m $config --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20 # configure ost -${LMC} -m $config --format --node localhost --ost $OSTDEV $OSTSIZE || exit 3 +${LMC} -m $config --add ost --node localhost --obd obd1 --dev $OSTDEV --size $OSTSIZE || exit 30 # create client config -${LMC} -m $config --node localhost --mtpt /mnt/lustre mds1 OSC_localhost || exit 4 +${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --obd obd1 || exit 40 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index 76f8374ce7..54d4c6656c 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -17,16 +17,16 @@ STRIPE_BYTES=65536 STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs # create nodes -${LMC} -o $config --node localhost --net localhost tcp || exit 1 +${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1 # configure mds server -${LMC} -m $config --format --node localhost --mds mds1 $MDSDEV $MDSSIZE || exit 10 +${LMC} -m $config --format --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10 # configure ost -${LMC} -m $config --lov lov1 mds1 $STRIPE_BYTES $STRIPES_PER_OBJ 0 || exit 20 -${LMC} -m $config --node localhost --lov lov1 --ost $OSTDEV1 $OSTSIZE || exit 21 -${LMC} -m $config --node localhost --lov lov1 --ost $OSTDEV2 $OSTSIZE || exit 22 -${LMC} -m $config --node localhost --lov lov1 --ost $OSTDEV3 $OSTSIZE || exit 23 +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 +${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21 +${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22 +${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV3 --size $OSTSIZE || exit 23 # create client config -${LMC} -m $config --node localhost --mtpt /mnt/lustre mds1 lov1 || exit 30 +${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 diff --git a/lustre/tests/mcr-individual-ost-nogw-config.sh b/lustre/tests/mcr-individual-ost-nogw-config.sh index 0aae2819f1..cd569b6103 100755 --- a/lustre/tests/mcr-individual-ost-nogw-config.sh +++ b/lustre/tests/mcr-individual-ost-nogw-config.sh @@ -22,7 +22,7 @@ save_cmd() { [ -f $config ] && rm $config # Client node -${LMC} --node client --tcpbuf $TCPBUF --net '*' tcp || exit 1 +${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1 # this is crude, but effective let server_per_gw=($SERVER_CNT / $GW_CNT ) @@ -34,11 +34,11 @@ do echo "server: $server" OST=ba$server # server node - ${LMC} --node $OST --tcpbuf $TCPBUF --net $OST tcp || exit 1 + ${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp || exit 1 # the device on the server - ${LMC} --node $OST --obdtype=obdecho --ost || exit 3 + ${LMC} --add ost --node $OST --obd obd_$OST --obdtype=obdecho || exit 3 # osc on client - ${LMC} --node client --osc OSC_$OST + ${LMC} --add oscref --node client --osc OSC_obd_$OST let server=$server+1 done diff --git a/lustre/tests/mcr-mds-failover-config.sh b/lustre/tests/mcr-mds-failover-config.sh index 8a42c3d8ff..d0320fd5ed 100755 --- a/lustre/tests/mcr-mds-failover-config.sh +++ b/lustre/tests/mcr-mds-failover-config.sh @@ -27,22 +27,22 @@ h2ip () { # create client node -$LMC -o $CONFIG --node client --net '*' elan -$LMC -m $CONFIG --router --node mcr21 --tcpbuf $TCPBUF --net `h2ip $GW_NODE` tcp -$LMC -m $CONFIG --router --node mcr21 --net `h2elan $GW_NODE` elan -$LMC -m $CONFIG --node $GW_NODE --route elan `h2elan $GW_NODE` $CLIENT_ELAN +$LMC -o $CONFIG --add net --node client --nid '*' --nettype elan +$LMC -m $CONFIG --add net --router --node mcr21 --tcpbuf $TCPBUF --nid `h2ip $GW_NODE` --nettype tcp +$LMC -m $CONFIG --add net --router --node mcr21 --nid `h2elan $GW_NODE` --nettype elan +$LMC -m $CONFIG --add route --node $GW_NODE --nettype elan --gw `h2elan $GW_NODE` --lo $CLIENT_ELAN # create MDS node entries for mds in $MDSNODES; do elanaddr=`$LUSTRE_QUERY -h emcri -s id=$mds -e` - $LMC -m $CONFIG --node $mds --net $elanaddr elan - $LMC -m $CONFIG --node $mds --mds mds_$mds $MDS_DEVICE $MDS_SIZE + $LMC -m $CONFIG --add net --node $mds --nid $elanaddr --nettype elan + $LMC -m $CONFIG --add mds --node $mds --mds mds_$mds --dev $MDS_DEVICE --size $MDS_SIZE done # create OST node entry -$LMC -m $CONFIG --node $OST_BA --tcpbuf $TCPBUF --net $OST_BA tcp -$LMC -m $CONFIG --node $OST_BA --obduuid $OST_UUID --ost bluearc -$LMC -m $CONFIG --node $GW_NODE --route tcp `h2ip $GW_NODE` $OST_BA +$LMC -m $CONFIG --add net --node $OST_BA --tcpbuf $TCPBUF --nid $OST_BA --nettype tcp +$LMC -m $CONFIG --add ost --node $OST_BA --obd obd_$OST_BA --obduuid $OST_UUID --dev bluearc +$LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2ip $GW_NODE` --lo $OST_BA # mount -$LMC -m $CONFIG --node client --mtpt /mnt/lustre mds_$ACTIVEMDS OSC_$OST_BA +$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov obd_$OST_BA diff --git a/lustre/tests/mcr-routed-config.sh b/lustre/tests/mcr-routed-config.sh index f66335e92d..eb2bec177a 100755 --- a/lustre/tests/mcr-routed-config.sh +++ b/lustre/tests/mcr-routed-config.sh @@ -44,14 +44,14 @@ gw2node() { [ -f $config ] && rm $config -${LMC} --node $MDS --net `h2elan $MDS` elan || exit 1 -${LMC} --node $MDS --mds mds1 /tmp/mds1 100000 || exit 1 -${LMC} --lov lov1 mds1 65536 1 0 +${LMC} --add net --node $MDS --nid `h2elan $MDS` --nettype elan || exit 1 +${LMC} --add mds --node $MDS --mds mds1 --dev /tmp/mds1 --size 100000 || exit 1 +${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 1 --stripe_pattern 0 # Client node -#${LMC} --node client --tcpbuf $TCPBUF --net '*' tcp || exit 1 -${LMC} --node client --net '*' elan || exit 1 -${LMC} --node client --mtpt /mnt/lustre mds1 lov1 +#${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1 +${LMC} --add net --node client --nid '*' --nettype elan || exit 1 +${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1 # this is crude, but effective let server_per_gw=($SERVER_CNT / $GW_CNT ) @@ -65,9 +65,9 @@ while (( $gw < $GW_CNT + GW_START )); do gwnode=$BASE`gw2node $gw` echo "Router: $gwnode" - ${LMC} --router --node $gwnode --tcpbuf $TCPBUF --net `h2ip $gwnode` tcp || exit 1 - ${LMC} --node $gwnode --net `h2elan $gwnode` elan|| exit 1 - ${LMC} --node $gwnode --route elan `h2elan $gwnode` `h2elan $CLIENT_LO` `h2elan $CLIENT_HI` || exit 2 + ${LMC} --add net --router --node $gwnode --tcpbuf $TCPBUF --nid `h2ip $gwnode` --nettype tcp || exit 1 + ${LMC} --add net --node $gwnode --nid `h2elan $gwnode` --nettype elan || exit 1 + ${LMC} --add route --node $gwnode --nettype elan --gw `h2elan $gwnode` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` || exit 2 let i=0 while (( $i < $server_per_gw )); @@ -77,11 +77,11 @@ do OBD_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` [ "$OBD_UUID" ] && OBD_UUID="--obduuid=$OBD_UUID" || echo "$OST: no UUID" # server node - ${LMC} --node $OST --tcpbuf $TCPBUF --net $OST tcp || exit 1 + ${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp || exit 1 # the device on the server - ${LMC} --lov lov1 --node $OST $OBD_UUID --ost bluearc || exit 3 + ${LMC} --add ost --lov lov1 --node $OST --obduuid $OBD_UUID --dev bluearc || exit 3 # route to server - ${LMC} --node $gwnode --route tcp `h2ip $gwnode` $OST || exit 2 + ${LMC} --add route --node $gwnode --nettype tcp --gw `h2ip $gwnode` --lo $OST || exit 2 let server=$server+1 let i=$i+1 done diff --git a/lustre/tests/mcr.sh b/lustre/tests/mcr.sh index c7f7919df3..f4e30ebbee 100755 --- a/lustre/tests/mcr.sh +++ b/lustre/tests/mcr.sh @@ -26,20 +26,20 @@ h2ip () { [ -f $config ] && rm $config # Client node -${LMC} --node client --net '*' elan || exit 1 +${LMC} --add net --node client --nid '*' --nettype elan || exit 1 # Router node -${LMC} --router --node $ROUTER --tcpbuf $TCPBUF --net `h2ip $ROUTER` tcp || exit 1 -${LMC} --node $ROUTER --net `h2elan $ROUTER` elan|| exit 1 -${LMC} --node $ROUTER --route elan `h2elan $ROUTER` `h2elan $CLIENT_LO` `h2elan $CLIENT_HI` || exit 2 +${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2ip $ROUTER` --nettype tcp || exit 1 +${LMC} --add net --node $ROUTER --nid `h2elan $ROUTER` --nettype elan|| exit 1 +${LMC} -m $config --add route --node $ROUTER --nettype elan --gw `h2elan $ROUTER` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` || exit 2 for s in $SERVERS do # server node - ${LMC} --node $s --tcpbuf $TCPBUF --net $s tcp || exit 1 + ${LMC} --add net --node $s --tcpbuf $TCPBUF --nid $s --nettype tcp || exit 1 # route to server - ${LMC} --node $ROUTER --route tcp `h2ip $ROUTER` $s || exit 2 + ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2ip $ROUTER` --lo $s || exit 2 # the device on the server - ${LMC} --node $s --obdtype=obdecho --ost || exit 3 - # attach to the device on the client (this would normally be a moun) - ${LMC} --node client --osc OSC_$s || exit 4 + ${LMC} --add ost --node $s --obd obd_$s --obdtype=obdecho || exit 3 + # attach to the device on the client (this would normally be a mount) + ${LMC} --add oscref --node client --osc OSC_obd_$s || exit 4 done diff --git a/lustre/tests/mcrlov.sh b/lustre/tests/mcrlov.sh index 35ba323eb8..735034358f 100755 --- a/lustre/tests/mcrlov.sh +++ b/lustre/tests/mcrlov.sh @@ -28,24 +28,25 @@ h2ip () { [ -f $config ] && rm $config # Client node -${LMC} --node client --net '*' elan || exit 1 +${LMC} --add net --node client --nid '*' --nettype elan || exit 1 # Router node -${LMC} --router --node $ROUTER --tcpbuf $TCPBUF --net `h2ip $ROUTER` tcp || exit 1 -${LMC} --node $ROUTER --net `h2elan $ROUTER` elan|| exit 1 -${LMC} --node $ROUTER --route elan `h2elan $ROUTER` `h2elan $CLIENT_LO` `h2elan $CLIENT_HI` || exit 2 +${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2ip $ROUTER` --nettype tcp || exit 1 +${LMC} --add net --node $ROUTER --nid `h2elan $ROUTER` --nettype elan|| exit 1 +${LMC} --add route --node $ROUTER --gw `h2elan $ROUTER` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` --nettype elan || exit 2 -${LMC} --node $MDS --net `h2elan $MDS` elan || exit 1 -${LMC} --node $MDS --mds mds1 $TMP/mds1 100000 || exit 1 -${LMC} --lov lov1 mds1 65536 0 0 +${LMC} --add net --node $MDS --nid `h2elan $MDS` --nettype elan || exit 1 +${LMC} --add mds --node $MDS --mds mds1 --dev $TMP/mds1 --size 100000 || exit 1 +${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 1 -${LMC} --node client --mtpt /mnt/lustre mds1 lov1 +${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1 for s in $SERVERS do # server node - ${LMC} --node $s --tcpbuf $TCPBUF --net $s tcp || exit 1 + ${LMC} --add net --node $s --tcpbuf $TCPBUF --nid $s --nettype tcp || exit 1 # route to server - ${LMC} --node $ROUTER --route tcp `h2ip $ROUTER` $s || exit 2 + ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2ip $ROUTER` --lo $s || exit 2 # the device on the server - ${LMC} --format --lov lov1 --node $s --ost bluearc || exit 3 + #${LMC} --format --lov lov1 --node $s --ost bluearc || exit 3 + ${LMC} --add ost --lov lov1 --node $s --dev bluearc --format || exit 3 done diff --git a/lustre/tests/mount2.sh b/lustre/tests/mount2.sh index 6e5aa0b56f..6ae6e70b56 100644 --- a/lustre/tests/mount2.sh +++ b/lustre/tests/mount2.sh @@ -22,14 +22,14 @@ case $kver in esac # create nodes -${LMC} -o $config --node localhost --net localhost tcp || exit 1 +${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1 # configure mds server -${LMC} -m $config --format --node localhost $FSTYPE --mds mds1 $MDSDEV $MDSSIZE || exit 2 +${LMC} -m $config --add mds --format --node localhost $FSTYPE --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 2 # configure ost -${LMC} -m $config --format --node localhost $FSTYPE --ost $OSTDEV $OSTSIZE || exit 3 +${LMC} -m $config --add ost --format --obd obd1 --node localhost $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 3 # create client config -${LMC} -m $config --node localhost --mtpt /mnt/lustre1 mds1 OSC_localhost || exit 4 -${LMC} -m $config --node localhost --mtpt /mnt/lustre2 mds1 OSC_localhost || exit 4 +${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --obd obd1 || exit 4 +${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --obd obd1 || exit 4 diff --git a/lustre/tests/mount2fs.sh b/lustre/tests/mount2fs.sh index 9e766cbfd7..27b570dfb4 100644 --- a/lustre/tests/mount2fs.sh +++ b/lustre/tests/mount2fs.sh @@ -20,23 +20,23 @@ OSTNODE=uml2 CLIENT=uml3 # create nodes -${LMC} -o $config --node $MDSNODE --net $MDSNODE tcp || exit 1 -${LMC} -m $config --node $OSTNODE --net $OSTNODE tcp || exit 2 -${LMC} -m $config --node $CLIENT --net $CLIENT tcp || exit 3 +${LMC} -o $config --add net --node $MDSNODE --nid $MDSNODE --nettype tcp || exit 1 +${LMC} -m $config --add net --node $OSTNODE --nid $OSTNODE --nettype tcp || exit 2 +${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3 # configure mds server -${LMC} -m $config --format --node $MDSNODE --mds mds1 $MDSDEV $MDSSIZE ||exit 10 -${LMC} -m $config --format --node $MDSNODE --mds mds2 $MDSDEV2 $MDSSIZE ||exit 10 +${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10 +${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --dev $MDSDEV2 --size $MDSSIZE ||exit 10 # configure ost -${LMC} -m $config --lov lov1 mds1 65536 0 0 || exit 20 -${LMC} -m $config --lov lov2 mds2 65536 0 0 || exit 20 -${LMC} -m $config --node $OSTNODE --lov lov1 --ost $OSTDEV1 $OSTSIZE || exit 21 -${LMC} -m $config --node $OSTNODE --lov lov2 --ost $OSTDEV2 $OSTSIZE || exit 22 +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 +${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 +${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21 +${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --dev $OSTDEV2 --size $OSTSIZE || exit 22 # create client config -${LMC} -m $config --node $CLIENT --mtpt /mnt/lustre mds1 lov1 || exit 30 -${LMC} -m $config --node $CLIENT --mtpt /mnt/lustre2 mds2 lov2 || exit 30 +${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 +${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30 diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index 7d8da8f743..a8a381b8f4 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -44,20 +44,20 @@ rm -f $config # create nodes for NODE in $MDSNODE $OSTNODE $CLIENTS; do eval [ \$$NODE ] && continue - ${LMC} -m $config --node $NODE --net $NODE tcp || exit 1 + ${LMC} -m $config --add net --node $NODE --nid $NODE --nettype tcp || exit 1 eval "$NODE=done" done # configure mds server -${LMC} -m $config --format --node $MDSNODE --mds mds1 $MDSDEV $MDSSIZE ||exit 10 +${LMC} -m $config --add mds --format --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10 # configure ost -${LMC} -m $config --lov lov1 mds1 65536 0 0 || exit 20 -${LMC} -m $config --node $OSTNODE --lov lov1 --ost $OSTDEV1 $OSTSIZE || exit 21 -${LMC} -m $config --node $OSTNODE --lov lov1 --ost $OSTDEV2 $OSTSIZE || exit 22 +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 +${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21 +${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22 # create client config(s) for NODE in $CLIENTS; do - ${LMC} -m $config --node $NODE --mtpt /mnt/lustre mds1 lov1 || exit 30 + ${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 done diff --git a/lustre/utils/lconf.in b/lustre/utils/lconf.in index 6960a498ed..0f39037f9d 100755 --- a/lustre/utils/lconf.in +++ b/lustre/utils/lconf.in @@ -75,14 +75,16 @@ config.xml Lustre configuration in xml format. 50 - mdc, osc 60 - lov, lovconfig 70 - mountpoint, echo_client +--lustre=src_dir Base directory of lustre sources. This parameter will cause lconf + to load modules from a source tree. +--portals=src_dir Portals source directory. If this is a relative path, then it is + assumed to be relative to lustre. + """ TODO = """ --ldap server LDAP server with lustre config database --makeldiff Translate xml source to LDIFF This are perhaps not needed: ---lustre="src dir" Base directory of lustre sources. Used to search - for modules. ---portals=src Portals source """ sys.exit() @@ -106,7 +108,8 @@ class Config: self._gdb_script = '/tmp/ogdb' self._debug_path = '/tmp/lustre-log' self._dump_file = None - self._src_dir = '' + self._lustre_dir = '' + self._portals_dir = '' self._minlevel = 0 self._maxlevel = 100 @@ -162,10 +165,6 @@ class Config: else: return self._debug_path - def src_dir(self, val = None): - if val: self._src_dir = val - return self._src_dir - def dump_file(self, val = None): if val: self._dump_file = val return self._dump_file @@ -178,6 +177,13 @@ class Config: if val: self._maxlevel = int(val) return self._maxlevel + def portals_dir(self, val = None): + if val: self._portals_dir = val + return self._portals_dir + + def lustre_dir(self, val = None): + if val: self._lustre_dir = val + return self._lustre_dir config = Config() @@ -471,10 +477,11 @@ def find_prog(cmd): syspath = string.split(os.environ['PATH'], ':') cmdpath = os.path.dirname(sys.argv[0]) syspath.insert(0, cmdpath); - syspath.insert(0, os.path.join(cmdpath, PORTALS_DIR+'/linux/utils/')) + if config.portals_dir(): + syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/')) for d in syspath: prog = os.path.join(d,cmd) - debug(prog) + debug(prog) if os.access(prog, os.X_OK): return prog return '' @@ -491,10 +498,9 @@ def do_find_file(base, mod): if module: return module -def find_module(dev_dir, modname): +def find_module(src_dir, dev_dir, modname): mod = '%s.o' % (modname) - - module = dev_dir +'/'+ mod + module = src_dir +'/'+ dev_dir +'/'+ mod try: if os.access(module, os.R_OK): return module @@ -572,6 +578,8 @@ def init_loop(file, size, fstype): print 'WARNING file:', file, 'already mapped to', dev return dev if config.reformat() or not os.access(file, os.R_OK | os.W_OK): + if size < 8000: + error(file, "size must be larger than 8MB") run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file)) loop = loop_base() # find next free loop @@ -669,6 +677,20 @@ def is_prepared(uuid): e.dump() return 0 +def fs_is_mounted(path): + """Return true if path is a mounted lustre filesystem""" + try: + fp = open('/proc/mounts') + lines = fp.readlines() + fp.close() + for l in lines: + a = string.split(l) + if a[1] == path and a[2] == 'lustre_lite': + return 1 + except IOError, e: + log(e) + return 0 + # ============================================================ # Classes to prepare and cleanup the various objects @@ -719,9 +741,13 @@ class Module: e.dump() cleanup_error(e.rc) - def add_module(self, dev_dir, modname): + def add_portals_module(self, dev_dir, modname): """Append a module to list of modules to load.""" - self.kmodule_list.append((dev_dir, modname)) + self.kmodule_list.append((config.portals_dir(), dev_dir, modname)) + + def add_lustre_module(self, dev_dir, modname): + """Append a module to list of modules to load.""" + self.kmodule_list.append((config.lustre_dir(), dev_dir, modname)) def mod_loaded(self, modname): """Check if a module is already loaded. Look in /proc/modules for it.""" @@ -735,13 +761,13 @@ class Module: def load_module(self): """Load all the modules in the list in the order they appear.""" - for dev_dir, mod in self.kmodule_list: + for src_dir, dev_dir, mod in self.kmodule_list: # (rc, out) = run ('/sbin/lsmod | grep -s', mod) if self.mod_loaded(mod) and not config.noexec(): continue log ('loading module:', mod) - if config.src_dir(): - module = find_module(dev_dir, mod) + if src_dir: + module = find_module(src_dir, dev_dir, mod) if not module: panic('module not found:', mod) (rc, out) = run('/sbin/insmod', module) @@ -756,7 +782,7 @@ class Module: """Unload the modules in the list in reverse order.""" rev = self.kmodule_list rev.reverse() - for dev_dir, mod in rev: + for src_dir, dev_dir, mod in rev: if not self.mod_loaded(mod): continue # debug hack @@ -785,19 +811,19 @@ class Network(Module): panic("unable to set nid for", self.net_type, self.nid) debug("nid:", self.nid) - self.add_module(PORTALS_DIR+"/linux/oslib", 'portals') + self.add_portals_module("linux/oslib", 'portals') if node_needs_router(): - self.add_module(PORTALS_DIR+"/linux/router", 'kptlrouter') + self.add_portals_module("linux/router", 'kptlrouter') if self.net_type == 'tcp': - self.add_module(PORTALS_DIR+"/linux/socknal", 'ksocknal') + self.add_portals_module("linux/socknal", 'ksocknal') if self.net_type == 'toe': - self.add_module(PORTALS_DIR+"/linux/toenal", 'ktoenal') + self.add_portals_odule("/linux/toenal", 'ktoenal') if self.net_type == 'elan': - self.add_module(PORTALS_DIR+"/linux/rqswnal", 'kqswnal') + self.add_portals_module("/linux/rqswnal", 'kqswnal') if self.net_type == 'gm': - self.add_module(PORTALS_DIR+"/linux/gmnal", 'kgmnal') - self.add_module(config.src_dir()+'obdclass', 'obdclass') - self.add_module(config.src_dir()+'ptlrpc', 'ptlrpc') + self.add_portals_module("/linux/gmnal", 'kgmnal') + self.add_lustre_module('obdclass', 'obdclass') + self.add_lustre_module('ptlrpc', 'ptlrpc') def prepare(self): self.info(self.net_type, self.nid, self.port) @@ -871,7 +897,7 @@ class Network(Module): class LDLM(Module): def __init__(self,dom_node): Module.__init__(self, 'LDLM', dom_node) - self.add_module(config.src_dir()+'ldlm', 'ldlm') + self.add_lustre_module('ldlm', 'ldlm') def prepare(self): if is_prepared(self.uuid): return @@ -893,8 +919,8 @@ class LOV(Module): self.pattern = get_attr_int(dev_node, 'pattern', 0) self.devlist = get_all_refs(dev_node, 'osc') self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist)) - self.add_module(config.src_dir()+'mdc', 'mdc') - self.add_module(config.src_dir()+'lov', 'lov') + self.add_lustre_module('mdc', 'mdc') + self.add_lustre_module('lov', 'lov') def prepare(self): if is_prepared(self.uuid): @@ -982,9 +1008,9 @@ class MDS(Module): # FIXME: if fstype not set, then determine based on kernel version self.format = get_text(dom_node, 'autoformat', "no") if self.fstype == 'extN': - self.add_module(config.src_dir()+'extN', 'extN') - self.add_module(config.src_dir()+'mds', 'mds') - self.add_module(config.src_dir()+'obdclass', 'fsfilt_%s'%(self.fstype)) + self.add_lustre_module('extN', 'extN') + self.add_lustre_module('mds', 'mds') + self.add_lustre_module('obdclass', 'fsfilt_%s'%(self.fstype)) def prepare(self): if is_prepared(self.uuid): @@ -1026,7 +1052,7 @@ class MDC(Module): int(random.random() * 1048576)) self.lookup_server(self.mds.uuid) - self.add_module(config.src_dir()+'mdc', 'mdc') + self.add_lustre_module('mdc', 'mdc') def prepare(self): if is_prepared(self.uuid): @@ -1046,9 +1072,10 @@ class OBD(Module): # FIXME: if fstype not set, then determine based on kernel version self.format = get_text(dom_node, 'autoformat', 'yes') if self.fstype == 'extN': - self.add_module(config.src_dir()+'extN', 'extN') - self.add_module(config.src_dir()+'' + self.obdtype, self.obdtype) - self.add_module(config.src_dir()+'obdclass' , 'fsfilt_%s' % (self.fstype)) + self.add_lustre_module('extN', 'extN') + self.add_lustre_module(self.obdtype, self.obdtype) + if self.fstype: + self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype)) # need to check /proc/mounts and /etc/mtab before # formatting anything. @@ -1070,11 +1097,28 @@ class OBD(Module): if not self.obdtype == 'obdecho': clean_loop(self.devname) +class COBD(Module): + def __init__(self, dom_node): + Module.__init__(self, 'COBD', dom_node) + self.real_uuid = get_first_ref(dom_node, 'real_obd') + self.cache_uuid = get_first_ref(dom_node, 'cache_obd') + self.add_lustre_module('cobd' , 'cobd') + + # need to check /proc/mounts and /etc/mtab before + # formatting anything. + # FIXME: check if device is already formatted. + def prepare(self): + if is_prepared(self.uuid): + return + self.info(self.real_uuid, self.cache_uuid) + lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid), + setup ="%s %s" %(self.real_uuid, self.cache_uuid)) + class OST(Module): def __init__(self,dom_node): Module.__init__(self, 'OST', dom_node) self.obd_uuid = get_first_ref(dom_node, 'obd') - self.add_module(config.src_dir()+'ost', 'ost') + self.add_lustre_module('ost', 'ost') def prepare(self): if is_prepared(self.uuid): @@ -1108,7 +1152,7 @@ class OSC(Module): self.obd_uuid = get_first_ref(dom_node, 'obd') self.ost_uuid = get_first_ref(dom_node, 'ost') self.lookup_server(self.ost_uuid) - self.add_module(config.src_dir()+'osc', 'osc') + self.add_lustre_module('osc', 'osc') def prepare(self, ignore_connect_failure = 0): if is_prepared(self.uuid): @@ -1153,7 +1197,7 @@ class OSC(Module): class ECHO_CLIENT(Module): def __init__(self,dom_node): Module.__init__(self, 'ECHO_CLIENT', dom_node) - self.add_module('lustre/obdecho', 'obdecho') + self.add_lustre_module('obdecho', 'obdecho') self.lov_uuid = get_first_ref(dom_node, 'osc') l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc = VOSC(l) @@ -1186,8 +1230,8 @@ class Mountpoint(Module): self.path = get_text(dom_node, 'path') self.mds_uuid = get_first_ref(dom_node, 'mds') self.lov_uuid = get_first_ref(dom_node, 'osc') - self.add_module(config.src_dir()+'mdc', 'mdc') - self.add_module(config.src_dir()+'llite', 'llite') + self.add_lustre_module('mdc', 'mdc') + self.add_lustre_module('llite', 'llite') l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc = VOSC(l) @@ -1204,12 +1248,17 @@ class Mountpoint(Module): def cleanup(self): self.info(self.path, self.mds_uuid,self.lov_uuid) - if config.force(): - (rc, out) = run("umount -f", self.path) - else: - (rc, out) = run("umount", self.path) - if rc: - log("umount failed, cleanup will most likely not work.") + if fs_is_mounted(self.path): + if config.force(): + (rc, out) = run("umount", "-f", self.path) + else: + (rc, out) = run("umount", self.path) + if rc: + raise CommandError('umount', out, rc) + + if fs_is_mounted(self.path): + panic("fs is still mounted:", self.path) + l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc.cleanup() cleanup_mdc(self.dom_node.parentNode, self.mds_uuid) @@ -1225,6 +1274,8 @@ class Mountpoint(Module): # ============================================================ # XML processing and query # TODO: Change query funcs to use XPath, which is muc cleaner +# Or not. Originally both lconf and lmc used XPath, but it was many +# orders of magnitute slower, and lmc was unusable. - robert def get_device(obd): list = obd.getElementsByTagName('device') @@ -1350,7 +1401,7 @@ def getServiceLevel(dom_node): ret = 10 elif type in ('device', 'ldlm'): ret = 20 - elif type in ('obd', 'mdd'): + elif type in ('obd', 'mdd', 'cobd'): ret = 30 elif type in ('mds','ost'): ret = 40 @@ -1370,7 +1421,7 @@ def getServiceLevel(dom_node): # [(level, dom_node),] def getServices(lustreNode, profileNode): list = [] - for n in profileNode.childNodes: + for n in profileNode.childNodes: if n.nodeType == n.ELEMENT_NODE: servNode = lookup(lustreNode, getRef(n)) if not servNode: @@ -1513,6 +1564,8 @@ def startService(dom_node, module_flag): n = Network(dom_node) elif type == 'obd': n = OBD(dom_node) + elif type == 'cobd': + n = COBD(dom_node) elif type == 'ost': n = OST(dom_node) elif type == 'mds': @@ -1571,7 +1624,7 @@ def doHost(lustreNode, hosts): if dom_node: break if not dom_node: - print 'lconf: No host entry found in '+sys.argv[1] + print 'No host entry found.' return if not get_attr(dom_node, 'router'): @@ -1612,12 +1665,13 @@ def parse_cmdline(argv): "dump=", "force", "minlevel=", "maxlevel="] opts = [] args = [] + try: opts, args = getopt.getopt(argv, short_opts, long_opts) except getopt.error: print "invalid opt" usage() - + for o, a in opts: if o in ("-h", "--help"): usage() @@ -1629,9 +1683,9 @@ def parse_cmdline(argv): config.noexec(1) config.verbose(1) if o == "--portals": - config.portals = a + config.portals_dir(a) if o == "--lustre": - config.lustre = a + config.lustre_dir(a) if o == "--reformat": config.reformat(1) if o == "--node": @@ -1650,7 +1704,6 @@ def parse_cmdline(argv): config.minlevel(a) if o in ("--maxlevel",): config.maxlevel(a) - return args def fetch(url): @@ -1663,13 +1716,23 @@ def fetch(url): usage() return data -def setupModulePath(cmd): - global PORTALS_DIR +def setupModulePath(cmd, portals_dir = PORTALS_DIR): base = os.path.dirname(cmd) if os.access(base+"/Makefile", os.R_OK): - config.src_dir(base + "/../") - if PORTALS_DIR[0] != '/': - PORTALS_DIR= config.src_dir()+PORTALS_DIR + if not config.lustre_dir(): + config.lustre_dir(os.path.join(base, "..")) + # normalize the portals dir, using command line arg if set + if config.portals_dir(): + portals_dir = config.portals_dir() + dir = os.path.join(config.lustre_dir(), portals_dir) + config.portals_dir(dir) + elif config.lustre_dir() and config.portals_dir(): + # production mode + # if --lustre and --portals, normalize portals + # can ignore POTRALS_DIR here, since it is probly useless here + dir = config.portals_dir() + dir = os.path.join(config.lustre_dir(), dir) + config.portals_dir(dir) def sys_set_debug_path(): debug("debug path: ", config.debug_path()) @@ -1726,7 +1789,6 @@ def sanitise_path(): # def main(): global TCP_ACCEPTOR, lctl, MAXTCPBUF - setupModulePath(sys.argv[0]) host = socket.gethostname() @@ -1767,6 +1829,8 @@ def main(): config._debug_path = config._debug_path + '-' + host config._gdb_script = config._gdb_script + '-' + host + setupModulePath(sys.argv[0]) + TCP_ACCEPTOR = find_prog('acceptor') if not TCP_ACCEPTOR: if config.noexec(): diff --git a/lustre/utils/lmc b/lustre/utils/lmc index 3d7c7bf210..b4f92eabd0 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -20,98 +20,30 @@ """ lmc - lustre configurtion data manager - - Basic plan for lmc usage: -# create nodes -./lmc --output config.xml --node server --net server1 tcp -./lmc --merge config.xml --node client --net client1 tcp -./lmc --merge config.xml --node client --route gw lo [hi] -./lmc --merge config.xml --router --node gw1 --net gw1 tcp -./lmc --merge config.xml --node gw1 --net 1 elan -./lmc --merge config.xml --route elan 1 1 100 -./lmc --merge config.xml --route tcp gw1 ba1 - - - -# configure server -./lmc --merge config.xml --node server --mds mds1 /tmp/mds1 50000 - -# create lov -./lmc --merge config.xml --lov lov1 mds1 65536 0 0 -./lmc --merge config.xml --node server --lov lov1 --ost /tmp/ost1 100000 -./lmc --merge config.xml --node server --lov lov1 --ost /tmp/ost2 100000 - -# create client config -./lmc --merge config.xml --node client --mtpt /mnt/lustre mds1 lov1 + See lustre book for documentation for lmc. """ -import sys, os, getopt, string +import sys, os, getopt, string, exceptions import xml.dom.minidom from xml.dom.ext import PrettyPrint - -DEFAULT_PORT = 988 # XXX What is the right default acceptor port to use? +DEFAULT_PORT = 988 def usage(): - print """usage: lmc [--node --ost | --mtpt | --lov] args -Commands: ---node node_name - Node_name by itself it will create a new node. If the --router - option is used when creating a new node, then that node will also - be configured as a router. When used with other commands it - specifies the node to modify. - ---net hostname nettype [port, recv_buf, send_buf] - Nettype is either tcp, toe, elan, or gm. - Requires --node - ---route net gw lo [hi] - This command is used to create routes. NET is the - network type this route will be used on. The GW is an address of - one of the local interfaces. LO and HI represent a range of - addresses that can be reached through the gateway. If HI is not - set, then a route to the specific host in LO is created. - ---mds device [size] - Create a MDS using the device - Requires --node - ---lov lov_name [mds_name stripe_sz sub_stripe_count pattern] - Creates a logical volume - When used with other commands, it specifics the lov to modify - ---ost device [size] - Creates an OBD/OST/OSC configuration triplet for a new device. - When used on "host", the device will be initialized and the OST - will be enabled. On client nodes, the OSC will be avaiable. - Requires --node - Optional --obduuid Specifies the UUID used for the obd. - If --lov lov_name is used, this device is added to lov. - ---mtpt /mnt/point mds_name lov_name|osc_name - Creates a client mount point. - Requires --node - -Options: ---merge="xml file" Add the new objects to an existing file ---format Format the partitions if unformated - NB: The autoformat option has been disabled until a safe - method is implemented to determine if a block device has a - filesystem. ---reformat Reformat partitions (this should be an lconf arg, - I think) ---obdtype="obdtype" Specifiy obdtype: valid ones are obdecho and obdfilter. - This is only useful for the --ost command. - The device parameters are ignored for the obdecho type. -""" + print """usage: lmc --add object [object parameters]""" sys.exit(1) def error(*args): msg = string.join(map(str,args)) - print "Error: ", msg + raise OptionError("Error: " + msg) + +def panic(cmd, msg): + print "! " + cmd + print msg sys.exit(1) + def warning(*args): msg = string.join(map(str,args)) @@ -184,8 +116,8 @@ class GenConfig: def newService(self, tag, name, uuid): """ create a new service elmement, which requires name and uuid attributes """ new = self.doc.createElement(tag) - new.setAttribute("name", name); new.setAttribute("uuid", uuid); + new.setAttribute("name", name); return new def addText(self, node, str): @@ -247,6 +179,12 @@ class GenConfig: self.addElement(obd, "autoformat", format) return obd + def cobd(self, name, uuid, real_uuid, cache_uuid): + cobd = self.newService("cobd", name, uuid) + cobd.appendChild(self.ref("real_obd",real_uuid)) + cobd.appendChild(self.ref("cache_obd",cache_uuid)) + return cobd + def osc(self, name, uuid, obd_uuid, net_uuid): osc = self.newService("osc", name, uuid) osc.appendChild(self.ref("ost", net_uuid)) @@ -259,12 +197,12 @@ class GenConfig: ost.appendChild(self.ref("obd", obd_uuid)) return ost - def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_count, pattern): + def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern): lov = self.newService("lov", name, uuid) lov.appendChild(self.ref("mds", mds_uuid)) devs = self.addElement(lov, "devices" ) devs.setAttribute("stripesize", stripe_sz) - devs.setAttribute("stripecount", stripe_count) + devs.setAttribute("stripecount", stripe_cnt) devs.setAttribute("pattern", pattern) return lov @@ -403,12 +341,10 @@ def do_add_node(gen, lustre, options, node_name): return node -def add_node(gen, lustre, options, args): +def add_node(gen, lustre, options): """ create a node with a network config """ - if len(args) > 1: - usage() - node_name = options['node'] + node_name = get_option(options, 'node') ret = findByName(lustre, node_name, "node") if ret: @@ -417,26 +353,19 @@ def add_node(gen, lustre, options, args): do_add_node(gen, lustre, options, node_name) -def add_net(gen, lustre, options, args): +def add_net(gen, lustre, options): """ create a node with a network config """ - if len(args) < 2: - usage() - - node_name = options['node'] - nid = args[0] - net_type = args[1] - port = 0 - tcpbuf = 0 - - if net_type in ('tcp', 'toe'): - if len(args) > 2: - port = int(args[2]) - else: - port = DEFAULT_PORT - if options.has_key('tcpbuf'): - tcpbuf = int(options['tcpbuf']) + + node_name = get_option(options, 'node') + nid = get_option(options, 'nid') + net_type = get_option(options, 'nettype') + + if net_type == 'tcp': + port = get_option_int(options, 'port', DEFAULT_PORT) + tcpbuf = get_option_int(options, 'tcpbuf', 0) elif net_type in ('elan', 'gm'): port = 0 + tcpbuf = 0 else: print "Unknown net_type: ", net_type sys.exit(2) @@ -452,19 +381,14 @@ def add_net(gen, lustre, options, args): node_add_profile(gen, node, "network", net_uuid) -def add_route(gen, lustre, options, args): +def add_route(gen, lustre, options): """ create a node with a network config """ - if len(args) < 3: - usage() - - node_name = options['node'] - net_type= args[0] - gw = args[1] - lo = args[2] - hi = '' - if len(args) > 3: - hi = args[3] + node_name = get_option(options, 'node') + net_type = get_option(options, 'nettype') + gw = get_option(options, 'gw') + lo = get_option(options, 'lo') + hi = get_option(options, 'hi', '') node = findByName(lustre, node_name, "node") if not node: @@ -480,28 +404,15 @@ def add_route(gen, lustre, options, args): rtbl.appendChild(gen.route(net_type, gw, lo, hi)) -def add_mds(gen, lustre, options, args): - fstype = 'extN' - - if len(args) < 1: - usage() - - if options.has_key('node'): - node_name = options['node'] - else: - error("--mds requires a --node argument") - - if options.has_key('fstype'): - fstype = options['fstype'] - - mds_name = new_name(options['mds']) - if mds_name != options['mds']: - warning("name:", options['mds'], "already used. using:", mds_name) - devname = args[0] - if len(args) > 1: - size = args[1] - else: - size = 0 +def add_mds(gen, lustre, options): + node_name = get_option(options, 'node') + mds_orig = get_option(options, 'mds') + mds_name = new_name(mds_orig) + if mds_name != mds_orig: + warning("name:", mds_orig, "already used. using:", mds_name) + devname = get_option(options, 'dev') + size = get_option(options, 'size', 0) + fstype = get_option(options, 'fstype', 'extN') mds_uuid = new_uuid(mds_name) @@ -518,37 +429,25 @@ def add_mds(gen, lustre, options, args): lustre.appendChild(mds) -def add_ost(gen, lustre, options, args): - lovname = '' - obdtype = 'obdfilter' - devname = '' - size = 0 - fstype = 'extN' - - if options.has_key('node'): - node_name = options['node'] - else: - error("--ost requires a --node argument") +def add_ost(gen, lustre, options): + node_name = get_option(options, 'node') + lovname = get_option(options, 'lov', '') + obdtype = get_option(options, 'obdtype', 'obdfilter') - if options.has_key('lov'): - lovname = options['lov'] - - if options.has_key('obdtype'): - obdtype = options['obdtype'] - if options.has_key('fstype'): - fstype = options['fstype'] if obdtype == 'obdecho': fstype = '' + devname = '' + size = 0 + fstype = '' else: - if len(args) < 1: - usage() - devname = args[0] - if len(args) > 1: - size = args[1] + devname = get_option(options, 'dev', '') # can be unset for bluearcs + size = get_option(options, 'size', 0) + fstype = get_option(options, 'fstype', 'extN') - obdname = new_name('OBD_'+ node_name) - oscname = new_name('OSC_'+ node_name) - ostname = new_name('OST_'+ node_name) + obdname = get_option(options, 'obd', 'OBD_'+ node_name) + obdname = new_name(obdname) + oscname = new_name('OSC_'+ obdname) + ostname = new_name('OST_'+ obdname) if options.has_key('obduuid'): obd_uuid = options['obduuid'] obd = lookup(lustre, obd_uuid) @@ -582,32 +481,32 @@ def add_ost(gen, lustre, options, args): lustre.appendChild(ost) -# this is generally only used by llecho.sh -def add_osc(gen, lustre, options, args): - """ add the osc to the profile for this node. """ - if len(args) < 1: - usage() - osc_name = args[0] - if options.has_key('node'): - node_name = options['node'] - else: - error("--osc requires a --node argument") - osc_uuid = name2uuid(lustre, osc_name) # either 'osc' or 'lov' +def add_cobd(gen, lustre, options): + node_name = get_option(options, 'node') + name = new_name('COBD_' + node_name) + uuid = new_uuid(name) + + real_name = get_option(options, 'real_obd') + cache_name = get_option(options, 'cache_obd') + # temp hack until merged with b_recover and OSC is removed + real_name = 'OSC_' + real_name + cache_name = 'OSC_' + cache_name + + real_uuid = name2uuid(lustre, real_name, tag='osc') + cache_uuid = name2uuid(lustre, cache_name, tag='osc') + node = findByName(lustre, node_name, "node") - node_add_profile(gen, node, 'osc', osc_uuid) + node_add_profile(gen, node, "cobd", uuid) + cobd = gen.cobd(name, uuid, real_uuid, cache_uuid) + lustre.appendChild(cobd) -#ditto -def add_echo_client(gen, lustre, options, args): +def add_echo_client(gen, lustre, options): """ add an echo client to the profile for this node. """ - if len(args) < 1: - usage() - lov_name = args[0] - if options.has_key('node'): - node_name = options['node'] - else: - error("--echo_client requires a --node argument") - node = findByName(lustre, node_name, "node") + node_name = get_option(options, 'node') + lov_name = get_option(options, 'obd') + + node = findByName(lustre, node_name, 'node') echoname = new_name('ECHO_'+ node_name) echo_uuid = new_uuid(echoname) @@ -615,25 +514,26 @@ def add_echo_client(gen, lustre, options, args): lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0) if not lov_uuid: + # remove this hack when the osc uuids are removed + lov_name = 'OSC_' + lov_name lov_uuid = name2uuid(lustre, lov_name, tag='osc', fatal=1) echo = gen.echo_client(echoname, echo_uuid, lov_uuid) lustre.appendChild(echo) -def add_lov(gen, lustre, options, args): +def add_lov(gen, lustre, options): """ create a lov """ - if len(args) < 4: - usage() - name = new_name(options['lov']) - if name != options['lov']: - warning("name:", options['lov'], "already used. using:", name) + lov_orig = get_option(options, 'lov') + name = new_name(lov_orig) + if name != lov_orig: + warning("name:", lov_orig, "already used. using:", name) - mds_name = args[0] - stripe_sz = args[1] - stripe_count = args[2] - pattern = args[3] + mds_name = get_option(options, 'mds') + stripe_sz = get_option(options, 'stripe_sz') + stripe_cnt = get_option(options, 'stripe_cnt', 0) + pattern = get_option(options, 'stripe_pattern', 0) uuid = new_uuid(name) ret = findByName(lustre, name, "lov") @@ -641,7 +541,7 @@ def add_lov(gen, lustre, options, args): error("LOV: ", name, " already exists.") mds_uuid = name2uuid(lustre, mds_name, 'mds') - lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_count, pattern) + lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern) lustre.appendChild(lov) # add an lovconfig entry to the mds profile @@ -653,20 +553,17 @@ def add_lov(gen, lustre, options, args): lustre.appendChild(lovconfig) - -def add_mtpt(gen, lustre, options, args): +def add_mtpt(gen, lustre, options): """ create mtpt on a node """ - if len(args) < 3: - usage() + node_name = get_option(options, 'node') - if options.has_key('node'): - node_name = options['node'] - else: - error("--mtpt requires a --node argument") - - path = args[0] - mds_name = args[1] - lov_name = args[2] + path = get_option(options, 'path') + mds_name = get_option(options, 'mds') + lov_name = get_option(options, 'lov', '') + if lov_name == '': + lov_name = get_option(options, 'obd', '') + if lov_name == '': + error("--add mtpt requires either --lov lov_name or --obd obd_name") name = new_name('MNT_'+ node_name) @@ -677,6 +574,8 @@ def add_mtpt(gen, lustre, options, args): mds_uuid = name2uuid(lustre, mds_name, tag='mds') lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0) if not lov_uuid: + # remove this hack when OSC is removed + lov_name = 'OSC_' + lov_name lov_uuid = name2uuid(lustre, lov_name, tag='osc', fatal=1) uuid = new_uuid(name) @@ -687,57 +586,125 @@ def add_mtpt(gen, lustre, options, args): node_add_profile(gen, node, "mountpoint", uuid) lustre.appendChild(mtpt) +def add_oscref(gen, lustre, options): + """ create mtpt on a node """ + node_name = get_option(options, 'node') + osc_name = get_option(options, 'osc') + + osc_uuid = name2uuid(lustre, osc_name, tag='osc') + node = findByName(lustre, node_name, "node") + if not node: + error('node:', node_name, "not found") + node_add_profile(gen, node, "osc",osc_uuid) ############################################################ # Command line processing # +class OptionError (exceptions.Exception): + def __init__(self, args): + self.args = args + +def get_option(options, tag, default = None): + """Look for tag in options hash and return the value if set. If not + set, then if return default it is set, otherwise exception.""" + if options.has_key(tag): + return options[tag] + elif default != None: + return default + else: + raise OptionError("--add %s requires --%s value" % (options['add'], tag)) + # this exception should print an error like '--add blah requires --<tag> value' + +def get_option_int(options, tag, default = None): + """Return an integer option. Raise exception if the value is not an int""" + val = get_option(options, tag, default) + return int(val) + def parse_cmdline(argv): short_opts = "ho:i:m:" - long_opts = ["ost", "osc", "mtpt", "lov=", "node=", "mds=", "net", - "echo_client", "tcpbuf=", - "route", "router", "merge=", "format", "reformat", "output=", - "obdtype=", "fstype=", "obduuid=", "in=", "help", "batch="] + long_opts = ["add=", "node=", "nettype=", "nid=", "tcpbuf=", "port=", + "echo_client=", "stripe_sz=", "stripe_cnt=", "stripe_pattern=", + "mds=", "route", "router", "merge=", "format", "reformat", "output=", + "dev=", "size=", "obd=", "obdtype=", "obduuid=", "in=", + "path=", "help", "batch=", "lov=", "gw=", "lo=", "hi=", + "oscref", "osc=", "real_obd=", "cache_obd=", "fstype="] opts = [] args = [] options = {} try: opts, args = getopt.getopt(argv, short_opts, long_opts) - except getopt.error: - print "invalid opt" - usage() + except getopt.error, e: + panic(string.join(sys.argv), e) for o, a in opts: # Commands to create new devices - if o == "--ost": - options['ost'] = 1 - if o == "--osc": - options['osc'] = 1 - if o == "--echo_client": - options['echo_client'] = 1 + if o == "--add": + options['add'] = a + + if o == "--node": + options['node'] = a + + # devices names + if o == "--lov": + options['lov'] = a if o == "--mds": options['mds'] = a + if o == "--obd": + options['obd'] = a + + # network options + if o == "--nid": + options['nid'] = a + if o == "--nettype": + options['nettype'] = a if o == "--net": - options['net'] = 1 + options[''] = a + if o == "--tcpbuf": + options['tcpbuf'] = a + if o == "--port": + options['port'] = a if o == "--mtpt": options['mtpt'] = 1 - if o == "--node": - options['node'] = a if o == "--route": options['route'] = 1 if o == "--router": options['router'] = 1 - if o == "--lov": - options['lov'] = a - # Options for commands + # ost options + if o == "--dev": + options['dev'] = a + if o == "--size": + options['size'] = a + if o == "--path": + options['path'] = a + if o == "--osc": + options['osc'] = a if o == "--obdtype": options['obdtype'] = a if o == "--fstype": options['fstype'] = a if o == "--obduuid": options['obduuid'] = a - if o == "--tcpbuf": - options['tcpbuf'] = a + + # lov options + if o == "--stripe_sz": + options['stripe_sz'] = a + if o == "--stripe_cnt": + options['stripe_cnt'] = a + if o == "--stripe_pattern": + options['stripe_pattern'] = a + if o == "--gw": + options['gw'] = a + if o == "--lo": + options['lo'] = a + if o == "--hi": + options['hi'] = a + + # cobd + if o == "--cache_obd": + options['cache_obd'] = a + if o == "--real_obd": + options['real_obd'] = a # lmc options if o in ("-h", "--help"): @@ -777,31 +744,43 @@ class chrono: str = '%s: %g secs' % (msg, d) print str + + ############################################################ # Main # + +def add(devtype, gen, lustre, options): + if devtype == 'net': + add_net(gen, lustre, options) + elif devtype =='osc': + add_osc(gen, lustre, options) + elif devtype == 'mtpt': + add_mtpt(gen, lustre, options) + elif devtype == 'mds': + add_mds(gen, lustre, options) + elif devtype == 'ost': + add_ost(gen, lustre, options) + elif devtype == 'lov': + add_lov(gen, lustre, options) + elif devtype == 'route': + add_route(gen, lustre, options) + elif devtype == 'node': + add_node(gen, lustre, options) + elif devtype == 'echo_client': + add_echo_client(gen, lustre, options) + elif devtype == 'oscref': + add_oscref(gen, lustre, options) + elif devtype == 'cobd': + add_cobd(gen, lustre, options) + else: + error("unknown device type:", devtype) + def do_command(gen, lustre, options, args): - if options.has_key('ost'): - add_ost(gen, lustre, options, args) - elif options.has_key('osc'): - add_osc(gen, lustre, options, args) - elif options.has_key('echo_client'): - add_echo_client(gen, lustre, options, args) - elif options.has_key('mtpt'): - add_mtpt(gen, lustre, options, args) - elif options.has_key('mds'): - add_mds(gen, lustre, options, args) - elif options.has_key('net'): - add_net(gen, lustre, options, args) - elif options.has_key('lov'): - add_lov(gen, lustre, options, args) - elif options.has_key('route'): - add_route(gen, lustre, options, args) - elif options.has_key('node'): - add_node(gen, lustre, options, args) + if options.has_key('add'): + add(options['add'], gen, lustre, options) else: - print "Missing command" - usage() + error("Missing command") def main(): options, args = parse_cmdline(sys.argv[1:]) @@ -835,9 +814,15 @@ def main(): fp.close() for cmd in batchCommands: options, args = parse_cmdline(string.split(cmd)) - do_command(gen, lustre, options, args) + try: + do_command(gen, lustre, options, args) + except OptionError, e: + panic(cmd, e) else: - do_command(gen, lustre, options, args) + try: + do_command(gen, lustre, options, args) + except OptionError, e: + panic(string.join(sys.argv),e) if outFile == '-': PrettyPrint(doc) -- GitLab