Skip to content
Snippets Groups Projects
Commit 255a6ea1 authored by Yang Sheng's avatar Yang Sheng
Browse files

Branch HEAD

b=13397
i=adilger
i=johann

Add support for vanilla-2.6.22 kernel.
parent ae076da0
No related branches found
No related tags found
No related merge requests found
Showing
with 10811 additions and 0 deletions
tbd Cluster File Systems, Inc. <info@clusterfs.com>
* version 3.0.4
Severity : normal
Bugzilla : 13397
Description: Add support for vanilla-2.6.22 kernel.
--------------------------------------------------------------------------------
2007-10-26 Cluster File Systems, Inc. <info@clusterfs.com>
......
......@@ -97,6 +97,7 @@ case $LINUXRELEASE in
2.6.18-*el5*) LDISKFS_SERIES="2.6-rhel5.series";;
2.6.18-*prep*) LDISKFS_SERIES="2.6-rhel5.series";;
2.6.18*) LDISKFS_SERIES="2.6.18-vanilla.series";;
2.6.22*) LDISKFS_SERIES="2.6.22-vanilla.series";;
*) AC_MSG_WARN([Unknown kernel version $LINUXRELEASE, fix ldiskfs/configure.ac])
esac
AC_MSG_RESULT([$LDISKFS_SERIES])
......
This diff is collapsed.
Index: linux-2.6.18/fs/ext3/ioctl.c
===================================================================
--- linux-2.6.18.orig/fs/ext3/ioctl.c
+++ linux-2.6.18/fs/ext3/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/smp_lock.h>
#include <asm/uaccess.h>
#include <linux/namei.h>
+#include "fiemap.h"
int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
unsigned long arg)
@@ -272,6 +272,9 @@ flags_err:
return err;
}
+ case EXT3_IOC_FIEMAP: {
+ return ext3_fiemap(inode, filp, cmd, arg);
+ }
default:
Index: linux-2.6.18/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.18.orig/include/linux/ext3_fs.h
+++ linux-2.6.18/include/linux/ext3_fs.h
@@ -249,7 +249,6 @@ struct ext3_new_group_data {
__u32 free_blocks_count;
};
-
/*
* ioctl commands
*/
@@ -257,15 +256,16 @@ struct ext3_new_group_data {
#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
-#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
+#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
+#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
+#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
+#define EXT3_IOC_FIEMAP _IOWR('f', 10, struct fiemap)
#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD_DEBUG
#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
-#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
-#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
/*
* ioctl commands in 32 bit emulation
@@ -1117,6 +1117,8 @@ ext3_get_blocks_wrap(handle_t *handle, s
bh->b_size = (ret << inode->i_blkbits);
return ret;
}
+extern int ext3_fiemap(struct inode *, struct file *, unsigned int,
+ unsigned long);
#endif /* __KERNEL__ */
Index: linux-2.6.18/include/linux/ext3_extents.h
===================================================================
--- linux-2.6.18.orig/include/linux/ext3_extents.h
+++ linux-2.6.18/include/linux/ext3_extents.h
@@ -142,8 +142,9 @@ struct ext3_ext_path {
* callback must return valid extent (passed or newly created)
*/
typedef int (*ext_prepare_callback)(struct inode *, struct ext3_ext_path *,
- struct ext3_ext_cache *,
- void *);
+ struct ext3_ext_cache *,
+ struct ext3_extent *, void *);
+#define HAVE_EXT_PREPARE_CB_EXTENT
#define EXT_CONTINUE 0
#define EXT_BREAK 1
@@ -152,6 +152,26 @@ typedef int (*ext_prepare_callback)(stru
#define EXT_MAX_BLOCK 0xffffffff
+/*
+ * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+ * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
+ * MSB of ee_len field in the extent datastructure to signify if this
+ * particular extent is an initialized extent or an uninitialized (i.e.
+ * preallocated).
+ * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
+ * uninitialized extent.
+ * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
+ * uninitialized one. In other words, if MSB of ee_len is set, it is an
+ * uninitialized extent with only one special scenario when ee_len = 0x8000.
+ * In this case we can not have an uninitialized extent of zero length and
+ * thus we make it as a special case of initialized extent with 0x8000 length.
+ * This way we get better extent-to-group alignment for initialized extents.
+ * Hence, the maximum number of blocks we can have in an *initialized*
+ * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
+ */
+#define EXT_INIT_MAX_LEN (1UL << 15)
+#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
+
#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
#define EXT_HDR_GEN_BITS 24
#define EXT_HDR_GEN_MASK ((1 << EXT_HDR_GEN_BITS) - 1)
@@ -219,6 +239,13 @@ ext3_ext_invalidate_cache(struct inode *
EXT3_I(inode)->i_cached_extent.ec_type = EXT3_EXT_CACHE_NO;
}
+static inline int ext3_ext_is_uninitialized(struct ext3_extent *ext)
+{
+ /* Extent with ee_len of 0x8000 is treated as an initialized extent */
+ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
+}
+
+
extern int ext3_ext_search_left(struct inode *, struct ext3_ext_path *, unsigned long *, unsigned long *);
extern int ext3_ext_search_right(struct inode *, struct ext3_ext_path *, unsigned long *, unsigned long *);
extern int ext3_extent_tree_init(handle_t *, struct inode *);
Index: linux-2.6.18/fs/ext3/extents.c
===================================================================
--- linux-2.6.18.orig/fs/ext3/extents.c
+++ linux-2.6.18/fs/ext3/extents.c
@@ -42,7 +42,7 @@
#include <linux/slab.h>
#include <linux/ext3_extents.h>
#include <asm/uaccess.h>
-
+#include "fiemap.h"
static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
{
@@ -1477,7 +1477,7 @@ int ext3_ext_walk_space(struct inode *in
}
BUG_ON(cbex.ec_len == 0);
- err = func(inode, path, &cbex, cbdata);
+ err = func(inode, path, &cbex, ex, cbdata);
ext3_ext_drop_refs(path);
if (err < 0)
@@ -2289,6 +2289,143 @@ int ext3_ext_writepage_trans_blocks(stru
return needed;
}
+struct fiemap_internal {
+ struct fiemap *fiemap_s;
+ struct fiemap_extent fm_extent;
+ size_t tot_mapping_len;
+ char *cur_ext_ptr;
+ int current_extent;
+ int err;
+};
+
+/*
+ * Callback function called for each extent to gather fiemap information.
+ */
+int ext3_ext_fiemap_cb(struct inode *inode, struct ext3_ext_path *path,
+ struct ext3_ext_cache *newex, struct ext3_extent *ex,
+ void *data)
+{
+ struct fiemap_internal *fiemap_i = data;
+ struct fiemap *fiemap_s = fiemap_i->fiemap_s;
+ struct fiemap_extent *fm_extent = &fiemap_i->fm_extent;
+ int current_extent = fiemap_i->current_extent;
+ unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+
+ /*
+ * ext3_ext_walk_space returns a hole for extents that have not been
+ * allocated yet.
+ */
+ if (((u64)(newex->ec_block + newex->ec_len) << blksize_bits >=
+ inode->i_size) && !ext3_ext_is_uninitialized(ex) &&
+ newex->ec_type == EXT3_EXT_CACHE_GAP)
+ return EXT_BREAK;
+
+ /*
+ * We only need to return number of extents.
+ */
+ if (fiemap_s->fm_flags & FIEMAP_FLAG_NUM_EXTENTS)
+ goto count_extents;
+
+ if (current_extent >= fiemap_s->fm_extent_count)
+ return EXT_BREAK;
+
+ memset(fm_extent, 0, sizeof(*fm_extent));
+ fm_extent->fe_offset = (__u64)newex->ec_start << blksize_bits;
+ fm_extent->fe_length = (__u64)newex->ec_len << blksize_bits;
+ fiemap_i->tot_mapping_len += fm_extent->fe_length;
+
+ if (newex->ec_type == EXT3_EXT_CACHE_GAP)
+ fm_extent->fe_flags |= FIEMAP_EXTENT_HOLE;
+
+ if (ext3_ext_is_uninitialized(ex))
+ fm_extent->fe_flags |= (FIEMAP_EXTENT_DELALLOC |
+ FIEMAP_EXTENT_UNMAPPED);
+
+ /*
+ * Mark this fiemap_extent as FIEMAP_EXTENT_EOF if it's past the end
+ * of file.
+ */
+ if ((u64)(newex->ec_block + newex->ec_len) << blksize_bits >=
+ inode->i_size)
+ fm_extent->fe_flags |= FIEMAP_EXTENT_EOF;
+
+ if (!copy_to_user(fiemap_i->cur_ext_ptr, fm_extent,
+ sizeof(struct fiemap_extent))) {
+ fiemap_i->cur_ext_ptr += sizeof(struct fiemap_extent);
+ } else {
+ fiemap_i->err = -EFAULT;
+ return EXT_BREAK;
+ }
+
+count_extents:
+ fiemap_i->current_extent++;
+
+ /*
+ * Stop if we are beyond requested mapping size but return complete last
+ * extent.
+ */
+ if ((u64)(newex->ec_block + newex->ec_len) << blksize_bits >=
+ fiemap_s->fm_length)
+ return EXT_BREAK;
+
+ return EXT_CONTINUE;
+}
+
+int ext3_fiemap(struct inode *inode, struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fiemap *fiemap_s;
+ struct fiemap_internal fiemap_i;
+ struct fiemap_extent *last_extent;
+ ext3_fsblk_t start_blk;
+ int err = 0;
+
+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
+ return -EOPNOTSUPP;
+
+ fiemap_s = kmalloc(sizeof(*fiemap_s), GFP_KERNEL);
+ if (fiemap_s == NULL)
+ return -ENOMEM;
+ if (copy_from_user(fiemap_s, (struct fiemap __user *)arg,
+ sizeof(*fiemap_s)))
+ return -EFAULT;
+
+ if (fiemap_s->fm_flags & FIEMAP_FLAG_INCOMPAT)
+ return -EOPNOTSUPP;
+
+ if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC)
+ ext3_sync_file(filp, filp->f_dentry, 1);
+
+ start_blk = (fiemap_s->fm_start + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
+ fiemap_i.fiemap_s = fiemap_s;
+ fiemap_i.tot_mapping_len = 0;
+ fiemap_i.cur_ext_ptr = (char *)(arg + sizeof(*fiemap_s));
+ fiemap_i.current_extent = 0;
+ fiemap_i.err = 0;
+
+ /*
+ * Walk the extent tree gathering extent information
+ */
+ mutex_lock(&EXT3_I(inode)->truncate_mutex);
+ err = ext3_ext_walk_space(inode, start_blk , EXT_MAX_BLOCK - start_blk,
+ ext3_ext_fiemap_cb, &fiemap_i);
+ mutex_unlock(&EXT3_I(inode)->truncate_mutex);
+ if (err)
+ return err;
+
+ fiemap_s->fm_extent_count = fiemap_i.current_extent;
+ fiemap_s->fm_length = fiemap_i.tot_mapping_len;
+ if (fiemap_i.current_extent != 0 &&
+ !(fiemap_s->fm_flags & FIEMAP_FLAG_NUM_EXTENTS)) {
+ last_extent = &fiemap_i.fm_extent;
+ last_extent->fe_flags |= FIEMAP_EXTENT_LAST;
+ }
+ err = copy_to_user((void *)arg, fiemap_s, sizeof(*fiemap_s));
+
+ return err;
+}
+
EXPORT_SYMBOL(ext3_mark_inode_dirty);
EXPORT_SYMBOL(ext3_ext_invalidate_cache);
EXPORT_SYMBOL(ext3_ext_insert_extent);
Index: linux-2.6.18/fs/ext3/fiemap.h
===================================================================
--- /dev/null
+++ linux-2.6.18/fs/ext3/fiemap.h
@@ -0,0 +1,49 @@
+/*
+ * linux/fs/ext3/fiemap.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Kalpak Shah <kalpak@clusterfs.com>
+ */
+
+#ifndef _LINUX_EXT3_FIEMAP_H
+#define _LINUX_EXT3_FIEMAP_H
+
+struct fiemap_extent {
+ __u64 fe_offset; /* offset in bytes for the start of the extent */
+ __u64 fe_length; /* length in bytes for the extent */
+ __u32 fe_flags; /* returned FIEMAP_EXTENT_* flags for the extent */
+ __u32 fe_lun; /* logical device number for extent (starting at 0)*/
+};
+
+/*
+ * fiemap is not ext3-specific and should be moved into fs.h eventually.
+ */
+
+struct fiemap {
+ __u64 fm_start; /* logical starting byte offset (in/out) */
+ __u64 fm_length; /* logical length of map (in/out) */
+ __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
+ __u32 fm_extent_count; /* number of extents in fm_extents (in/out) */
+ __u64 fm_unused;
+ struct fiemap_extent fm_extents[0];
+};
+
+#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
+#define FIEMAP_FLAG_HSM_READ 0x00000002 /* get data from HSM before map */
+#define FIEMAP_FLAG_NUM_EXTENTS 0x00000004 /* return only number of extents */
+#define FIEMAP_FLAG_INCOMPAT 0xff000000 /* error for unknown flags in here */
+
+#define FIEMAP_EXTENT_HOLE 0x00000001 /* has no data or space allocation */
+#define FIEMAP_EXTENT_UNWRITTEN 0x00000002 /* space allocated, but no data */
+#define FIEMAP_EXTENT_UNMAPPED 0x00000004 /* has data but no space allocation*/
+#define FIEMAP_EXTENT_ERROR 0x00000008 /* mapping error, errno in fe_start*/
+#define FIEMAP_EXTENT_NO_DIRECT 0x00000010 /* cannot access data directly */
+#define FIEMAP_EXTENT_LAST 0x00000020 /* last extent in the file */
+#define FIEMAP_EXTENT_DELALLOC 0x00000040 /* has data but not yet written,
+ * must have EXTENT_UNKNOWN set */
+#define FIEMAP_EXTENT_SECONDARY 0x00000080 /* data (also) in secondary storage,
+ * not in primary if EXTENT_UNKNOWN*/
+#define FIEMAP_EXTENT_EOF 0x00000100 /* if fm_start+fm_len is beyond EOF*/
+
+#endif /* _LINUX_EXT3_FIEMAP_H */
Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/ialloc.c 2005-05-16 14:10:54.000000000 -0600
+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/ialloc.c 2005-05-16 14:18:29.000000000 -0600
@@ -352,13 +352,17 @@
return -1;
}
-static int find_group_other(struct super_block *sb, struct inode *parent)
+static int find_group_other(struct super_block *sb, struct inode *parent,
+ int mode)
{
int parent_group = EXT3_I(parent)->i_block_group;
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
int ngroups = EXT3_SB(sb)->s_groups_count;
struct ext3_group_desc *desc;
struct buffer_head *bh;
int group, i;
+ int best_group = -1;
+ int avefreeb, freeb, best_group_freeb = 0;
/*
* Try to place the inode in its parent directory
@@ -366,9 +370,9 @@
group = parent_group;
desc = ext3_get_group_desc (sb, group, &bh);
if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
- le16_to_cpu(desc->bg_free_blocks_count))
+ (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count)))
return group;
-
+ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups;
/*
* We're going to place this inode in a different blockgroup from its
* parent. We want to cause files in a common directory to all land in
@@ -381,33 +385,47 @@
group = (group + parent->i_ino) % ngroups;
/*
- * Use a quadratic hash to find a group with a free inode and some free
- * blocks.
+ * Use a quadratic hash to find a group with a free inode and
+ * average number of free blocks.
*/
for (i = 1; i < ngroups; i <<= 1) {
group += i;
if (group >= ngroups)
group -= ngroups;
desc = ext3_get_group_desc (sb, group, &bh);
- if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
- le16_to_cpu(desc->bg_free_blocks_count))
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+ if (!S_ISREG(mode))
+ return group;
+ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb)
return group;
}
/*
- * That failed: try linear search for a free inode, even if that group
- * has no free blocks.
+ * That failed: start from last group used to allocate inode
+ * try linear search for a free inode and prefereably
+ * free blocks.
*/
- group = parent_group;
+ group = sbi->s_last_alloc_group;
+ if (group == -1)
+ group = parent_group;
+
for (i = 0; i < ngroups; i++) {
if (++group >= ngroups)
group = 0;
desc = ext3_get_group_desc (sb, group, &bh);
- if (desc && le16_to_cpu(desc->bg_free_inodes_count))
- return group;
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+ freeb = le16_to_cpu(desc->bg_free_blocks_count);
+ if (freeb > best_group_freeb) {
+ best_group_freeb = freeb;
+ best_group = group;
+ if (freeb >= avefreeb || !S_ISREG(mode))
+ break;
+ }
}
-
- return -1;
+ sbi->s_last_alloc_group = best_group;
+ return best_group;
}
/*
@@ -454,7 +472,7 @@
else
group = find_group_orlov(sb, dir);
} else
- group = find_group_other(sb, dir);
+ group = find_group_other(sb, dir, mode);
err = -ENOSPC;
if (group == -1)
Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/super.c
===================================================================
--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/super.c 2005-05-16 14:10:54.000000000 -0600
+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/super.c 2005-05-16 14:17:14.000000000 -0600
@@ -1297,6 +1297,7 @@
percpu_counter_init(&sbi->s_dirs_counter);
bgl_lock_init(&sbi->s_blockgroup_lock);
+ sbi->s_last_alloc_group = -1;
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/ext3_fs_sb.h 2005-05-16 14:10:54.000000000 -0600
+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/ext3_fs_sb.h 2005-05-16 14:17:14.000000000 -0600
@@ -59,6 +59,8 @@
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct blockgroup_lock s_blockgroup_lock;
+ /* Last group used to allocate inode */
+ int s_last_alloc_group;
/* root of the per fs reservation window tree */
spinlock_t s_rsv_window_lock;
This diff is collapsed.
This diff is collapsed.
Index: linux-2.6.18/fs/ext3/super.c
===================================================================
--- linux-2.6.18.orig/fs/ext3/super.c
+++ linux-2.6.18/fs/ext3/super.c
@@ -35,6 +35,8 @@
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
+#include <linux/kthread.h>
+#include <linux/utsname.h>
#include <asm/uaccess.h>
@@ -435,6 +437,9 @@ static void ext3_put_super (struct super
invalidate_bdev(sbi->journal_bdev, 0);
ext3_blkdev_remove(sbi);
}
+ if (sbi->s_mmp_tsk)
+ kthread_stop(sbi->s_mmp_tsk);
+
sb->s_fs_info = NULL;
kfree(sbi);
return;
@@ -1528,6 +1533,313 @@ static ext3_fsblk_t descriptor_loc(struc
return (has_super + ext3_group_first_block_no(sb, bg));
}
+/*
+ * Write the MMP block using WRITE_SYNC to try to get the block on-disk
+ * faster.
+ */
+static int write_mmp_block(struct buffer_head *bh)
+{
+ mark_buffer_dirty(bh);
+ lock_buffer(bh);
+ bh->b_end_io = end_buffer_write_sync;
+ get_bh(bh);
+ submit_bh(WRITE_SYNC, bh);
+ wait_on_buffer(bh);
+ if (unlikely(!buffer_uptodate(bh)))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Read the MMP block. It _must_ be read from disk and hence we clear the
+ * uptodate flag on the buffer.
+ */
+static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
+ unsigned long mmp_block)
+{
+ struct mmp_struct *mmp;
+
+ if (*bh)
+ clear_buffer_uptodate(*bh);
+
+ brelse(*bh);
+
+ *bh = sb_bread(sb, mmp_block);
+ if (!*bh) {
+ ext3_warning(sb, __FUNCTION__,
+ "Error while reading MMP block %lu", mmp_block);
+ return -EIO;
+ }
+
+ mmp = (struct mmp_struct *)((*bh)->b_data);
+ if (le32_to_cpu(mmp->mmp_magic) != EXT3_MMP_MAGIC)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * Dump as much information as possible to help the admin.
+ */
+static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
+ const char *function, const char *msg)
+{
+ ext3_warning(sb, function, msg);
+ ext3_warning(sb, function, "MMP failure info: last update time: %llu, "
+ "last update node: %s, last update device: %s\n",
+ le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename,
+ mmp->mmp_bdevname);
+}
+
+/*
+ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
+ */
+static int kmmpd(void *data)
+{
+ struct super_block *sb = (struct super_block *) data;
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+ struct buffer_head *bh = NULL;
+ struct mmp_struct *mmp;
+ unsigned long mmp_block;
+ u32 seq = 0;
+ unsigned long failed_writes = 0;
+ int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
+ unsigned mmp_check_interval;
+ unsigned long last_update_time;
+ unsigned long diff;
+ int retval;
+
+ mmp_block = le64_to_cpu(es->s_mmp_block);
+ retval = read_mmp_block(sb, &bh, mmp_block);
+ if (retval)
+ goto failed;
+
+ mmp = (struct mmp_struct *)(bh->b_data);
+ mmp->mmp_time = cpu_to_le64(get_seconds());
+ /*
+ * Start with the higher mmp_check_interval and reduce it if
+ * the MMP block is being updated on time.
+ */
+ mmp_check_interval = max(5 * mmp_update_interval,
+ EXT3_MMP_MIN_CHECK_INTERVAL);
+ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
+ bdevname(bh->b_bdev, mmp->mmp_bdevname);
+
+ down_read(&uts_sem);
+ memcpy(mmp->mmp_nodename, utsname()->nodename,
+ sizeof(mmp->mmp_nodename));
+ up_read(&uts_sem);
+
+ while (!kthread_should_stop()) {
+ if (++seq > EXT3_MMP_SEQ_MAX)
+ seq = 1;
+
+ mmp->mmp_seq = cpu_to_le32(seq);
+ mmp->mmp_time = cpu_to_le64(get_seconds());
+ last_update_time = jiffies;
+
+ retval = write_mmp_block(bh);
+ /*
+ * Don't spew too many error messages. Print one every
+ * (s_mmp_update_interval * 60) seconds.
+ */
+ if (retval && (failed_writes % 60) == 0) {
+ ext3_error(sb, __FUNCTION__,
+ "Error writing to MMP block");
+ failed_writes++;
+ }
+
+ if (!(le32_to_cpu(es->s_feature_incompat) &
+ EXT3_FEATURE_INCOMPAT_MMP)) {
+ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped "
+ "since MMP feature has been disabled.");
+ EXT3_SB(sb)->s_mmp_tsk = 0;
+ goto failed;
+ }
+
+ if (sb->s_flags & MS_RDONLY) {
+ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped "
+ "since filesystem has been remounted as "
+ "readonly.");
+ EXT3_SB(sb)->s_mmp_tsk = 0;
+ goto failed;
+ }
+
+ diff = jiffies - last_update_time;
+ if (diff < mmp_update_interval * HZ)
+ schedule_timeout_interruptible(EXT3_MMP_UPDATE_INTERVAL*
+ HZ - diff);
+
+ /*
+ * We need to make sure that more than mmp_check_interval
+ * seconds have not passed since writing. If that has happened
+ * we need to check if the MMP block is as we left it.
+ */
+ diff = jiffies - last_update_time;
+ if (diff > mmp_check_interval * HZ) {
+ struct buffer_head *bh_check = NULL;
+ struct mmp_struct *mmp_check;
+
+ retval = read_mmp_block(sb, &bh_check, mmp_block);
+ if (retval) {
+ EXT3_SB(sb)->s_mmp_tsk = 0;
+ goto failed;
+ }
+
+ mmp_check = (struct mmp_struct *)(bh_check->b_data);
+ if (mmp->mmp_time != mmp_check->mmp_time ||
+ memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
+ sizeof(mmp->mmp_nodename)))
+ dump_mmp_msg(sb, mmp_check, __FUNCTION__,
+ "Error while updating MMP info. "
+ "The filesystem seems to have "
+ "been multiply mounted.");
+
+ put_bh(bh_check);
+ }
+
+ /*
+ * Adjust the mmp_check_interval depending on how much time
+ * it took for the MMP block to be written.
+ */
+ mmp_check_interval = max(5 * diff / HZ,
+ (unsigned long) EXT3_MMP_MIN_CHECK_INTERVAL);
+ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
+ }
+
+ /*
+ * Unmount seems to be clean.
+ */
+ mmp->mmp_seq = cpu_to_le32(EXT3_MMP_SEQ_CLEAN);
+ mmp->mmp_time = cpu_to_le64(get_seconds());
+
+ retval = write_mmp_block(bh);
+
+failed:
+ brelse(bh);
+ return retval;
+}
+
+/*
+ * Get a random new sequence number but make sure it is not greater than
+ * EXT3_MMP_SEQ_MAX.
+ */
+static unsigned int mmp_new_seq(void)
+{
+ u32 new_seq;
+
+ do {
+ get_random_bytes(&new_seq, sizeof(u32));
+ } while (new_seq > EXT3_MMP_SEQ_MAX);
+
+ return new_seq;
+}
+
+/*
+ * Protect the filesystem from being mounted more than once.
+ */
+static int ext3_multi_mount_protect(struct super_block *sb,
+ unsigned long mmp_block)
+{
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+ struct buffer_head *bh = NULL;
+ struct mmp_struct *mmp = NULL;
+ u32 seq;
+ unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
+ int retval;
+
+ if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
+ mmp_block >= le32_to_cpu(es->s_blocks_count)) {
+ ext3_warning(sb, __FUNCTION__,
+ "Invalid MMP block in superblock");
+ goto failed;
+ }
+
+ retval = read_mmp_block(sb, &bh, mmp_block);
+ if (retval)
+ goto failed;
+
+ mmp = (struct mmp_struct *)(bh->b_data);
+
+ if (mmp_check_interval < EXT3_MMP_MIN_CHECK_INTERVAL)
+ mmp_check_interval = EXT3_MMP_MIN_CHECK_INTERVAL;
+
+ /*
+ * If check_interval in MMP block is larger, use that instead of
+ * update_interval from the superblock.
+ */
+ if (mmp->mmp_check_interval > mmp_check_interval)
+ mmp_check_interval = mmp->mmp_check_interval;
+
+ seq = le32_to_cpu(mmp->mmp_seq);
+ if (seq == EXT3_MMP_SEQ_CLEAN)
+ goto skip;
+
+ if (seq == EXT3_MMP_SEQ_FSCK) {
+ dump_mmp_msg(sb, mmp, __FUNCTION__,
+ "fsck is running on the filesystem");
+ goto failed;
+ }
+
+ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1));
+
+ retval = read_mmp_block(sb, &bh, mmp_block);
+ if (retval)
+ goto failed;
+ mmp = (struct mmp_struct *)(bh->b_data);
+ if (seq != le32_to_cpu(mmp->mmp_seq)) {
+ dump_mmp_msg(sb, mmp, __FUNCTION__,
+ "Device is already active on another node.");
+ goto failed;
+ }
+
+skip:
+ /*
+ * write a new random sequence number.
+ */
+ mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
+
+ retval = write_mmp_block(bh);
+ if (retval)
+ goto failed;
+
+ /*
+ * wait for MMP interval and check mmp_seq.
+ */
+ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1));
+
+ retval = read_mmp_block(sb, &bh, mmp_block);
+ if (retval)
+ goto failed;
+ mmp = (struct mmp_struct *)(bh->b_data);
+ if (seq != le32_to_cpu(mmp->mmp_seq)) {
+ dump_mmp_msg(sb, mmp, __FUNCTION__,
+ "Device is already active on another node.");
+ goto failed;
+ }
+
+ /*
+ * Start a kernel thread to update the MMP block periodically.
+ */
+ EXT3_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x",
+ MAJOR(sb->s_dev),
+ MINOR(sb->s_dev));
+ if (IS_ERR(EXT3_SB(sb)->s_mmp_tsk)) {
+ EXT3_SB(sb)->s_mmp_tsk = 0;
+ ext3_warning(sb, __FUNCTION__, "Unable to create kmmpd thread "
+ "for %s.", sb->s_id);
+ goto failed;
+ }
+
+ brelse(bh);
+ return 0;
+
+failed:
+ brelse(bh);
+ return 1;
+}
+
static int ext3_fill_super (struct super_block *sb, void *data, int silent)
{
@@ -1844,6 +2156,11 @@ static int ext3_fill_super (struct super
EXT3_HAS_INCOMPAT_FEATURE(sb,
EXT3_FEATURE_INCOMPAT_RECOVER));
+ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_MMP) &&
+ !(sb->s_flags & MS_RDONLY))
+ if (ext3_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
+ goto failed_mount3;
+
/*
* The first inode we look at is the journal inode. Don't try
* root first: it may be modified in the journal!
@@ -2446,7 +2763,7 @@ static int ext3_remount (struct super_bl
ext3_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext3_mount_options old_opts;
- int err;
+ int err = 0;
#ifdef CONFIG_QUOTA
int i;
#endif
@@ -2530,6 +2847,11 @@ static int ext3_remount (struct super_bl
}
if (!ext3_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
+ if (EXT3_HAS_INCOMPAT_FEATURE(sb,
+ EXT3_FEATURE_INCOMPAT_MMP))
+ if (ext3_multi_mount_protect(sb,
+ le64_to_cpu(es->s_mmp_block)))
+ goto restore_opts;
}
}
#ifdef CONFIG_QUOTA
Index: linux-2.6.18/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.18.orig/include/linux/ext3_fs.h
+++ linux-2.6.18/include/linux/ext3_fs.h
@@ -593,13 +593,17 @@ struct ext3_super_block {
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */
- __le32 s_blocks_count_hi; /* Blocks count high 32 bits */
+/*150*/ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */
__le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/
__le32 s_free_blocks_count_hi; /* Free blocks count high 32 bits */
__le16 s_min_extra_isize; /* All inodes have at least # bytes */
__le16 s_want_extra_isize; /* New inodes should reserve # bytes */
- __le32 s_flags; /* Miscellaneous flags */
- __u32 s_reserved[167]; /* Padding to the end of the block */
+/*160*/ __le32 s_flags; /* Miscellaneous flags */
+ __le16 s_raid_stride; /* RAID stride */
+ __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
+ __le64 s_mmp_block; /* Block for multi-mount protection */
+/*170*/ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
+ __le32 s_reserved[163]; /* Padding to the end of the block */
};
#ifdef __KERNEL__
@@ -702,12 +706,14 @@ static inline int ext3_valid_inum(struct
#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
#define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
+#define EXT3_FEATURE_INCOMPAT_MMP 0x0100
#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
#define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
EXT3_FEATURE_INCOMPAT_RECOVER| \
EXT3_FEATURE_INCOMPAT_META_BG| \
- EXT3_FEATURE_INCOMPAT_EXTENTS)
+ EXT3_FEATURE_INCOMPAT_EXTENTS| \
+ EXT3_FEATURE_INCOMPAT_MMP)
#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -870,6 +876,39 @@ ext3_group_first_block_no(struct super_b
#define ERR_BAD_DX_DIR -75000
/*
+ * This structure will be used for multiple mount protection. It will be
+ * written into the block number saved in the s_mmp_block field in the
+ * superblock. Programs that check MMP should assume that if
+ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
+ * to use the filesystem, regardless of how old the timestamp is.
+ */
+#define EXT3_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */
+#define EXT3_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
+#define EXT3_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
+#define EXT3_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
+
+struct mmp_struct {
+ __le32 mmp_magic;
+ __le32 mmp_seq;
+ __le64 mmp_time;
+ char mmp_nodename[64];
+ char mmp_bdevname[32];
+ __le16 mmp_check_interval;
+ __le16 mmp_pad1;
+ __le32 mmp_pad2[227];
+};
+
+/*
+ * Default interval in seconds to update the MMP sequence number.
+ */
+#define EXT3_MMP_UPDATE_INTERVAL 1
+
+/*
+ * Minimum interval for MMP checking in seconds.
+ */
+#define EXT3_MMP_MIN_CHECK_INTERVAL 5
+
+/*
* Function prototypes
*/
Index: linux-2.6.18/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.18.orig/include/linux/ext3_fs_sb.h
+++ linux-2.6.18/include/linux/ext3_fs_sb.h
@@ -151,6 +151,7 @@ struct ext3_sb_info {
/* locality groups */
struct ext3_locality_group *s_locality_groups;
+ struct task_struct *s_mmp_tsk; /* Kernel thread for multiple mount protection */
};
#define EXT3_GROUP_INFO(sb, group) \
Index: linux-2.6.18.8/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/ialloc.c 2007-06-20 18:54:59.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/ialloc.c 2007-06-20 18:54:59.000000000 +0200
@@ -729,7 +729,8 @@ got:
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blksize = PAGE_SIZE;
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+ ext3_current_time(inode);
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
@@ -761,9 +762,8 @@ got:
spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT3_STATE_NEW;
- ei->i_extra_isize =
- (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+
+ ei->i_extra_isize = EXT3_SB(sb)->s_want_extra_isize;
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
Index: linux-2.6.18.8/fs/ext3/inode.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/inode.c 2007-06-20 18:54:52.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/inode.c 2007-06-20 18:54:59.000000000 +0200
@@ -727,7 +727,7 @@ static int ext3_splice_branch(handle_t *
/* We are done with atomic stuff, now do the rest of housekeeping */
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);
/* had we spliced it onto indirect block? */
@@ -2375,7 +2375,7 @@ do_indirects:
ext3_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_ctime = ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);
/*
@@ -2611,10 +2611,6 @@ void ext3_read_inode(struct inode * inod
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
- inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
- inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
- inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
ei->i_state = 0;
ei->i_dir_start_lookup = 0;
@@ -2689,6 +2685,11 @@ void ext3_read_inode(struct inode * inod
} else
ei->i_extra_isize = 0;
+ EXT3_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+ EXT3_INODE_GET_XTIME(i_mtime, inode, raw_inode);
+ EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode);
+ EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
@@ -2769,9 +2770,12 @@ static int ext3_do_update_inode(handle_t
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+
+ EXT3_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+ EXT3_INODE_SET_XTIME(i_mtime, inode, raw_inode);
+ EXT3_INODE_SET_XTIME(i_atime, inode, raw_inode);
+ EXT3_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
+
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
Index: linux-2.6.18.8/fs/ext3/ioctl.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/ioctl.c 2007-06-20 18:42:05.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/ioctl.c 2007-06-20 18:54:59.000000000 +0200
@@ -120,7 +120,7 @@ int ext3_ioctl (struct inode * inode, st
ei->i_flags = flags;
ext3_set_inode_flags(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
@@ -157,7 +157,7 @@ flags_err:
return PTR_ERR(handle);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
inode->i_generation = generation;
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
}
Index: linux-2.6.18.8/fs/ext3/namei.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/namei.c 2007-06-20 18:54:53.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/namei.c 2007-06-20 18:54:59.000000000 +0200
@@ -1287,7 +1287,7 @@ static int add_dirent_to_buf(handle_t *h
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ dir->i_mtime = dir->i_ctime = ext3_current_time(dir);
ext3_update_dx_flag(dir);
dir->i_version++;
ext3_mark_inode_dirty(handle, dir);
@@ -2079,7 +2079,7 @@ static int ext3_rmdir (struct inode * di
inode->i_version++;
inode->i_nlink = 0;
ext3_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);
ext3_dec_count(handle, dir);
ext3_update_dx_flag(dir);
@@ -2129,13 +2129,13 @@ static int ext3_unlink(struct inode * di
retval = ext3_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ dir->i_ctime = dir->i_mtime = ext3_current_time(dir);
ext3_update_dx_flag(dir);
ext3_mark_inode_dirty(handle, dir);
ext3_dec_count(handle, inode);
if (!inode->i_nlink)
ext3_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime;
+ inode->i_ctime = ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);
retval = 0;
@@ -2237,8 +2237,8 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
- inc_nlink(inode);
+ ext3_inc_count(handle, inode);
atomic_inc(&inode->i_count);
err = ext3_add_link(handle, dentry, inode);
@@ -2340,7 +2340,7 @@ static int ext3_rename (struct inode * o
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = CURRENT_TIME_SEC;
+ old_inode->i_ctime = ext3_current_time(old_inode);
ext3_mark_inode_dirty(handle, old_inode);
/*
@@ -2373,9 +2373,9 @@ static int ext3_rename (struct inode * o
if (new_inode) {
ext3_dec_count(handle, new_inode);
- new_inode->i_ctime = CURRENT_TIME_SEC;
+ new_inode->i_ctime = ext3_current_time(new_inode);
}
- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+ old_dir->i_ctime = old_dir->i_mtime = ext3_current_time(old_dir);
ext3_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
Index: linux-2.6.18.8/fs/ext3/super.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/super.c 2007-06-20 18:54:59.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/super.c 2007-06-20 18:54:59.000000000 +0200
@@ -1713,6 +1713,8 @@ static int ext3_fill_super (struct super
sbi->s_inode_size);
goto failed_mount;
}
+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE)
+ sb->s_time_gran = 1 << (EXT3_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
@@ -1917,6 +1919,32 @@ static int ext3_fill_super (struct super
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+
+ /* determine the minimum size of new large inodes, if present */
+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) -
+ EXT3_GOOD_OLD_INODE_SIZE;
+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_want_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_want_extra_isize);
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_min_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if (EXT3_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size) {
+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) -
+ EXT3_GOOD_OLD_INODE_SIZE;
+ printk(KERN_INFO "EXT3-fs: required extra inode space not"
+ "available.\n");
+ }
+
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
Index: linux-2.6.18.8/fs/ext3/xattr.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/xattr.c 2007-06-20 18:54:52.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/xattr.c 2007-06-20 18:54:59.000000000 +0200
@@ -1007,7 +1007,7 @@ ext3_xattr_set_handle(handle_t *handle,
}
if (!error) {
ext3_xattr_update_super_block(handle, inode->i_sb);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext3_mark_iloc_dirty, even with
Index: linux-2.6.18.8/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.18.8.orig/include/linux/ext3_fs.h 2007-06-20 18:54:59.000000000 +0200
+++ linux-2.6.18.8/include/linux/ext3_fs.h 2007-06-20 18:54:59.000000000 +0200
@@ -288,7 +288,7 @@ struct ext3_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
- __le32 i_ctime; /* Creation time */
+ __le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
@@ -337,10 +337,73 @@ struct ext3_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */
};
#define i_size_high i_dir_acl
+#define EXT3_EPOCH_BITS 2
+#define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1)
+#define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS)
+
+#define EXT3_FITS_IN_INODE(ext3_inode, einode, field) \
+ ((offsetof(typeof(*ext3_inode), field) + \
+ sizeof((ext3_inode)->field)) \
+ <= (EXT3_GOOD_OLD_INODE_SIZE + \
+ (einode)->i_extra_isize)) \
+
+static inline __le32 ext3_encode_extra_time(struct timespec *time)
+{
+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+ time->tv_sec >> 32 : 0) |
+ ((time->tv_nsec << 2) & EXT3_NSEC_MASK));
+}
+
+static inline void ext3_decode_extra_time(struct timespec *time, __le32 extra) {
+ if (sizeof(time->tv_sec) > 4)
+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT3_EPOCH_MASK)
+ << 32;
+ time->tv_nsec = (le32_to_cpu(extra) & EXT3_NSEC_MASK) >> 2;
+}
+
+#define EXT3_INODE_SET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext3_encode_extra_time(&(inode)->xtime); \
+} while (0)
+
+#define EXT3_EINODE_SET_XTIME(xtime, einode, raw_inode)\
+do { \
+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext3_encode_extra_time(&(einode)->xtime); \
+} while (0)
+
+#define EXT3_INODE_GET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra)) \
+ ext3_decode_extra_time(&(inode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
+#define EXT3_EINODE_GET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (einode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ ext3_decode_extra_time(&(einode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
@@ -520,11 +583,19 @@ struct ext3_super_block {
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
- __u8 s_reserved_char_pad;
- __u16 s_reserved_word_pad;
+ __u8 s_jnl_backup_type; /* Default type of journal backup */
+ __le16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */
__le32 s_default_mount_opts;
- __le32 s_first_meta_bg; /* First metablock block group */
- __u32 s_reserved[190]; /* Padding to the end of the block */
+ __le32 s_first_meta_bg; /* First metablock block group */
+ __le32 s_mkfs_time; /* When the filesystem was created */
+ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
+ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */
+ __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/
+ __le32 s_free_blocks_count_hi; /* Free blocks count high 32 bits */
+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */
+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
+ __le32 s_flags; /* Miscellaneous flags */
+ __u32 s_reserved[167]; /* Padding to the end of the block */
};
#ifdef __KERNEL__
@@ -539,6 +610,13 @@ static inline struct ext3_inode_info *EX
return container_of(inode, struct ext3_inode_info, vfs_inode);
}
+static inline struct timespec ext3_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT3_ROOT_INO ||
@@ -611,6 +689,8 @@ static inline int ext3_valid_inum(struct
#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
+
#define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -628,6 +708,7 @@ static inline int ext3_valid_inum(struct
EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \
EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
/*
Index: linux-2.6.18.8/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.18.8.orig/include/linux/ext3_fs_sb.h 2007-06-20 18:54:54.000000000 +0200
+++ linux-2.6.18.8/include/linux/ext3_fs_sb.h 2007-06-20 18:54:59.000000000 +0200
@@ -68,6 +68,9 @@ struct ext3_sb_info {
/* Last group used to allocate inode */
int s_last_alloc_group;
+ /* New inodes should reserve # bytes */
+ unsigned int s_want_extra_isize;
+
/* root of the per fs reservation window tree */
spinlock_t s_rsv_window_lock;
struct rb_root s_rsv_window_root;
Index: linux-2.6.18.8/include/linux/ext3_fs_i.h
===================================================================
--- linux-2.6.18.8.orig/include/linux/ext3_fs_i.h 2007-06-20 18:54:57.000000000 +0200
+++ linux-2.6.18.8/include/linux/ext3_fs_i.h 2007-06-20 18:54:59.000000000 +0200
@@ -140,6 +140,8 @@ struct ext3_inode_info {
/* on-disk additional length */
__u16 i_extra_isize;
+ struct timespec i_crtime;
+
/*
* truncate_mutex is for serialising ext3_truncate() against
* ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
This diff is collapsed.
Index: linux-2.6.18.8/fs/ext3/super.c
===================================================================
--- linux-2.6.18.8.orig/fs/ext3/super.c 2007-07-20 16:51:14.000000000 +0200
+++ linux-2.6.18.8/fs/ext3/super.c 2007-07-20 16:54:17.000000000 +0200
@@ -2572,19 +2572,19 @@ static int ext3_statfs (struct dentry *
struct super_block *sb = dentry->d_sb;
struct ext3_sb_info *sbi = EXT3_SB(sb);
struct ext3_super_block *es = sbi->s_es;
- ext3_fsblk_t overhead;
- int i;
u64 fsid;
- if (test_opt (sb, MINIX_DF))
- overhead = 0;
- else {
- unsigned long ngroups;
- ngroups = EXT3_SB(sb)->s_groups_count;
+ if (test_opt(sb, MINIX_DF)) {
+ sbi->s_overhead_last = 0;
+ } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+ unsigned long ngroups = sbi->s_groups_count, i;
+ ext3_fsblk_t overhead = 0;
smp_rmb();
/*
- * Compute the overhead (FS structures)
+ * Compute the overhead (FS structures). This is constant
+ * for a given filesystem unless the number of block groups
+ * changes so we cache the previous value until it does.
*/
/*
@@ -2605,18 +2605,23 @@ static int ext3_statfs (struct dentry *
* Every block group has an inode bitmap, a block
* bitmap, and an inode table.
*/
- overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+ overhead += ngroups * (2 + sbi->s_itb_per_group);
+ sbi->s_overhead_last = overhead;
+ smp_wmb();
+ sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
}
buf->f_type = EXT3_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
- buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+ buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
+ es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
buf->f_bavail = 0;
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
+ es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
buf->f_namelen = EXT3_NAME_LEN;
return 0;
}
Index: linux-2.6.18.8/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.18.8.orig/include/linux/ext3_fs_sb.h 2007-07-20 16:51:23.000000000 +0200
+++ linux-2.6.18.8/include/linux/ext3_fs_sb.h 2007-07-20 16:51:43.000000000 +0200
@@ -45,6 +45,8 @@ struct ext3_sb_info {
unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */
unsigned long s_groups_count; /* Number of groups in the fs */
+ unsigned long s_overhead_last; /* Last calculated overhead */
+ unsigned long s_blocks_last; /* Last seen block count */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
struct buffer_head ** s_group_desc;
This diff is collapsed.
This diff is collapsed.
ext3-wantedi-2.6-rhel4.patch
iopen-2.6.22-vanilla.patch
ext3-map_inode_page-2.6.18.patch
export-ext3-2.6-rhel4.patch
ext3-include-fixes-2.6-rhel4.patch
ext3-extents-2.6.22-vanilla.patch
ext3-mballoc3-core-2.6.22-vanilla.patch
ext3-mballoc3-2.6.22.patch
ext3-nlinks-2.6.22-vanilla.patch
ext3-ialloc-2.6.22-vanilla.patch
ext3-remove-cond_resched-calls-2.6.12.patch
ext3-filterdata-sles10.patch
ext3-uninit-2.6.22-vanilla.patch
ext3-nanosecond-2.6.22-vanilla.patch
ext3-inode-version-2.6.18-vanilla.patch
ext3-mmp-2.6.22-vanilla.patch
ext3-fiemap-2.6.22-vanilla.patch
ext3-block-bitmap-validation-2.6-rhel5.patch
ext3-statfs-2.6.22.patch
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment