From 4d5b2e7e9f7e877bd51c5d3d9b36077a88f05dd1 Mon Sep 17 00:00:00 2001 From: kalpak <kalpak> Date: Tue, 31 Jul 2007 09:55:43 +0000 Subject: [PATCH] b=6334 i=adilger i=girish Multiple mount protection patches for ldiskfs. --- .../patches/ext3-mmp-2.6-rhel4.patch | 455 +++++++++++++++++ .../patches/ext3-mmp-2.6-sles10.patch | 463 ++++++++++++++++++ .../patches/ext3-mmp-2.6.18-vanilla.patch | 463 ++++++++++++++++++ .../series/ldiskfs-2.6-rhel4.series | 1 + .../series/ldiskfs-2.6-sles10.series | 1 + .../series/ldiskfs-2.6.18-vanilla.series | 1 + 6 files changed, 1384 insertions(+) create mode 100644 ldiskfs/kernel_patches/patches/ext3-mmp-2.6-rhel4.patch create mode 100644 ldiskfs/kernel_patches/patches/ext3-mmp-2.6-sles10.patch create mode 100644 ldiskfs/kernel_patches/patches/ext3-mmp-2.6.18-vanilla.patch diff --git a/ldiskfs/kernel_patches/patches/ext3-mmp-2.6-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-mmp-2.6-rhel4.patch new file mode 100644 index 0000000000..3a86d41a42 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-mmp-2.6-rhel4.patch @@ -0,0 +1,455 @@ +Index: linux-2.6.9/fs/ext3/super.c +=================================================================== +--- linux-2.6.9.orig/fs/ext3/super.c ++++ linux-2.6.9/fs/ext3/super.c +@@ -35,6 +35,8 @@ + #include <linux/mount.h> + #include <linux/namei.h> + #include <linux/quotaops.h> ++#include <linux/kthread.h> ++#include <linux/utsname.h> + #include <asm/uaccess.h> + #include "xattr.h" + #include "acl.h" +@@ -440,6 +442,9 @@ void ext3_put_super (struct super_block + invalidate_bdev(sbi->journal_bdev, 0); + ext3_blkdev_remove(sbi); + } ++ if (sbi->s_mmp_tsk) ++ kthread_stop(sbi->s_mmp_tsk); ++ + sb->s_fs_info = NULL; + kfree(sbi); + return; +@@ -1431,6 +1436,314 @@ static unsigned long descriptor_loc(stru + return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); + } + ++/* ++ * Write the MMP block using WRITE_SYNC to try to get the block on-disk ++ * faster. ++ */ ++static int write_mmp_block(struct buffer_head *bh) ++{ ++ mark_buffer_dirty(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_write_sync; ++ get_bh(bh); ++ submit_bh(WRITE_SYNC, bh); ++ wait_on_buffer(bh); ++ if (unlikely(!buffer_uptodate(bh))) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * Read the MMP block. It _must_ be read from disk and hence we clear the ++ * uptodate flag on the buffer. ++ */ ++static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ++ unsigned long mmp_block) ++{ ++ struct mmp_struct *mmp; ++ ++ if (*bh) ++ clear_buffer_uptodate(*bh); ++ ++ brelse(*bh); ++ ++ *bh = sb_bread(sb, mmp_block); ++ if (!*bh) { ++ ext3_warning(sb, __FUNCTION__, ++ "Error while reading MMP block %lu", mmp_block); ++ return -EIO; ++ } ++ ++ mmp = (struct mmp_struct *)((*bh)->b_data); ++ if (le32_to_cpu(mmp->mmp_magic) != EXT3_MMP_MAGIC) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++/* ++ * Dump as much information as possible to help the admin. ++ */ ++static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, ++ const char *function, const char *msg) ++{ ++ ext3_warning(sb, function, msg); ++ ext3_warning(sb, function, "MMP failure info: last update time: %llu, " ++ "last update node: %s, last update device: %s\n", ++ le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename, ++ mmp->mmp_bdevname); ++} ++ ++/* ++ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds ++ */ ++static int kmmpd(void *data) ++{ ++ struct super_block *sb = (struct super_block *) data; ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ struct buffer_head *bh = NULL; ++ struct mmp_struct *mmp; ++ unsigned long mmp_block; ++ u32 seq = 0; ++ unsigned long failed_writes = 0; ++ int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); ++ unsigned mmp_check_interval; ++ unsigned long last_update_time; ++ unsigned long diff; ++ int retval; ++ ++ mmp_block = le64_to_cpu(es->s_mmp_block); ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ ++ mmp = (struct mmp_struct *)(bh->b_data); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ /* ++ * Start with the higher mmp_check_interval and reduce it if ++ * the MMP block is being updated on time. ++ */ ++ mmp_check_interval = max(5 * mmp_update_interval, ++ EXT3_MMP_MIN_CHECK_INTERVAL); ++ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); ++ bdevname(bh->b_bdev, mmp->mmp_bdevname); ++ ++ down_read(&uts_sem); ++ memcpy(mmp->mmp_nodename, system_utsname.nodename, ++ sizeof(mmp->mmp_nodename)); ++ up_read(&uts_sem); ++ ++ while (!kthread_should_stop()) { ++ if (++seq > EXT3_MMP_SEQ_MAX) ++ seq = 1; ++ ++ mmp->mmp_seq = cpu_to_le32(seq); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ last_update_time = jiffies; ++ ++ retval = write_mmp_block(bh); ++ /* ++ * Don't spew too many error messages. Print one every ++ * (s_mmp_update_interval * 60) seconds. ++ */ ++ if (retval && (failed_writes % 60) == 0) { ++ ext3_error(sb, __FUNCTION__, ++ "Error writing to MMP block"); ++ failed_writes++; ++ } ++ ++ if (!(le32_to_cpu(es->s_feature_incompat) & ++ EXT3_FEATURE_INCOMPAT_MMP)) { ++ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped " ++ "since MMP feature has been disabled."); ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ if (sb->s_flags & MS_RDONLY) { ++ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped " ++ "since filesystem has been remounted as " ++ "readonly."); ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ diff = jiffies - last_update_time; ++ if (diff < mmp_update_interval * HZ) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule_timeout(EXT3_MMP_UPDATE_INTERVAL * HZ - diff); ++ } ++ ++ /* ++ * We need to make sure that more than mmp_check_interval ++ * seconds have not passed since writing. If that has happened ++ * we need to check if the MMP block is as we left it. ++ */ ++ diff = jiffies - last_update_time; ++ if (diff > mmp_check_interval * HZ) { ++ struct buffer_head *bh_check = NULL; ++ struct mmp_struct *mmp_check; ++ ++ retval = read_mmp_block(sb, &bh_check, mmp_block); ++ if (retval) { ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ mmp_check = (struct mmp_struct *)(bh_check->b_data); ++ if (mmp->mmp_time != mmp_check->mmp_time || ++ memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, ++ sizeof(mmp->mmp_nodename))) ++ dump_mmp_msg(sb, mmp_check, __FUNCTION__, ++ "Error while updating MMP info. " ++ "The filesystem seems to have " ++ "been multiply mounted."); ++ ++ put_bh(bh_check); ++ } ++ ++ /* ++ * Adjust the mmp_check_interval depending on how much time ++ * it took for the MMP block to be written. ++ */ ++ mmp_check_interval = max(5 * diff / HZ, ++ (unsigned long) EXT3_MMP_MIN_CHECK_INTERVAL); ++ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); ++ } ++ ++ /* ++ * Unmount seems to be clean. ++ */ ++ mmp->mmp_seq = cpu_to_le32(EXT3_MMP_SEQ_CLEAN); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ ++ retval = write_mmp_block(bh); ++ ++failed: ++ brelse(bh); ++ return retval; ++} ++ ++/* ++ * Get a random new sequence number but make sure it is not greater than ++ * EXT3_MMP_SEQ_MAX. ++ */ ++static unsigned int mmp_new_seq(void) ++{ ++ u32 new_seq; ++ ++ do { ++ get_random_bytes(&new_seq, sizeof(u32)); ++ } while (new_seq > EXT3_MMP_SEQ_MAX); ++ ++ return new_seq; ++} ++ ++/* ++ * Protect the filesystem from being mounted more than once. ++ */ ++static int ext3_multi_mount_protect(struct super_block *sb, ++ unsigned long mmp_block) ++{ ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ struct buffer_head *bh = NULL; ++ struct mmp_struct *mmp = NULL; ++ u32 seq; ++ unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); ++ int retval; ++ ++ if (mmp_block < le32_to_cpu(es->s_first_data_block) || ++ mmp_block >= le32_to_cpu(es->s_blocks_count)) { ++ ext3_warning(sb, __FUNCTION__, ++ "Invalid MMP block in superblock"); ++ goto failed; ++ } ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ ++ mmp = (struct mmp_struct *)(bh->b_data); ++ ++ if (mmp_check_interval < EXT3_MMP_MIN_CHECK_INTERVAL) ++ mmp_check_interval = EXT3_MMP_MIN_CHECK_INTERVAL; ++ ++ /* ++ * If check_interval in MMP block is larger, use that instead of ++ * update_interval from the superblock. ++ */ ++ if (mmp->mmp_check_interval > mmp_check_interval) ++ mmp_check_interval = mmp->mmp_check_interval; ++ ++ seq = le32_to_cpu(mmp->mmp_seq); ++ if (seq == EXT3_MMP_SEQ_CLEAN) ++ goto skip; ++ ++ if (seq == EXT3_MMP_SEQ_FSCK) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "fsck is running on the filesystem"); ++ goto failed; ++ } ++ ++ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1)); ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ mmp = (struct mmp_struct *)(bh->b_data); ++ if (seq != le32_to_cpu(mmp->mmp_seq)) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "Device is already active on another node."); ++ goto failed; ++ } ++ ++skip: ++ /* ++ * write a new random sequence number. ++ */ ++ mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); ++ ++ retval = write_mmp_block(bh); ++ if (retval) ++ goto failed; ++ ++ /* ++ * wait for MMP interval and check mmp_seq. ++ */ ++ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1)); ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ mmp = (struct mmp_struct *)(bh->b_data); ++ if (seq != le32_to_cpu(mmp->mmp_seq)) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "Device is already active on another node."); ++ goto failed; ++ } ++ ++ /* ++ * Start a kernel thread to update the MMP block periodically. ++ */ ++ EXT3_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x", ++ MAJOR(sb->s_dev), ++ MINOR(sb->s_dev)); ++ if (IS_ERR(EXT3_SB(sb)->s_mmp_tsk)) { ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ ext3_warning(sb, __FUNCTION__, "Unable to create kmmpd thread " ++ "for %s.", sb->s_id); ++ goto failed; ++ } ++ ++ brelse(bh); ++ return 0; ++ ++failed: ++ brelse(bh); ++ return 1; ++} ++ + + static int ext3_fill_super (struct super_block *sb, void *data, int silent) + { +@@ -1738,6 +2051,11 @@ static int ext3_fill_super (struct super + EXT3_HAS_INCOMPAT_FEATURE(sb, + EXT3_FEATURE_INCOMPAT_RECOVER)); + ++ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_MMP) && ++ !(sb->s_flags & MS_RDONLY)) ++ if (ext3_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) ++ goto failed_mount3; ++ + /* + * The first inode we look at is the journal inode. Don't try + * root first: it may be modified in the journal! +@@ -2371,6 +2689,11 @@ int ext3_remount (struct super_block * s + return ret; + if (!ext3_setup_super (sb, es, 0)) + sb->s_flags &= ~MS_RDONLY; ++ if (EXT3_HAS_INCOMPAT_FEATURE(sb, ++ EXT3_FEATURE_INCOMPAT_MMP)) ++ if (ext3_multi_mount_protect(sb, ++ le64_to_cpu(es->s_mmp_block))) ++ return -EROFS; + } + } + return 0; +Index: linux-2.6.9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.9.orig/include/linux/ext3_fs.h ++++ linux-2.6.9/include/linux/ext3_fs.h +@@ -577,13 +577,17 @@ struct ext3_super_block { + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ +- __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ ++/*150*/ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ + __le32 s_free_blocks_hi; /* Free blocks count high 32 bits */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ +- __le32 s_flags; /* Miscellaneous flags */ +- __u32 s_reserved[167]; /* Padding to the end of the block */ ++/*160*/ __le32 s_flags; /* Miscellaneous flags */ ++ __le16 s_raid_stride; /* RAID stride */ ++ __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ ++ __le64 s_mmp_block; /* Block for multi-mount protection */ ++/*170*/ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ ++ __le32 s_reserved[163]; /* Padding to the end of the block */ + }; + + #ifdef __KERNEL__ +@@ -667,12 +671,14 @@ static inline struct ext3_inode_info *EX + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ + #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 + #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ ++#define EXT3_FEATURE_INCOMPAT_MMP 0x0100 + + #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER| \ + EXT3_FEATURE_INCOMPAT_META_BG| \ +- EXT3_FEATURE_INCOMPAT_EXTENTS) ++ EXT3_FEATURE_INCOMPAT_EXTENTS| \ ++ EXT3_FEATURE_INCOMPAT_MMP) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ +@@ -832,6 +838,39 @@ struct dir_private_info { + #define ERR_BAD_DX_DIR -75000 + + /* ++ * This structure will be used for multiple mount protection. It will be ++ * written into the block number saved in the s_mmp_block field in the ++ * superblock. Programs that check MMP should assume that if ++ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe ++ * to use the filesystem, regardless of how old the timestamp is. ++ */ ++#define EXT3_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ ++#define EXT3_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ ++#define EXT3_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ ++#define EXT3_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ ++ ++struct mmp_struct { ++ __le32 mmp_magic; ++ __le32 mmp_seq; ++ __le64 mmp_time; ++ char mmp_nodename[64]; ++ char mmp_bdevname[32]; ++ __le16 mmp_check_interval; ++ __le16 mmp_pad1; ++ __le32 mmp_pad2[227]; ++}; ++ ++/* ++ * Default interval in seconds to update the MMP sequence number. ++ */ ++#define EXT3_MMP_UPDATE_INTERVAL 1 ++ ++/* ++ * Minimum interval for MMP checking in seconds. ++ */ ++#define EXT3_MMP_MIN_CHECK_INTERVAL 5 ++ ++/* + * Function prototypes + */ + +Index: linux-2.6.9/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.9.orig/include/linux/ext3_fs_sb.h ++++ linux-2.6.9/include/linux/ext3_fs_sb.h +@@ -143,6 +143,7 @@ struct ext3_sb_info { + /* locality groups */ + struct ext3_locality_group *s_locality_groups; + ++ struct task_struct *s_mmp_tsk; /* Kernel thread for multiple mount protection */ + }; + + #define EXT3_GROUP_INFO(sb, group) \ diff --git a/ldiskfs/kernel_patches/patches/ext3-mmp-2.6-sles10.patch b/ldiskfs/kernel_patches/patches/ext3-mmp-2.6-sles10.patch new file mode 100644 index 0000000000..c9637f6da0 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-mmp-2.6-sles10.patch @@ -0,0 +1,463 @@ +Index: linux-2.6.16.46-0.14/fs/ext3/super.c +=================================================================== +--- linux-2.6.16.46-0.14.orig/fs/ext3/super.c ++++ linux-2.6.16.46-0.14/fs/ext3/super.c +@@ -36,6 +36,8 @@ + #include <linux/namei.h> + #include <linux/quotaops.h> + #include <linux/seq_file.h> ++#include <linux/kthread.h> ++#include <linux/utsname.h> + + #include <asm/uaccess.h> + +@@ -436,6 +438,9 @@ static void ext3_put_super (struct super + invalidate_bdev(sbi->journal_bdev, 0); + ext3_blkdev_remove(sbi); + } ++ if (sbi->s_mmp_tsk) ++ kthread_stop(sbi->s_mmp_tsk); ++ + sb->s_fs_info = NULL; + kfree(sbi); + return; +@@ -1521,6 +1526,313 @@ static unsigned long descriptor_loc(stru + return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); + } + ++/* ++ * Write the MMP block using WRITE_SYNC to try to get the block on-disk ++ * faster. ++ */ ++static int write_mmp_block(struct buffer_head *bh) ++{ ++ mark_buffer_dirty(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_write_sync; ++ get_bh(bh); ++ submit_bh(WRITE_SYNC, bh); ++ wait_on_buffer(bh); ++ if (unlikely(!buffer_uptodate(bh))) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * Read the MMP block. It _must_ be read from disk and hence we clear the ++ * uptodate flag on the buffer. ++ */ ++static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ++ unsigned long mmp_block) ++{ ++ struct mmp_struct *mmp; ++ ++ if (*bh) ++ clear_buffer_uptodate(*bh); ++ ++ brelse(*bh); ++ ++ *bh = sb_bread(sb, mmp_block); ++ if (!*bh) { ++ ext3_warning(sb, __FUNCTION__, ++ "Error while reading MMP block %lu", mmp_block); ++ return -EIO; ++ } ++ ++ mmp = (struct mmp_struct *)((*bh)->b_data); ++ if (le32_to_cpu(mmp->mmp_magic) != EXT3_MMP_MAGIC) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++/* ++ * Dump as much information as possible to help the admin. ++ */ ++static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, ++ const char *function, const char *msg) ++{ ++ ext3_warning(sb, function, msg); ++ ext3_warning(sb, function, "MMP failure info: last update time: %llu, " ++ "last update node: %s, last update device: %s\n", ++ le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename, ++ mmp->mmp_bdevname); ++} ++ ++/* ++ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds ++ */ ++static int kmmpd(void *data) ++{ ++ struct super_block *sb = (struct super_block *) data; ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ struct buffer_head *bh = NULL; ++ struct mmp_struct *mmp; ++ unsigned long mmp_block; ++ u32 seq = 0; ++ unsigned long failed_writes = 0; ++ int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); ++ unsigned mmp_check_interval; ++ unsigned long last_update_time; ++ unsigned long diff; ++ int retval; ++ ++ mmp_block = le64_to_cpu(es->s_mmp_block); ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ ++ mmp = (struct mmp_struct *)(bh->b_data); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ /* ++ * Start with the higher mmp_check_interval and reduce it if ++ * the MMP block is being updated on time. ++ */ ++ mmp_check_interval = max(5 * mmp_update_interval, ++ EXT3_MMP_MIN_CHECK_INTERVAL); ++ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); ++ bdevname(bh->b_bdev, mmp->mmp_bdevname); ++ ++ down_read(&uts_sem); ++ memcpy(mmp->mmp_nodename, system_utsname.nodename, ++ sizeof(mmp->mmp_nodename)); ++ up_read(&uts_sem); ++ ++ while (!kthread_should_stop()) { ++ if (++seq > EXT3_MMP_SEQ_MAX) ++ seq = 1; ++ ++ mmp->mmp_seq = cpu_to_le32(seq); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ last_update_time = jiffies; ++ ++ retval = write_mmp_block(bh); ++ /* ++ * Don't spew too many error messages. Print one every ++ * (s_mmp_update_interval * 60) seconds. ++ */ ++ if (retval && (failed_writes % 60) == 0) { ++ ext3_error(sb, __FUNCTION__, ++ "Error writing to MMP block"); ++ failed_writes++; ++ } ++ ++ if (!(le32_to_cpu(es->s_feature_incompat) & ++ EXT3_FEATURE_INCOMPAT_MMP)) { ++ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped " ++ "since MMP feature has been disabled."); ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ if (sb->s_flags & MS_RDONLY) { ++ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped " ++ "since filesystem has been remounted as " ++ "readonly."); ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ diff = jiffies - last_update_time; ++ if (diff < mmp_update_interval * HZ) ++ schedule_timeout_interruptible(EXT3_MMP_UPDATE_INTERVAL* ++ HZ - diff); ++ ++ /* ++ * We need to make sure that more than mmp_check_interval ++ * seconds have not passed since writing. If that has happened ++ * we need to check if the MMP block is as we left it. ++ */ ++ diff = jiffies - last_update_time; ++ if (diff > mmp_check_interval * HZ) { ++ struct buffer_head *bh_check = NULL; ++ struct mmp_struct *mmp_check; ++ ++ retval = read_mmp_block(sb, &bh_check, mmp_block); ++ if (retval) { ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ mmp_check = (struct mmp_struct *)(bh_check->b_data); ++ if (mmp->mmp_time != mmp_check->mmp_time || ++ memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, ++ sizeof(mmp->mmp_nodename))) ++ dump_mmp_msg(sb, mmp_check, __FUNCTION__, ++ "Error while updating MMP info. " ++ "The filesystem seems to have " ++ "been multiply mounted."); ++ ++ put_bh(bh_check); ++ } ++ ++ /* ++ * Adjust the mmp_check_interval depending on how much time ++ * it took for the MMP block to be written. ++ */ ++ mmp_check_interval = max(5 * diff / HZ, ++ (unsigned long) EXT3_MMP_MIN_CHECK_INTERVAL); ++ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); ++ } ++ ++ /* ++ * Unmount seems to be clean. ++ */ ++ mmp->mmp_seq = cpu_to_le32(EXT3_MMP_SEQ_CLEAN); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ ++ retval = write_mmp_block(bh); ++ ++failed: ++ brelse(bh); ++ return retval; ++} ++ ++/* ++ * Get a random new sequence number but make sure it is not greater than ++ * EXT3_MMP_SEQ_MAX. ++ */ ++static unsigned int mmp_new_seq(void) ++{ ++ u32 new_seq; ++ ++ do { ++ get_random_bytes(&new_seq, sizeof(u32)); ++ } while (new_seq > EXT3_MMP_SEQ_MAX); ++ ++ return new_seq; ++} ++ ++/* ++ * Protect the filesystem from being mounted more than once. ++ */ ++static int ext3_multi_mount_protect(struct super_block *sb, ++ unsigned long mmp_block) ++{ ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ struct buffer_head *bh = NULL; ++ struct mmp_struct *mmp = NULL; ++ u32 seq; ++ unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); ++ int retval; ++ ++ if (mmp_block < le32_to_cpu(es->s_first_data_block) || ++ mmp_block >= le32_to_cpu(es->s_blocks_count)) { ++ ext3_warning(sb, __FUNCTION__, ++ "Invalid MMP block in superblock"); ++ goto failed; ++ } ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ ++ mmp = (struct mmp_struct *)(bh->b_data); ++ ++ if (mmp_check_interval < EXT3_MMP_MIN_CHECK_INTERVAL) ++ mmp_check_interval = EXT3_MMP_MIN_CHECK_INTERVAL; ++ ++ /* ++ * If check_interval in MMP block is larger, use that instead of ++ * update_interval from the superblock. ++ */ ++ if (mmp->mmp_check_interval > mmp_check_interval) ++ mmp_check_interval = mmp->mmp_check_interval; ++ ++ seq = le32_to_cpu(mmp->mmp_seq); ++ if (seq == EXT3_MMP_SEQ_CLEAN) ++ goto skip; ++ ++ if (seq == EXT3_MMP_SEQ_FSCK) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "fsck is running on the filesystem"); ++ goto failed; ++ } ++ ++ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1)); ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ mmp = (struct mmp_struct *)(bh->b_data); ++ if (seq != le32_to_cpu(mmp->mmp_seq)) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "Device is already active on another node."); ++ goto failed; ++ } ++ ++skip: ++ /* ++ * write a new random sequence number. ++ */ ++ mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); ++ ++ retval = write_mmp_block(bh); ++ if (retval) ++ goto failed; ++ ++ /* ++ * wait for MMP interval and check mmp_seq. ++ */ ++ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1)); ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ mmp = (struct mmp_struct *)(bh->b_data); ++ if (seq != le32_to_cpu(mmp->mmp_seq)) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "Device is already active on another node."); ++ goto failed; ++ } ++ ++ /* ++ * Start a kernel thread to update the MMP block periodically. ++ */ ++ EXT3_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x", ++ MAJOR(sb->s_dev), ++ MINOR(sb->s_dev)); ++ if (IS_ERR(EXT3_SB(sb)->s_mmp_tsk)) { ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ ext3_warning(sb, __FUNCTION__, "Unable to create kmmpd thread " ++ "for %s.", sb->s_id); ++ goto failed; ++ } ++ ++ brelse(bh); ++ return 0; ++ ++failed: ++ brelse(bh); ++ return 1; ++} ++ + + static int ext3_fill_super (struct super_block *sb, void *data, int silent) + { +@@ -1828,6 +2140,11 @@ static int ext3_fill_super (struct super + EXT3_HAS_INCOMPAT_FEATURE(sb, + EXT3_FEATURE_INCOMPAT_RECOVER)); + ++ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_MMP) && ++ !(sb->s_flags & MS_RDONLY)) ++ if (ext3_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) ++ goto failed_mount3; ++ + /* + * The first inode we look at is the journal inode. Don't try + * root first: it may be modified in the journal! +@@ -2438,7 +2755,7 @@ static int ext3_remount (struct super_bl + unsigned long n_blocks_count = 0; + unsigned long old_sb_flags; + struct ext3_mount_options old_opts; +- int err; ++ int err = 0; + #ifdef CONFIG_QUOTA + int i; + #endif +@@ -2522,6 +2839,11 @@ static int ext3_remount (struct super_bl + } + if (!ext3_setup_super (sb, es, 0)) + sb->s_flags &= ~MS_RDONLY; ++ if (EXT3_HAS_INCOMPAT_FEATURE(sb, ++ EXT3_FEATURE_INCOMPAT_MMP)) ++ if (ext3_multi_mount_protect(sb, ++ le64_to_cpu(es->s_mmp_block))) ++ goto restore_opts; + } + } + #ifdef CONFIG_QUOTA +Index: linux-2.6.16.46-0.14/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.16.46-0.14.orig/include/linux/ext3_fs.h ++++ linux-2.6.16.46-0.14/include/linux/ext3_fs.h +@@ -599,13 +599,17 @@ struct ext3_super_block { + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ +- __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ ++/*150*/ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ + __le32 s_free_blocks_hi; /* Free blocks count high 32 bits */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ +- __le32 s_flags; /* Miscellaneous flags */ +- __u32 s_reserved[167]; /* Padding to the end of the block */ ++/*160*/ __le32 s_flags; /* Miscellaneous flags */ ++ __le16 s_raid_stride; /* RAID stride */ ++ __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ ++ __le64 s_mmp_block; /* Block for multi-mount protection */ ++/*170*/ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ ++ __le32 s_reserved[163]; /* Padding to the end of the block */ + }; + + #ifdef __KERNEL__ +@@ -699,12 +703,14 @@ static inline int ext3_valid_inum(struct + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ + #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 + #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ ++#define EXT3_FEATURE_INCOMPAT_MMP 0x0100 + + #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER| \ + EXT3_FEATURE_INCOMPAT_META_BG| \ +- EXT3_FEATURE_INCOMPAT_EXTENTS) ++ EXT3_FEATURE_INCOMPAT_EXTENTS| \ ++ EXT3_FEATURE_INCOMPAT_MMP) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ +@@ -864,6 +870,39 @@ struct dir_private_info { + #define ERR_BAD_DX_DIR -75000 + + /* ++ * This structure will be used for multiple mount protection. It will be ++ * written into the block number saved in the s_mmp_block field in the ++ * superblock. Programs that check MMP should assume that if ++ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe ++ * to use the filesystem, regardless of how old the timestamp is. ++ */ ++#define EXT3_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ ++#define EXT3_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ ++#define EXT3_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ ++#define EXT3_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ ++ ++struct mmp_struct { ++ __le32 mmp_magic; ++ __le32 mmp_seq; ++ __le64 mmp_time; ++ char mmp_nodename[64]; ++ char mmp_bdevname[32]; ++ __le16 mmp_check_interval; ++ __le16 mmp_pad1; ++ __le32 mmp_pad2[227]; ++}; ++ ++/* ++ * Default interval in seconds to update the MMP sequence number. ++ */ ++#define EXT3_MMP_UPDATE_INTERVAL 1 ++ ++/* ++ * Minimum interval for MMP checking in seconds. ++ */ ++#define EXT3_MMP_MIN_CHECK_INTERVAL 5 ++ ++/* + * Function prototypes + */ + +Index: linux-2.6.16.46-0.14/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.16.46-0.14.orig/include/linux/ext3_fs_sb.h ++++ linux-2.6.16.46-0.14/include/linux/ext3_fs_sb.h +@@ -140,6 +140,7 @@ struct ext3_sb_info { + /* locality groups */ + struct ext3_locality_group *s_locality_groups; + ++ struct task_struct *s_mmp_tsk; /* Kernel thread for multiple mount protection */ + }; + + #define EXT3_GROUP_INFO(sb, group) \ diff --git a/ldiskfs/kernel_patches/patches/ext3-mmp-2.6.18-vanilla.patch b/ldiskfs/kernel_patches/patches/ext3-mmp-2.6.18-vanilla.patch new file mode 100644 index 0000000000..b7c4a1c952 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-mmp-2.6.18-vanilla.patch @@ -0,0 +1,463 @@ +Index: linux-2.6.18/fs/ext3/super.c +=================================================================== +--- linux-2.6.18.orig/fs/ext3/super.c ++++ linux-2.6.18/fs/ext3/super.c +@@ -35,6 +35,8 @@ + #include <linux/namei.h> + #include <linux/quotaops.h> + #include <linux/seq_file.h> ++#include <linux/kthread.h> ++#include <linux/utsname.h> + + #include <asm/uaccess.h> + +@@ -435,6 +437,9 @@ static void ext3_put_super (struct super + invalidate_bdev(sbi->journal_bdev, 0); + ext3_blkdev_remove(sbi); + } ++ if (sbi->s_mmp_tsk) ++ kthread_stop(sbi->s_mmp_tsk); ++ + sb->s_fs_info = NULL; + kfree(sbi); + return; +@@ -1528,6 +1533,313 @@ static ext3_fsblk_t descriptor_loc(struc + return (has_super + ext3_group_first_block_no(sb, bg)); + } + ++/* ++ * Write the MMP block using WRITE_SYNC to try to get the block on-disk ++ * faster. ++ */ ++static int write_mmp_block(struct buffer_head *bh) ++{ ++ mark_buffer_dirty(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_write_sync; ++ get_bh(bh); ++ submit_bh(WRITE_SYNC, bh); ++ wait_on_buffer(bh); ++ if (unlikely(!buffer_uptodate(bh))) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * Read the MMP block. It _must_ be read from disk and hence we clear the ++ * uptodate flag on the buffer. ++ */ ++static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ++ unsigned long mmp_block) ++{ ++ struct mmp_struct *mmp; ++ ++ if (*bh) ++ clear_buffer_uptodate(*bh); ++ ++ brelse(*bh); ++ ++ *bh = sb_bread(sb, mmp_block); ++ if (!*bh) { ++ ext3_warning(sb, __FUNCTION__, ++ "Error while reading MMP block %lu", mmp_block); ++ return -EIO; ++ } ++ ++ mmp = (struct mmp_struct *)((*bh)->b_data); ++ if (le32_to_cpu(mmp->mmp_magic) != EXT3_MMP_MAGIC) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++/* ++ * Dump as much information as possible to help the admin. ++ */ ++static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, ++ const char *function, const char *msg) ++{ ++ ext3_warning(sb, function, msg); ++ ext3_warning(sb, function, "MMP failure info: last update time: %llu, " ++ "last update node: %s, last update device: %s\n", ++ le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename, ++ mmp->mmp_bdevname); ++} ++ ++/* ++ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds ++ */ ++static int kmmpd(void *data) ++{ ++ struct super_block *sb = (struct super_block *) data; ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ struct buffer_head *bh = NULL; ++ struct mmp_struct *mmp; ++ unsigned long mmp_block; ++ u32 seq = 0; ++ unsigned long failed_writes = 0; ++ int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); ++ unsigned mmp_check_interval; ++ unsigned long last_update_time; ++ unsigned long diff; ++ int retval; ++ ++ mmp_block = le64_to_cpu(es->s_mmp_block); ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ ++ mmp = (struct mmp_struct *)(bh->b_data); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ /* ++ * Start with the higher mmp_check_interval and reduce it if ++ * the MMP block is being updated on time. ++ */ ++ mmp_check_interval = max(5 * mmp_update_interval, ++ EXT3_MMP_MIN_CHECK_INTERVAL); ++ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); ++ bdevname(bh->b_bdev, mmp->mmp_bdevname); ++ ++ down_read(&uts_sem); ++ memcpy(mmp->mmp_nodename, system_utsname.nodename, ++ sizeof(mmp->mmp_nodename)); ++ up_read(&uts_sem); ++ ++ while (!kthread_should_stop()) { ++ if (++seq > EXT3_MMP_SEQ_MAX) ++ seq = 1; ++ ++ mmp->mmp_seq = cpu_to_le32(seq); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ last_update_time = jiffies; ++ ++ retval = write_mmp_block(bh); ++ /* ++ * Don't spew too many error messages. Print one every ++ * (s_mmp_update_interval * 60) seconds. ++ */ ++ if (retval && (failed_writes % 60) == 0) { ++ ext3_error(sb, __FUNCTION__, ++ "Error writing to MMP block"); ++ failed_writes++; ++ } ++ ++ if (!(le32_to_cpu(es->s_feature_incompat) & ++ EXT3_FEATURE_INCOMPAT_MMP)) { ++ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped " ++ "since MMP feature has been disabled."); ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ if (sb->s_flags & MS_RDONLY) { ++ ext3_warning(sb, __FUNCTION__, "kmmpd being stopped " ++ "since filesystem has been remounted as " ++ "readonly."); ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ diff = jiffies - last_update_time; ++ if (diff < mmp_update_interval * HZ) ++ schedule_timeout_interruptible(EXT3_MMP_UPDATE_INTERVAL* ++ HZ - diff); ++ ++ /* ++ * We need to make sure that more than mmp_check_interval ++ * seconds have not passed since writing. If that has happened ++ * we need to check if the MMP block is as we left it. ++ */ ++ diff = jiffies - last_update_time; ++ if (diff > mmp_check_interval * HZ) { ++ struct buffer_head *bh_check = NULL; ++ struct mmp_struct *mmp_check; ++ ++ retval = read_mmp_block(sb, &bh_check, mmp_block); ++ if (retval) { ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ goto failed; ++ } ++ ++ mmp_check = (struct mmp_struct *)(bh_check->b_data); ++ if (mmp->mmp_time != mmp_check->mmp_time || ++ memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, ++ sizeof(mmp->mmp_nodename))) ++ dump_mmp_msg(sb, mmp_check, __FUNCTION__, ++ "Error while updating MMP info. " ++ "The filesystem seems to have " ++ "been multiply mounted."); ++ ++ put_bh(bh_check); ++ } ++ ++ /* ++ * Adjust the mmp_check_interval depending on how much time ++ * it took for the MMP block to be written. ++ */ ++ mmp_check_interval = max(5 * diff / HZ, ++ (unsigned long) EXT3_MMP_MIN_CHECK_INTERVAL); ++ mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); ++ } ++ ++ /* ++ * Unmount seems to be clean. ++ */ ++ mmp->mmp_seq = cpu_to_le32(EXT3_MMP_SEQ_CLEAN); ++ mmp->mmp_time = cpu_to_le64(get_seconds()); ++ ++ retval = write_mmp_block(bh); ++ ++failed: ++ brelse(bh); ++ return retval; ++} ++ ++/* ++ * Get a random new sequence number but make sure it is not greater than ++ * EXT3_MMP_SEQ_MAX. ++ */ ++static unsigned int mmp_new_seq(void) ++{ ++ u32 new_seq; ++ ++ do { ++ get_random_bytes(&new_seq, sizeof(u32)); ++ } while (new_seq > EXT3_MMP_SEQ_MAX); ++ ++ return new_seq; ++} ++ ++/* ++ * Protect the filesystem from being mounted more than once. ++ */ ++static int ext3_multi_mount_protect(struct super_block *sb, ++ unsigned long mmp_block) ++{ ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ struct buffer_head *bh = NULL; ++ struct mmp_struct *mmp = NULL; ++ u32 seq; ++ unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); ++ int retval; ++ ++ if (mmp_block < le32_to_cpu(es->s_first_data_block) || ++ mmp_block >= le32_to_cpu(es->s_blocks_count)) { ++ ext3_warning(sb, __FUNCTION__, ++ "Invalid MMP block in superblock"); ++ goto failed; ++ } ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ ++ mmp = (struct mmp_struct *)(bh->b_data); ++ ++ if (mmp_check_interval < EXT3_MMP_MIN_CHECK_INTERVAL) ++ mmp_check_interval = EXT3_MMP_MIN_CHECK_INTERVAL; ++ ++ /* ++ * If check_interval in MMP block is larger, use that instead of ++ * update_interval from the superblock. ++ */ ++ if (mmp->mmp_check_interval > mmp_check_interval) ++ mmp_check_interval = mmp->mmp_check_interval; ++ ++ seq = le32_to_cpu(mmp->mmp_seq); ++ if (seq == EXT3_MMP_SEQ_CLEAN) ++ goto skip; ++ ++ if (seq == EXT3_MMP_SEQ_FSCK) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "fsck is running on the filesystem"); ++ goto failed; ++ } ++ ++ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1)); ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ mmp = (struct mmp_struct *)(bh->b_data); ++ if (seq != le32_to_cpu(mmp->mmp_seq)) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "Device is already active on another node."); ++ goto failed; ++ } ++ ++skip: ++ /* ++ * write a new random sequence number. ++ */ ++ mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); ++ ++ retval = write_mmp_block(bh); ++ if (retval) ++ goto failed; ++ ++ /* ++ * wait for MMP interval and check mmp_seq. ++ */ ++ schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1)); ++ ++ retval = read_mmp_block(sb, &bh, mmp_block); ++ if (retval) ++ goto failed; ++ mmp = (struct mmp_struct *)(bh->b_data); ++ if (seq != le32_to_cpu(mmp->mmp_seq)) { ++ dump_mmp_msg(sb, mmp, __FUNCTION__, ++ "Device is already active on another node."); ++ goto failed; ++ } ++ ++ /* ++ * Start a kernel thread to update the MMP block periodically. ++ */ ++ EXT3_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x", ++ MAJOR(sb->s_dev), ++ MINOR(sb->s_dev)); ++ if (IS_ERR(EXT3_SB(sb)->s_mmp_tsk)) { ++ EXT3_SB(sb)->s_mmp_tsk = 0; ++ ext3_warning(sb, __FUNCTION__, "Unable to create kmmpd thread " ++ "for %s.", sb->s_id); ++ goto failed; ++ } ++ ++ brelse(bh); ++ return 0; ++ ++failed: ++ brelse(bh); ++ return 1; ++} ++ + + static int ext3_fill_super (struct super_block *sb, void *data, int silent) + { +@@ -1844,6 +2156,11 @@ static int ext3_fill_super (struct super + EXT3_HAS_INCOMPAT_FEATURE(sb, + EXT3_FEATURE_INCOMPAT_RECOVER)); + ++ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_MMP) && ++ !(sb->s_flags & MS_RDONLY)) ++ if (ext3_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) ++ goto failed_mount3; ++ + /* + * The first inode we look at is the journal inode. Don't try + * root first: it may be modified in the journal! +@@ -2446,7 +2763,7 @@ static int ext3_remount (struct super_bl + ext3_fsblk_t n_blocks_count = 0; + unsigned long old_sb_flags; + struct ext3_mount_options old_opts; +- int err; ++ int err = 0; + #ifdef CONFIG_QUOTA + int i; + #endif +@@ -2530,6 +2847,11 @@ static int ext3_remount (struct super_bl + } + if (!ext3_setup_super (sb, es, 0)) + sb->s_flags &= ~MS_RDONLY; ++ if (EXT3_HAS_INCOMPAT_FEATURE(sb, ++ EXT3_FEATURE_INCOMPAT_MMP)) ++ if (ext3_multi_mount_protect(sb, ++ le64_to_cpu(es->s_mmp_block))) ++ goto restore_opts; + } + } + #ifdef CONFIG_QUOTA +Index: linux-2.6.18/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.18.orig/include/linux/ext3_fs.h ++++ linux-2.6.18/include/linux/ext3_fs.h +@@ -593,13 +593,17 @@ struct ext3_super_block { + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ +- __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ ++/*150*/ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ + __le32 s_free_blocks_count_hi; /* Free blocks count high 32 bits */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ +- __le32 s_flags; /* Miscellaneous flags */ +- __u32 s_reserved[167]; /* Padding to the end of the block */ ++/*160*/ __le32 s_flags; /* Miscellaneous flags */ ++ __le16 s_raid_stride; /* RAID stride */ ++ __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ ++ __le64 s_mmp_block; /* Block for multi-mount protection */ ++/*170*/ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ ++ __le32 s_reserved[163]; /* Padding to the end of the block */ + }; + + #ifdef __KERNEL__ +@@ -702,12 +706,14 @@ static inline int ext3_valid_inum(struct + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ + #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 + #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ ++#define EXT3_FEATURE_INCOMPAT_MMP 0x0100 + + #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER| \ + EXT3_FEATURE_INCOMPAT_META_BG| \ +- EXT3_FEATURE_INCOMPAT_EXTENTS) ++ EXT3_FEATURE_INCOMPAT_EXTENTS| \ ++ EXT3_FEATURE_INCOMPAT_MMP) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ +@@ -870,6 +876,39 @@ ext3_group_first_block_no(struct super_b + #define ERR_BAD_DX_DIR -75000 + + /* ++ * This structure will be used for multiple mount protection. It will be ++ * written into the block number saved in the s_mmp_block field in the ++ * superblock. Programs that check MMP should assume that if ++ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe ++ * to use the filesystem, regardless of how old the timestamp is. ++ */ ++#define EXT3_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ ++#define EXT3_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ ++#define EXT3_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ ++#define EXT3_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ ++ ++struct mmp_struct { ++ __le32 mmp_magic; ++ __le32 mmp_seq; ++ __le64 mmp_time; ++ char mmp_nodename[64]; ++ char mmp_bdevname[32]; ++ __le16 mmp_check_interval; ++ __le16 mmp_pad1; ++ __le32 mmp_pad2[227]; ++}; ++ ++/* ++ * Default interval in seconds to update the MMP sequence number. ++ */ ++#define EXT3_MMP_UPDATE_INTERVAL 1 ++ ++/* ++ * Minimum interval for MMP checking in seconds. ++ */ ++#define EXT3_MMP_MIN_CHECK_INTERVAL 5 ++ ++/* + * Function prototypes + */ + +Index: linux-2.6.18/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.18.orig/include/linux/ext3_fs_sb.h ++++ linux-2.6.18/include/linux/ext3_fs_sb.h +@@ -151,6 +151,7 @@ struct ext3_sb_info { + /* locality groups */ + struct ext3_locality_group *s_locality_groups; + ++ struct task_struct *s_mmp_tsk; /* Kernel thread for multiple mount protection */ + }; + + #define EXT3_GROUP_INFO(sb, group) \ diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series index bbb7b56462..3c7f1cbae3 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series @@ -20,3 +20,4 @@ ext3-check-jbd-errors-2.6.9.patch ext3-uninit-2.6.9.patch ext3-nanosecond-2.6-rhel4.patch ext3-unlink-race.patch +ext3-mmp-2.6-rhel4.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series index 04768bd677..213fff30ed 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series @@ -18,3 +18,4 @@ ext3-disable-write-bar-by-default-2.6-sles10.patch ext3-uninit-2.6-sles10.patch ext3-nanosecond-2.6-sles10.patch ext3-inode-version-2.6-sles10.patch +ext3-mmp-2.6-sles10.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series index 3fc9e90753..b7b2ff189d 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series @@ -13,3 +13,4 @@ ext3-filterdata-sles10.patch ext3-uninit-2.6.18.patch ext3-nanosecond-2.6.18-vanilla.patch ext3-inode-version-2.6.18-vanilla.patch +ext3-mmp-2.6.18-vanilla.patch -- GitLab