Index: mmp/fs/ext3/al.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ mmp/fs/ext3/al.h	2006-07-24 10:39:26.000000000 +0800
@@ -0,0 +1,11 @@
+/*
+ * (C) 2006  Qi Yong <qiyong@clusterfs.com>
+ */
+
+#define	ALIVE_MAGIC	0xA1153C29
+struct alive_struct {
+	__le32	al_magic;
+	__le32	al_seq;
+	__le32	al_time;
+	char	al_nodename[65];
+};
Index: mmp/fs/ext3/namei.c
===================================================================
--- mmp.orig/fs/ext3/namei.c	2006-07-24 10:34:41.000000000 +0800
+++ mmp/fs/ext3/namei.c	2006-07-24 10:39:26.000000000 +0800
@@ -805,7 +805,7 @@ static inline int search_dirblock(struct
  * The returned buffer_head has ->b_count elevated.  The caller is expected
  * to brelse() it when appropriate.
  */
-static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+struct buffer_head * ext3_find_entry (struct dentry *dentry,
 					struct ext3_dir_entry_2 ** res_dir)
 {
 	struct super_block * sb;
Index: mmp/fs/ext3/super.c
===================================================================
--- mmp.orig/fs/ext3/super.c	2006-07-24 10:34:41.000000000 +0800
+++ mmp/fs/ext3/super.c	2006-07-24 10:45:19.000000000 +0800
@@ -36,12 +36,14 @@
 #include <linux/namei.h>
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
+#include "al.h"
 
 static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
 			     unsigned long journal_devnum);
@@ -62,6 +64,8 @@ static int ext3_statfs (struct super_blo
 static void ext3_unlockfs(struct super_block *sb);
 static void ext3_write_super (struct super_block * sb);
 static void ext3_write_super_lockfs(struct super_block *sb);
+struct buffer_head * ext3_find_entry (struct dentry *dentry,
+					struct ext3_dir_entry_2 ** res_dir);
 
 /* 
  * Wrappers for journal_start/end.
@@ -435,6 +439,9 @@ static void ext3_put_super (struct super
 		invalidate_bdev(sbi->journal_bdev, 0);
 		ext3_blkdev_remove(sbi);
 	}
+	if (sbi->s_alive_tsk)
+		kthread_stop(sbi->s_alive_tsk);
+
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	return;
@@ -1369,6 +1376,261 @@ static unsigned long descriptor_loc(stru
 	return (first_data_block + has_super + (bg * sbi->s_blocks_per_group));
 }
 
+static int write_alive(struct buffer_head * bh)
+{
+	lock_buffer(bh);
+	bh->b_end_io = end_buffer_write_sync;
+	get_bh(bh);
+	submit_bh(WRITE, bh);
+	wait_on_buffer(bh);
+	if (unlikely(!buffer_uptodate(bh)))
+		return 1;
+	return 0;
+}
+
+static int read_alive_again(struct buffer_head * bh)
+{
+	lock_buffer(bh);
+	bh->b_end_io = end_buffer_read_sync;
+	get_bh(bh);
+	submit_bh(READ, bh);
+	wait_on_buffer(bh);
+	if (!buffer_uptodate(bh)) {
+		brelse(bh);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * The caller must have a ref on the buffer_head.
+ */
+static int kalived(void *data)
+{
+	struct buffer_head * bh;
+	struct alive_struct * alive;
+	char b[BDEVNAME_SIZE];
+	u32 seq = 0;
+
+	bh = (struct buffer_head *)data;
+	bdevname(bh->b_bdev, b);
+
+	alive = (struct alive_struct *)(bh->b_data);
+	alive->al_magic = cpu_to_le32(ALIVE_MAGIC);
+	alive->al_time = cpu_to_le32(get_seconds());
+
+	down_read(&uts_sem);
+	memcpy(alive->al_nodename, system_utsname.nodename, 65);
+	up_read(&uts_sem);
+
+	while (!kthread_should_stop()) {
+		if (++seq == 0)
+			++seq;
+
+		alive->al_seq = cpu_to_le32(seq);
+		alive->al_time = cpu_to_le32(get_seconds());
+
+		if (unlikely(write_alive(bh))) {
+			/* panic here? */
+			printk(KERN_ERR "Alive (device %s): "
+				"can't write alive block\n", b);
+			continue;
+		}
+
+		schedule_timeout_interruptible(5 * HZ);
+	}
+
+	alive->al_seq = 0;
+	alive->al_time = cpu_to_le32(get_seconds());
+
+	if (unlikely(write_alive(bh)))
+		printk(KERN_ERR "Alive (device %s): "
+			"can't reset alive block\n", b);
+	brelse(bh);
+	return 0;
+}
+
+static unsigned long get_alive_ino(struct super_block *sb)
+{
+	unsigned long	ino = 0;
+	struct dentry	alive;
+	struct dentry	* root;
+	struct inode	* root_inode;
+	struct ext3_dir_entry_2 * de;
+	struct buffer_head	* bh;
+
+	root_inode = iget(sb, EXT3_ROOT_INO);
+	root = d_alloc_root(root_inode);
+	if (!root) {
+		printk(KERN_ERR "Alive (device %s): get root inode failed\n",
+			sb->s_id);
+		iput(root_inode);
+		goto out;
+	}
+
+	alive.d_name.name = ".alive";
+	alive.d_name.len = 6;
+	alive.d_parent = root;
+
+	bh = ext3_find_entry(&alive, &de);
+	dput(root);
+
+	if (!bh) {
+		printk(KERN_WARNING "Alive (device %s): alive lookup failed\n",
+			sb->s_id);
+		goto out;
+	}
+
+	ino = le32_to_cpu(de->inode);
+	brelse (bh);
+	pr_debug("Alive (device %s): alive_ino=%lu\n", sb->s_id, ino);
+out:
+	return ino;
+}
+
+/* check alive file */
+static int check_alive(struct super_block *sb, struct ext3_sb_info *sbi)
+{
+	unsigned long		ino;
+	struct buffer_head	* bh;
+	struct ext3_inode_info	* ei;
+	struct inode		* alive_inode;
+	struct alive_struct	* alive;
+	u32 alive_block;
+	u32 seq;
+
+	ino = get_alive_ino(sb);
+	if (!ino)
+		goto failed;
+
+	alive_inode = iget(sb, ino);
+	if (!alive_inode) {
+		iput(alive_inode);
+		printk(KERN_ERR "Alive (device %s): get alive inode failed\n",
+			sb->s_id);
+		goto failed;
+	}
+	if (!alive_inode->i_nlink) {
+		make_bad_inode(alive_inode);
+		iput(alive_inode);
+		printk(KERN_ERR "Alive (device %s): alive inode is deleted\n",
+			sb->s_id);
+		goto failed;
+	}
+	if (!S_ISREG(alive_inode->i_mode)) {
+		iput(alive_inode);
+		printk(KERN_ERR "Alive (device %s): invalid alive inode\n",
+			sb->s_id);
+		goto failed;
+	}
+	if (EXT3_I(alive_inode)->i_flags & EXT3_EXTENTS_FL) {
+		iput(alive_inode);
+		printk(KERN_ERR "Alive (device %s): invalid alive inode, "
+			"in extents format\n", sb->s_id);
+		goto failed;
+	}
+
+	ei = EXT3_I(alive_inode);
+	alive_block = ei->i_data[0];
+	iput(alive_inode);
+
+	pr_debug("Alive (device %s): read in alive block #%u\n",
+			sb->s_id, alive_block);
+
+	/* first read */
+	bh = sb_bread(sb, alive_block);
+	if (!bh) {
+		printk(KERN_ERR "Alive (device %s): "
+			"can't read alive block #%u\n", sb->s_id, alive_block);
+		goto failed;
+	}
+
+	alive = (struct alive_struct *)(bh->b_data);
+	if (le32_to_cpu(alive->al_magic) != ALIVE_MAGIC) {
+		printk(KERN_ERR "Alive (device %s): "
+			"magic mismatch\n", sb->s_id);
+		brelse(bh);
+		goto failed;
+	}
+
+	seq = le32_to_cpu(alive->al_seq);
+	pr_debug("Alive (device %s): seq=%u\n", sb->s_id, seq);
+	pr_info ("Alive (device %s): last touched by node: %s, "
+		"%li seconds ago\n", sb->s_id, alive->al_nodename,
+		get_seconds() - le32_to_cpu(alive->al_time));
+
+	if (seq == 0)
+		goto skip;
+
+	/* wait 8s */
+	pr_info("Alive (device %s): wait for 8 seconds...\n", sb->s_id);
+	schedule_timeout_uninterruptible(HZ * 8);
+
+	/* read again */
+	if (read_alive_again(bh)) {
+		printk(KERN_ERR "Alive (device %s): "
+			"can't read alive block #%u\n",
+			sb->s_id, alive_block);
+		goto failed;
+	}
+
+	alive = (struct alive_struct *)(bh->b_data);
+	pr_debug("Alive (device %s): seq=%u\n",
+		sb->s_id, le32_to_cpu(alive->al_seq));
+
+	if (seq != le32_to_cpu(alive->al_seq)) {
+		printk(KERN_WARNING "Alive (device %s): "
+			"still active on node %s\n",
+			sb->s_id, alive->al_nodename);
+		brelse(bh);
+		goto failed;
+	}
+skip:
+	/* write a new random seq */
+	get_random_bytes(&seq, sizeof(u32));
+	alive->al_seq = cpu_to_le32(seq);
+	if (unlikely(write_alive(bh))) {
+		printk(KERN_ERR "Alive (device %s): "
+			"can't write alive block\n", sb->s_id);
+		goto failed;
+	}
+	pr_debug("Alive (device %s): write random seq=%u\n", sb->s_id, seq);
+
+	/* wait 6s */
+	pr_info("Alive (device %s): wait for 6 seconds...\n", sb->s_id);
+	schedule_timeout_uninterruptible(HZ * 6);
+
+	/* read again */
+	if (read_alive_again(bh)) {
+		printk(KERN_ERR "Alive (device %s): "
+			"can't read alive block #%u\n",
+			sb->s_id, alive_block);
+		goto failed;
+	}
+
+	alive = (struct alive_struct *)(bh->b_data);
+	pr_debug("Alive (device %s): seq=%u\n",
+		sb->s_id, le32_to_cpu(alive->al_seq));
+
+	if (seq != le32_to_cpu(alive->al_seq)) {
+		printk(KERN_WARNING "Alive (device %s): "
+			"still active on node %s\n",
+			sb->s_id, alive->al_nodename);
+		brelse(bh);
+		goto failed;
+	}
+
+	/* succeed */
+	pr_info("Alive (device %s): alive check passed!\n", sb->s_id);
+	sbi->s_alive_tsk = kthread_run(kalived, bh, "kalived");
+	return 0;
+
+failed:
+	printk(KERN_WARNING "Alive (device %s): alive check failed!\n",
+		sb->s_id);
+	return 1;
+}
+
 
 static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 {
@@ -1668,6 +1930,10 @@ static int ext3_fill_super (struct super
 			  EXT3_HAS_INCOMPAT_FEATURE(sb,
 				    EXT3_FEATURE_INCOMPAT_RECOVER));
 
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_ALIVE))
+		if (check_alive(sb, sbi))
+			goto failed_mount2;
+
 	/*
 	 * The first inode we look at is the journal inode.  Don't try
 	 * root first: it may be modified in the journal!
@@ -1785,6 +2051,8 @@ cantfind_ext3:
 
 failed_mount3:
 	journal_destroy(sbi->s_journal);
+	if (sbi->s_alive_tsk)
+		kthread_stop(sbi->s_alive_tsk);
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
Index: mmp/include/linux/ext3_fs.h
===================================================================
--- mmp.orig/include/linux/ext3_fs.h	2006-07-24 10:34:41.000000000 +0800
+++ mmp/include/linux/ext3_fs.h	2006-07-24 10:39:26.000000000 +0800
@@ -581,12 +581,14 @@ static inline struct ext3_inode_info *EX
 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
 #define EXT3_FEATURE_INCOMPAT_META_BG		0x0010
 #define EXT3_FEATURE_INCOMPAT_EXTENTS		0x0040 /* extents support */
+#define EXT3_FEATURE_INCOMPAT_ALIVE		0x0080
 
 #define EXT3_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT3_FEATURE_INCOMPAT_SUPP	(EXT3_FEATURE_INCOMPAT_FILETYPE| \
 					 EXT3_FEATURE_INCOMPAT_RECOVER| \
 					 EXT3_FEATURE_INCOMPAT_META_BG| \
-					 EXT3_FEATURE_INCOMPAT_EXTENTS)
+					 EXT3_FEATURE_INCOMPAT_EXTENTS| \
+					 EXT3_FEATURE_INCOMPAT_ALIVE)
 #define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
Index: mmp/include/linux/ext3_fs_sb.h
===================================================================
--- mmp.orig/include/linux/ext3_fs_sb.h	2006-07-24 10:34:41.000000000 +0800
+++ mmp/include/linux/ext3_fs_sb.h	2006-07-24 10:39:26.000000000 +0800
@@ -86,6 +86,7 @@ struct ext3_sb_info {
 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
 	int s_jquota_fmt;			/* Format of quota to use */
 #endif
+	struct task_struct * s_alive_tsk;
 
 	/* for buddy allocator */
 	struct ext3_group_info **s_group_info;