From 9055078c15c82bd17d10130b06e026fc25ab4b9a Mon Sep 17 00:00:00 2001
From: kalpak <kalpak>
Date: Tue, 15 May 2007 09:42:01 +0000
Subject: [PATCH] b=10768 i=adilger i=girish

Add an on-disk 64-bit inode version to track changes made to the inode. This is required for version-based recovery.
---
 .../series/ldiskfs-2.6-sles10.series          |   1 +
 .../series/ldiskfs-2.6.18-vanilla.series      |   1 +
 lustre/ChangeLog                              |   7 +
 .../ext3-inode-version-2.6-sles10.patch       | 426 ++++++++++++++++++
 .../ext3-inode-version-2.6.18-vanilla.patch   | 426 ++++++++++++++++++
 .../series/ldiskfs-2.6-sles10.series          |   1 +
 .../series/ldiskfs-2.6.18-vanilla.series      |   1 +
 .../series/ldiskfs2-2.6-sles10.series         |   1 +
 .../series/ldiskfs2-2.6.18-vanilla.series     |   1 +
 9 files changed, 865 insertions(+)
 create mode 100644 lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch

diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series
index 11f62b04fb..e9f3f1fb5a 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series
@@ -12,3 +12,4 @@ ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-filterdata-2.6.15.patch
 ext3-disable-write-bar-by-default-2.6-sles10.patch
 ext3-nanosecond-2.6-sles10.patch
+ext3-inode-version-2.6-sles10.patch
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series
index d5a8733974..350067db39 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series
@@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch
 ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-filterdata-2.6.15.patch
 ext3-nanosecond-2.6.18-vanilla.patch
+ext3-inode-version-2.6.18-vanilla.patch
diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 58f4891ccf..b4d6ddb743 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -88,6 +88,13 @@ Details    : the default for reading the health_check proc file has changed
 	     is possible to return to the previous behaviour during configure
 	     with --enable-health-write.
 
+Severity   : enhancement
+Bugzilla   : 10768
+Description: 64-bit inode version
+Details:   : Add a on-disk 64-bit inode version for ext3 to track changes made
+	     to the inode. This will be required for version-based recovery.	      
+
+
 --------------------------------------------------------------------------------
 
 2007-05-03  Cluster File Systems, Inc. <info@clusterfs.com>
diff --git a/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch
new file mode 100644
index 0000000000..7b6b1b8890
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch
@@ -0,0 +1,426 @@
+Index: linux-2.6.16-sles10/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.16-sles10.orig/fs/ext3/inode.c
++++ linux-2.6.16-sles10/fs/ext3/inode.c
+@@ -2558,6 +2558,13 @@ void ext3_read_inode(struct inode * inod
+ 	EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode);
+ 	EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+ 
++	ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version);
++	if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
++		if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi))
++			ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi))
++									 << 32;
++	}
++
+ 	if (S_ISREG(inode->i_mode)) {
+ 		inode->i_op = &ext3_file_inode_operations;
+ 		inode->i_fop = &ext3_file_operations;
+@@ -2696,8 +2703,14 @@ static int ext3_do_update_inode(handle_t
+ 	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
+ 		raw_inode->i_block[block] = ei->i_data[block];
+ 
+-	if (ei->i_extra_isize)
++	raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version);
++	if (ei->i_extra_isize) {
++		if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) {
++			raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version
++									>> 32);
++		}
+ 		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
++	}
+ 
+ 	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+ 	rc = ext3_journal_dirty_metadata(handle, bh);
+@@ -2971,10 +2984,32 @@ ext3_reserve_inode_write(handle_t *handl
+ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+ {
+ 	struct ext3_iloc iloc;
+-	int err;
++	int err, ret;
++	static int expand_message;
+ 
+ 	might_sleep();
+ 	err = ext3_reserve_inode_write(handle, inode, &iloc);
++	if (EXT3_I(inode)->i_extra_isize <
++	    EXT3_SB(inode->i_sb)->s_want_extra_isize &&
++	    !(EXT3_I(inode)->i_state & EXT3_STATE_NO_EXPAND)) {
++		/* We need extra buffer credits since we may write into EA block
++		 * with this same handle */
++		if ((ext3_journal_extend(handle,
++			     EXT3_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
++			ret = ext3_expand_extra_isize(inode,
++  					EXT3_SB(inode->i_sb)->s_want_extra_isize,
++					iloc, handle);
++			if (ret) {
++				EXT3_I(inode)->i_state |= EXT3_STATE_NO_EXPAND;
++				if (!expand_message) {
++					ext3_warning(inode->i_sb, __FUNCTION__,
++					"Unable to expand inode %lu. Delete some"
++					" EAs or run e2fsck.", inode->i_ino);
++					expand_message = 1;
++				}
++			}
++		}
++	}
+ 	if (!err)
+ 		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ 	return err;
+Index: linux-2.6.16-sles10/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.16-sles10.orig/include/linux/ext3_fs.h
++++ linux-2.6.16-sles10/include/linux/ext3_fs.h
+@@ -205,6 +205,7 @@ struct ext3_group_desc
+ #define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
+ #define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
++#define EXT3_STATE_NO_EXPAND		0x00000008 /* No space for expansion */
+ 
+ /* Used to pass group descriptor data when online resize is done */
+ struct ext3_new_group_input {
+@@ -281,7 +282,7 @@ struct ext3_inode {
+ 	__le32	i_flags;	/* File flags */
+ 	union {
+ 		struct {
+-			__u32  l_i_reserved1;
++			__u32  l_i_version;
+ 		} linux1;
+ 		struct {
+ 			__u32  h_i_translator;
+@@ -326,6 +327,7 @@ struct ext3_inode {
+ 	__le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+ 	__le32  i_crtime;       /* File Creation time */
+ 	__le32  i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */
++	__le32	i_version_hi;	/* high 32 bits for 64-bit version */
+ };
+ 
+ #define i_size_high	i_dir_acl
+@@ -388,6 +390,8 @@ do {								     	 \
+ 				      raw_inode->xtime ## _extra);       \
+ } while (0)
+ 
++#define i_disk_version osd1.linux1.l_i_version
++
+ #if defined(__KERNEL__) || defined(__linux__)
+ #define i_reserved1	osd1.linux1.l_i_reserved1
+ #define i_frag		osd2.linux2.l_i_frag
+Index: linux-2.6.16-sles10/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.16-sles10.orig/include/linux/ext3_fs_i.h
++++ linux-2.6.16-sles10/include/linux/ext3_fs_i.h
+@@ -20,6 +20,8 @@
+ #include <linux/rbtree.h>
+ #include <linux/seqlock.h>
+ 
++#define HAVE_DISK_INODE_VERSION
++
+ struct ext3_reserve_window {
+ 	__u32			_rsv_start;	/* First byte reserved */
+ 	__u32			_rsv_end;	/* Last byte reserved or 0 */
+@@ -138,6 +140,8 @@ struct ext3_inode_info {
+ 	__u32 i_cached_extent[4];
+ 
+ 	void *i_filterdata;
++
++	__u64 i_fs_version;
+ };
+ 
+ #endif	/* _LINUX_EXT3_FS_I */
+Index: linux-2.6.16-sles10/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.16-sles10.orig/fs/ext3/xattr.c
++++ linux-2.6.16-sles10/fs/ext3/xattr.c
+@@ -505,6 +505,20 @@ ext3_xattr_release_block(handle_t *handl
+ 	}
+ }
+ 
++static inline size_t ext3_xattr_free_space(struct ext3_xattr_entry *last,
++				    size_t *min_offs, void *base, int *total)
++{
++	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
++		*total += EXT3_XATTR_LEN(last->e_name_len);
++		if (!last->e_value_block && last->e_value_size) {
++			size_t offs = le16_to_cpu(last->e_value_offs);
++			if (offs < *min_offs)
++				*min_offs = offs;
++		}
++	}
++	return (*min_offs - ((void *)last - base) - sizeof(__u32));
++}
++
+ struct ext3_xattr_info {
+ 	int name_index;
+ 	const char *name;
+@@ -1007,6 +1021,8 @@ ext3_xattr_set_handle(handle_t *handle, 
+ 	if (!error) {
+ 		ext3_xattr_update_super_block(handle, inode->i_sb);
+ 		inode->i_ctime = ext3_current_time(inode);
++		if (!value)
++			EXT3_I(inode)->i_state &= ~EXT3_STATE_NO_EXPAND;
+ 		ext3_mark_inode_dirty(handle, inode);
+ 		/*
+ 		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+@@ -1059,6 +1075,249 @@ retry:
+ 	return error;
+ }
+ 
++static void ext3_xattr_shift_entries(struct ext3_xattr_entry *entry,
++				     int value_offs_shift, void *to,
++				     void *from, size_t n, int blocksize)
++{
++	struct ext3_xattr_entry *last = entry;
++	int new_offs;
++
++	/* Adjust the value offsets of the entries */
++	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
++		if (!last->e_value_block && last->e_value_size) {
++			new_offs = le16_to_cpu(last->e_value_offs) +
++							value_offs_shift;
++			BUG_ON(new_offs + le32_to_cpu(last->e_value_size) >
++			       blocksize);
++			last->e_value_offs = cpu_to_le16(new_offs);
++		}
++	}
++	/* Shift the entries by n bytes */
++	memmove(to, from, n);
++}
++
++/* Expand an inode by new_extra_isize bytes.
++ * Returns 0 on success or negative error number on failure.
++ */
++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize,
++			    struct ext3_iloc iloc, handle_t *handle)
++{
++	struct ext3_inode *raw_inode;
++	struct ext3_xattr_ibody_header *header;
++	struct ext3_xattr_entry *entry, *last, *first;
++	struct buffer_head *bh = NULL;
++	struct ext3_xattr_ibody_find *is = NULL;
++	struct ext3_xattr_block_find *bs = NULL;
++	char *buffer = NULL, *b_entry_name = NULL;
++	size_t min_offs, free;
++	int total_ino, total_blk;
++	void *base, *start, *end;
++	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
++	int s_min_extra_isize = EXT3_SB(inode->i_sb)->s_es->s_min_extra_isize;
++
++	down_write(&EXT3_I(inode)->xattr_sem);
++
++retry:
++	if (EXT3_I(inode)->i_extra_isize >= new_extra_isize) {
++		up_write(&EXT3_I(inode)->xattr_sem);
++		return 0;
++	}
++
++	raw_inode = ext3_raw_inode(&iloc);
++
++	header = IHDR(inode, raw_inode);
++	entry = IFIRST(header);
++
++	/* No extended attributes present */
++	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR) ||
++	    header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC)) {
++		memset((void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE, 0,
++		       new_extra_isize);
++		EXT3_I(inode)->i_extra_isize = new_extra_isize;
++		goto cleanup;
++	}
++
++	/*
++	 * Check if enough free space is available in the inode to shift the
++	 * entries ahead by new_extra_isize.
++	 */
++
++	base = start = entry;
++	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
++	min_offs = end - base;
++	last = entry;
++	total_ino = sizeof(struct ext3_xattr_ibody_header);
++
++	free = ext3_xattr_free_space(last, &min_offs, base, &total_ino);
++	if (free >= new_extra_isize) {
++		entry = IFIRST(header);
++		ext3_xattr_shift_entries(entry,	EXT3_I(inode)->i_extra_isize -
++				new_extra_isize, (void *)raw_inode +
++				EXT3_GOOD_OLD_INODE_SIZE + new_extra_isize,
++				(void *)header, total_ino,
++				inode->i_sb->s_blocksize);
++		EXT3_I(inode)->i_extra_isize = new_extra_isize;
++		error = 0;
++		goto cleanup;
++	}
++
++	/*
++	 * Enough free space isn't available in the inode, check if
++	 * EA block can hold new_extra_isize bytes.
++	 */
++	if (EXT3_I(inode)->i_file_acl) {
++		bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
++		error = -EIO;
++		if (!bh)
++			goto cleanup;
++		if (ext3_xattr_check_block(bh)) {
++			ext3_error(inode->i_sb, __FUNCTION__,
++				"inode %lu: bad block %d", inode->i_ino,
++				EXT3_I(inode)->i_file_acl);
++			error = -EIO;
++			goto cleanup;
++		}
++		base = BHDR(bh);
++		first = BFIRST(bh);
++		end = bh->b_data + bh->b_size;
++		min_offs = end - base;
++		free = ext3_xattr_free_space(first, &min_offs, base,
++					     &total_blk);
++		if (free < new_extra_isize) {
++			if (!tried_min_extra_isize && s_min_extra_isize) {
++				tried_min_extra_isize++;
++				new_extra_isize = s_min_extra_isize;
++				goto retry;
++			}
++			error = -1;
++			goto cleanup;
++		}
++	} else {
++		free = inode->i_sb->s_blocksize;
++	}
++
++	while (new_extra_isize > 0) {
++		size_t offs, size, entry_size;
++		struct ext3_xattr_entry *small_entry = NULL;
++		struct ext3_xattr_info i = {
++			.value = NULL,
++			.value_len = 0,
++		};
++		unsigned int total_size, shift_bytes, temp = ~0U;
++
++		is = (struct ext3_xattr_ibody_find *) kmalloc(sizeof(struct
++					 ext3_xattr_ibody_find), GFP_KERNEL);
++		bs = (struct ext3_xattr_block_find *) kmalloc(sizeof(struct
++					 ext3_xattr_block_find), GFP_KERNEL);
++		memset((void *)is, 0, sizeof(struct ext3_xattr_ibody_find));
++		memset((void *)bs, 0, sizeof(struct ext3_xattr_block_find));
++
++		is->s.not_found = bs->s.not_found = -ENODATA;
++		is->iloc.bh = NULL;
++		bs->bh = NULL;
++
++		last = IFIRST(header);
++		/* Find the entry best suited to be pushed into EA block */
++		entry = NULL;
++		for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
++			total_size = EXT3_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
++	    			    	EXT3_XATTR_LEN(last->e_name_len);
++			if (total_size <= free && total_size < temp) {
++				if (total_size < new_extra_isize) {
++					small_entry = last;
++				} else {
++					entry = last;
++					temp = total_size;
++				}
++			}
++		}
++
++		if (entry == NULL) {
++			if (small_entry) {
++				entry = small_entry;
++			} else {
++				if (!tried_min_extra_isize &&
++				    s_min_extra_isize) {
++					tried_min_extra_isize++;
++					new_extra_isize = s_min_extra_isize;
++					goto retry;
++				}
++				error = -1;
++				goto cleanup;
++			}
++		}
++		offs = le16_to_cpu(entry->e_value_offs);
++		size = le32_to_cpu(entry->e_value_size);
++		entry_size = EXT3_XATTR_LEN(entry->e_name_len);
++		i.name_index = entry->e_name_index,
++		buffer = kmalloc(EXT3_XATTR_SIZE(size), GFP_KERNEL);
++		b_entry_name = kmalloc(entry->e_name_len + 1, GFP_KERNEL);
++		/* Save the entry name and the entry value */
++		memcpy((void *)buffer, (void *)IFIRST(header) + offs,
++		       EXT3_XATTR_SIZE(size));
++		memcpy((void *)b_entry_name, (void *)entry->e_name,
++		       entry->e_name_len);
++		b_entry_name[entry->e_name_len] = '\0';
++		i.name = b_entry_name;
++
++		error = ext3_get_inode_loc(inode, &is->iloc);
++		if (error)
++			goto cleanup;
++
++		error = ext3_xattr_ibody_find(inode, &i, is);
++		if (error)
++			goto cleanup;
++
++		/* Remove the chosen entry from the inode */
++		error = ext3_xattr_ibody_set(handle, inode, &i, is);
++
++		entry = IFIRST(header);
++		if (entry_size + EXT3_XATTR_SIZE(size) >= new_extra_isize)
++			shift_bytes = new_extra_isize;
++		else
++			shift_bytes = entry_size + size;
++		/* Adjust the offsets and shift the remaining entries ahead */
++		ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize -
++			shift_bytes, (void *)raw_inode +
++			EXT3_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
++			(void *)header, total_ino - entry_size,
++			inode->i_sb->s_blocksize);
++
++		extra_isize += shift_bytes;
++		new_extra_isize -= shift_bytes;
++		EXT3_I(inode)->i_extra_isize = extra_isize;
++
++		i.name = b_entry_name;
++		i.value = buffer;
++		i.value_len = cpu_to_le32(size);
++		error = ext3_xattr_block_find(inode, &i, bs);
++		if (error)
++			goto cleanup;
++
++		/* Add entry which was removed from the inode into the block */
++		error = ext3_xattr_block_set(handle, inode, &i, bs);
++		if (error)
++			goto cleanup;
++	}
++
++cleanup:
++	if (b_entry_name)
++		kfree(b_entry_name);
++	if (buffer)
++		kfree(buffer);
++	if (is) {
++		brelse(is->iloc.bh);
++		kfree(is);
++	}
++	if (bs)
++		kfree(bs);
++	brelse(bh);
++	up_write(&EXT3_I(inode)->xattr_sem);
++	return error;
++}
++
++
++
+ /*
+  * ext3_xattr_delete_inode()
+  *
+Index: linux-2.6.16-sles10/fs/ext3/xattr.h
+===================================================================
+--- linux-2.6.16-sles10.orig/fs/ext3/xattr.h
++++ linux-2.6.16-sles10/fs/ext3/xattr.h
+@@ -75,6 +75,9 @@ extern int ext3_xattr_set_handle(handle_
+ extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
+ extern void ext3_xattr_put_super(struct super_block *);
+ 
++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize,
++			    struct ext3_iloc iloc, handle_t *handle);
++
+ extern int init_ext3_xattr(void);
+ extern void exit_ext3_xattr(void);
+ 
diff --git a/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch
new file mode 100644
index 0000000000..26f71acf45
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch
@@ -0,0 +1,426 @@
+Index: linux-2.6.18/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.18.orig/fs/ext3/inode.c
++++ linux-2.6.18/fs/ext3/inode.c
+@@ -2703,6 +2703,13 @@ void ext3_read_inode(struct inode * inod
+ 	EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode);
+ 	EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+ 
++	ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version);
++	if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
++		if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi))
++			ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi))
++									 << 32;
++	}
++
+ 	if (S_ISREG(inode->i_mode)) {
+ 		inode->i_op = &ext3_file_inode_operations;
+ 		inode->i_fop = &ext3_file_operations;
+@@ -2841,8 +2848,14 @@ static int ext3_do_update_inode(handle_t
+ 	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
+ 		raw_inode->i_block[block] = ei->i_data[block];
+ 
+-	if (ei->i_extra_isize)
++	raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version);
++	if (ei->i_extra_isize) {
++		if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) {
++			raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version
++									>> 32);
++		}
+ 		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
++	}
+ 
+ 	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+ 	rc = ext3_journal_dirty_metadata(handle, bh);
+@@ -3116,10 +3129,32 @@ ext3_reserve_inode_write(handle_t *handl
+ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+ {
+ 	struct ext3_iloc iloc;
+-	int err;
++	int err, ret;
++	static int expand_message;
+ 
+ 	might_sleep();
+ 	err = ext3_reserve_inode_write(handle, inode, &iloc);
++	if (EXT3_I(inode)->i_extra_isize <
++	    EXT3_SB(inode->i_sb)->s_want_extra_isize &&
++	    !(EXT3_I(inode)->i_state & EXT3_STATE_NO_EXPAND)) {
++		/* We need extra buffer credits since we may write into EA block
++		 * with this same handle */
++		if ((ext3_journal_extend(handle,
++			     EXT3_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
++			ret = ext3_expand_extra_isize(inode,
++  					EXT3_SB(inode->i_sb)->s_want_extra_isize,
++					iloc, handle);
++			if (ret) {
++				EXT3_I(inode)->i_state |= EXT3_STATE_NO_EXPAND;
++				if (!expand_message) {
++					ext3_warning(inode->i_sb, __FUNCTION__,
++					"Unable to expand inode %lu. Delete some"
++					" EAs or run e2fsck.", inode->i_ino);
++					expand_message = 1;
++				}
++			}
++		}
++	}
+ 	if (!err)
+ 		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ 	return err;
+Index: linux-2.6.18/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.18.orig/include/linux/ext3_fs.h
++++ linux-2.6.18/include/linux/ext3_fs.h
+@@ -201,6 +201,7 @@ struct ext3_group_desc
+ #define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
+ #define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
++#define EXT3_STATE_NO_EXPAND		0x00000008 /* No space for expansion */
+ 
+ /* Used to pass group descriptor data when online resize is done */
+ struct ext3_new_group_input {
+@@ -277,7 +278,7 @@ struct ext3_inode {
+ 	__le32	i_flags;	/* File flags */
+ 	union {
+ 		struct {
+-			__u32  l_i_reserved1;
++			__u32  l_i_version;
+ 		} linux1;
+ 		struct {
+ 			__u32  h_i_translator;
+@@ -322,6 +323,7 @@ struct ext3_inode {
+ 	__le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+ 	__le32  i_crtime;       /* File Creation time */
+ 	__le32  i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */
++	__le32	i_version_hi;	/* high 32 bits for 64-bit version */
+ };
+ 
+ #define i_size_high	i_dir_acl
+@@ -384,6 +386,8 @@ do {									       \
+ 				       raw_inode->xtime ## _extra);	       \
+ } while (0)
+ 
++#define i_disk_version osd1.linux1.l_i_version
++
+ #if defined(__KERNEL__) || defined(__linux__)
+ #define i_reserved1	osd1.linux1.l_i_reserved1
+ #define i_frag		osd2.linux2.l_i_frag
+Index: linux-2.6.18/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.18.orig/include/linux/ext3_fs_i.h
++++ linux-2.6.18/include/linux/ext3_fs_i.h
+@@ -21,6 +21,8 @@
+ #include <linux/seqlock.h>
+ #include <linux/mutex.h>
+ 
++#define HAVE_DISK_INODE_VERSION
++
+ /* data type for block offset of block group */
+ typedef int ext3_grpblk_t;
+ 
+@@ -147,6 +149,8 @@ struct ext3_inode_info {
+ 	struct timespec i_crtime;
+ 
+ 	void *i_filterdata;
++
++	__u64 i_fs_version;
+ };
+ 
+ #endif	/* _LINUX_EXT3_FS_I */
+Index: linux-2.6.18/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.18.orig/fs/ext3/xattr.c
++++ linux-2.6.18/fs/ext3/xattr.c
+@@ -505,6 +505,20 @@ ext3_xattr_release_block(handle_t *handl
+ 	}
+ }
+ 
++static inline size_t ext3_xattr_free_space(struct ext3_xattr_entry *last,
++				    size_t *min_offs, void *base, int *total)
++{
++	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
++		*total += EXT3_XATTR_LEN(last->e_name_len);
++		if (!last->e_value_block && last->e_value_size) {
++			size_t offs = le16_to_cpu(last->e_value_offs);
++			if (offs < *min_offs)
++				*min_offs = offs;
++		}
++	}
++	return (*min_offs - ((void *)last - base) - sizeof(__u32));
++}
++
+ struct ext3_xattr_info {
+ 	int name_index;
+ 	const char *name;
+@@ -1008,6 +1022,8 @@ ext3_xattr_set_handle(handle_t *handle, 
+ 	if (!error) {
+ 		ext3_xattr_update_super_block(handle, inode->i_sb);
+ 		inode->i_ctime = ext3_current_time(inode);
++		if (!value)
++			EXT3_I(inode)->i_state &= ~EXT3_STATE_NO_EXPAND;
+ 		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+ 		/*
+ 		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+@@ -1060,6 +1076,249 @@ retry:
+ 	return error;
+ }
+ 
++static void ext3_xattr_shift_entries(struct ext3_xattr_entry *entry,
++				     int value_offs_shift, void *to,
++				     void *from, size_t n, int blocksize)
++{
++	struct ext3_xattr_entry *last = entry;
++	int new_offs;
++
++	/* Adjust the value offsets of the entries */
++	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
++		if (!last->e_value_block && last->e_value_size) {
++			new_offs = le16_to_cpu(last->e_value_offs) +
++							value_offs_shift;
++			BUG_ON(new_offs + le32_to_cpu(last->e_value_size) >
++			       blocksize);
++			last->e_value_offs = cpu_to_le16(new_offs);
++		}
++	}
++	/* Shift the entries by n bytes */
++	memmove(to, from, n);
++}
++
++/* Expand an inode by new_extra_isize bytes.
++ * Returns 0 on success or negative error number on failure.
++ */
++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize,
++			    struct ext3_iloc iloc, handle_t *handle)
++{
++	struct ext3_inode *raw_inode;
++	struct ext3_xattr_ibody_header *header;
++	struct ext3_xattr_entry *entry, *last, *first;
++	struct buffer_head *bh = NULL;
++	struct ext3_xattr_ibody_find *is = NULL;
++	struct ext3_xattr_block_find *bs = NULL;
++	char *buffer = NULL, *b_entry_name = NULL;
++	size_t min_offs, free;
++	int total_ino, total_blk;
++	void *base, *start, *end;
++	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
++	int s_min_extra_isize = EXT3_SB(inode->i_sb)->s_es->s_min_extra_isize;
++
++	down_write(&EXT3_I(inode)->xattr_sem);
++
++retry:
++	if (EXT3_I(inode)->i_extra_isize >= new_extra_isize) {
++		up_write(&EXT3_I(inode)->xattr_sem);
++		return 0;
++	}
++
++	raw_inode = ext3_raw_inode(&iloc);
++
++	header = IHDR(inode, raw_inode);
++	entry = IFIRST(header);
++
++	/* No extended attributes present */
++	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR) ||
++	    header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC)) {
++		memset((void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE, 0,
++		       new_extra_isize);
++		EXT3_I(inode)->i_extra_isize = new_extra_isize;
++		goto cleanup;
++	}
++
++	/*
++	 * Check if enough free space is available in the inode to shift the
++	 * entries ahead by new_extra_isize.
++	 */
++
++	base = start = entry;
++	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
++	min_offs = end - base;
++	last = entry;
++	total_ino = sizeof(struct ext3_xattr_ibody_header);
++
++	free = ext3_xattr_free_space(last, &min_offs, base, &total_ino);
++	if (free >= new_extra_isize) {
++		entry = IFIRST(header);
++		ext3_xattr_shift_entries(entry,	EXT3_I(inode)->i_extra_isize -
++				new_extra_isize, (void *)raw_inode +
++				EXT3_GOOD_OLD_INODE_SIZE + new_extra_isize,
++				(void *)header, total_ino,
++				inode->i_sb->s_blocksize);
++		EXT3_I(inode)->i_extra_isize = new_extra_isize;
++		error = 0;
++		goto cleanup;
++	}
++
++	/*
++	 * Enough free space isn't available in the inode, check if
++	 * EA block can hold new_extra_isize bytes.
++	 */
++	if (EXT3_I(inode)->i_file_acl) {
++		bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
++		error = -EIO;
++		if (!bh)
++			goto cleanup;
++		if (ext3_xattr_check_block(bh)) {
++			ext3_error(inode->i_sb, __FUNCTION__,
++				"inode %lu: bad block "E3FSBLK, inode->i_ino,
++				EXT3_I(inode)->i_file_acl);
++			error = -EIO;
++			goto cleanup;
++		}
++		base = BHDR(bh);
++		first = BFIRST(bh);
++		end = bh->b_data + bh->b_size;
++		min_offs = end - base;
++		free = ext3_xattr_free_space(first, &min_offs, base,
++					     &total_blk);
++		if (free < new_extra_isize) {
++			if (!tried_min_extra_isize && s_min_extra_isize) {
++				tried_min_extra_isize++;
++				new_extra_isize = s_min_extra_isize;
++				goto retry;
++			}
++			error = -1;
++			goto cleanup;
++		}
++	} else {
++		free = inode->i_sb->s_blocksize;
++	}
++
++	while (new_extra_isize > 0) {
++		size_t offs, size, entry_size;
++		struct ext3_xattr_entry *small_entry = NULL;
++		struct ext3_xattr_info i = {
++			.value = NULL,
++			.value_len = 0,
++		};
++		unsigned int total_size, shift_bytes, temp = ~0U;
++
++		is = (struct ext3_xattr_ibody_find *) kmalloc(sizeof(struct
++					 ext3_xattr_ibody_find), GFP_KERNEL);
++		bs = (struct ext3_xattr_block_find *) kmalloc(sizeof(struct
++					 ext3_xattr_block_find), GFP_KERNEL);
++		memset((void *)is, 0, sizeof(struct ext3_xattr_ibody_find));
++		memset((void *)bs, 0, sizeof(struct ext3_xattr_block_find));
++
++		is->s.not_found = bs->s.not_found = -ENODATA;
++		is->iloc.bh = NULL;
++		bs->bh = NULL;
++
++		last = IFIRST(header);
++		/* Find the entry best suited to be pushed into EA block */
++		entry = NULL;
++		for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
++			total_size = EXT3_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
++	    			    	EXT3_XATTR_LEN(last->e_name_len);
++			if (total_size <= free && total_size < temp) {
++				if (total_size < new_extra_isize) {
++					small_entry = last;
++				} else {
++					entry = last;
++					temp = total_size;
++				}
++			}
++		}
++
++		if (entry == NULL) {
++			if (small_entry) {
++				entry = small_entry;
++			} else {
++				if (!tried_min_extra_isize &&
++				    s_min_extra_isize) {
++					tried_min_extra_isize++;
++					new_extra_isize = s_min_extra_isize;
++					goto retry;
++				}
++				error = -1;
++				goto cleanup;
++			}
++		}
++		offs = le16_to_cpu(entry->e_value_offs);
++		size = le32_to_cpu(entry->e_value_size);
++		entry_size = EXT3_XATTR_LEN(entry->e_name_len);
++		i.name_index = entry->e_name_index,
++		buffer = kmalloc(EXT3_XATTR_SIZE(size), GFP_KERNEL);
++		b_entry_name = kmalloc(entry->e_name_len + 1, GFP_KERNEL);
++		/* Save the entry name and the entry value */
++		memcpy((void *)buffer, (void *)IFIRST(header) + offs,
++		       EXT3_XATTR_SIZE(size));
++		memcpy((void *)b_entry_name, (void *)entry->e_name,
++		       entry->e_name_len);
++		b_entry_name[entry->e_name_len] = '\0';
++		i.name = b_entry_name;
++
++		error = ext3_get_inode_loc(inode, &is->iloc);
++		if (error)
++			goto cleanup;
++
++		error = ext3_xattr_ibody_find(inode, &i, is);
++		if (error)
++			goto cleanup;
++
++		/* Remove the chosen entry from the inode */
++		error = ext3_xattr_ibody_set(handle, inode, &i, is);
++
++		entry = IFIRST(header);
++		if (entry_size + EXT3_XATTR_SIZE(size) >= new_extra_isize)
++			shift_bytes = new_extra_isize;
++		else
++			shift_bytes = entry_size + size;
++		/* Adjust the offsets and shift the remaining entries ahead */
++		ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize -
++			shift_bytes, (void *)raw_inode +
++			EXT3_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
++			(void *)header, total_ino - entry_size,
++			inode->i_sb->s_blocksize);
++
++		extra_isize += shift_bytes;
++		new_extra_isize -= shift_bytes;
++		EXT3_I(inode)->i_extra_isize = extra_isize;
++
++		i.name = b_entry_name;
++		i.value = buffer;
++		i.value_len = cpu_to_le32(size);
++		error = ext3_xattr_block_find(inode, &i, bs);
++		if (error)
++			goto cleanup;
++
++		/* Add entry which was removed from the inode into the block */
++		error = ext3_xattr_block_set(handle, inode, &i, bs);
++		if (error)
++			goto cleanup;
++	}
++
++cleanup:
++	if (b_entry_name)
++		kfree(b_entry_name);
++	if (buffer)
++		kfree(buffer);
++	if (is) {
++		brelse(is->iloc.bh);
++		kfree(is);
++	}
++	if (bs)
++		kfree(bs);
++	brelse(bh);
++	up_write(&EXT3_I(inode)->xattr_sem);
++	return error;
++}
++
++
++
+ /*
+  * ext3_xattr_delete_inode()
+  *
+Index: linux-2.6.18/fs/ext3/xattr.h
+===================================================================
+--- linux-2.6.18.orig/fs/ext3/xattr.h
++++ linux-2.6.18/fs/ext3/xattr.h
+@@ -74,6 +74,9 @@ extern int ext3_xattr_set_handle(handle_
+ extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
+ extern void ext3_xattr_put_super(struct super_block *);
+ 
++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize,
++			    struct ext3_iloc iloc, handle_t *handle);
++
+ extern int init_ext3_xattr(void);
+ extern void exit_ext3_xattr(void);
+ 
diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series b/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series
index 11f62b04fb..e9f3f1fb5a 100644
--- a/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series
+++ b/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series
@@ -12,3 +12,4 @@ ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-filterdata-2.6.15.patch
 ext3-disable-write-bar-by-default-2.6-sles10.patch
 ext3-nanosecond-2.6-sles10.patch
+ext3-inode-version-2.6-sles10.patch
diff --git a/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series b/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series
index d5a8733974..350067db39 100644
--- a/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series
+++ b/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series
@@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch
 ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-filterdata-2.6.15.patch
 ext3-nanosecond-2.6.18-vanilla.patch
+ext3-inode-version-2.6.18-vanilla.patch
diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series b/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series
index 3c7197b3f2..982bc45d7b 100644
--- a/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series
+++ b/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series
@@ -17,3 +17,4 @@ ext3-filterdata-sles10.patch
 ext3-disable-write-bar-by-default-2.6-sles10.patch
 ext3-uninit-2.6-sles10.patch
 ext3-nanosecond-2.6-sles10.patch
+ext3-inode-version-2.6-sles10.patch
diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series b/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series
index d5a8733974..350067db39 100644
--- a/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series
+++ b/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series
@@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch
 ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-filterdata-2.6.15.patch
 ext3-nanosecond-2.6.18-vanilla.patch
+ext3-inode-version-2.6.18-vanilla.patch
-- 
GitLab