diff --git a/lustre/kernel_patches/patches/raid5-zerocopy.patch b/lustre/kernel_patches/patches/raid5-zerocopy.patch
new file mode 100644
index 0000000000000000000000000000000000000000..09863437aceb9acafd2c7a21b16b8f6ac31ba93d
--- /dev/null
+++ b/lustre/kernel_patches/patches/raid5-zerocopy.patch
@@ -0,0 +1,374 @@
+diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c
+--- linux-2.6.9.orig/drivers/md/raid5.c	2007-07-09 02:43:33.000000000 -0600
++++ linux-2.6.9/drivers/md/raid5.c	2007-07-13 00:39:15.000000000 -0600
+@@ -412,6 +412,7 @@ static int raid5_end_read_request (struc
+ 		clear_buffer_uptodate(bh);
+ 	}
+ #endif
++	BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
+ 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ 	set_bit(STRIPE_HANDLE, &sh->state);
+ 	release_stripe(sh);
+@@ -450,6 +451,10 @@ static int raid5_end_write_request (stru
+ 
+ 	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
+ 	
++	if (test_bit(R5_Direct, &sh->dev[i].flags)) {
++		BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
++		sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
++	}
+ 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ 	set_bit(STRIPE_HANDLE, &sh->state);
+ 	__release_stripe(conf, sh);
+@@ -621,6 +626,25 @@ static sector_t compute_blocknr(struct s
+ }
+ 
+ 
++static struct page *zero_copy_data(struct bio *bio, sector_t sector)
++{
++	sector_t bi_sector = bio->bi_sector;
++	struct page *page;
++	struct bio_vec *bvl;
++	int i;
++
++	bio_for_each_segment(bvl, bio, i) {
++		if (sector > bi_sector) {
++			bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9;
++			continue;
++		}
++		BUG_ON(sector != bi_sector);
++		page = bio_iovec_idx(bio, i)->bv_page;
++		return PageConstant(page) ? page : NULL;
++	}
++	BUG();
++	return NULL;
++}
+ 
+ /*
+  * Copy data between a page in the stripe cache, and one or more bion
+@@ -716,8 +740,9 @@ static void compute_parity(struct stripe
+ {
+ 	raid5_conf_t *conf = sh->raid_conf;
+ 	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
+-	void *ptr[MAX_XOR_BLOCKS];
++	void *ptr[MAX_XOR_BLOCKS], *h_ptr[2];
+ 	struct bio *chosen;
++	struct page *page;
+ 
+ 	PRINTK("compute_parity, stripe %llu, method %d\n",
+ 		(unsigned long long)sh->sector, method);
+@@ -744,13 +769,14 @@ static void compute_parity(struct stripe
+ 		break;
+ 	case RECONSTRUCT_WRITE:
+ 		memset(ptr[0], 0, STRIPE_SIZE);
+-		for (i= disks; i-- ;)
++		for (i= disks; i-- ;) {
+ 			if (i!=pd_idx && sh->dev[i].towrite) {
+ 				chosen = sh->dev[i].towrite;
+ 				sh->dev[i].towrite = NULL;
+ 				if (sh->dev[i].written) BUG();
+ 				sh->dev[i].written = chosen;
+ 			}
++		}
+ 		break;
+ 	case CHECK_PARITY:
+ 		break;
+@@ -760,34 +786,88 @@ static void compute_parity(struct stripe
+ 		count = 1;
+ 	}
+ 	
+-	for (i = disks; i--;)
+-		if (sh->dev[i].written) {
+-			sector_t sector = sh->dev[i].sector;
+-			struct bio *wbi = sh->dev[i].written;
+-			while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
+-				copy_data(1, wbi, sh->dev[i].page, sector);
+-				wbi = r5_next_bio(wbi, sector);
++	for (i = disks; i--;) {
++		struct bio *wbi = sh->dev[i].written;
++		sector_t sector;
++
++		if (!wbi)
++			continue;
++
++		sector = sh->dev[i].sector;
++		set_bit(R5_LOCKED, &sh->dev[i].flags);
++		BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
++
++		/* check if it's covered by a single page
++		   and whole stripe is written at once.
++		 * in this case we can avoid memcpy() */
++		if (!wbi->bi_next && test_bit(R5_OVERWRITE, &sh->dev[i].flags) &&
++		    test_bit(R5_Insync, &sh->dev[i].flags)) {
++			page = zero_copy_data(wbi, sector);
++			if (page) {
++				atomic_inc(&conf->writes_zcopy);
++				sh->dev[i].req.bi_io_vec[0].bv_page = page;
++				set_bit(R5_Direct, &sh->dev[i].flags);
++				clear_bit(R5_UPTODATE, &sh->dev[i].flags);
++				clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
++				continue;
+ 			}
++		}
+ 
+-			set_bit(R5_LOCKED, &sh->dev[i].flags);
+-			set_bit(R5_UPTODATE, &sh->dev[i].flags);
++		atomic_inc(&conf->writes_copied);
++		test_and_clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
++		set_bit(R5_UPTODATE, &sh->dev[i].flags);
++		while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
++			copy_data(1, wbi, sh->dev[i].page, sector);
++			wbi = r5_next_bio(wbi, sector);
+ 		}
++	}
+ 
++	h_ptr[0] = ptr[0];
+ 	switch(method) {
+ 	case RECONSTRUCT_WRITE:
+ 	case CHECK_PARITY:
+-		for (i=disks; i--;)
+-			if (i != pd_idx) {
+-				ptr[count++] = page_address(sh->dev[i].page);
+-				check_xor();
++		for (i=disks; i--;) {
++			if (i == pd_idx)
++				continue;
++			if (test_bit(R5_Direct, &sh->dev[i].flags))
++				page = sh->dev[i].req.bi_io_vec[0].bv_page;
++			else
++				page = sh->dev[i].page;
++
++			/* have to compute the parity immediately for
++			 * a highmem page. it would happen for zerocopy. -jay
++			 */
++			if (PageHighMem(page)) {
++				h_ptr[1] = kmap_atomic(page, KM_USER0);
++				xor_block(2, STRIPE_SIZE, h_ptr);
++				kunmap_atomic(page, KM_USER0);
++			} else {
++				ptr[count++] = page_address(page);
+ 			}
++			check_xor();
++		}
+ 		break;
+ 	case READ_MODIFY_WRITE:
+-		for (i = disks; i--;)
+-			if (sh->dev[i].written) {
+-				ptr[count++] = page_address(sh->dev[i].page);
+-				check_xor();
++		for (i = disks; i--;) {
++			if (!sh->dev[i].written)
++				continue;
++			if (test_bit(R5_Direct, &sh->dev[i].flags))
++				page = sh->dev[i].req.bi_io_vec[0].bv_page;
++			else
++				page = sh->dev[i].page;
++
++			/* have to compute the parity immediately for
++			 * a highmem page. it would happen for zerocopy. -jay
++			 */
++			if (PageHighMem(page)) {
++				h_ptr[1] = kmap_atomic(page, KM_USER0);
++				xor_block(2, STRIPE_SIZE, h_ptr);
++				kunmap_atomic(page, KM_USER0);
++			} else {
++				ptr[count++] = page_address(page);
+ 			}
++			check_xor();
++		}
+ 	}
+ 	if (count != 1)
+ 		xor_block(count, STRIPE_SIZE, ptr);
+@@ -1059,13 +1139,15 @@ static void handle_stripe(struct stripe_
+ 		if (sh->dev[i].written) {
+ 		    dev = &sh->dev[i];
+ 		    if (!test_bit(R5_LOCKED, &dev->flags) &&
+-			 test_bit(R5_UPTODATE, &dev->flags) ) {
++			 (test_bit(R5_UPTODATE, &dev->flags) ||
++			  	test_bit(R5_Direct, &dev->flags)) ) {
+ 			/* We can return any write requests */
+ 			    struct bio *wbi, *wbi2;
+ 			    PRINTK("Return write for disc %d\n", i);
+ 			    spin_lock_irq(&conf->device_lock);
+ 			    wbi = dev->written;
+ 			    dev->written = NULL;
++			    test_and_clear_bit(R5_Direct, &dev->flags);
+ 			    while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+ 				    wbi2 = r5_next_bio(wbi, dev->sector);
+ 				    if (--wbi->bi_phys_segments == 0) {
+@@ -1831,6 +1913,7 @@ memory = conf->max_nr_stripes * (sizeof(
+ 		if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
+ 			mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+ 	}
++	mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONST_WRITE;
+ 
+ 	/* Ok, everything is just fine now */
+ 	mddev->array_size =  mddev->size * (mddev->raid_disks - 1);
+@@ -1918,9 +2001,11 @@ static void status (struct seq_file *seq
+ 			atomic_read(&conf->handled_in_raid5d),
+ 			atomic_read(&conf->out_of_stripes),
+ 			atomic_read(&conf->handle_called));
+-	seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",
++	seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u",
+ 			atomic_read(&conf->reads_for_rmw),
+-			atomic_read(&conf->reads_for_rcw));
++			atomic_read(&conf->reads_for_rcw),
++			atomic_read(&conf->writes_zcopy),
++			atomic_read(&conf->writes_copied));
+ 	seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n",
+ 			atomic_read(&conf->delayed),
+ 			atomic_read(&conf->active_stripes),
+diff -pru linux-2.6.9.orig/include/linux/backing-dev.h linux-2.6.9/include/linux/backing-dev.h
+--- linux-2.6.9.orig/include/linux/backing-dev.h	2004-10-18 15:53:46.000000000 -0600
++++ linux-2.6.9/include/linux/backing-dev.h	2007-07-13 00:12:46.000000000 -0600
+@@ -30,8 +30,11 @@ struct backing_dev_info {
+ 	void *congested_data;	/* Pointer to aux data for congested func */
+ 	void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
+ 	void *unplug_io_data;
++	unsigned int capabilities;
+ };
+ 
++#define BDI_CAP_PAGE_CONST_WRITE      0x00000001
++
+ extern struct backing_dev_info default_backing_dev_info;
+ void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page);
+ 
+@@ -62,4 +65,7 @@ static inline int bdi_rw_congested(struc
+ 				  (1 << BDI_write_congested));
+ }
+ 
++#define mapping_cap_page_constant_write(mapping) \
++	((mapping)->backing_dev_info->capabilities & BDI_CAP_PAGE_CONST_WRITE)
++
+ #endif		/* _LINUX_BACKING_DEV_H */
+diff -pru linux-2.6.9.orig/include/linux/page-flags.h linux-2.6.9/include/linux/page-flags.h
+--- linux-2.6.9.orig/include/linux/page-flags.h	2004-10-18 15:54:39.000000000 -0600
++++ linux-2.6.9/include/linux/page-flags.h	2007-07-13 00:12:46.000000000 -0600
+@@ -74,6 +74,7 @@
+ #define PG_swapcache		16	/* Swap page: swp_entry_t in private */
+ #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
+ #define PG_reclaim		18	/* To be reclaimed asap */
++#define PG_constant		19  /* To mark the page is constant */
+ 
+ 
+ /*
+@@ -298,6 +299,11 @@ extern unsigned long __read_page_state(u
+ #define PageSwapCache(page)	0
+ #endif
+ 
++#define PageConstant(page) test_bit(PG_constant, &(page)->flags)
++#define SetPageConstant(page) set_bit(PG_constant, &(page)->flags)
++#define ClearPageConstant(page) clear_bit(PG_constant, &(page->flags))
++#define TestSetPageConstant(page) test_and_set_bit(PG_constant, &(page)->flags)
++
+ struct page;	/* forward declaration */
+ 
+ int test_clear_page_dirty(struct page *page);
+diff -pru linux-2.6.9.orig/include/linux/pagemap.h linux-2.6.9/include/linux/pagemap.h
+--- linux-2.6.9.orig/include/linux/pagemap.h	2004-10-18 15:53:06.000000000 -0600
++++ linux-2.6.9/include/linux/pagemap.h	2007-07-13 00:12:46.000000000 -0600
+@@ -191,6 +191,19 @@ static inline void wait_on_page_writebac
+ 
+ extern void end_page_writeback(struct page *page);
+ 
++extern int set_page_constant(struct page *page);
++extern void clear_page_constant(struct page *);
++static inline int set_page_constant_lock(struct page *page)
++{
++        BUG_ON(PageLocked(page));
++        lock_page(page);
++        if (set_page_constant(page)) {
++                unlock_page(page);
++                return 1;
++        }
++        return 0;
++}
++
+ /*
+  * Fault a userspace page into pagetables.  Return non-zero on a fault.
+  *
+diff -pru linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h
+--- linux-2.6.9.orig/include/linux/raid/raid5.h	2007-07-09 02:43:33.000000000 -0600
++++ linux-2.6.9/include/linux/raid/raid5.h	2007-07-13 00:39:15.000000000 -0600
+@@ -153,6 +153,7 @@ struct stripe_head {
+ #define	R5_Wantread	4	/* want to schedule a read */
+ #define	R5_Wantwrite	5
+ #define	R5_Syncio	6	/* this io need to be accounted as resync io */
++#define	R5_Direct	7	/* use page from passed bio to avoid memcpy */
+ 
+ /*
+  * Write method
+@@ -234,6 +235,8 @@ struct raid5_private_data {
+ 	atomic_t		out_of_stripes;
+ 	atomic_t		reads_for_rmw;
+ 	atomic_t		reads_for_rcw;
++	atomic_t 		writes_zcopy;
++	atomic_t		writes_copied;
+ 	atomic_t		handle_called;
+ 	atomic_t		delayed;
+ 	atomic_t		in_reqs_in_queue;
+diff -pru linux-2.6.9.orig/mm/filemap.c linux-2.6.9/mm/filemap.c
+--- linux-2.6.9.orig/mm/filemap.c	2007-07-09 02:43:33.000000000 -0600
++++ linux-2.6.9/mm/filemap.c	2007-07-13 00:12:46.000000000 -0600
+@@ -27,6 +27,8 @@
+ #include <linux/pagevec.h>
+ #include <linux/blkdev.h>
+ #include <linux/security.h>
++#include <linux/rmap.h>
++
+ /*
+  * This is needed for the following functions:
+  *  - try_to_release_page
+@@ -486,11 +488,52 @@ void end_page_writeback(struct page *pag
+ 			BUG();
+ 		smp_mb__after_clear_bit();
+ 	}
++	clear_page_constant(page);
+ 	wake_up_page(page);
+ }
+ 
+ EXPORT_SYMBOL(end_page_writeback);
+ 
++/* Mark a page in bio to be constant, page must be locked */
++int set_page_constant(struct page *page)
++{
++	BUG_ON(!PageLocked(page));
++
++	/* If it's an anonymous page and haven't been added to swap cache, 
++	 * do it here.
++	 */
++	if (PageAnon(page) && !PageSwapCache(page))
++		return 1;
++
++	BUG_ON(!PageUptodate(page));
++
++	/* I have to clear page uptodate before trying to remove
++	 * it from user's page table because otherwise, the page may be
++	 * reinstalled by a page access which happens between try_to_unmap()
++	 * and ClearPageUptodate(). -jay
++	 */
++	ClearPageUptodate(page);
++	if (page_mapped(page) && try_to_unmap(page) != SWAP_SUCCESS) {
++		SetPageUptodate(page);
++		return 1;
++	}
++	SetPageConstant(page);
++	return 0;
++}
++
++void clear_page_constant(struct page *page)
++{
++	if (PageConstant(page)) {
++		BUG_ON(!PageLocked(page));
++		BUG_ON(PageUptodate(page));
++		ClearPageConstant(page);
++		SetPageUptodate(page);
++		unlock_page(page);
++	}
++}
++EXPORT_SYMBOL(set_page_constant);
++EXPORT_SYMBOL(clear_page_constant);
++
+ /*
+  * Get a lock on the page, assuming we need to sleep to get it.
+  *