diff --git a/lustre/kernel_patches/patches/raid5-zerocopy.patch b/lustre/kernel_patches/patches/raid5-zerocopy.patch index 5095906dfbdca8659d00da10eab46fbf93bd9675..f3c1e041c9955655ac8145fd31f58f4340ffa7b4 100644 --- a/lustre/kernel_patches/patches/raid5-zerocopy.patch +++ b/lustre/kernel_patches/patches/raid5-zerocopy.patch @@ -1,15 +1,17 @@ -diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c ---- linux-2.6.9.orig/drivers/md/raid5.c 2007-07-09 02:43:33.000000000 -0600 -+++ linux-2.6.9/drivers/md/raid5.c 2007-07-13 00:39:15.000000000 -0600 -@@ -412,6 +412,7 @@ static int raid5_end_read_request (struc +diff -pur linux-2.6.9-67.orig/drivers/md/raid5.c linux-2.6.9-67/drivers/md/raid5.c +--- linux-2.6.9-67.orig/drivers/md/raid5.c 2009-02-15 10:11:54.000000000 +0800 ++++ linux-2.6.9-67/drivers/md/raid5.c 2009-02-15 10:22:51.000000000 +0800 +@@ -412,6 +412,9 @@ static int raid5_end_read_request (struc clear_buffer_uptodate(bh); } #endif -+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)); ++ /* Read on a Directing write is allowable */ ++ /* BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)) */ ++ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page != sh->dev[i].page); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); -@@ -450,6 +451,10 @@ static int raid5_end_write_request (stru +@@ -450,6 +453,10 @@ static int raid5_end_write_request (stru rdev_dec_pending(conf->disks[i].rdev, conf->mddev); @@ -20,7 +22,7 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); __release_stripe(conf, sh); -@@ -620,7 +625,27 @@ static sector_t compute_blocknr(struct s +@@ -620,7 +627,27 @@ static sector_t compute_blocknr(struct s return r_sector; } @@ -48,7 +50,7 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c /* * Copy data between a page in the stripe cache, and one or more bion -@@ -716,8 +741,9 @@ static void compute_parity(struct stripe +@@ -716,8 +743,9 @@ static void compute_parity(struct stripe { raid5_conf_t *conf = sh->raid_conf; int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; @@ -59,7 +61,7 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c PRINTK("compute_parity, stripe %llu, method %d\n", (unsigned long long)sh->sector, method); -@@ -744,13 +770,14 @@ static void compute_parity(struct stripe +@@ -744,13 +772,14 @@ static void compute_parity(struct stripe break; case RECONSTRUCT_WRITE: memset(ptr[0], 0, STRIPE_SIZE); @@ -75,7 +77,7 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c break; case CHECK_PARITY: break; -@@ -760,34 +787,88 @@ static void compute_parity(struct stripe +@@ -760,34 +789,90 @@ static void compute_parity(struct stripe count = 1; } @@ -105,6 +107,8 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c + page = zero_copy_data(wbi, sector); + if (page) { + atomic_inc(&conf->writes_zcopy); ++ /* The pointer must be restored whenever the LOCKED ++ * gets cleared. */ + sh->dev[i].req.bi_io_vec[0].bv_page = page; + set_bit(R5_Direct, &sh->dev[i].flags); + clear_bit(R5_UPTODATE, &sh->dev[i].flags); @@ -181,7 +185,7 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c } if (count != 1) xor_block(count, STRIPE_SIZE, ptr); -@@ -1059,13 +1140,15 @@ static void handle_stripe(struct stripe_ +@@ -1061,13 +1146,15 @@ static void handle_stripe(struct stripe_ if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) && @@ -198,7 +202,23 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) { -@@ -1831,6 +1914,7 @@ memory = conf->max_nr_stripes * (sizeof( +@@ -1337,6 +1424,15 @@ static void handle_stripe(struct stripe_ + } else { + PRINTK("skip op %ld on disc %d for sector %llu\n", + bi->bi_rw, i, (unsigned long long)sh->sector); ++ ++ if (test_bit(R5_Direct, &sh->dev[i].flags)) { ++ /* restore the page pointer of req, otherwise, ++ * no any read is permitted on this stripe, this is ++ * not what we want. -jay */ ++ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page); ++ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page; ++ } ++ + clear_bit(R5_LOCKED, &sh->dev[i].flags); + set_bit(STRIPE_HANDLE, &sh->state); + } +@@ -1835,6 +1931,7 @@ memory = conf->max_nr_stripes * (sizeof( if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -206,7 +226,7 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c /* Ok, everything is just fine now */ mddev->array_size = mddev->size * (mddev->raid_disks - 1); -@@ -1918,9 +2002,11 @@ static void status (struct seq_file *seq +@@ -1922,9 +2019,11 @@ static void status (struct seq_file *seq atomic_read(&conf->handled_in_raid5d), atomic_read(&conf->out_of_stripes), atomic_read(&conf->handle_called)); @@ -220,9 +240,9 @@ diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", atomic_read(&conf->delayed), atomic_read(&conf->active_stripes), -diff -pru linux-2.6.9.orig/include/linux/backing-dev.h linux-2.6.9/include/linux/backing-dev.h ---- linux-2.6.9.orig/include/linux/backing-dev.h 2004-10-18 15:53:46.000000000 -0600 -+++ linux-2.6.9/include/linux/backing-dev.h 2007-07-13 00:12:46.000000000 -0600 +diff -pur linux-2.6.9-67.orig/include/linux/backing-dev.h linux-2.6.9-67/include/linux/backing-dev.h +--- linux-2.6.9-67.orig/include/linux/backing-dev.h 2009-02-15 10:11:54.000000000 +0800 ++++ linux-2.6.9-67/include/linux/backing-dev.h 2009-02-15 10:22:40.000000000 +0800 @@ -30,8 +30,11 @@ struct backing_dev_info { void *congested_data; /* Pointer to aux data for congested func */ void (*unplug_io_fn)(struct backing_dev_info *, struct page *); @@ -243,9 +263,9 @@ diff -pru linux-2.6.9.orig/include/linux/backing-dev.h linux-2.6.9/include/linux + ((mapping)->backing_dev_info->capabilities & BDI_CAP_PAGE_CONST_WRITE) + #endif /* _LINUX_BACKING_DEV_H */ -diff -pru linux-2.6.9.orig/include/linux/page-flags.h linux-2.6.9/include/linux/page-flags.h ---- linux-2.6.9.orig/include/linux/page-flags.h 2004-10-18 15:54:39.000000000 -0600 -+++ linux-2.6.9/include/linux/page-flags.h 2007-07-13 00:12:46.000000000 -0600 +diff -pur linux-2.6.9-67.orig/include/linux/page-flags.h linux-2.6.9-67/include/linux/page-flags.h +--- linux-2.6.9-67.orig/include/linux/page-flags.h 2009-02-15 10:11:54.000000000 +0800 ++++ linux-2.6.9-67/include/linux/page-flags.h 2009-02-15 10:22:40.000000000 +0800 @@ -74,6 +74,7 @@ #define PG_swapcache 16 /* Swap page: swp_entry_t in private */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ @@ -266,9 +286,9 @@ diff -pru linux-2.6.9.orig/include/linux/page-flags.h linux-2.6.9/include/linux/ struct page; /* forward declaration */ int test_clear_page_dirty(struct page *page); -diff -pru linux-2.6.9.orig/include/linux/pagemap.h linux-2.6.9/include/linux/pagemap.h ---- linux-2.6.9.orig/include/linux/pagemap.h 2004-10-18 15:53:06.000000000 -0600 -+++ linux-2.6.9/include/linux/pagemap.h 2007-07-13 00:12:46.000000000 -0600 +diff -pur linux-2.6.9-67.orig/include/linux/pagemap.h linux-2.6.9-67/include/linux/pagemap.h +--- linux-2.6.9-67.orig/include/linux/pagemap.h 2009-02-15 10:11:54.000000000 +0800 ++++ linux-2.6.9-67/include/linux/pagemap.h 2009-02-15 10:22:40.000000000 +0800 @@ -191,6 +191,19 @@ static inline void wait_on_page_writebac extern void end_page_writeback(struct page *page); @@ -289,9 +309,9 @@ diff -pru linux-2.6.9.orig/include/linux/pagemap.h linux-2.6.9/include/linux/pag /* * Fault a userspace page into pagetables. Return non-zero on a fault. * -diff -pru linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h ---- linux-2.6.9.orig/include/linux/raid/raid5.h 2007-07-09 02:43:33.000000000 -0600 -+++ linux-2.6.9/include/linux/raid/raid5.h 2007-07-13 00:39:15.000000000 -0600 +diff -pur linux-2.6.9-67.orig/include/linux/raid/raid5.h linux-2.6.9-67/include/linux/raid/raid5.h +--- linux-2.6.9-67.orig/include/linux/raid/raid5.h 2009-02-15 10:11:54.000000000 +0800 ++++ linux-2.6.9-67/include/linux/raid/raid5.h 2009-02-15 10:22:40.000000000 +0800 @@ -153,6 +153,7 @@ struct stripe_head { #define R5_Wantread 4 /* want to schedule a read */ #define R5_Wantwrite 5 @@ -309,9 +329,9 @@ diff -pru linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/ atomic_t handle_called; atomic_t delayed; atomic_t in_reqs_in_queue; -diff -pru linux-2.6.9.orig/mm/filemap.c linux-2.6.9/mm/filemap.c ---- linux-2.6.9.orig/mm/filemap.c 2007-07-09 02:43:33.000000000 -0600 -+++ linux-2.6.9/mm/filemap.c 2007-07-13 00:12:46.000000000 -0600 +diff -pur linux-2.6.9-67.orig/mm/filemap.c linux-2.6.9-67/mm/filemap.c +--- linux-2.6.9-67.orig/mm/filemap.c 2009-02-15 10:11:55.000000000 +0800 ++++ linux-2.6.9-67/mm/filemap.c 2009-02-15 10:22:40.000000000 +0800 @@ -27,6 +27,8 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> @@ -321,7 +341,7 @@ diff -pru linux-2.6.9.orig/mm/filemap.c linux-2.6.9/mm/filemap.c /* * This is needed for the following functions: * - try_to_release_page -@@ -486,11 +488,52 @@ void end_page_writeback(struct page *pag +@@ -485,11 +487,52 @@ void end_page_writeback(struct page *pag BUG(); smp_mb__after_clear_bit(); } diff --git a/lustre/kernel_patches/patches/raid6-zerocopy.patch b/lustre/kernel_patches/patches/raid6-zerocopy.patch index 95b713d465fa12a952bbb438e73f67ce514a52e6..27d7f153f90e5fbc80f99c76dcbe724d606f8dbd 100644 --- a/lustre/kernel_patches/patches/raid6-zerocopy.patch +++ b/lustre/kernel_patches/patches/raid6-zerocopy.patch @@ -1,15 +1,17 @@ -diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c ---- linux-2.6.9.orig/drivers/md/raid6main.c 2008-01-10 14:02:08.000000000 +0800 -+++ linux-2.6.9/drivers/md/raid6main.c 2008-01-10 14:01:56.000000000 +0800 -@@ -430,6 +430,7 @@ static int raid6_end_read_request (struc +diff -pur linux-2.6.9-67.orig/drivers/md/raid6main.c linux-2.6.9-67/drivers/md/raid6main.c +--- linux-2.6.9-67.orig/drivers/md/raid6main.c 2009-02-15 10:24:30.000000000 +0800 ++++ linux-2.6.9-67/drivers/md/raid6main.c 2009-02-15 10:26:17.000000000 +0800 +@@ -430,6 +430,9 @@ static int raid6_end_read_request (struc clear_buffer_uptodate(bh); } #endif -+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)); ++ /* Read on a Directing write is allowable */ ++ /* BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)) */ ++ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page != sh->dev[i].page); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); -@@ -468,6 +469,10 @@ static int raid6_end_write_request (stru +@@ -468,6 +471,10 @@ static int raid6_end_write_request (stru rdev_dec_pending(conf->disks[i].rdev, conf->mddev); @@ -20,7 +22,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); __release_stripe(conf, sh); -@@ -664,7 +669,27 @@ static sector_t compute_blocknr(struct s +@@ -664,7 +671,27 @@ static sector_t compute_blocknr(struct s return r_sector; } @@ -48,7 +50,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma /* * Copy data between a page in the stripe cache, and one or more bion -@@ -731,6 +756,7 @@ static void compute_parity(struct stripe +@@ -731,6 +758,7 @@ static void compute_parity(struct stripe raid6_conf_t *conf = sh->raid_conf; int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count; struct bio *chosen; @@ -56,7 +58,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma /**** FIX THIS: This could be very bad if disks is close to 256 ****/ void *ptrs[disks]; -@@ -761,18 +787,46 @@ static void compute_parity(struct stripe +@@ -761,18 +789,46 @@ static void compute_parity(struct stripe BUG(); /* Not implemented yet */ } @@ -112,7 +114,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma // switch(method) { // case RECONSTRUCT_WRITE: -@@ -783,7 +837,10 @@ static void compute_parity(struct stripe +@@ -783,7 +839,10 @@ static void compute_parity(struct stripe count = 0; i = d0_idx; do { @@ -124,7 +126,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma i = raid6_next_disk(i, disks); } while ( i != d0_idx ); -@@ -1185,7 +1242,8 @@ static void handle_stripe(struct stripe_ +@@ -1185,7 +1244,8 @@ static void handle_stripe(struct stripe_ if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) && @@ -134,7 +136,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma /* We can return any write requests */ struct bio *wbi, *wbi2; PRINTK("Return write for stripe %llu disc %d\n", -@@ -1193,6 +1251,7 @@ static void handle_stripe(struct stripe_ +@@ -1193,6 +1253,7 @@ static void handle_stripe(struct stripe_ spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL; @@ -142,7 +144,23 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) { -@@ -2008,6 +2067,7 @@ static int run (mddev_t *mddev) +@@ -1503,6 +1564,15 @@ static void handle_stripe(struct stripe_ + } else { + PRINTK("skip op %ld on disc %d for sector %llu\n", + bi->bi_rw, i, (unsigned long long)sh->sector); ++ ++ if (test_bit(R5_Direct, &sh->dev[i].flags)) { ++ /* restore the page pointer of req, otherwise, ++ * no any read is permitted on this stripe, this is ++ * not what we want. -jay */ ++ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page); ++ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page; ++ } ++ + clear_bit(R5_LOCKED, &sh->dev[i].flags); + set_bit(STRIPE_HANDLE, &sh->state); + atomic_dec(&conf->delayed); +@@ -2008,6 +2078,7 @@ static int run (mddev_t *mddev) if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -150,7 +168,7 @@ diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6ma /* Ok, everything is just fine now */ mddev->array_size = mddev->size * (mddev->raid_disks - 2); -@@ -2095,9 +2155,11 @@ static void status (struct seq_file *seq +@@ -2095,9 +2166,11 @@ static void status (struct seq_file *seq atomic_read(&conf->handled_in_raid5d), atomic_read(&conf->out_of_stripes), atomic_read(&conf->handle_called));