Skip to content
Snippets Groups Projects
Commit 79e1fac6 authored by Jinshan Xiong's avatar Jinshan Xiong
Browse files

Remove specialized patch for titech, as Scjody suggested.
parent 574014b8
No related branches found
No related tags found
No related merge requests found
In case of full-stripe writes don't copy data into internal cache.
This optimization reduces CPU load by 30% rougly.
Index: linux-2.6.9/include/linux/raid/raid5.h
===================================================================
--- linux-2.6.9.orig/include/linux/raid/raid5.h 2006-05-21 17:57:25.000000000 +0400
+++ linux-2.6.9/include/linux/raid/raid5.h 2006-05-22 00:10:04.000000000 +0400
@@ -152,6 +152,7 @@ struct stripe_head {
#define R5_Wantread 4 /* want to schedule a read */
#define R5_Wantwrite 5
#define R5_Syncio 6 /* this io need to be accounted as resync io */
+#define R5_Direct 7 /* use page fom passed bio to avoid memcpy */
/*
* Write method
Index: linux-2.6.9/drivers/md/raid5.c
===================================================================
--- linux-2.6.9.orig/drivers/md/raid5.c 2006-05-22 00:10:01.000000000 +0400
+++ linux-2.6.9/drivers/md/raid5.c 2006-05-22 00:10:04.000000000 +0400
@@ -411,6 +411,8 @@ static int raid5_end_read_request (struc
clear_buffer_uptodate(bh);
}
#endif
+ if (test_bit(R5_Direct, &sh->dev[i].flags))
+ printk("R5_Direct for READ ?!\n");
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
@@ -449,6 +451,10 @@ static int raid5_end_write_request (stru
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
+ if (test_bit(R5_Direct, &sh->dev[i].flags)) {
+ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
+ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
+ }
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
__release_stripe(conf, sh);
@@ -673,6 +679,49 @@ static void copy_data(int frombio, struc
}
}
+static struct page *zero_copy_data(struct bio *bio, sector_t sector)
+{
+ struct bio_vec *bvl;
+ int i;
+
+ for (;bio && bio->bi_sector < sector+STRIPE_SECTORS;
+ bio = r5_next_bio(bio, sector) ) {
+ int page_offset;
+ if (bio->bi_sector >= sector)
+ page_offset = (signed)(bio->bi_sector - sector) * 512;
+ else
+ page_offset = (signed)(sector - bio->bi_sector) * -512;
+ bio_for_each_segment(bvl, bio, i) {
+ int len = bio_iovec_idx(bio,i)->bv_len;
+ int clen;
+ int b_offset = 0;
+
+ if (page_offset < 0) {
+ b_offset = -page_offset;
+ page_offset += b_offset;
+ len -= b_offset;
+ }
+
+ if (len > 0 && page_offset + len > STRIPE_SIZE)
+ clen = STRIPE_SIZE - page_offset;
+ else clen = len;
+
+ if (clen > 0) {
+ BUG_ON(clen < STRIPE_SIZE);
+ /*printk(" sector %lu: page %p from index %u\n",
+ (unsigned long) sector,
+ bio_iovec_idx(bio, i)->bv_page, i);*/
+ return bio_iovec_idx(bio, i)->bv_page;
+ }
+ if (clen < len) /* hit end of page */
+ break;
+ page_offset += len;
+ }
+ }
+ BUG();
+ return NULL;
+}
+
#define check_xor() do { \
if (count == MAX_XOR_BLOCKS) { \
xor_block(count, STRIPE_SIZE, ptr); \
@@ -717,6 +766,8 @@ static void compute_parity(struct stripe
int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
void *ptr[MAX_XOR_BLOCKS];
struct bio *chosen;
+ struct page *page;
+ int zerocopy = 0;
PRINTK("compute_parity, stripe %llu, method %d\n",
(unsigned long long)sh->sector, method);
@@ -743,13 +794,17 @@ static void compute_parity(struct stripe
break;
case RECONSTRUCT_WRITE:
memset(ptr[0], 0, STRIPE_SIZE);
- for (i= disks; i-- ;)
+ zerocopy = 1;
+ for (i= disks; i-- ;) {
+ if (i != pd_idx && !sh->dev[i].towrite)
+ zerocopy = 0;
if (i!=pd_idx && sh->dev[i].towrite) {
chosen = sh->dev[i].towrite;
sh->dev[i].towrite = NULL;
if (sh->dev[i].written) BUG();
sh->dev[i].written = chosen;
}
+ }
break;
case CHECK_PARITY:
break;
@@ -759,34 +814,62 @@ static void compute_parity(struct stripe
count = 1;
}
- for (i = disks; i--;)
- if (sh->dev[i].written) {
- sector_t sector = sh->dev[i].sector;
- struct bio *wbi = sh->dev[i].written;
- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
- copy_data(1, wbi, sh->dev[i].page, sector);
- wbi = r5_next_bio(wbi, sector);
- }
+ for (i = disks; i--;) {
+ struct bio *wbi = sh->dev[i].written;
+ sector_t sector;
+
+ if (!wbi)
+ continue;
+
+ sector = sh->dev[i].sector;
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
+
+ /* check if it's covered by a single page
+ and whole stripe is written at once.
+ * in this case we can avoid memcpy() */
+ if (zerocopy && wbi && wbi->bi_next == NULL &&
+ test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {
+ page = zero_copy_data(wbi, sector);
+ BUG_ON(PageHighMem(page));
+ sh->dev[i].req.bi_io_vec[0].bv_page = page;
+ set_bit(R5_Direct, &sh->dev[i].flags);
+ clear_bit(R5_UPTODATE, &sh->dev[i].flags);
+ continue;
+ }
- set_bit(R5_LOCKED, &sh->dev[i].flags);
- set_bit(R5_UPTODATE, &sh->dev[i].flags);
+ set_bit(R5_UPTODATE, &sh->dev[i].flags);
+ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
+ copy_data(1, wbi, sh->dev[i].page, sector);
+ wbi = r5_next_bio(wbi, sector);
}
+ }
switch(method) {
case RECONSTRUCT_WRITE:
case CHECK_PARITY:
- for (i=disks; i--;)
- if (i != pd_idx) {
- ptr[count++] = page_address(sh->dev[i].page);
- check_xor();
- }
+ for (i=disks; i--;) {
+ if (i == pd_idx)
+ continue;
+ if (test_bit(R5_Direct, &sh->dev[i].flags))
+ page = sh->dev[i].req.bi_io_vec[0].bv_page;
+ else
+ page = sh->dev[i].page;
+ ptr[count++] = page_address(page);
+ check_xor();
+ }
break;
case READ_MODIFY_WRITE:
- for (i = disks; i--;)
- if (sh->dev[i].written) {
- ptr[count++] = page_address(sh->dev[i].page);
- check_xor();
- }
+ for (i = disks; i--;) {
+ if (!sh->dev[i].written)
+ continue;
+ if (test_bit(R5_Direct, &sh->dev[i].flags))
+ page = sh->dev[i].req.bi_io_vec[0].bv_page;
+ else
+ page = sh->dev[i].page;
+ ptr[count++] = page_address(page);
+ check_xor();
+ }
}
if (count != 1)
xor_block(count, STRIPE_SIZE, ptr);
@@ -1012,7 +1094,7 @@ static void handle_stripe(struct stripe_
dev = &sh->dev[sh->pd_idx];
if ( written &&
( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
- test_bit(R5_UPTODATE, &dev->flags))
+ (test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Direct, &dev->flags)))
|| (failed == 1 && failed_num == sh->pd_idx))
) {
/* any written block on an uptodate or failed drive can be returned.
@@ -1023,13 +1105,16 @@ static void handle_stripe(struct stripe_
if (sh->dev[i].written) {
dev = &sh->dev[i];
if (!test_bit(R5_LOCKED, &dev->flags) &&
- test_bit(R5_UPTODATE, &dev->flags) ) {
+ (test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Direct, &dev->flags)) ) {
/* We can return any write requests */
struct bio *wbi, *wbi2;
PRINTK("Return write for disc %d\n", i);
spin_lock_irq(&conf->device_lock);
wbi = dev->written;
dev->written = NULL;
+ if (test_bit(R5_Direct, &dev->flags))
+ clear_bit(R5_Direct, &dev->flags);
while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev->sector);
if (--wbi->bi_phys_segments == 0) {
lustre_version.patch
vfs_intent-2.6-rhel4.patch
vfs_races-2.6-rhel4.patch
iopen-misc-2.6-suse.patch
export_symbols-2.6-rhel4.patch
dev_read_only-2.6-suse.patch
export-log-2.6-rhel4.patch
lookup_bdev_init_intent.patch
remove-suid-2.6-suse.patch
export-show_task-2.6-vanilla.patch
sd_iostats-2.6-rhel4.patch
blkdev_tunables-2.6-suse.patch
fsprivate-2.6.patch
export_symbol_numa.patch
qsnet-rhel4-2.6.patch
linux-2.6-binutils-2.16.patch
vm-tunables-rhel4.patch
tcp-rto_proc-2.6.9.patch
raid5-stats.patch
raid5-configurable-cachesize.patch
raid5-large-io.patch
raid5-stripe-by-stripe-handling.patch
raid5-optimize-memcpy.patch
raid5-merge-ios.patch
raid5-serialize-ovelapping-reqs.patch
jbd-stats-2.6.9.patch
bitops_ext2_find_next_le_bit-2.6.patch
quota-deadlock-on-pagelock-core.patch
quota-umount-race-fix.patch
quota-deadlock-on-pagelock-ext3.patch
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment