From 6faeeda6c372cfd63f7b4a543becf4aee497f50f Mon Sep 17 00:00:00 2001 From: yangsheng <yangsheng> Date: Thu, 27 Mar 2008 07:15:10 +0000 Subject: [PATCH] Branch HEAD b=13397 i=adilger i=shadow Add kernel patches for vanilla-2.6.22.14. --- .../dev_read_only-2.6.22-vanilla.patch | 142 +++++ .../export_symbols-2.6.22-vanilla.patch | 51 ++ .../patches/iopen-misc-2.6.22-vanilla.patch | 64 +++ .../patches/sd_iostats-2.6.22-vanilla.patch | 484 ++++++++++++++++++ .../patches/vfs_races-2.6.22-vanilla.patch | 58 +++ .../series/2.6.22-vanilla.series | 13 + 6 files changed, 812 insertions(+) create mode 100644 lustre/kernel_patches/patches/dev_read_only-2.6.22-vanilla.patch create mode 100644 lustre/kernel_patches/patches/export_symbols-2.6.22-vanilla.patch create mode 100644 lustre/kernel_patches/patches/iopen-misc-2.6.22-vanilla.patch create mode 100644 lustre/kernel_patches/patches/sd_iostats-2.6.22-vanilla.patch create mode 100644 lustre/kernel_patches/patches/vfs_races-2.6.22-vanilla.patch create mode 100644 lustre/kernel_patches/series/2.6.22-vanilla.series diff --git a/lustre/kernel_patches/patches/dev_read_only-2.6.22-vanilla.patch b/lustre/kernel_patches/patches/dev_read_only-2.6.22-vanilla.patch new file mode 100644 index 0000000000..a6e7351f05 --- /dev/null +++ b/lustre/kernel_patches/patches/dev_read_only-2.6.22-vanilla.patch @@ -0,0 +1,142 @@ +diff -urp linux-2.6.18.1.orig/block/ll_rw_blk.c linux-2.6.18.1/block/ll_rw_blk.c +--- linux-2.6.18.1.orig/block/ll_rw_blk.c 2006-10-14 06:34:03.000000000 +0300 ++++ linux-2.6.18.1/block/ll_rw_blk.c 2007-05-29 14:50:46.000000000 +0300 +@@ -2993,6 +2993,8 @@ static void handle_bad_sector(struct bio + set_bit(BIO_EOF, &bio->bi_flags); + } + ++int dev_check_rdonly(struct block_device *bdev); ++ + /** + * generic_make_request: hand a buffer to its device driver for I/O + * @bio: The bio describing the location in memory and on the device. +@@ -3076,6 +3078,12 @@ end_io: + + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + goto end_io; ++ /* this is cfs's dev_rdonly check */ ++ if (bio->bi_rw == WRITE && ++ dev_check_rdonly(bio->bi_bdev)) { ++ bio_endio(bio, bio->bi_size, 0); ++ break; ++ } + + /* + * If this device has partitions, remap block n +@@ -3675,6 +3683,91 @@ void swap_io_context(struct io_context * + *ioc2 = temp; + } + EXPORT_SYMBOL(swap_io_context); ++ /* ++ * Debug code for turning block devices "read-only" (will discard writes ++ * silently). This is for filesystem crash/recovery testing. ++ */ ++struct deventry { ++ dev_t dev; ++ struct deventry *next; ++}; ++ ++static struct deventry *devlist = NULL; ++static spinlock_t devlock = SPIN_LOCK_UNLOCKED; ++ ++int dev_check_rdonly(struct block_device *bdev) ++{ ++ struct deventry *cur; ++ if (!bdev) return 0; ++ spin_lock(&devlock); ++ cur = devlist; ++ while(cur) { ++ if (bdev->bd_dev == cur->dev) { ++ spin_unlock(&devlock); ++ return 1; ++ } ++ cur = cur->next; ++ } ++ spin_unlock(&devlock); ++ return 0; ++} ++ ++void dev_set_rdonly(struct block_device *bdev) ++{ ++ struct deventry *newdev, *cur; ++ ++ if (!bdev) ++ return; ++ newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL); ++ if (!newdev) ++ return; ++ ++ spin_lock(&devlock); ++ cur = devlist; ++ while(cur) { ++ if (bdev->bd_dev == cur->dev) { ++ spin_unlock(&devlock); ++ kfree(newdev); ++ return; ++ } ++ cur = cur->next; ++ } ++ newdev->dev = bdev->bd_dev; ++ newdev->next = devlist; ++ devlist = newdev; ++ spin_unlock(&devlock); ++ printk(KERN_WARNING "Turning device %s (%#x) read-only\n", ++ bdev->bd_disk ? bdev->bd_disk->disk_name : "", bdev->bd_dev); ++} ++ ++void dev_clear_rdonly(struct block_device *bdev) ++{ ++ struct deventry *cur, *last = NULL; ++ if (!bdev) return; ++ spin_lock(&devlock); ++ cur = devlist; ++ while(cur) { ++ if (bdev->bd_dev == cur->dev) { ++ if (last) ++ last->next = cur->next; ++ else ++ devlist = cur->next; ++ spin_unlock(&devlock); ++ kfree(cur); ++ printk(KERN_WARNING "Removing read-only on %s (%#x)\n", ++ bdev->bd_disk ? bdev->bd_disk->disk_name : ++ "unknown block", bdev->bd_dev); ++ return; ++ } ++ last = cur; ++ cur = cur->next; ++ } ++ spin_unlock(&devlock); ++} ++ ++EXPORT_SYMBOL(dev_set_rdonly); ++EXPORT_SYMBOL(dev_clear_rdonly); ++EXPORT_SYMBOL(dev_check_rdonly); + + /* + * sysfs parts below +diff -urp linux-2.6.18.1.orig/fs/block_dev.c linux-2.6.18.1/fs/block_dev.c +--- linux-2.6.18.1.orig/fs/block_dev.c 2006-10-14 06:34:03.000000000 +0300 ++++ linux-2.6.18.1/fs/block_dev.c 2007-05-29 14:53:38.000000000 +0300 +@@ -58,6 +58,7 @@ static void kill_bdev(struct block_devic + { + invalidate_bdev(bdev, 1); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); ++ dev_clear_rdonly(bdev); + } + + int set_blocksize(struct block_device *bdev, int size) +diff -urp linux-2.6.18.1.orig/include/linux/fs.h linux-2.6.18.1/include/linux/fs.h +--- linux-2.6.18.1.orig/include/linux/fs.h 2006-10-14 06:34:03.000000000 +0300 ++++ linux-2.6.18.1/include/linux/fs.h 2007-05-29 14:50:46.000000000 +0300 +@@ -1632,6 +1632,10 @@ extern void file_kill(struct file *f); + extern void submit_bio(int, struct bio *); + extern int bdev_read_only(struct block_device *); + #endif ++#define HAVE_CLEAR_RDONLY_ON_PUT ++extern void dev_set_rdonly(struct block_device *bdev); ++extern int dev_check_rdonly(struct block_device *bdev); ++extern void dev_clear_rdonly(struct block_device *bdev); + extern int set_blocksize(struct block_device *, int); + extern int sb_set_blocksize(struct super_block *, int); + extern int sb_min_blocksize(struct super_block *, int); diff --git a/lustre/kernel_patches/patches/export_symbols-2.6.22-vanilla.patch b/lustre/kernel_patches/patches/export_symbols-2.6.22-vanilla.patch new file mode 100644 index 0000000000..bdc49c7ffe --- /dev/null +++ b/lustre/kernel_patches/patches/export_symbols-2.6.22-vanilla.patch @@ -0,0 +1,51 @@ +Index: linux-2.6/fs/filesystems.c +=================================================================== +--- linux-2.6.orig/fs/filesystems.c 2006-07-15 16:08:35.000000000 +0800 ++++ linux-2.6/fs/filesystems.c 2006-07-15 16:14:19.000000000 +0800 +@@ -29,7 +29,9 @@ + */ + + static struct file_system_type *file_systems; +-static DEFINE_RWLOCK(file_systems_lock); ++DEFINE_RWLOCK(file_systems_lock); ++ ++EXPORT_SYMBOL(file_systems_lock); + + /* WARNING: This can be used only if we _already_ own a reference */ + void get_filesystem(struct file_system_type *fs) +Index: linux-2.6/include/linux/fs.h +=================================================================== +--- linux-2.6.orig/include/linux/fs.h 2006-07-15 16:10:37.000000000 +0800 ++++ linux-2.6/include/linux/fs.h 2006-07-15 16:14:19.000000000 +0800 +@@ -1768,6 +1768,7 @@ static inline ssize_t blockdev_direct_IO + + extern const struct file_operations generic_ro_fops; + ++extern rwlock_t file_systems_lock; + #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) + + extern int vfs_readlink(struct dentry *, char __user *, int, const char *); +Index: linux-2.6/fs/namespace.c +=================================================================== +--- linux-2.6.orig/fs/namespace.c 2006-07-15 16:10:33.000000000 +0800 ++++ linux-2.6/fs/namespace.c 2006-07-15 16:14:19.000000000 +0800 +@@ -1641,6 +1641,7 @@ void set_fs_pwd(struct fs_struct *fs, st + mntput(old_pwdmnt); + } + } ++EXPORT_SYMBOL(set_fs_pwd); + + static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) + { +Index: linux-2.6/fs/dcache.c +=================================================================== +--- linux-2.6.orig/fs/dcache.c 2006-07-15 16:14:00.000000000 +0800 ++++ linux-2.6/fs/dcache.c 2006-07-15 16:14:19.000000000 +0800 +@@ -1628,6 +1628,7 @@ int is_subdir(struct dentry * new_dentry + + return result; + } ++EXPORT_SYMBOL(is_subdir); + + void d_genocide(struct dentry *root) + { diff --git a/lustre/kernel_patches/patches/iopen-misc-2.6.22-vanilla.patch b/lustre/kernel_patches/patches/iopen-misc-2.6.22-vanilla.patch new file mode 100644 index 0000000000..050bd03849 --- /dev/null +++ b/lustre/kernel_patches/patches/iopen-misc-2.6.22-vanilla.patch @@ -0,0 +1,64 @@ +Index: linux-2.6/Documentation/filesystems/ext2.txt +=================================================================== +--- linux-2.6.orig/Documentation/filesystems/ext2.txt 2006-04-03 22:46:38.000000000 +0800 ++++ linux-2.6/Documentation/filesystems/ext2.txt 2006-07-15 12:54:06.000000000 +0800 +@@ -58,6 +58,22 @@ nobh Do not attach buffer_heads to fi + + xip Use execute in place (no caching) if possible + ++iopen Makes an invisible pseudo-directory called ++ __iopen__ available in the root directory ++ of the filesystem. Allows open-by-inode- ++ number. i.e., inode 3145 can be accessed ++ via /mntpt/__iopen__/3145 ++ ++iopen_nopriv This option makes the iopen directory be ++ world-readable. This may be safer since it ++ allows daemons to run as an unprivileged user, ++ however it significantly changes the security ++ model of a Unix filesystem, since previously ++ all files under a mode 700 directory were not ++ generally avilable even if the ++ permissions on the file itself is ++ world-readable. ++ + grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. + + +Index: linux-2.6/fs/dcache.c +=================================================================== +--- linux-2.6.orig/fs/dcache.c 2006-07-15 12:48:18.000000000 +0800 ++++ linux-2.6/fs/dcache.c 2006-07-15 12:54:06.000000000 +0800 +@@ -1608,6 +1608,12 @@ + spin_unlock(&dcache_lock); + } + ++void __d_move(struct dentry * dentry, struct dentry * target) ++{ ++ d_move_locked(dentry, target); ++} ++EXPORT_SYMBOL(__d_move); ++ + /* + * Helper that returns 1 if p1 is a parent of p2, else 0 + */ +Index: linux-2.6/include/linux/dcache.h +=================================================================== +--- linux-2.6.orig/include/linux/dcache.h 2006-07-15 12:48:41.000000000 +0800 ++++ linux-2.6/include/linux/dcache.h 2006-07-15 12:54:06.000000000 +0800 +@@ -257,6 +257,7 @@ extern int have_submounts(struct dentry + * This adds the entry to the hash queues. + */ + extern void d_rehash(struct dentry *); ++extern void d_rehash_cond(struct dentry *, int lock); + + /** + * d_add - add dentry to hash queues +@@ -292,6 +293,7 @@ static inline struct dentry *d_add_uniqu + + /* used for rename() and baskets */ + extern void d_move(struct dentry *, struct dentry *); ++extern void __d_move(struct dentry *, struct dentry *); + + /* appendix may either be NULL or be used for transname suffixes */ + extern struct dentry * d_lookup(struct dentry *, struct qstr *); diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6.22-vanilla.patch b/lustre/kernel_patches/patches/sd_iostats-2.6.22-vanilla.patch new file mode 100644 index 0000000000..a16c9578fa --- /dev/null +++ b/lustre/kernel_patches/patches/sd_iostats-2.6.22-vanilla.patch @@ -0,0 +1,484 @@ +Index: linux-2.6.22.5/drivers/scsi/Kconfig +=================================================================== +--- linux-2.6.22.5.orig/drivers/scsi/Kconfig ++++ linux-2.6.22.5/drivers/scsi/Kconfig +@@ -76,6 +76,14 @@ config BLK_DEV_SD + In this case, do not compile the driver for your SCSI host adapter + (below) as a module either. + ++config SD_IOSTATS ++ bool "Enable SCSI disk I/O stats" ++ depends on BLK_DEV_SD ++ default y ++ ---help--- ++ This enables SCSI disk I/O stats collection. You must also enable ++ /proc file system support if you want this feature. ++ + config CHR_DEV_ST + tristate "SCSI tape support" + depends on SCSI +Index: linux+rhel4+chaos/drivers/scsi/sd.c +=================================================================== +--- linux+rhel4+chaos.orig/drivers/scsi/sd.c ++++ linux+rhel4+chaos/drivers/scsi/sd.c +@@ -63,6 +63,38 @@ + + #include "scsi_logging.h" + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++# include <linux/proc_fs.h> ++# include <linux/seq_file.h> ++ ++typedef struct { ++ unsigned long long iostat_size; ++ unsigned long long iostat_count; ++} iostat_counter_t; ++ ++#define IOSTAT_NCOUNTERS 16 ++typedef struct { ++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; ++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; ++ struct timeval iostat_timeval; ++} iostat_stats_t; ++ ++iostat_stats_t **sd_iostats; ++spinlock_t sd_iostats_lock; ++struct proc_dir_entry *sd_iostats_procdir; ++char sd_iostats_procdir_name[] = "sd_iostats"; ++ ++extern void sd_iostats_init(void); ++extern void sd_iostats_init_disk(struct gendisk *); ++extern void sd_iostats_fini(void); ++extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite); ++#else ++static inline void sd_iostats_init(void) {} ++static inline void sd_iostats_init_disk(struct gendisk *disk) {} ++static inline void sd_iostats_fini(void) {} ++static inline void sd_iostats_bump(int disk, unsigned int nsect, int iswrite) {} ++#endif ++ + MODULE_AUTHOR("Eric Youngdale"); + MODULE_DESCRIPTION("SCSI disk (sd) driver"); + MODULE_LICENSE("GPL"); +@@ -89,6 +121,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); + static DEFINE_IDR(sd_index_idr); + static DEFINE_SPINLOCK(sd_index_lock); + ++#define SD_STATS 256 + /* This semaphore is used to mediate the 0->1 reference get in the + * face of object destruction (i.e. we can't allow a get on an + * object after last put) */ +@@ -368,6 +401,9 @@ static int sd_init_command(struct scsi_c + SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n", + (unsigned long long)block)); + ++ sd_iostats_bump(scsi_disk(disk)->index, this_count, ++ rq_data_dir(SCpnt->request) == WRITE); ++ + /* + * If we have a 1K hardware sectorsize, prevent access to single + * 512 byte sectors. In theory we could handle this - in fact +@@ -575,6 +611,7 @@ static int sd_open(struct inode *inode, + scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); + } + ++ sd_iostats_init_disk(disk); + return 0; + + error_out: +@@ -601,8 +638,20 @@ static int sd_release(struct inode *inod + + SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); + +- if (!--sdkp->openers && sdev->removable) { +- if (scsi_block_when_processing_errors(sdev)) ++ if (!--sdkp->openers) { ++ /* ++ * Remove sd_iostats information about this disk ++ */ ++ if (sd_iostats_procdir != NULL) { ++ remove_proc_entry(disk->disk_name, sd_iostats_procdir); ++ } ++ if (sd_iostats != NULL) { ++ if (sd_iostats[sdkp->index] != NULL) { ++ kfree (sd_iostats[sdkp->index]); ++ sd_iostats[sdkp->index] = NULL; ++ } ++ } ++ if (sdev->removable && scsi_block_when_processing_errors(sdev)) + scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); + } + +@@ -1563,6 +1612,342 @@ static int sd_revalidate_disk(struct gen + return 0; + } + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++static int ++sd_iostats_seq_show(struct seq_file *seq, void *v) ++{ ++ struct timeval now; ++ struct gendisk *disk; ++ iostat_stats_t *stats; ++ unsigned long long read_len; ++ unsigned long long read_len_tot; ++ unsigned long read_num; ++ unsigned long read_num_tot; ++ unsigned long long write_len; ++ unsigned long long write_len_tot; ++ unsigned long write_num; ++ unsigned long write_num_tot; ++ int i; ++ int maxi; ++ ++ if (seq == NULL || seq->private == NULL) { ++ printk(KERN_ERR "sd_iostats_seq_show: NULL disk\n"); ++ BUG(); ++ } ++ ++ disk = seq->private; ++ ++ if (scsi_disk(disk) == NULL || (disk->flags & GENHD_FL_UP) == 0) { ++ seq_printf(seq, "sd_iostats_seq_show: Device %s " ++ "does not exist\n", disk->disk_name); ++ return 0; ++ } ++ ++ if (sd_iostats == NULL) { ++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); ++ BUG(); ++ } ++ ++ stats = sd_iostats[scsi_disk(disk)->index]; ++ if (stats == NULL) { ++ seq_printf(seq, "sd_iostats_seq_show: sd_iostats " ++ "entry %d does not exist\n", ++ scsi_disk(disk)->index); ++ return 0; ++ } ++ ++ do_gettimeofday(&now); ++ now.tv_sec -= stats->iostat_timeval.tv_sec; ++ now.tv_usec -= stats->iostat_timeval.tv_usec; ++ if (now.tv_usec < 0) { ++ now.tv_usec += 1000000; ++ now.tv_sec--; ++ } ++ ++ /* this sampling races with updates */ ++ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", ++ scsi_disk(disk)->index, now.tv_sec, now.tv_usec); ++ ++ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) ++ if (stats->iostat_read_histogram[i].iostat_count != 0 || ++ stats->iostat_write_histogram[i].iostat_count != 0) ++ break; ++ maxi = i; ++ ++ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", ++ "reads", "total", "writes", "total"); ++ ++ read_len_tot = write_len_tot = 0; ++ read_num_tot = write_num_tot = 0; ++ for (i = 0; i <= maxi; i++) { ++ read_len = stats->iostat_read_histogram[i].iostat_size; ++ read_len_tot += read_len; ++ read_num = stats->iostat_read_histogram[i].iostat_count; ++ read_num_tot += read_num; ++ ++ write_len = stats->iostat_write_histogram[i].iostat_size; ++ write_len_tot += write_len; ++ write_num = stats->iostat_write_histogram[i].iostat_count; ++ write_num_tot += write_num; ++ ++ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", ++ 512<<i, read_num, read_len, write_num, write_len); ++ } ++ ++ seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n", "total", ++ read_num_tot, read_len_tot, ++ write_num_tot, write_len_tot); ++ return 0; ++} ++ ++static void * ++sd_iostats_seq_start(struct seq_file *p, loff_t *pos) ++{ ++ return (*pos == 0) ? (void *)1 : NULL; ++} ++ ++static void * ++sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos) ++{ ++ ++*pos; ++ return NULL; ++} ++ ++static void ++sd_iostats_seq_stop(struct seq_file *p, void *v) ++{ ++} ++ ++static struct seq_operations sd_iostats_seqops = { ++ .start = sd_iostats_seq_start, ++ .stop = sd_iostats_seq_stop, ++ .next = sd_iostats_seq_next, ++ .show = sd_iostats_seq_show, ++}; ++ ++static int ++sd_iostats_seq_open (struct inode *inode, struct file *file) ++{ ++ int rc; ++ ++ rc = seq_open(file, &sd_iostats_seqops); ++ if (rc != 0) ++ return rc; ++ ++ ((struct seq_file *)file->private_data)->private = PDE(inode)->data; ++ return 0; ++} ++ ++static ssize_t ++sd_iostats_seq_write(struct file *file, const char *buffer, ++ size_t len, loff_t *off) ++{ ++ struct seq_file *seq = file->private_data; ++ struct gendisk *disk = seq->private; ++ iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index]; ++ unsigned long flags; ++ ++ ++ spin_lock_irqsave (&sd_iostats_lock, flags); ++ memset (stats, 0, sizeof(*stats)); ++ do_gettimeofday(&stats->iostat_timeval); ++ spin_unlock_irqrestore (&sd_iostats_lock, flags); ++ ++ return len; ++} ++ ++static struct file_operations sd_iostats_proc_fops = { ++ .owner = THIS_MODULE, ++ .open = sd_iostats_seq_open, ++ .read = seq_read, ++ .write = sd_iostats_seq_write, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++extern struct proc_dir_entry *proc_scsi; ++ ++void ++sd_iostats_init(void) ++{ ++ int i; ++ ++ spin_lock_init(&sd_iostats_lock); ++ ++ sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL); ++ if (sd_iostats == NULL) { ++ printk(KERN_WARNING "Can't keep sd iostats: " ++ "ENOMEM allocating stats array size %ld\n", ++ SD_STATS * sizeof(iostat_stats_t *)); ++ return; ++ } ++ ++ for (i = 0; i < SD_STATS; i++) ++ sd_iostats[i] = NULL; ++ ++ if (proc_scsi == NULL) { ++ printk(KERN_WARNING "No access to sd iostats: " ++ "proc_scsi is NULL\n"); ++ return; ++ } ++ ++ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, ++ S_IFDIR | S_IRUGO | S_IXUGO, ++ proc_scsi); ++ if (sd_iostats_procdir == NULL) { ++ printk(KERN_WARNING "No access to sd iostats: " ++ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); ++ return; ++ } ++} ++ ++void ++sd_iostats_init_disk(struct gendisk *disk) ++{ ++ struct proc_dir_entry *pde; ++ unsigned long flags; ++ iostat_stats_t *stats; ++ ++ if (sd_iostats == NULL || ++ sd_iostats_procdir == NULL) ++ return; ++ ++ if (scsi_disk(disk)->index > SD_STATS) { ++ printk(KERN_ERR "sd_iostats_init_disk: " ++ "unexpected disk index %d(%d)\n", ++ scsi_disk(disk)->index, SD_STATS); ++ return; ++ } ++ ++ if (sd_iostats[scsi_disk(disk)->index] != NULL) ++ return; ++ ++ stats = kmalloc(sizeof(*stats), GFP_KERNEL); ++ if (stats == NULL) { ++ printk(KERN_WARNING "Can't keep %s iostats: " ++ "ENOMEM allocating stats size %ld\n", ++ disk->disk_name, sizeof(*stats)); ++ return; ++ } ++ ++ memset (stats, 0, sizeof(*stats)); ++ do_gettimeofday(&stats->iostat_timeval); ++ ++ spin_lock_irqsave(&sd_iostats_lock, flags); ++ ++ if (sd_iostats[scsi_disk(disk)->index] != NULL) { ++ spin_unlock_irqrestore(&sd_iostats_lock, flags); ++ kfree (stats); ++ return; ++ } ++ ++ sd_iostats[scsi_disk(disk)->index] = stats; ++ ++ spin_unlock_irqrestore(&sd_iostats_lock, flags); ++ ++ pde = create_proc_entry(disk->disk_name, S_IRUGO | S_IWUSR, ++ sd_iostats_procdir); ++ if (pde == NULL) { ++ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", ++ sd_iostats_procdir_name, disk->disk_name); ++ } else { ++ pde->proc_fops = &sd_iostats_proc_fops; ++ pde->data = disk; ++ } ++} ++ ++static void sd_devname(unsigned int disknum, char *buffer) ++{ ++ if (disknum < 26) ++ sprintf(buffer, "sd%c", 'a' + disknum); ++ else { ++ unsigned int min1; ++ unsigned int min2; ++ /* ++ * For larger numbers of disks, we need to go to a new ++ * naming scheme. ++ */ ++ min1 = disknum / 26; ++ min2 = disknum % 26; ++ sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2); ++ } ++} ++ ++void ++sd_iostats_fini(void) ++{ ++ char name[6]; ++ int i; ++ ++ if (sd_iostats_procdir != NULL) { ++ for (i = 0; i < SD_STATS; i++) { ++ sd_devname(i, name); ++ remove_proc_entry(name, sd_iostats_procdir); ++ } ++ ++ if (proc_scsi == NULL) { ++ printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n"); ++ BUG(); ++ } ++ remove_proc_entry(sd_iostats_procdir_name, ++ proc_scsi); ++ ++ sd_iostats_procdir = NULL; ++ } ++ ++ if (sd_iostats != NULL) { ++ for (i = 0; i < SD_STATS; i++) { ++ if (sd_iostats[i] != NULL) ++ kfree (sd_iostats[i]); ++ } ++ ++ kfree(sd_iostats); ++ sd_iostats = NULL; ++ } ++} ++ ++void ++sd_iostats_bump(int disk, unsigned int nsect, int iswrite) ++{ ++ iostat_stats_t *stats; ++ iostat_counter_t *counter; ++ int bucket; ++ int tmp; ++ unsigned long irqflags; ++ ++ if (sd_iostats == NULL) ++ return; ++ ++ if (disk < 0 || disk >= SD_STATS) { ++ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", ++ disk, SD_STATS); ++ BUG(); ++ } ++ ++ for (bucket = 0, tmp = nsect; tmp > 1; bucket++) ++ tmp /= 2; ++ ++ if (bucket >= IOSTAT_NCOUNTERS) { ++ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); ++ BUG(); ++ } ++ ++ spin_lock_irqsave(&sd_iostats_lock, irqflags); ++ ++ stats = sd_iostats[disk]; ++ if (stats != NULL) { ++ counter = iswrite ? ++ &stats->iostat_write_histogram[bucket] : ++ &stats->iostat_read_histogram[bucket]; ++ ++ counter->iostat_size += nsect; ++ counter->iostat_count++; ++ } ++ ++ spin_unlock_irqrestore(&sd_iostats_lock, irqflags); ++} ++#endif ++ + /** + * sd_probe - called during driver initialization and whenever a + * new scsi device is attached to the system. It is called once +@@ -1854,6 +2239,7 @@ static int __init init_sd(void) + err = scsi_register_driver(&sd_template.gendrv); + if (err) + goto err_out_class; ++ sd_iostats_init(); + + return 0; + +@@ -1876,6 +2262,7 @@ static void __exit exit_sd(void) + + SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); + ++ sd_iostats_fini(); + scsi_unregister_driver(&sd_template.gendrv); + class_unregister(&sd_disk_class); + +Index: linux-2.6.22.5/drivers/scsi/scsi_proc.c +=================================================================== +--- linux-2.6.22.5.orig/drivers/scsi/scsi_proc.c ++++ linux-2.6.22.5/drivers/scsi/scsi_proc.c +@@ -40,7 +40,8 @@ + /* 4K page size, but our output routines, use some slack for overruns */ + #define PROC_BLOCK_SIZE (3*1024) + +-static struct proc_dir_entry *proc_scsi; ++struct proc_dir_entry *proc_scsi; ++EXPORT_SYMBOL(proc_scsi); + + /* Protect sht->present and sht->proc_dir */ + static DEFINE_MUTEX(global_host_template_mutex); diff --git a/lustre/kernel_patches/patches/vfs_races-2.6.22-vanilla.patch b/lustre/kernel_patches/patches/vfs_races-2.6.22-vanilla.patch new file mode 100644 index 0000000000..fdac939c10 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_races-2.6.22-vanilla.patch @@ -0,0 +1,58 @@ +Index: linux-2.6/fs/dcache.c +=================================================================== +--- linux-2.6.orig/fs/dcache.c 2006-08-31 11:59:09.000000000 +0800 ++++ linux-2.6/fs/dcache.c 2006-09-06 14:01:37.000000000 +0800 +@@ -226,6 +226,13 @@ int d_invalidate(struct dentry * dentry) + spin_unlock(&dcache_lock); + return 0; + } ++ ++ /* network invalidation by Lustre */ ++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { ++ spin_unlock(&dcache_lock); ++ return 0; ++ } ++ + /* + * Check whether to do a partial shrink_dcache + * to get rid of unused child entries. +@@ -1242,15 +1249,24 @@ static void __d_rehash(struct dentry * e + * Adds a dentry to the hash according to its name. + */ + +-void d_rehash(struct dentry * entry) ++void d_rehash_cond(struct dentry * entry, int lock) + { +- spin_lock(&dcache_lock); ++ if (lock) ++ spin_lock(&dcache_lock); + spin_lock(&entry->d_lock); + _d_rehash(entry); + spin_unlock(&entry->d_lock); +- spin_unlock(&dcache_lock); ++ if (lock) ++ spin_unlock(&dcache_lock); + } + ++EXPORT_SYMBOL(d_rehash_cond); ++ ++void d_rehash(struct dentry * entry) ++{ ++ d_rehash_cond(entry, 1); ++} ++ + #define do_switch(x,y) do { \ + __typeof__ (x) __tmp = x; \ + x = y; y = __tmp; } while (0) +Index: linux-2.6/include/linux/dcache.h +=================================================================== +--- linux-2.6.orig/include/linux/dcache.h 2006-08-31 12:00:23.000000000 +0800 ++++ linux-2.6/include/linux/dcache.h 2006-09-06 14:02:36.000000000 +0800 +@@ -176,6 +176,7 @@ d_iput: no no no yes + + #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ + #define DCACHE_UNHASHED 0x0010 ++#define DCACHE_LUSTRE_INVALID 0x0040 /* Lustre invalidated */ + + #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ + diff --git a/lustre/kernel_patches/series/2.6.22-vanilla.series b/lustre/kernel_patches/series/2.6.22-vanilla.series new file mode 100644 index 0000000000..5e8bebc7ee --- /dev/null +++ b/lustre/kernel_patches/series/2.6.22-vanilla.series @@ -0,0 +1,13 @@ +lustre_version.patch +vfs_races-2.6.22-vanilla.patch +i_filter_data.patch +jbd-jcberr-2.6.18-vanilla.patch +iopen-misc-2.6.22-vanilla.patch +export-truncate-2.6.18-vanilla.patch +export_symbols-2.6.22-vanilla.patch +export-nr_free_buffer_pages.patch +dev_read_only-2.6.22-vanilla.patch +export-2.6.18-vanilla.patch +8kstack-2.6.12.patch +export-show_task-2.6.18-vanilla.patch +sd_iostats-2.6.22-vanilla.patch -- GitLab