From 1c98dbe32e317ee1b6b3403811eb07eeb383a594 Mon Sep 17 00:00:00 2001 From: johann <johann> Date: Thu, 31 Jul 2008 23:06:32 +0000 Subject: [PATCH] Branch b1_8_gate b=12755,16494,16404 i=bzzz i=adilger several fixes in the sd_iostat patch: - remove the limit of 256 scsi disks - unloading/reloading the scsi low level driver triggers a kernel bug when trying to access the sd iostat file. - REQ_BLOCK_PC requests are not handled properly causing memory corruption. --- .../patches/sd_iostats-2.6-rhel5.patch | 333 +++++++----------- 1 file changed, 132 insertions(+), 201 deletions(-) diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch index e38e22af27..d0cc6f62ec 100644 --- a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch +++ b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch @@ -1,12 +1,10 @@ -Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig -=================================================================== -Index: linux-2.6.9/drivers/scsi/Kconfig +Index: linux-2.6.18-53.1.21/drivers/scsi/Kconfig =================================================================== ---- linux-2.6.9.orig/drivers/scsi/Kconfig 2007-07-23 14:19:13.000000000 +0400 -+++ linux-2.6.9/drivers/scsi/Kconfig 2007-07-26 14:16:36.000000000 +0400 -@@ -61,6 +61,14 @@ config SCSI_DUMP - help - SCSI dump support +--- linux-2.6.18-53.1.21.orig/drivers/scsi/Kconfig ++++ linux-2.6.18-53.1.21/drivers/scsi/Kconfig +@@ -66,6 +66,14 @@ config BLK_DEV_SD + In this case, do not compile the driver for your SCSI host adapter + (below) as a module either. +config SD_IOSTATS + bool "Enable SCSI disk I/O stats" @@ -19,11 +17,11 @@ Index: linux-2.6.9/drivers/scsi/Kconfig config CHR_DEV_ST tristate "SCSI tape support" depends on SCSI -Index: linux-2.6.9/drivers/scsi/scsi_proc.c +Index: linux-2.6.18-53.1.21/drivers/scsi/scsi_proc.c =================================================================== ---- linux-2.6.9.orig/drivers/scsi/scsi_proc.c 2007-03-13 02:47:28.000000000 +0300 -+++ linux-2.6.9/drivers/scsi/scsi_proc.c 2007-07-26 14:16:36.000000000 +0400 -@@ -38,7 +38,8 @@ +--- linux-2.6.18-53.1.21.orig/drivers/scsi/scsi_proc.c ++++ linux-2.6.18-53.1.21/drivers/scsi/scsi_proc.c +@@ -40,7 +40,8 @@ /* 4K page size, but our output routines, use some slack for overruns */ #define PROC_BLOCK_SIZE (3*1024) @@ -32,12 +30,12 @@ Index: linux-2.6.9/drivers/scsi/scsi_proc.c +EXPORT_SYMBOL(proc_scsi); /* Protect sht->present and sht->proc_dir */ - static DECLARE_MUTEX(global_host_template_sem); -Index: linux-2.6.9/drivers/scsi/sd.c + static DEFINE_MUTEX(global_host_template_mutex); +Index: linux-2.6.18-53.1.21/drivers/scsi/sd.c =================================================================== ---- linux-2.6.9.orig/drivers/scsi/sd.c 2007-03-13 02:47:27.000000000 +0300 -+++ linux-2.6.9/drivers/scsi/sd.c 2007-07-28 14:55:56.000000000 +0400 -@@ -63,6 +63,67 @@ +--- linux-2.6.18-53.1.21.orig/drivers/scsi/sd.c ++++ linux-2.6.18-53.1.21/drivers/scsi/sd.c +@@ -62,6 +62,63 @@ #include "scsi_logging.h" @@ -46,15 +44,15 @@ Index: linux-2.6.9/drivers/scsi/sd.c +# include <linux/seq_file.h> + +typedef struct { -+ unsigned long long iostat_size; -+ unsigned long long iostat_count; ++ unsigned long long iostat_size; ++ unsigned long long iostat_count; +} iostat_counter_t; + +#define IOSTAT_NCOUNTERS 16 +typedef struct { -+ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; -+ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; -+ struct timeval iostat_timeval; ++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; ++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; ++ struct timeval iostat_timeval; + + /* queue depth: how well the pipe is filled up */ + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS]; @@ -79,24 +77,20 @@ Index: linux-2.6.9/drivers/scsi/sd.c + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS]; + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS]; + -+ char iostat_name[32]; -+ + /* must be the last field, as it's used to know size to be memset'ed */ -+ spinlock_t iostat_lock; -+} ____cacheline_aligned_in_smp iostat_stats_t; ++ spinlock_t iostat_lock; ++} ____cacheline_aligned_in_smp iostat_stats_t; + -+iostat_stats_t **sd_iostats; -+struct proc_dir_entry *sd_iostats_procdir; -+char sd_iostats_procdir_name[] = "sd_iostats"; ++struct proc_dir_entry *sd_iostats_procdir = NULL; ++char sd_iostats_procdir_name[] = "sd_iostats"; ++static struct file_operations sd_iostats_proc_fops; + +extern void sd_iostats_init(void); -+extern void sd_iostats_init_disk(struct gendisk *); +extern void sd_iostats_fini(void); +void sd_iostats_start_req(struct scsi_cmnd *SCpnt); +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt); +#else +static inline void sd_iostats_init(void) {} -+static inline void sd_iostats_init_disk(struct gendisk *disk) {} +static inline void sd_iostats_fini(void) {} +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {} +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {} @@ -105,42 +99,73 @@ Index: linux-2.6.9/drivers/scsi/sd.c /* * More than enough for everybody ;) The huge number of majors * is a leftover from 16bit dev_t days, we don't really need that -@@ -76,6 +137,7 @@ - */ - #define SD_MAX_DISKS (((26 * 26) + 26 + 1) * 26) +@@ -126,6 +183,9 @@ struct scsi_disk { + unsigned WCE : 1; /* state of disk WCE bit */ + unsigned RCD : 1; /* state of disk RCD bit, unused */ + unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ iostat_stats_t *stats; /* scsi disk statistics */ ++#endif + }; + #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev) -+#define SD_STATS 256 - /* - * Time out in seconds for disks and Magneto-opticals (which are slower). - */ -@@ -278,6 +340,8 @@ static int sd_init_command(struct scsi_c - SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n", - disk->disk_name, (unsigned long long)block)); +@@ -557,6 +617,8 @@ static int sd_init_command(struct scsi_c + */ + SCpnt->done = sd_rw_intr; + sd_iostats_start_req(SCpnt); + /* - * If we have a 1K hardware sectorsize, prevent access to single - * 512 byte sectors. In theory we could handle this - in fact -@@ -474,6 +538,7 @@ static int sd_open(struct inode *inode, - scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); - } - -+ sd_iostats_init_disk(disk); - return 0; - - error_out: -@@ -849,6 +914,7 @@ static void sd_rw_intr(struct scsi_cmnd + * This indicates that the command is ready from our end to be + * queued. +@@ -1040,6 +1102,7 @@ static void sd_rw_intr(struct scsi_cmnd break; } out: + sd_iostats_finish_req(SCpnt); scsi_io_completion(SCpnt, good_bytes); } - -@@ -1575,6 +1643,481 @@ static void sd_shutdown(struct device *d - sd_sync_cache(sdp); - } + +@@ -1735,6 +1798,36 @@ static int sd_probe(struct device *dev) + if (sdp->removable) + gd->flags |= GENHD_FL_REMOVABLE; + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL); ++ if (!sdkp->stats) { ++ printk(KERN_WARNING "cannot allocate iostat structure for" ++ "%s\n", gd->disk_name); ++ } else { ++ do_gettimeofday(&sdkp->stats->iostat_timeval); ++ sdkp->stats->iostat_queue_stamp = jiffies; ++ spin_lock_init(&sdkp->stats->iostat_lock); ++ if (sd_iostats_procdir) { ++ struct proc_dir_entry *pde; ++ pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR, ++ sd_iostats_procdir); ++ if (!pde) { ++ printk(KERN_WARNING "Can't create /proc/scsi/" ++ "%s/%s\n", ++ sd_iostats_procdir_name, ++ gd->disk_name); ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } else { ++ pde->proc_fops = &sd_iostats_proc_fops; ++ pde->data = gd; ++ } ++ } else { ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } ++ } ++#endif + dev_set_drvdata(dev, sdkp); + add_disk(gd); + +@@ -1778,6 +1871,366 @@ static int sd_remove(struct device *dev) + return 0; + } +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +static int @@ -160,12 +185,7 @@ Index: linux-2.6.9/drivers/scsi/sd.c + int i; + int maxi; + -+ if (sd_iostats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); -+ BUG(); -+ } -+ -+ stats = sd_iostats[scsi_disk(disk)->index]; ++ stats = scsi_disk(disk)->stats; + if (stats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); + BUG(); @@ -312,7 +332,7 @@ Index: linux-2.6.9/drivers/scsi/sd.c +static int +sd_iostats_seq_open (struct inode *inode, struct file *file) +{ -+ int rc; ++ int rc; + + rc = seq_open(file, &sd_iostats_seqops); + if (rc != 0) @@ -324,11 +344,11 @@ Index: linux-2.6.9/drivers/scsi/sd.c + +static ssize_t +sd_iostats_seq_write(struct file *file, const char *buffer, -+ size_t len, loff_t *off) ++ size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct gendisk *disk = seq->private; -+ iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index]; ++ iostat_stats_t *stats = scsi_disk(disk)->stats; + unsigned long flags; + unsigned long qdepth; + @@ -358,19 +378,6 @@ Index: linux-2.6.9/drivers/scsi/sd.c +void +sd_iostats_init(void) +{ -+ int i; -+ -+ sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL); -+ if (sd_iostats == NULL) { -+ printk(KERN_WARNING "Can't keep sd iostats: " -+ "ENOMEM allocating stats array size %d\n", -+ SD_STATS * sizeof(iostat_stats_t *)); -+ return; -+ } -+ -+ for (i = 0; i < SD_STATS; i++) -+ sd_iostats[i] = NULL; -+ + if (proc_scsi == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "proc_scsi is NULL\n"); @@ -378,97 +385,21 @@ Index: linux-2.6.9/drivers/scsi/sd.c + } + + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, -+ S_IFDIR | S_IRUGO | S_IXUGO, -+ proc_scsi); ++ S_IFDIR | S_IRUGO | S_IXUGO, ++ proc_scsi); + if (sd_iostats_procdir == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); + return; -+ } -+} -+ -+void -+sd_iostats_init_disk(struct gendisk *disk) -+{ -+ struct proc_dir_entry *pde; -+ unsigned long flags; -+ iostat_stats_t *stats; -+ -+ if (sd_iostats == NULL || sd_iostats_procdir == NULL) -+ return; -+ -+ if (scsi_disk(disk)->index > SD_STATS) { -+ printk(KERN_ERR "sd_iostats_init_disk: " -+ "unexpected disk index %d(%d)\n", -+ scsi_disk(disk)->index, SD_STATS); -+ return; -+ } -+ -+ if (sd_iostats[scsi_disk(disk)->index] != NULL) -+ return; -+ -+ stats = kmalloc(sizeof(*stats), GFP_KERNEL); -+ if (stats == NULL) { -+ printk(KERN_WARNING "Can't keep %s iostats: " -+ "ENOMEM allocating stats size %d\n", -+ disk->disk_name, sizeof(*stats)); -+ return; -+ } -+ -+ memset (stats, 0, sizeof(*stats)); -+ do_gettimeofday(&stats->iostat_timeval); -+ stats->iostat_queue_stamp = jiffies; -+ spin_lock_init(&stats->iostat_lock); -+ -+ -+ spin_lock_irqsave(&stats->iostat_lock, flags); -+ -+ if (sd_iostats[scsi_disk(disk)->index] != NULL) { -+ spin_unlock_irqrestore(&stats->iostat_lock, flags); -+ kfree (stats); -+ return; -+ } -+ -+ sd_iostats[scsi_disk(disk)->index] = stats; -+ -+ spin_unlock_irqrestore(&stats->iostat_lock, flags); -+ -+ strncpy(stats->iostat_name, disk->disk_name, -+ sizeof(stats->iostat_name)-1); -+ -+ pde = create_proc_entry(stats->iostat_name, S_IRUGO | S_IWUSR, -+ sd_iostats_procdir); -+ if (pde == NULL) { -+ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", -+ sd_iostats_procdir_name, disk->disk_name); -+ } else { -+ pde->proc_fops = &sd_iostats_proc_fops; -+ pde->data = disk; + } +} + +void sd_iostats_fini(void) +{ -+ int i; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ for (i = 0; i < SD_STATS; i++) { -+ if (sd_iostats[i] == NULL) -+ continue; -+ if (sd_iostats_procdir != NULL) -+ remove_proc_entry(sd_iostats[i]->iostat_name, -+ sd_iostats_procdir); -+ kfree(sd_iostats[i]); -+ } -+ + if (proc_scsi != NULL && sd_iostats_procdir != NULL) + remove_proc_entry(sd_iostats_procdir_name, proc_scsi); + + sd_iostats_procdir = NULL; -+ kfree(sd_iostats); -+ sd_iostats = NULL; +} + +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) @@ -479,31 +410,20 @@ Index: linux-2.6.9/drivers/scsi/sd.c + int tbucket; + int tmp; + unsigned long irqflags; -+ int disk, i; -+ -+ disk = scsi_disk(rq->rq_disk)->index; -+ -+ if (sd_iostats == NULL) -+ return; ++ unsigned long i; + -+ if (disk < 0 || disk >= SD_STATS) { -+ printk(KERN_ERR "sd_iostats_bump: unexpected disk index " -+ "%d([0-%d])\n", disk, SD_STATS); -+ BUG(); -+ } -+ -+ stats = sd_iostats[disk]; ++ stats = scsi_disk(rq->rq_disk)->stats; + if (stats == NULL) + return; + -+ tmp = jiffies - rq->start_time; ++ tmp = jiffies - rq->start_time; + for (tbucket = 0; tmp > 1; tbucket++) + tmp >>= 1; + if (tbucket >= IOSTAT_NCOUNTERS) + tbucket = IOSTAT_NCOUNTERS - 1; + //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket); + -+ tcounter = rq_data_dir(rq) == WRITE ? ++ tcounter = rq_data_dir(rq) == WRITE ? + &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket]; + + spin_lock_irqsave(&stats->iostat_lock, irqflags); @@ -517,13 +437,14 @@ Index: linux-2.6.9/drivers/scsi/sd.c + i = IOSTAT_NCOUNTERS - 1; + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp; + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp; ++ BUG_ON(stats->iostat_queue_depth == 0); + stats->iostat_queue_depth--; + + /* update seek stats. XXX: not sure about nr_sectors */ + stats->iostat_sectors += rq->nr_sectors; + stats->iostat_reqs++; + if (rq->sector != stats->iostat_next_sector) { -+ stats->iostat_seek_sectors += ++ stats->iostat_seek_sectors += + rq->sector > stats->iostat_next_sector ? + rq->sector - stats->iostat_next_sector : + stats->iostat_next_sector - rq->sector; @@ -545,21 +466,10 @@ Index: linux-2.6.9/drivers/scsi/sd.c + int tbucket; + int tmp; + unsigned long irqflags; -+ int disk, i; ++ unsigned long i; + int nsect; + -+ disk = scsi_disk(rq->rq_disk)->index; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ if (disk < 0 || disk >= SD_STATS) { -+ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", -+ disk, SD_STATS); -+ BUG(); -+ } -+ -+ stats = sd_iostats[disk]; ++ stats = scsi_disk(rq->rq_disk)->stats; + if (stats == NULL) + return; + @@ -572,7 +482,7 @@ Index: linux-2.6.9/drivers/scsi/sd.c + BUG(); + } + -+ counter = rq_data_dir(rq) == WRITE ? ++ counter = rq_data_dir(rq) == WRITE ? + &stats->iostat_write_histogram[bucket] : + &stats->iostat_read_histogram[bucket]; + @@ -618,33 +528,54 @@ Index: linux-2.6.9/drivers/scsi/sd.c +#endif + /** - * init_sd - entry point for this driver (both when built in or when - * a module). -@@ -1584,6 +2127,7 @@ static void sd_shutdown(struct device *d + * scsi_disk_release - Called to free the scsi_disk structure + * @cdev: pointer to embedded class device +@@ -1796,10 +2249,16 @@ static void scsi_disk_release(struct cla + idr_remove(&sd_index_idr, sdkp->index); + spin_unlock(&sd_index_lock); + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ if (sdkp->stats) { ++ remove_proc_entry(disk->disk_name, sd_iostats_procdir); ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } ++#endif + disk->private_data = NULL; + put_disk(disk); + put_device(&sdkp->device->sdev_gendev); +- + kfree(sdkp); + } + +@@ -1907,6 +2366,7 @@ done: static int __init init_sd(void) { int majors = 0, i; -+ int rc = 0; ++ int rc = 0; SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); -@@ -1594,7 +2138,10 @@ static int __init init_sd(void) +@@ -1917,9 +2377,13 @@ static int __init init_sd(void) if (!majors) return -ENODEV; ++ sd_iostats_init(); + class_register(&sd_disk_class); + - return scsi_register_driver(&sd_template.gendrv); -+ rc = scsi_register_driver(&sd_template.gendrv); -+ if (rc == 0) -+ sd_iostats_init(); -+ return rc; ++ rc = scsi_register_driver(&sd_template.gendrv); ++ if (rc) ++ sd_iostats_fini(); ++ return rc; } /** -@@ -1608,6 +2155,7 @@ static void __exit exit_sd(void) +@@ -1938,6 +2402,7 @@ static void __exit exit_sd(void) + unregister_blkdev(sd_major(i), "sd"); - SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); + class_unregister(&sd_disk_class); ++ sd_iostats_fini(); + } -+ sd_iostats_fini(); - scsi_unregister_driver(&sd_template.gendrv); - for (i = 0; i < SD_MAJORS; i++) - unregister_blkdev(sd_major(i), "sd"); + module_init(init_sd); -- GitLab