From d81eb8753bfae1c8d2df8a216a625ac98a82156e Mon Sep 17 00:00:00 2001 From: johann <johann> Date: Thu, 31 Jul 2008 23:06:31 +0000 Subject: [PATCH] Branch b1_8_gate b=12755,16494,16404 i=bzzz i=adilger several fixes in the sd_iostat patch: - remove the limit of 256 scsi disks - unloading/reloading the scsi low level driver triggers a kernel bug when trying to access the sd iostat file. - REQ_BLOCK_PC requests are not handled properly causing memory corruption. --- .../patches/sd_iostats-2.6-rhel4.patch | 295 +++++++----------- 1 file changed, 111 insertions(+), 184 deletions(-) diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch index 33160d9915..954c445d72 100644 --- a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch +++ b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch @@ -1,9 +1,7 @@ -Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig -=================================================================== -Index: linux-2.6.9/drivers/scsi/Kconfig +Index: linux-2.6.9-67.0.20/drivers/scsi/Kconfig =================================================================== ---- linux-2.6.9.orig/drivers/scsi/Kconfig 2007-07-23 14:19:13.000000000 +0400 -+++ linux-2.6.9/drivers/scsi/Kconfig 2007-07-26 14:16:36.000000000 +0400 +--- linux-2.6.9-67.0.20.orig/drivers/scsi/Kconfig ++++ linux-2.6.9-67.0.20/drivers/scsi/Kconfig @@ -61,6 +61,14 @@ config SCSI_DUMP help SCSI dump support @@ -19,10 +17,10 @@ Index: linux-2.6.9/drivers/scsi/Kconfig config CHR_DEV_ST tristate "SCSI tape support" depends on SCSI -Index: linux-2.6.9/drivers/scsi/scsi_proc.c +Index: linux-2.6.9-67.0.20/drivers/scsi/scsi_proc.c =================================================================== ---- linux-2.6.9.orig/drivers/scsi/scsi_proc.c 2007-03-13 02:47:28.000000000 +0300 -+++ linux-2.6.9/drivers/scsi/scsi_proc.c 2007-07-26 14:16:36.000000000 +0400 +--- linux-2.6.9-67.0.20.orig/drivers/scsi/scsi_proc.c ++++ linux-2.6.9-67.0.20/drivers/scsi/scsi_proc.c @@ -38,7 +38,8 @@ /* 4K page size, but our output routines, use some slack for overruns */ #define PROC_BLOCK_SIZE (3*1024) @@ -33,11 +31,11 @@ Index: linux-2.6.9/drivers/scsi/scsi_proc.c /* Protect sht->present and sht->proc_dir */ static DECLARE_MUTEX(global_host_template_sem); -Index: linux-2.6.9/drivers/scsi/sd.c +Index: linux-2.6.9-67.0.20/drivers/scsi/sd.c =================================================================== ---- linux-2.6.9.orig/drivers/scsi/sd.c 2007-03-13 02:47:27.000000000 +0300 -+++ linux-2.6.9/drivers/scsi/sd.c 2007-07-28 14:55:56.000000000 +0400 -@@ -63,6 +63,67 @@ +--- linux-2.6.9-67.0.20.orig/drivers/scsi/sd.c ++++ linux-2.6.9-67.0.20/drivers/scsi/sd.c +@@ -63,6 +63,63 @@ #include "scsi_logging.h" @@ -46,15 +44,15 @@ Index: linux-2.6.9/drivers/scsi/sd.c +# include <linux/seq_file.h> + +typedef struct { -+ unsigned long long iostat_size; -+ unsigned long long iostat_count; ++ unsigned long long iostat_size; ++ unsigned long long iostat_count; +} iostat_counter_t; + +#define IOSTAT_NCOUNTERS 16 +typedef struct { -+ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; -+ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; -+ struct timeval iostat_timeval; ++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; ++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; ++ struct timeval iostat_timeval; + + /* queue depth: how well the pipe is filled up */ + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS]; @@ -79,24 +77,20 @@ Index: linux-2.6.9/drivers/scsi/sd.c + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS]; + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS]; + -+ char iostat_name[32]; -+ + /* must be the last field, as it's used to know size to be memset'ed */ -+ spinlock_t iostat_lock; -+} ____cacheline_aligned_in_smp iostat_stats_t; ++ spinlock_t iostat_lock; ++} ____cacheline_aligned_in_smp iostat_stats_t; + -+iostat_stats_t **sd_iostats; -+struct proc_dir_entry *sd_iostats_procdir; -+char sd_iostats_procdir_name[] = "sd_iostats"; ++struct proc_dir_entry *sd_iostats_procdir = NULL; ++char sd_iostats_procdir_name[] = "sd_iostats"; ++static struct file_operations sd_iostats_proc_fops; + +extern void sd_iostats_init(void); -+extern void sd_iostats_init_disk(struct gendisk *); +extern void sd_iostats_fini(void); +void sd_iostats_start_req(struct scsi_cmnd *SCpnt); +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt); +#else +static inline void sd_iostats_init(void) {} -+static inline void sd_iostats_init_disk(struct gendisk *disk) {} +static inline void sd_iostats_fini(void) {} +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {} +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {} @@ -105,32 +99,26 @@ Index: linux-2.6.9/drivers/scsi/sd.c /* * More than enough for everybody ;) The huge number of majors * is a leftover from 16bit dev_t days, we don't really need that -@@ -76,6 +137,7 @@ - */ - #define SD_MAX_DISKS (((26 * 26) + 26 + 1) * 26) +@@ -101,6 +158,9 @@ struct scsi_disk { + u8 write_prot; + unsigned WCE : 1; /* state of disk WCE bit */ + unsigned RCD : 1; /* state of disk RCD bit, unused */ ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ iostat_stats_t *stats; /* scsi disk statistics */ ++#endif + }; -+#define SD_STATS 256 - /* - * Time out in seconds for disks and Magneto-opticals (which are slower). - */ -@@ -278,6 +340,8 @@ static int sd_init_command(struct scsi_c - SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n", - disk->disk_name, (unsigned long long)block)); + static DEFINE_IDR(sd_index_idr); +@@ -391,6 +451,8 @@ queue: + SCpnt->allowed = SD_MAX_RETRIES; + SCpnt->timeout_per_command = timeout; + sd_iostats_start_req(SCpnt); + /* - * If we have a 1K hardware sectorsize, prevent access to single - * 512 byte sectors. In theory we could handle this - in fact -@@ -474,6 +538,7 @@ static int sd_open(struct inode *inode, - scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); - } - -+ sd_iostats_init_disk(disk); - return 0; - - error_out: -@@ -849,6 +914,9 @@ static void sd_rw_intr(struct scsi_cmnd + * This is the completion routine we use. This is matched in terms + * of capability to this function. +@@ -849,6 +911,9 @@ static void sd_rw_intr(struct scsi_cmnd break; } } @@ -140,7 +128,60 @@ Index: linux-2.6.9/drivers/scsi/sd.c /* * This calls the generic completion function, now that we know * how many actual sectors finished, and how many sectors we need -@@ -1575,6 +1643,481 @@ static void sd_shutdown(struct device *d +@@ -1487,6 +1552,36 @@ static int sd_probe(struct device *dev) + gd->flags |= GENHD_FL_REMOVABLE; + gd->queue = sdkp->device->request_queue; + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL); ++ if (!sdkp->stats) { ++ printk(KERN_WARNING "cannot allocate iostat structure for" ++ "%s\n", gd->disk_name); ++ } else { ++ do_gettimeofday(&sdkp->stats->iostat_timeval); ++ sdkp->stats->iostat_queue_stamp = jiffies; ++ spin_lock_init(&sdkp->stats->iostat_lock); ++ if (sd_iostats_procdir) { ++ struct proc_dir_entry *pde; ++ pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR, ++ sd_iostats_procdir); ++ if (!pde) { ++ printk(KERN_WARNING "Can't create /proc/scsi/" ++ "%s/%s\n", ++ sd_iostats_procdir_name, ++ gd->disk_name); ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } else { ++ pde->proc_fops = &sd_iostats_proc_fops; ++ pde->data = gd; ++ } ++ } else { ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } ++ } ++#endif + dev_set_drvdata(dev, sdkp); + add_disk(gd); + +@@ -1549,8 +1644,14 @@ static void scsi_disk_release(struct kre + + disk->private_data = NULL; + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ if (sdkp->stats) { ++ remove_proc_entry(disk->disk_name, sd_iostats_procdir); ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } ++#endif + put_disk(disk); +- + kfree(sdkp); + } + +@@ -1575,6 +1676,366 @@ static void sd_shutdown(struct device *d sd_sync_cache(sdp); } @@ -162,12 +203,7 @@ Index: linux-2.6.9/drivers/scsi/sd.c + int i; + int maxi; + -+ if (sd_iostats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); -+ BUG(); -+ } -+ -+ stats = sd_iostats[scsi_disk(disk)->index]; ++ stats = scsi_disk(disk)->stats; + if (stats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); + BUG(); @@ -314,7 +350,7 @@ Index: linux-2.6.9/drivers/scsi/sd.c +static int +sd_iostats_seq_open (struct inode *inode, struct file *file) +{ -+ int rc; ++ int rc; + + rc = seq_open(file, &sd_iostats_seqops); + if (rc != 0) @@ -326,11 +362,11 @@ Index: linux-2.6.9/drivers/scsi/sd.c + +static ssize_t +sd_iostats_seq_write(struct file *file, const char *buffer, -+ size_t len, loff_t *off) ++ size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct gendisk *disk = seq->private; -+ iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index]; ++ iostat_stats_t *stats = scsi_disk(disk)->stats; + unsigned long flags; + unsigned long qdepth; + @@ -360,19 +396,6 @@ Index: linux-2.6.9/drivers/scsi/sd.c +void +sd_iostats_init(void) +{ -+ int i; -+ -+ sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL); -+ if (sd_iostats == NULL) { -+ printk(KERN_WARNING "Can't keep sd iostats: " -+ "ENOMEM allocating stats array size %d\n", -+ SD_STATS * sizeof(iostat_stats_t *)); -+ return; -+ } -+ -+ for (i = 0; i < SD_STATS; i++) -+ sd_iostats[i] = NULL; -+ + if (proc_scsi == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "proc_scsi is NULL\n"); @@ -386,91 +409,15 @@ Index: linux-2.6.9/drivers/scsi/sd.c + printk(KERN_WARNING "No access to sd iostats: " + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); + return; -+ } -+} -+ -+void -+sd_iostats_init_disk(struct gendisk *disk) -+{ -+ struct proc_dir_entry *pde; -+ unsigned long flags; -+ iostat_stats_t *stats; -+ -+ if (sd_iostats == NULL || sd_iostats_procdir == NULL) -+ return; -+ -+ if (scsi_disk(disk)->index > SD_STATS) { -+ printk(KERN_ERR "sd_iostats_init_disk: " -+ "unexpected disk index %d(%d)\n", -+ scsi_disk(disk)->index, SD_STATS); -+ return; -+ } -+ -+ if (sd_iostats[scsi_disk(disk)->index] != NULL) -+ return; -+ -+ stats = kmalloc(sizeof(*stats), GFP_KERNEL); -+ if (stats == NULL) { -+ printk(KERN_WARNING "Can't keep %s iostats: " -+ "ENOMEM allocating stats size %d\n", -+ disk->disk_name, sizeof(*stats)); -+ return; -+ } -+ -+ memset (stats, 0, sizeof(*stats)); -+ do_gettimeofday(&stats->iostat_timeval); -+ stats->iostat_queue_stamp = jiffies; -+ spin_lock_init(&stats->iostat_lock); -+ -+ -+ spin_lock_irqsave(&stats->iostat_lock, flags); -+ -+ if (sd_iostats[scsi_disk(disk)->index] != NULL) { -+ spin_unlock_irqrestore(&stats->iostat_lock, flags); -+ kfree (stats); -+ return; -+ } -+ -+ sd_iostats[scsi_disk(disk)->index] = stats; -+ -+ spin_unlock_irqrestore(&stats->iostat_lock, flags); -+ -+ strncpy(stats->iostat_name, disk->disk_name, -+ sizeof(stats->iostat_name)-1); -+ -+ pde = create_proc_entry(stats->iostat_name, S_IRUGO | S_IWUSR, -+ sd_iostats_procdir); -+ if (pde == NULL) { -+ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", -+ sd_iostats_procdir_name, disk->disk_name); -+ } else { -+ pde->proc_fops = &sd_iostats_proc_fops; -+ pde->data = disk; + } +} + +void sd_iostats_fini(void) +{ -+ int i; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ for (i = 0; i < SD_STATS; i++) { -+ if (sd_iostats[i] == NULL) -+ continue; -+ if (sd_iostats_procdir != NULL) -+ remove_proc_entry(sd_iostats[i]->iostat_name, -+ sd_iostats_procdir); -+ kfree(sd_iostats[i]); -+ } -+ + if (proc_scsi != NULL && sd_iostats_procdir != NULL) + remove_proc_entry(sd_iostats_procdir_name, proc_scsi); + + sd_iostats_procdir = NULL; -+ kfree(sd_iostats); -+ sd_iostats = NULL; +} + +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) @@ -481,20 +428,9 @@ Index: linux-2.6.9/drivers/scsi/sd.c + int tbucket; + int tmp; + unsigned long irqflags; -+ int disk, i; -+ -+ disk = scsi_disk(rq->rq_disk)->index; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ if (disk < 0 || disk >= SD_STATS) { -+ printk(KERN_ERR "sd_iostats_bump: unexpected disk index " -+ "%d([0-%d])\n", disk, SD_STATS); -+ BUG(); -+ } ++ unsigned long i; + -+ stats = sd_iostats[disk]; ++ stats = scsi_disk(rq->rq_disk)->stats; + if (stats == NULL) + return; + @@ -519,6 +455,7 @@ Index: linux-2.6.9/drivers/scsi/sd.c + i = IOSTAT_NCOUNTERS - 1; + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp; + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp; ++ BUG_ON(stats->iostat_queue_depth == 0); + stats->iostat_queue_depth--; + + /* update seek stats. XXX: not sure about nr_sectors */ @@ -547,21 +484,10 @@ Index: linux-2.6.9/drivers/scsi/sd.c + int tbucket; + int tmp; + unsigned long irqflags; -+ int disk, i; ++ unsigned long i; + int nsect; + -+ disk = scsi_disk(rq->rq_disk)->index; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ if (disk < 0 || disk >= SD_STATS) { -+ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", -+ disk, SD_STATS); -+ BUG(); -+ } -+ -+ stats = sd_iostats[disk]; ++ stats = scsi_disk(rq->rq_disk)->stats; + if (stats == NULL) + return; + @@ -622,31 +548,32 @@ Index: linux-2.6.9/drivers/scsi/sd.c /** * init_sd - entry point for this driver (both when built in or when * a module). -@@ -1584,6 +2127,7 @@ static void sd_shutdown(struct device *d +@@ -1584,6 +2045,7 @@ static void sd_shutdown(struct device *d static int __init init_sd(void) { int majors = 0, i; -+ int rc = 0; ++ int rc = 0; SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); -@@ -1594,7 +2138,10 @@ static int __init init_sd(void) +@@ -1594,7 +2056,11 @@ static int __init init_sd(void) if (!majors) return -ENODEV; - return scsi_register_driver(&sd_template.gendrv); -+ rc = scsi_register_driver(&sd_template.gendrv); -+ if (rc == 0) -+ sd_iostats_init(); -+ return rc; ++ sd_iostats_init(); ++ rc = scsi_register_driver(&sd_template.gendrv); ++ if (rc) ++ sd_iostats_fini(); ++ return rc; } /** -@@ -1608,6 +2155,7 @@ static void __exit exit_sd(void) - - SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); - -+ sd_iostats_fini(); +@@ -1611,6 +2077,7 @@ static void __exit exit_sd(void) scsi_unregister_driver(&sd_template.gendrv); for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); ++ sd_iostats_fini(); + } + + MODULE_LICENSE("GPL"); -- GitLab