Skip to content
Snippets Groups Projects
sd_iostats-2.6-rhel4.patch 15.46 KiB
Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig
===================================================================
Index: linux+rhel4+chaos/drivers/scsi/Kconfig
===================================================================
--- linux+rhel4+chaos.orig/drivers/scsi/Kconfig
+++ linux+rhel4+chaos/drivers/scsi/Kconfig
@@ -61,6 +61,14 @@ config SCSI_DUMP
 	help
 	   SCSI dump support
 
+config SD_IOSTATS
+   bool "Enable SCSI disk I/O stats"
+   depends on BLK_DEV_SD
+   default y
+   ---help---
+     This enables SCSI disk I/O stats collection.  You must also enable
+     /proc file system support if you want this feature.
+
 config CHR_DEV_ST
 	tristate "SCSI tape support"
 	depends on SCSI
Index: linux+rhel4+chaos/drivers/scsi/sd.c
===================================================================
--- linux+rhel4+chaos.orig/drivers/scsi/sd.c
+++ linux+rhel4+chaos/drivers/scsi/sd.c
@@ -63,6 +63,38 @@
 
 #include "scsi_logging.h"
 
+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
+# include <linux/proc_fs.h>
+# include <linux/seq_file.h>
+
+typedef struct {
+        unsigned long long iostat_size;
+        unsigned long long iostat_count;
+} iostat_counter_t;
+
+#define IOSTAT_NCOUNTERS 16
+typedef struct {
+        iostat_counter_t        iostat_read_histogram[IOSTAT_NCOUNTERS];
+        iostat_counter_t        iostat_write_histogram[IOSTAT_NCOUNTERS];
+        struct timeval          iostat_timeval;
+} iostat_stats_t;
+
+iostat_stats_t       **sd_iostats;
+spinlock_t             sd_iostats_lock;
+struct proc_dir_entry *sd_iostats_procdir;
+char                   sd_iostats_procdir_name[] = "sd_iostats";
+
+extern void sd_iostats_init(void);
+extern void sd_iostats_init_disk(struct gendisk *);
+extern void sd_iostats_fini(void);
+extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite);
+#else
+static inline void sd_iostats_init(void) {}
+static inline void sd_iostats_init_disk(struct gendisk *disk) {}
+static inline void sd_iostats_fini(void) {}
+static inline void sd_iostats_bump(int disk, unsigned int nsect, int iswrite) {}
+#endif
+
 /*
  * More than enough for everybody ;)  The huge number of majors
  * is a leftover from 16bit dev_t days, we don't really need that
@@ -76,6 +108,7 @@
  */
 #define SD_MAX_DISKS	(((26 * 26) + 26 + 1) * 26)
 
+#define SD_STATS 256
 /*
  * Time out in seconds for disks and Magneto-opticals (which are slower).
  */
@@ -278,6 +311,9 @@ static int sd_init_command(struct scsi_c
 	SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n",
 				   disk->disk_name, (unsigned long long)block));
 
+   sd_iostats_bump(scsi_disk(disk)->index, this_count,
+                   rq_data_dir(SCpnt->request) == WRITE);
+
 	/*
 	 * If we have a 1K hardware sectorsize, prevent access to single
 	 * 512 byte sectors.  In theory we could handle this - in fact
@@ -474,6 +510,7 @@ static int sd_open(struct inode *inode, 
 			scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
 	}
 
+   sd_iostats_init_disk(disk);
 	return 0;
 
 error_out:
@@ -500,8 +537,20 @@ static int sd_release(struct inode *inod
 
 	SCSI_LOG_HLQUEUE(3, printk("sd_release: disk=%s\n", disk->disk_name));
 
-	if (!--sdkp->openers && sdev->removable) {
-		if (scsi_block_when_processing_errors(sdev))
+	if (!--sdkp->openers) {
+		/*
+		 * Remove sd_iostats information about this disk
+		 */
+		if (sd_iostats_procdir != NULL) {
+			remove_proc_entry(disk->disk_name, sd_iostats_procdir);
+		}
+		if (sd_iostats != NULL) {
+			if (sd_iostats[sdkp->index] != NULL) {
+				kfree (sd_iostats[sdkp->index]);
+				sd_iostats[sdkp->index] = NULL;
+			}
+		}
+		if (sdev->removable && scsi_block_when_processing_errors(sdev))
 			scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
 	}
 
@@ -1575,6 +1624,342 @@ static void sd_shutdown(struct device *d
 	sd_sync_cache(sdp);
 }	
 
+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
+static int
+sd_iostats_seq_show(struct seq_file *seq, void *v)
+{
+        struct timeval     now;
+        struct gendisk *disk;
+        iostat_stats_t    *stats;
+        unsigned long long read_len;
+        unsigned long long read_len_tot;
+        unsigned long      read_num;
+        unsigned long      read_num_tot;
+        unsigned long long write_len;
+        unsigned long long write_len_tot;
+        unsigned long      write_num;
+        unsigned long      write_num_tot;
+        int                i;
+        int                maxi;
+
+	if (seq == NULL || seq->private == NULL) {
+		printk(KERN_ERR "sd_iostats_seq_show: NULL disk\n");
+		BUG();
+	}
+
+	disk = seq->private;
+
+	if (scsi_disk(disk) == NULL || (disk->flags & GENHD_FL_UP) == 0) {
+		seq_printf(seq, "sd_iostats_seq_show: Device %s "
+				"does not exist\n", disk->disk_name);
+		return 0;
+	}
+
+        if (sd_iostats == NULL) {
+                printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
+                BUG();
+        }
+
+        stats = sd_iostats[scsi_disk(disk)->index];
+        if (stats == NULL) {
+                seq_printf(seq, "sd_iostats_seq_show: sd_iostats "
+				"entry %d does not exist\n",
+				scsi_disk(disk)->index);
+		return 0;
+        }
+
+        do_gettimeofday(&now);
+        now.tv_sec -= stats->iostat_timeval.tv_sec;
+        now.tv_usec -= stats->iostat_timeval.tv_usec;
+        if (now.tv_usec < 0) {
+                now.tv_usec += 1000000;
+                now.tv_sec--;
+        }
+
+        /* this sampling races with updates */
+        seq_printf(seq, "index:        %lu   snapshot_time:         %lu.%06lu\n",
+                   scsi_disk(disk)->index, now.tv_sec, now.tv_usec);
+
+        for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
+                if (stats->iostat_read_histogram[i].iostat_count != 0 ||
+                    stats->iostat_write_histogram[i].iostat_count != 0)
+                        break;
+        maxi = i;
+
+        seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", 
+                   "reads", "total", "writes", "total");
+
+        read_len_tot = write_len_tot = 0;
+        read_num_tot = write_num_tot = 0;
+        for (i = 0; i <= maxi; i++) {
+                read_len = stats->iostat_read_histogram[i].iostat_size;
+                read_len_tot += read_len;
+                read_num = stats->iostat_read_histogram[i].iostat_count;
+                read_num_tot += read_num;
+
+                write_len = stats->iostat_write_histogram[i].iostat_size;
+                write_len_tot += write_len;
+                write_num = stats->iostat_write_histogram[i].iostat_count;
+                write_num_tot += write_num;
+
+                seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", 
+                            512<<i, read_num, read_len, write_num, write_len);
+        }
+        
+        seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n", "total",
+                   read_num_tot, read_len_tot, 
+                   write_num_tot, write_len_tot);
+        return 0;
+}
+
+static void *
+sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
+{
+        return (*pos == 0) ? (void *)1 : NULL;
+}
+
+static void *
+sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+        ++*pos;
+        return NULL;
+}
+
+static void
+sd_iostats_seq_stop(struct seq_file *p, void *v)
+{
+}
+
+static struct seq_operations sd_iostats_seqops = {
+        .start = sd_iostats_seq_start,
+        .stop  = sd_iostats_seq_stop,
+        .next  = sd_iostats_seq_next,
+        .show  = sd_iostats_seq_show,
+};
+
+static int
+sd_iostats_seq_open (struct inode *inode, struct file *file)
+{
+        int                    rc;
+
+        rc = seq_open(file, &sd_iostats_seqops);
+        if (rc != 0)
+                return rc;
+
+        ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
+        return 0;
+}
+
+static ssize_t
+sd_iostats_seq_write(struct file *file, const char *buffer,
+                     size_t len, loff_t *off)
+{
+        struct seq_file   *seq = file->private_data;
+        struct gendisk *disk = seq->private;
+        iostat_stats_t    *stats = sd_iostats[scsi_disk(disk)->index];
+        unsigned long      flags;
+        
+        
+        spin_lock_irqsave (&sd_iostats_lock, flags);
+        memset (stats, 0, sizeof(*stats));
+        do_gettimeofday(&stats->iostat_timeval);
+        spin_unlock_irqrestore (&sd_iostats_lock, flags);
+
+        return len;
+}
+
+static struct file_operations sd_iostats_proc_fops = {
+        .owner   = THIS_MODULE,
+        .open    = sd_iostats_seq_open,
+        .read    = seq_read,
+        .write   = sd_iostats_seq_write,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+
+extern struct proc_dir_entry *proc_scsi;
+
+void
+sd_iostats_init(void)
+{
+        int    i;
+
+        spin_lock_init(&sd_iostats_lock);
+
+        sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
+        if (sd_iostats == NULL) {
+                printk(KERN_WARNING "Can't keep sd iostats: "
+                       "ENOMEM allocating stats array size %ld\n",
+                       SD_STATS * sizeof(iostat_stats_t *));
+                return;
+        }
+
+        for (i = 0; i < SD_STATS; i++)
+                sd_iostats[i] = NULL;
+
+        if (proc_scsi == NULL) {
+                printk(KERN_WARNING "No access to sd iostats: "
+                       "proc_scsi is NULL\n");
+                return;
+        }
+
+        sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
+                                               S_IFDIR | S_IRUGO | S_IXUGO,
+                                               proc_scsi);
+        if (sd_iostats_procdir == NULL) {
+                printk(KERN_WARNING "No access to sd iostats: "
+                       "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
+                return;
+        }
+}
+
+void
+sd_iostats_init_disk(struct gendisk *disk)
+{
+        struct proc_dir_entry *pde;
+        unsigned long          flags;
+        iostat_stats_t        *stats;
+
+        if (sd_iostats == NULL ||
+            sd_iostats_procdir == NULL)
+                return;
+
+        if (scsi_disk(disk)->index > SD_STATS) {
+                printk(KERN_ERR "sd_iostats_init_disk: "
+                       "unexpected disk index %d(%d)\n",
+                       scsi_disk(disk)->index, SD_STATS);
+				    return;
+        }
+
+        if (sd_iostats[scsi_disk(disk)->index] != NULL)
+                return;
+
+        stats = kmalloc(sizeof(*stats), GFP_KERNEL);
+        if (stats == NULL) {
+                printk(KERN_WARNING "Can't keep %s iostats: "
+                       "ENOMEM allocating stats size %ld\n", 
+                       disk->disk_name, sizeof(*stats));
+                return;
+        }
+
+        memset (stats, 0, sizeof(*stats));
+        do_gettimeofday(&stats->iostat_timeval);
+
+        spin_lock_irqsave(&sd_iostats_lock, flags);
+
+        if (sd_iostats[scsi_disk(disk)->index] != NULL) {
+                spin_unlock_irqrestore(&sd_iostats_lock, flags);
+                kfree (stats);
+                return;
+        }
+
+        sd_iostats[scsi_disk(disk)->index] = stats;
+        
+        spin_unlock_irqrestore(&sd_iostats_lock, flags);
+        
+        pde = create_proc_entry(disk->disk_name, S_IRUGO | S_IWUSR, 
+                                sd_iostats_procdir);
+        if (pde == NULL) {
+                printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
+                       sd_iostats_procdir_name, disk->disk_name);
+        } else {
+                pde->proc_fops = &sd_iostats_proc_fops;
+                pde->data = disk;
+        }
+}
+
+static void sd_devname(unsigned int disknum, char *buffer)
+{
+        if (disknum < 26)
+                sprintf(buffer, "sd%c", 'a' + disknum);
+        else {
+                unsigned int min1;
+                unsigned int min2;
+                /*
+                 * For larger numbers of disks, we need to go to a new
+                 * naming scheme.
+                 */
+                min1 = disknum / 26;
+                min2 = disknum % 26;
+                sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
+        }
+}
+
+void
+sd_iostats_fini(void)
+{
+        char name[6];
+        int  i;
+        
+        if (sd_iostats_procdir != NULL) {
+                for (i = 0; i < SD_STATS; i++) {
+                        sd_devname(i, name);
+                        remove_proc_entry(name, sd_iostats_procdir);
+                }
+
+                if (proc_scsi == NULL) {
+                        printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n");
+                        BUG();
+                }
+                remove_proc_entry(sd_iostats_procdir_name,
+                                  proc_scsi);
+
+                sd_iostats_procdir = NULL;
+        }
+        
+        if (sd_iostats != NULL) {
+                for (i = 0; i < SD_STATS; i++) {
+                        if (sd_iostats[i] != NULL)
+                                kfree (sd_iostats[i]);
+                }
+                
+                kfree(sd_iostats);
+                sd_iostats = NULL;
+        }
+}
+
+void
+sd_iostats_bump(int disk, unsigned int nsect, int iswrite)
+{
+        iostat_stats_t    *stats;
+        iostat_counter_t  *counter;
+        int                bucket;
+        int                tmp;
+        unsigned long      irqflags;
+
+        if (sd_iostats == NULL)
+                return;
+
+        if (disk < 0 || disk >= SD_STATS) {
+                printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
+                       disk, SD_STATS);
+                BUG();
+        }
+
+        for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
+                tmp /= 2;
+
+        if (bucket >= IOSTAT_NCOUNTERS) {
+                printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
+                BUG();
+        }
+
+        spin_lock_irqsave(&sd_iostats_lock, irqflags);
+        
+        stats = sd_iostats[disk];
+        if (stats != NULL) {
+                counter = iswrite ? 
+                          &stats->iostat_write_histogram[bucket] :
+                          &stats->iostat_read_histogram[bucket];
+
+                counter->iostat_size += nsect;
+                counter->iostat_count++;
+        }
+
+        spin_unlock_irqrestore(&sd_iostats_lock, irqflags);
+}
+#endif
+
 /**
  *	init_sd - entry point for this driver (both when built in or when
  *	a module).
@@ -1584,6 +1969,7 @@ static void sd_shutdown(struct device *d
 static int __init init_sd(void)
 {
 	int majors = 0, i;
+   int rc = 0;
 
 	SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
 
@@ -1594,7 +1980,10 @@ static int __init init_sd(void)
 	if (!majors)
 		return -ENODEV;
 
-	return scsi_register_driver(&sd_template.gendrv);
+   rc = scsi_register_driver(&sd_template.gendrv);
+   if (rc == 0)
+      sd_iostats_init();
+   return rc;
 }
 
 /**
@@ -1608,6 +1997,7 @@ static void __exit exit_sd(void)
 
 	SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
 
+   sd_iostats_fini();
 	scsi_unregister_driver(&sd_template.gendrv);
 	for (i = 0; i < SD_MAJORS; i++)
 		unregister_blkdev(sd_major(i), "sd");
Index: linux+rhel4+chaos/drivers/scsi/scsi_proc.c
===================================================================
--- linux+rhel4+chaos.orig/drivers/scsi/scsi_proc.c
+++ linux+rhel4+chaos/drivers/scsi/scsi_proc.c
@@ -38,7 +38,8 @@
 /* 4K page size, but our output routines, use some slack for overruns */
 #define PROC_BLOCK_SIZE (3*1024)
 
-static struct proc_dir_entry *proc_scsi;
+struct proc_dir_entry *proc_scsi;
+EXPORT_SYMBOL(proc_scsi);
 
 /* Protect sht->present and sht->proc_dir */
 static DECLARE_MUTEX(global_host_template_sem);