From 006d37aec7ec9ad6394eb11b92528815447f7f9a Mon Sep 17 00:00:00 2001
From: alex <alex>
Date: Thu, 18 Oct 2007 06:53:00 +0000
Subject: [PATCH] b=12797 i=adilger i=eeb

 - improved sd iostats:
  * time requests spend in elevator's queue
  * time requests are served by disk
  * seek stats
  * queue depth stats
---
 .../patches/sd_iostats-2.6-rhel4.patch        | 766 +++++++++++-------
 1 file changed, 460 insertions(+), 306 deletions(-)

diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch
index 4e06c09ff7..33160d9915 100644
--- a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch
+++ b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch
@@ -1,9 +1,9 @@
 Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig
 ===================================================================
-Index: linux+rhel4+chaos/drivers/scsi/Kconfig
+Index: linux-2.6.9/drivers/scsi/Kconfig
 ===================================================================
---- linux+rhel4+chaos.orig/drivers/scsi/Kconfig
-+++ linux+rhel4+chaos/drivers/scsi/Kconfig
+--- linux-2.6.9.orig/drivers/scsi/Kconfig	2007-07-23 14:19:13.000000000 +0400
++++ linux-2.6.9/drivers/scsi/Kconfig	2007-07-26 14:16:36.000000000 +0400
 @@ -61,6 +61,14 @@ config SCSI_DUMP
  	help
  	   SCSI dump support
@@ -19,11 +19,25 @@ Index: linux+rhel4+chaos/drivers/scsi/Kconfig
  config CHR_DEV_ST
  	tristate "SCSI tape support"
  	depends on SCSI
-Index: linux+rhel4+chaos/drivers/scsi/sd.c
+Index: linux-2.6.9/drivers/scsi/scsi_proc.c
 ===================================================================
---- linux+rhel4+chaos.orig/drivers/scsi/sd.c
-+++ linux+rhel4+chaos/drivers/scsi/sd.c
-@@ -63,6 +63,38 @@
+--- linux-2.6.9.orig/drivers/scsi/scsi_proc.c	2007-03-13 02:47:28.000000000 +0300
++++ linux-2.6.9/drivers/scsi/scsi_proc.c	2007-07-26 14:16:36.000000000 +0400
+@@ -38,7 +38,8 @@
+ /* 4K page size, but our output routines, use some slack for overruns */
+ #define PROC_BLOCK_SIZE (3*1024)
+ 
+-static struct proc_dir_entry *proc_scsi;
++struct proc_dir_entry *proc_scsi;
++EXPORT_SYMBOL(proc_scsi);
+ 
+ /* Protect sht->present and sht->proc_dir */
+ static DECLARE_MUTEX(global_host_template_sem);
+Index: linux-2.6.9/drivers/scsi/sd.c
+===================================================================
+--- linux-2.6.9.orig/drivers/scsi/sd.c	2007-03-13 02:47:27.000000000 +0300
++++ linux-2.6.9/drivers/scsi/sd.c	2007-07-28 14:55:56.000000000 +0400
+@@ -63,6 +63,67 @@
  
  #include "scsi_logging.h"
  
@@ -41,28 +55,57 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
 +        iostat_counter_t        iostat_read_histogram[IOSTAT_NCOUNTERS];
 +        iostat_counter_t        iostat_write_histogram[IOSTAT_NCOUNTERS];
 +        struct timeval          iostat_timeval;
-+} iostat_stats_t;
++
++	/* queue depth: how well the pipe is filled up */
++	unsigned long long	iostat_queue_ticks[IOSTAT_NCOUNTERS];
++	unsigned long long	iostat_queue_ticks_sum;
++	unsigned long		iostat_queue_depth;
++	unsigned long		iostat_queue_stamp;
++
++	/* seeks: how linear the traffic is */
++	unsigned long long	iostat_next_sector;
++	unsigned long long	iostat_seek_sectors;
++	unsigned long long	iostat_seeks;
++	unsigned long long	iostat_sectors;
++	unsigned long long	iostat_reqs;
++	unsigned long		iostat_read_reqs;
++	unsigned long		iostat_write_reqs;
++
++	/* process time: how long it takes to process requests */
++	unsigned long		iostat_rtime[IOSTAT_NCOUNTERS];
++	unsigned long		iostat_wtime[IOSTAT_NCOUNTERS];
++
++	/* queue time: how long process spent in elevator's queue */
++	unsigned long		iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
++	unsigned long		iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
++
++	char			iostat_name[32];
++
++	/* must be the last field, as it's used to know size to be memset'ed */
++	spinlock_t              iostat_lock;
++}  ____cacheline_aligned_in_smp iostat_stats_t;
 +
 +iostat_stats_t       **sd_iostats;
-+spinlock_t             sd_iostats_lock;
 +struct proc_dir_entry *sd_iostats_procdir;
 +char                   sd_iostats_procdir_name[] = "sd_iostats";
 +
 +extern void sd_iostats_init(void);
 +extern void sd_iostats_init_disk(struct gendisk *);
 +extern void sd_iostats_fini(void);
-+extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite);
++void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
 +#else
 +static inline void sd_iostats_init(void) {}
 +static inline void sd_iostats_init_disk(struct gendisk *disk) {}
 +static inline void sd_iostats_fini(void) {}
-+static inline void sd_iostats_bump(int disk, unsigned int nsect, int iswrite) {}
++static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
++static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
 +#endif
 +
  /*
   * More than enough for everybody ;)  The huge number of majors
   * is a leftover from 16bit dev_t days, we don't really need that
-@@ -76,6 +108,7 @@
+@@ -76,6 +137,7 @@
   */
  #define SD_MAX_DISKS	(((26 * 26) + 26 + 1) * 26)
  
@@ -70,17 +113,16 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
  /*
   * Time out in seconds for disks and Magneto-opticals (which are slower).
   */
-@@ -278,6 +311,9 @@ static int sd_init_command(struct scsi_c
+@@ -278,6 +340,8 @@ static int sd_init_command(struct scsi_c
  	SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n",
  				   disk->disk_name, (unsigned long long)block));
  
-+   sd_iostats_bump(scsi_disk(disk)->index, this_count,
-+                   rq_data_dir(SCpnt->request) == WRITE);
++	sd_iostats_start_req(SCpnt);
 +
  	/*
  	 * If we have a 1K hardware sectorsize, prevent access to single
  	 * 512 byte sectors.  In theory we could handle this - in fact
-@@ -474,6 +510,7 @@ static int sd_open(struct inode *inode, 
+@@ -474,6 +538,7 @@ static int sd_open(struct inode *inode, 
  			scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
  	}
  
@@ -88,30 +130,17 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
  	return 0;
  
  error_out:
-@@ -500,8 +537,20 @@ static int sd_release(struct inode *inod
- 
- 	SCSI_LOG_HLQUEUE(3, printk("sd_release: disk=%s\n", disk->disk_name));
- 
--	if (!--sdkp->openers && sdev->removable) {
--		if (scsi_block_when_processing_errors(sdev))
-+	if (!--sdkp->openers) {
-+		/*
-+		 * Remove sd_iostats information about this disk
-+		 */
-+		if (sd_iostats_procdir != NULL) {
-+			remove_proc_entry(disk->disk_name, sd_iostats_procdir);
-+		}
-+		if (sd_iostats != NULL) {
-+			if (sd_iostats[sdkp->index] != NULL) {
-+				kfree (sd_iostats[sdkp->index]);
-+				sd_iostats[sdkp->index] = NULL;
-+			}
-+		}
-+		if (sdev->removable && scsi_block_when_processing_errors(sdev))
- 			scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
+@@ -849,6 +914,9 @@ static void sd_rw_intr(struct scsi_cmnd 
+ 			break;
+ 		}
  	}
- 
-@@ -1575,6 +1624,342 @@ static void sd_shutdown(struct device *d
++
++	sd_iostats_finish_req(SCpnt);
++
+ 	/*
+ 	 * This calls the generic completion function, now that we know
+ 	 * how many actual sectors finished, and how many sectors we need
+@@ -1575,6 +1643,481 @@ static void sd_shutdown(struct device *d
  	sd_sync_cache(sdp);
  }	
  
@@ -119,101 +148,155 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
 +static int
 +sd_iostats_seq_show(struct seq_file *seq, void *v)
 +{
-+        struct timeval     now;
-+        struct gendisk *disk;
-+        iostat_stats_t    *stats;
-+        unsigned long long read_len;
-+        unsigned long long read_len_tot;
-+        unsigned long      read_num;
-+        unsigned long      read_num_tot;
-+        unsigned long long write_len;
-+        unsigned long long write_len_tot;
-+        unsigned long      write_num;
-+        unsigned long      write_num_tot;
-+        int                i;
-+        int                maxi;
-+
-+	if (seq == NULL || seq->private == NULL) {
-+		printk(KERN_ERR "sd_iostats_seq_show: NULL disk\n");
++	struct timeval     now;
++	struct gendisk *disk = seq->private;
++	iostat_stats_t    *stats;
++	unsigned long long read_len;
++	unsigned long long read_len_tot;
++	unsigned long      read_num;
++	unsigned long      read_num_tot;
++	unsigned long long write_len;
++	unsigned long long write_len_tot;
++	unsigned long      write_num;
++	unsigned long      write_num_tot;
++	int                i;
++	int                maxi;
++
++	if (sd_iostats == NULL) {
++		printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
 +		BUG();
 +	}
 +
-+	disk = seq->private;
++	stats = sd_iostats[scsi_disk(disk)->index];
++	if (stats == NULL) {
++		printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
++		BUG();
++	}
 +
-+	if (scsi_disk(disk) == NULL || (disk->flags & GENHD_FL_UP) == 0) {
-+		seq_printf(seq, "sd_iostats_seq_show: Device %s "
-+				"does not exist\n", disk->disk_name);
-+		return 0;
++	do_gettimeofday(&now);
++	now.tv_sec -= stats->iostat_timeval.tv_sec;
++	now.tv_usec -= stats->iostat_timeval.tv_usec;
++	if (now.tv_usec < 0) {
++		now.tv_usec += 1000000;
++		now.tv_sec--;
 +	}
 +
-+        if (sd_iostats == NULL) {
-+                printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
-+                BUG();
-+        }
++	/* this sampling races with updates */
++	seq_printf(seq, "index:        %lu   snapshot_time:         %lu.%06lu\n",
++			(unsigned long) scsi_disk(disk)->index,
++			now.tv_sec, now.tv_usec);
++
++	for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
++		if (stats->iostat_read_histogram[i].iostat_count != 0 ||
++				stats->iostat_write_histogram[i].iostat_count != 0)
++			break;
++	maxi = i;
++
++	seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", 
++			"reads", "total", "writes", "total");
++
++	read_len_tot = write_len_tot = 0;
++	read_num_tot = write_num_tot = 0;
++	for (i = 0; i <= maxi; i++) {
++		read_len = stats->iostat_read_histogram[i].iostat_size;
++		read_len_tot += read_len;
++		read_num = stats->iostat_read_histogram[i].iostat_count;
++		read_num_tot += read_num;
++
++		write_len = stats->iostat_write_histogram[i].iostat_size;
++		write_len_tot += write_len;
++		write_num = stats->iostat_write_histogram[i].iostat_count;
++		write_num_tot += write_num;
++
++		seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", 
++				512<<i, read_num, read_len, write_num, write_len);
++	}
 +
-+        stats = sd_iostats[scsi_disk(disk)->index];
-+        if (stats == NULL) {
-+                seq_printf(seq, "sd_iostats_seq_show: sd_iostats "
-+				"entry %d does not exist\n",
-+				scsi_disk(disk)->index);
-+		return 0;
-+        }
++	seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
++			read_num_tot, read_len_tot, 
++			write_num_tot, write_len_tot);
++
++	seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
++	for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++		unsigned long long ticks, percent;
++		ticks = stats->iostat_queue_ticks[i];
++		if (ticks == 0)
++			continue;
++		percent = stats->iostat_queue_ticks[i] * 100;
++		do_div(percent, stats->iostat_queue_ticks_sum);
++		seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
++	}
 +
-+        do_gettimeofday(&now);
-+        now.tv_sec -= stats->iostat_timeval.tv_sec;
-+        now.tv_usec -= stats->iostat_timeval.tv_usec;
-+        if (now.tv_usec < 0) {
-+                now.tv_usec += 1000000;
-+                now.tv_sec--;
-+        }
++	if (stats->iostat_reqs != 0) {
++		unsigned long long aveseek = 0, percent = 0;
 +
-+        /* this sampling races with updates */
-+        seq_printf(seq, "index:        %lu   snapshot_time:         %lu.%06lu\n",
-+                   scsi_disk(disk)->index, now.tv_sec, now.tv_usec);
-+
-+        for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
-+                if (stats->iostat_read_histogram[i].iostat_count != 0 ||
-+                    stats->iostat_write_histogram[i].iostat_count != 0)
-+                        break;
-+        maxi = i;
-+
-+        seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", 
-+                   "reads", "total", "writes", "total");
-+
-+        read_len_tot = write_len_tot = 0;
-+        read_num_tot = write_num_tot = 0;
-+        for (i = 0; i <= maxi; i++) {
-+                read_len = stats->iostat_read_histogram[i].iostat_size;
-+                read_len_tot += read_len;
-+                read_num = stats->iostat_read_histogram[i].iostat_count;
-+                read_num_tot += read_num;
-+
-+                write_len = stats->iostat_write_histogram[i].iostat_size;
-+                write_len_tot += write_len;
-+                write_num = stats->iostat_write_histogram[i].iostat_count;
-+                write_num_tot += write_num;
-+
-+                seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", 
-+                            512<<i, read_num, read_len, write_num, write_len);
-+        }
-+        
-+        seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n", "total",
-+                   read_num_tot, read_len_tot, 
-+                   write_num_tot, write_len_tot);
-+        return 0;
++		if (stats->iostat_seeks) {
++			aveseek = stats->iostat_seek_sectors;
++			do_div(aveseek, stats->iostat_seeks);
++			percent = stats->iostat_seeks * 100;
++			do_div(percent, stats->iostat_reqs);
++		}
++
++		seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
++				"%llu sectors in ave, %llu%% of all reqs\n",
++				stats->iostat_sectors, stats->iostat_reqs,
++				stats->iostat_seeks, aveseek, percent);
++	}
++
++	seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
++			"%%", "writes", "%%");
++	for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++		unsigned long read_percent = 0, write_percent = 0;
++		if (stats->iostat_wtime[i] == 0 &&
++				stats->iostat_rtime[i] == 0)
++			continue;
++		if (stats->iostat_read_reqs)
++			read_percent = stats->iostat_rtime[i] * 100 / 
++				stats->iostat_read_reqs;
++		if (stats->iostat_write_reqs)
++			write_percent = stats->iostat_wtime[i] * 100 / 
++				stats->iostat_write_reqs;
++		seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
++				jiffies_to_msecs(((1UL << i) >> 1) << 1),
++				stats->iostat_rtime[i], read_percent,
++				stats->iostat_wtime[i], write_percent);
++	}
++
++	seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
++			"%%", "writes", "%%");
++	for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++		unsigned long read_percent = 0, write_percent = 0;
++		if (stats->iostat_wtime_in_queue[i] == 0 &&
++				stats->iostat_rtime_in_queue[i] == 0)
++			continue;
++		if (stats->iostat_read_reqs)
++			read_percent = stats->iostat_rtime_in_queue[i] * 100 / 
++				stats->iostat_read_reqs;
++		if (stats->iostat_write_reqs)
++			write_percent = stats->iostat_wtime_in_queue[i] * 100 / 
++				stats->iostat_write_reqs;
++		seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
++				jiffies_to_msecs(((1UL << i) >> 1) << 1),
++				stats->iostat_rtime_in_queue[i],
++				read_percent,
++				stats->iostat_wtime_in_queue[i],
++				write_percent);
++	}
++
++	return 0;
 +}
 +
 +static void *
 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
 +{
-+        return (*pos == 0) ? (void *)1 : NULL;
++	return (*pos == 0) ? (void *)1 : NULL;
 +}
 +
 +static void *
 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
 +{
-+        ++*pos;
-+        return NULL;
++	++*pos;
++	return NULL;
 +}
 +
 +static void
@@ -222,50 +305,54 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
 +}
 +
 +static struct seq_operations sd_iostats_seqops = {
-+        .start = sd_iostats_seq_start,
-+        .stop  = sd_iostats_seq_stop,
-+        .next  = sd_iostats_seq_next,
-+        .show  = sd_iostats_seq_show,
++	.start = sd_iostats_seq_start,
++	.stop  = sd_iostats_seq_stop,
++	.next  = sd_iostats_seq_next,
++	.show  = sd_iostats_seq_show,
 +};
 +
 +static int
 +sd_iostats_seq_open (struct inode *inode, struct file *file)
 +{
-+        int                    rc;
++	int                    rc;
 +
-+        rc = seq_open(file, &sd_iostats_seqops);
-+        if (rc != 0)
-+                return rc;
++	rc = seq_open(file, &sd_iostats_seqops);
++	if (rc != 0)
++		return rc;
 +
-+        ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
-+        return 0;
++	((struct seq_file *)file->private_data)->private = PDE(inode)->data;
++	return 0;
 +}
 +
 +static ssize_t
 +sd_iostats_seq_write(struct file *file, const char *buffer,
 +                     size_t len, loff_t *off)
 +{
-+        struct seq_file   *seq = file->private_data;
-+        struct gendisk *disk = seq->private;
-+        iostat_stats_t    *stats = sd_iostats[scsi_disk(disk)->index];
-+        unsigned long      flags;
-+        
-+        
-+        spin_lock_irqsave (&sd_iostats_lock, flags);
-+        memset (stats, 0, sizeof(*stats));
-+        do_gettimeofday(&stats->iostat_timeval);
-+        spin_unlock_irqrestore (&sd_iostats_lock, flags);
-+
-+        return len;
++	struct seq_file   *seq = file->private_data;
++	struct gendisk *disk = seq->private;
++	iostat_stats_t    *stats = sd_iostats[scsi_disk(disk)->index];
++	unsigned long      flags;
++	unsigned long      qdepth;
++
++
++	spin_lock_irqsave (&stats->iostat_lock, flags);
++	qdepth = stats->iostat_queue_depth;
++	memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
++	do_gettimeofday(&stats->iostat_timeval);
++	stats->iostat_queue_stamp = jiffies;
++	stats->iostat_queue_depth = qdepth;
++	spin_unlock_irqrestore (&stats->iostat_lock, flags);
++
++	return len;
 +}
 +
 +static struct file_operations sd_iostats_proc_fops = {
-+        .owner   = THIS_MODULE,
-+        .open    = sd_iostats_seq_open,
-+        .read    = seq_read,
-+        .write   = sd_iostats_seq_write,
-+        .llseek  = seq_lseek,
-+        .release = seq_release,
++	.owner   = THIS_MODULE,
++	.open    = sd_iostats_seq_open,
++	.read    = seq_read,
++	.write   = sd_iostats_seq_write,
++	.llseek  = seq_lseek,
++	.release = seq_release,
 +};
 +
 +extern struct proc_dir_entry *proc_scsi;
@@ -273,188 +360,269 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
 +void
 +sd_iostats_init(void)
 +{
-+        int    i;
-+
-+        spin_lock_init(&sd_iostats_lock);
-+
-+        sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
-+        if (sd_iostats == NULL) {
-+                printk(KERN_WARNING "Can't keep sd iostats: "
-+                       "ENOMEM allocating stats array size %ld\n",
-+                       SD_STATS * sizeof(iostat_stats_t *));
-+                return;
-+        }
++	int    i;
++
++	sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
++	if (sd_iostats == NULL) {
++		printk(KERN_WARNING "Can't keep sd iostats: "
++			"ENOMEM allocating stats array size %d\n",
++			SD_STATS * sizeof(iostat_stats_t *));
++		return;
++	}
 +
-+        for (i = 0; i < SD_STATS; i++)
-+                sd_iostats[i] = NULL;
++	for (i = 0; i < SD_STATS; i++)
++		sd_iostats[i] = NULL;
 +
-+        if (proc_scsi == NULL) {
-+                printk(KERN_WARNING "No access to sd iostats: "
-+                       "proc_scsi is NULL\n");
-+                return;
-+        }
++	if (proc_scsi == NULL) {
++		printk(KERN_WARNING "No access to sd iostats: "
++			"proc_scsi is NULL\n");
++		return;
++	}
 +
-+        sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
-+                                               S_IFDIR | S_IRUGO | S_IXUGO,
-+                                               proc_scsi);
-+        if (sd_iostats_procdir == NULL) {
-+                printk(KERN_WARNING "No access to sd iostats: "
-+                       "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
-+                return;
++	sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
++			S_IFDIR | S_IRUGO | S_IXUGO,
++			proc_scsi);
++	if (sd_iostats_procdir == NULL) {
++		printk(KERN_WARNING "No access to sd iostats: "
++			"can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
++		return;
 +        }
 +}
 +
 +void
 +sd_iostats_init_disk(struct gendisk *disk)
 +{
-+        struct proc_dir_entry *pde;
-+        unsigned long          flags;
-+        iostat_stats_t        *stats;
-+
-+        if (sd_iostats == NULL ||
-+            sd_iostats_procdir == NULL)
-+                return;
-+
-+        if (scsi_disk(disk)->index > SD_STATS) {
-+                printk(KERN_ERR "sd_iostats_init_disk: "
-+                       "unexpected disk index %d(%d)\n",
-+                       scsi_disk(disk)->index, SD_STATS);
-+				    return;
-+        }
++	struct proc_dir_entry *pde;
++	unsigned long          flags;
++	iostat_stats_t        *stats;
++
++	if (sd_iostats == NULL || sd_iostats_procdir == NULL)
++		return;
++
++	if (scsi_disk(disk)->index > SD_STATS) {
++		printk(KERN_ERR "sd_iostats_init_disk: "
++			"unexpected disk index %d(%d)\n",
++			scsi_disk(disk)->index, SD_STATS);
++		return;
++	}
 +
-+        if (sd_iostats[scsi_disk(disk)->index] != NULL)
-+                return;
++	if (sd_iostats[scsi_disk(disk)->index] != NULL)
++		return;
 +
-+        stats = kmalloc(sizeof(*stats), GFP_KERNEL);
-+        if (stats == NULL) {
-+                printk(KERN_WARNING "Can't keep %s iostats: "
-+                       "ENOMEM allocating stats size %ld\n", 
-+                       disk->disk_name, sizeof(*stats));
-+                return;
-+        }
++	stats = kmalloc(sizeof(*stats), GFP_KERNEL);
++	if (stats == NULL) {
++		printk(KERN_WARNING "Can't keep %s iostats: "
++			"ENOMEM allocating stats size %d\n", 
++			disk->disk_name, sizeof(*stats));
++		return;
++	}
 +
-+        memset (stats, 0, sizeof(*stats));
-+        do_gettimeofday(&stats->iostat_timeval);
++	memset (stats, 0, sizeof(*stats));
++	do_gettimeofday(&stats->iostat_timeval);
++	stats->iostat_queue_stamp = jiffies;
++	spin_lock_init(&stats->iostat_lock);
 +
-+        spin_lock_irqsave(&sd_iostats_lock, flags);
 +
-+        if (sd_iostats[scsi_disk(disk)->index] != NULL) {
-+                spin_unlock_irqrestore(&sd_iostats_lock, flags);
-+                kfree (stats);
-+                return;
-+        }
++	spin_lock_irqsave(&stats->iostat_lock, flags);
 +
-+        sd_iostats[scsi_disk(disk)->index] = stats;
-+        
-+        spin_unlock_irqrestore(&sd_iostats_lock, flags);
-+        
-+        pde = create_proc_entry(disk->disk_name, S_IRUGO | S_IWUSR, 
-+                                sd_iostats_procdir);
-+        if (pde == NULL) {
-+                printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
-+                       sd_iostats_procdir_name, disk->disk_name);
-+        } else {
-+                pde->proc_fops = &sd_iostats_proc_fops;
-+                pde->data = disk;
-+        }
++	if (sd_iostats[scsi_disk(disk)->index] != NULL) {
++		spin_unlock_irqrestore(&stats->iostat_lock, flags);
++		kfree (stats);
++		return;
++	}
++
++	sd_iostats[scsi_disk(disk)->index] = stats;
++
++	spin_unlock_irqrestore(&stats->iostat_lock, flags);
++
++	strncpy(stats->iostat_name, disk->disk_name,
++		sizeof(stats->iostat_name)-1);
++
++	pde = create_proc_entry(stats->iostat_name, S_IRUGO | S_IWUSR,
++				sd_iostats_procdir);
++	if (pde == NULL) {
++		printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
++			sd_iostats_procdir_name, disk->disk_name);
++	} else {
++		pde->proc_fops = &sd_iostats_proc_fops;
++		pde->data = disk;
++	}
 +}
 +
-+static void sd_devname(unsigned int disknum, char *buffer)
++void sd_iostats_fini(void)
 +{
-+        if (disknum < 26)
-+                sprintf(buffer, "sd%c", 'a' + disknum);
-+        else {
-+                unsigned int min1;
-+                unsigned int min2;
-+                /*
-+                 * For larger numbers of disks, we need to go to a new
-+                 * naming scheme.
-+                 */
-+                min1 = disknum / 26;
-+                min2 = disknum % 26;
-+                sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
-+        }
++	int  i;
++
++	if (sd_iostats == NULL)
++		return;
++
++	for (i = 0; i < SD_STATS; i++) {
++		if (sd_iostats[i] == NULL)
++			continue;
++		if (sd_iostats_procdir != NULL)
++			remove_proc_entry(sd_iostats[i]->iostat_name,
++						sd_iostats_procdir);
++		kfree(sd_iostats[i]);
++	}
++
++	if (proc_scsi != NULL && sd_iostats_procdir != NULL)
++		remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
++
++	sd_iostats_procdir = NULL;
++	kfree(sd_iostats);
++	sd_iostats = NULL;
 +}
 +
-+void
-+sd_iostats_fini(void)
++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
 +{
-+        char name[6];
-+        int  i;
-+        
-+        if (sd_iostats_procdir != NULL) {
-+                for (i = 0; i < SD_STATS; i++) {
-+                        sd_devname(i, name);
-+                        remove_proc_entry(name, sd_iostats_procdir);
-+                }
-+
-+                if (proc_scsi == NULL) {
-+                        printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n");
-+                        BUG();
-+                }
-+                remove_proc_entry(sd_iostats_procdir_name,
-+                                  proc_scsi);
-+
-+                sd_iostats_procdir = NULL;
-+        }
-+        
-+        if (sd_iostats != NULL) {
-+                for (i = 0; i < SD_STATS; i++) {
-+                        if (sd_iostats[i] != NULL)
-+                                kfree (sd_iostats[i]);
-+                }
-+                
-+                kfree(sd_iostats);
-+                sd_iostats = NULL;
-+        }
++	struct request		*rq = SCpnt->request;
++	iostat_stats_t		*stats;
++	unsigned long		*tcounter;
++	int			tbucket;
++	int			tmp;
++	unsigned long		irqflags;
++	int			disk, i;
++
++	disk = scsi_disk(rq->rq_disk)->index;
++
++	if (sd_iostats == NULL)
++		return;
++
++	if (disk < 0 || disk >= SD_STATS) {
++		printk(KERN_ERR "sd_iostats_bump: unexpected disk index "
++		        "%d([0-%d])\n", disk, SD_STATS);
++		BUG();
++	}
++
++	stats = sd_iostats[disk];
++	if (stats == NULL)
++		return;
++
++	tmp = jiffies -  rq->start_time;
++	for (tbucket = 0; tmp > 1; tbucket++)
++		tmp >>= 1;
++	if (tbucket >= IOSTAT_NCOUNTERS)
++		tbucket = IOSTAT_NCOUNTERS - 1;
++	//printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
++
++	tcounter = rq_data_dir(rq) == WRITE ? 
++		&stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
++
++	spin_lock_irqsave(&stats->iostat_lock, irqflags);
++
++	/* update delay stats */
++	(*tcounter)++;
++
++	/* update queue depth stats */
++	i = stats->iostat_queue_depth;
++	if (i >= IOSTAT_NCOUNTERS)
++		i = IOSTAT_NCOUNTERS - 1;
++	stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
++	stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
++	stats->iostat_queue_depth--;
++
++	/* update seek stats. XXX: not sure about nr_sectors */
++	stats->iostat_sectors += rq->nr_sectors;
++	stats->iostat_reqs++;
++	if (rq->sector != stats->iostat_next_sector) {
++		stats->iostat_seek_sectors += 
++			rq->sector > stats->iostat_next_sector ?
++			rq->sector - stats->iostat_next_sector :
++			stats->iostat_next_sector - rq->sector;
++		stats->iostat_seeks++;
++	}
++	stats->iostat_next_sector = rq->sector + rq->nr_sectors;
++
++	stats->iostat_queue_stamp = jiffies;
++
++	spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
 +}
 +
-+void
-+sd_iostats_bump(int disk, unsigned int nsect, int iswrite)
++void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
 +{
-+        iostat_stats_t    *stats;
-+        iostat_counter_t  *counter;
-+        int                bucket;
-+        int                tmp;
-+        unsigned long      irqflags;
-+
-+        if (sd_iostats == NULL)
-+                return;
-+
-+        if (disk < 0 || disk >= SD_STATS) {
-+                printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
-+                       disk, SD_STATS);
-+                BUG();
-+        }
++	struct request		*rq = SCpnt->request;
++	iostat_stats_t		*stats;
++	iostat_counter_t	*counter;
++	int			bucket;
++	int			tbucket;
++	int			tmp;
++	unsigned long		irqflags;
++	int			disk, i;
++	int			nsect;
++
++	disk = scsi_disk(rq->rq_disk)->index;
++
++	if (sd_iostats == NULL)
++		return;
++
++	if (disk < 0 || disk >= SD_STATS) {
++		printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
++			disk, SD_STATS);
++		BUG();
++	}
 +
-+        for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
-+                tmp /= 2;
++	stats = sd_iostats[disk];
++	if (stats == NULL)
++		return;
 +
-+        if (bucket >= IOSTAT_NCOUNTERS) {
-+                printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
-+                BUG();
-+        }
++	nsect = SCpnt->request_bufflen >> 9;
++	for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
++		tmp >>= 1;
 +
-+        spin_lock_irqsave(&sd_iostats_lock, irqflags);
-+        
-+        stats = sd_iostats[disk];
-+        if (stats != NULL) {
-+                counter = iswrite ? 
-+                          &stats->iostat_write_histogram[bucket] :
-+                          &stats->iostat_read_histogram[bucket];
++	if (bucket >= IOSTAT_NCOUNTERS) {
++		printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
++		BUG();
++	}
 +
-+                counter->iostat_size += nsect;
-+                counter->iostat_count++;
-+        }
++	counter = rq_data_dir(rq) == WRITE ? 
++		&stats->iostat_write_histogram[bucket] :
++		&stats->iostat_read_histogram[bucket];
++
++	tmp = jiffies - rq->start_time;
++	for (tbucket = 0; tmp > 1; tbucket++)
++		tmp >>= 1;
++	if (tbucket >= IOSTAT_NCOUNTERS)
++		tbucket = IOSTAT_NCOUNTERS - 1;
++	//printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
++
++	/* an ugly hack to know exact processing time. the right
++	 * solution is to add one more field to struct request
++	 * hopefully it will break nothing ... */
++	rq->start_time = jiffies;
++
++	spin_lock_irqsave(&stats->iostat_lock, irqflags);
++
++	/* update queue depth stats */
++	i = stats->iostat_queue_depth;
++	if (i >= IOSTAT_NCOUNTERS)
++		i = IOSTAT_NCOUNTERS - 1;
++	stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
++	stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
++	stats->iostat_queue_depth++;
++
++	/* update delay stats */
++	if (rq_data_dir(rq) == WRITE) {
++		stats->iostat_wtime_in_queue[tbucket]++;
++		stats->iostat_write_reqs++;
++	} else {
++		stats->iostat_rtime_in_queue[tbucket]++;
++		stats->iostat_read_reqs++;
++	}
++
++	/* update size stats */
++	counter->iostat_size += nsect;
++	counter->iostat_count++;
++
++	stats->iostat_queue_stamp = jiffies;
 +
-+        spin_unlock_irqrestore(&sd_iostats_lock, irqflags);
++	spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
 +}
 +#endif
 +
  /**
   *	init_sd - entry point for this driver (both when built in or when
   *	a module).
-@@ -1584,6 +1969,7 @@ static void sd_shutdown(struct device *d
+@@ -1584,6 +2127,7 @@ static void sd_shutdown(struct device *d
  static int __init init_sd(void)
  {
  	int majors = 0, i;
@@ -462,7 +630,7 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
  
  	SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
  
-@@ -1594,7 +1980,10 @@ static int __init init_sd(void)
+@@ -1594,7 +2138,10 @@ static int __init init_sd(void)
  	if (!majors)
  		return -ENODEV;
  
@@ -474,7 +642,7 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
  }
  
  /**
-@@ -1608,6 +1997,7 @@ static void __exit exit_sd(void)
+@@ -1608,6 +2155,7 @@ static void __exit exit_sd(void)
  
  	SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
  
@@ -482,17 +650,3 @@ Index: linux+rhel4+chaos/drivers/scsi/sd.c
  	scsi_unregister_driver(&sd_template.gendrv);
  	for (i = 0; i < SD_MAJORS; i++)
  		unregister_blkdev(sd_major(i), "sd");
-Index: linux+rhel4+chaos/drivers/scsi/scsi_proc.c
-===================================================================
---- linux+rhel4+chaos.orig/drivers/scsi/scsi_proc.c
-+++ linux+rhel4+chaos/drivers/scsi/scsi_proc.c
-@@ -38,7 +38,8 @@
- /* 4K page size, but our output routines, use some slack for overruns */
- #define PROC_BLOCK_SIZE (3*1024)
- 
--static struct proc_dir_entry *proc_scsi;
-+struct proc_dir_entry *proc_scsi;
-+EXPORT_SYMBOL(proc_scsi);
- 
- /* Protect sht->present and sht->proc_dir */
- static DECLARE_MUTEX(global_host_template_sem);
-- 
GitLab