diff --git a/lustre/ChangeLog b/lustre/ChangeLog index ae83eb2a9f8434192e60da1a4bc0cdc45b850a43..040c6e3612324cfb9c836a719cd7e2bbcc2b362e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -270,7 +270,7 @@ Details : dev_clear_rdonly(bdev) must be called in kill_bdev() instead of Severity : minor Bugzilla : 11706 -Description: service threads may hog cpus when ther eis a lot of requests +Description: service threads may hog cpus when there are a lot of requests coming Details : Insert cond_resched to give other threads a chance to use some of the cpu @@ -285,6 +285,14 @@ Details : When mds(mgs) do recovery, the tgt_count might be zero, so the initialization will be done asynchronausly, so there will be race between add unlink log and unlink log initialization. +Severity : normal +Bugzilla : 12597 +Description: brw_stats were being printed incorrectly +Details : brw_stats were being printed as log2 but all of them were not + recorded as log2. Also remove some code duplication arising from + filter_tally_{read,write}. + + -------------------------------------------------------------------------------- 2007-05-03 Cluster File Systems, Inc. <info@clusterfs.com> diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 093976febb7a8373aaf6c5b95a3e07e838e01730..9fb48ad4fa9c62ac8d532d0c82b51fdc25ed48c4 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -174,23 +174,13 @@ int filter_recov_log_mds_ost_cb(struct llog_handle *llh, struct llog_rec_hdr *rec, void *data); #ifdef LPROCFS -void filter_tally_write(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks, - int blocks_per_page); -void filter_tally_read(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks, - int blocks_per_page); +void filter_tally(struct obd_export *exp, struct page **pages, int nr_pages, + unsigned long *blocks, int blocks_per_page, int wr); int lproc_filter_attach_seqstat(struct obd_device *dev); #else -static inline void filter_tally_write(struct obd_export *exp, - struct page **pages, int nr_pages, - unsigned long *blocks, int blocks_per_page) {} -static inline void filter_tally_read(struct obd_export *exp, - struct page **pages, int nr_pages, - unsigned long *blocks, int blocks_per_page) {} -static inline void filter_tally_read(struct filter_obd *filter, - struct page **pages, int nr_pages, - unsigned long *blocks, int blocks_per_page) {} +static inline void filter_tally(struct obd_export *exp, struct page **pages, + int nr_pages, unsigned long *blocks, + int blocks_per_page, int wr) {} static inline int lproc_filter_attach_seqstat(struct obd_device *dev) {} #endif diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 19645c83d91db99a250e0c3676544fb9e1f80341..c126aa5a3c28d6bd8a63cb20fa7dc6e2247dc40a 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -240,9 +240,9 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf, if (rw == OBD_BRW_WRITE) { if (rc == 0) { - filter_tally_write(exp, iobuf->maplist, iobuf->nr_pages, - KIOBUF_GET_BLOCKS(iobuf), - blocks_per_page); + filter_tally(exp, iobuf->maplist, iobuf->nr_pages, + KIOBUF_GET_BLOCKS(iobuf), blocks_per_page, + 1); if (attr->ia_size > inode->i_size) attr->ia_valid |= ATTR_SIZE; @@ -264,8 +264,8 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf, if (rc) GOTO(cleanup, rc); } else { - filter_tally_read(exp, iobuf->maplist, iobuf->nr_pages, - KIOBUF_GET_BLOCKS(iobuf), blocks_per_page); + filter_tally(exp, iobuf->maplist, iobuf->nr_pages, + KIOBUF_GET_BLOCKS(iobuf), blocks_per_page, 0); } rc = filter_clear_page_cache(inode, iobuf); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index eeae4bdf6f2d64c23ed5ad526b8ed998f4bcf6f0..64881e5bc6dc231cfc30975e583ea3ed9c021652 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -523,9 +523,9 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, if (rw == OBD_BRW_WRITE) { if (rc == 0) { - filter_tally_write(exp, iobuf->dr_pages, - iobuf->dr_npages, iobuf->dr_blocks, - blocks_per_page); + filter_tally(exp, iobuf->dr_pages, + iobuf->dr_npages, iobuf->dr_blocks, + blocks_per_page, 1); if (attr->ia_size > inode->i_size) attr->ia_valid |= ATTR_SIZE; rc = fsfilt_setattr(obd, dchild, @@ -547,9 +547,8 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, if (rc != 0) RETURN(rc); } else if (rc == 0) { - filter_tally_read(exp, iobuf->dr_pages, - iobuf->dr_npages, iobuf->dr_blocks, - blocks_per_page); + filter_tally(exp, iobuf->dr_pages, iobuf->dr_npages, + iobuf->dr_blocks, blocks_per_page, 0); } rc = filter_clear_page_cache(inode, iobuf); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 9d3b8e125f0effab564a8a264c6026198e6781ed..c40ba57495b37196da5347581b101cdc9e107eb2 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -216,8 +216,8 @@ static struct lprocfs_vars lprocfs_module_vars[] = { { 0 } }; -void filter_tally_write(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks,int blocks_per_page) +void filter_tally(struct obd_export *exp, struct page **pages, int nr_pages, + unsigned long *blocks, int blocks_per_page, int wr) { struct filter_obd *filter = &exp->exp_obd->u.filter; struct filter_export_data *fed = &exp->exp_filter_data; @@ -230,50 +230,10 @@ void filter_tally_write(struct obd_export *exp, struct page **pages, if (nr_pages == 0) return; - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_W_PAGES], + lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_PAGES + wr], nr_pages); - lprocfs_oh_tally_log2(&fed->fed_brw_stats.hist[BRW_W_PAGES], nr_pages); - - while (nr_pages-- > 0) { - if (last_page && (*pages)->index != (last_page->index + 1)) - discont_pages++; - last_page = *pages; - pages++; - for (i = 0; i < blocks_per_page; i++) { - if (last_block && *blocks != (*last_block + 1)) - discont_blocks++; - last_block = blocks++; - } - } - - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_W_DISCONT_PAGES], - discont_pages); - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_W_DISCONT_BLOCKS], - discont_blocks); - - lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_W_DISCONT_PAGES], - discont_pages); - lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_W_DISCONT_BLOCKS], - discont_blocks); -} - -void filter_tally_read(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks, int blocks_per_page) -{ - struct filter_obd *filter = &exp->exp_obd->u.filter; - struct filter_export_data *fed = &exp->exp_filter_data; - struct page *last_page = NULL; - unsigned long *last_block = NULL; - unsigned long discont_pages = 0; - unsigned long discont_blocks = 0; - int i; - - if (nr_pages == 0) - return; - - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_PAGES], + lprocfs_oh_tally_log2(&fed->fed_brw_stats.hist[BRW_R_PAGES + wr], nr_pages); - lprocfs_oh_tally_log2(&fed->fed_brw_stats.hist[BRW_R_PAGES], nr_pages); while (nr_pages-- > 0) { if (last_page && (*pages)->index != (last_page->index + 1)) @@ -287,40 +247,51 @@ void filter_tally_read(struct obd_export *exp, struct page **pages, } } - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_PAGES], + lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_PAGES +wr], discont_pages); - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_BLOCKS], - discont_blocks); - - lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_PAGES], + lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_PAGES + wr], discont_pages); - lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_BLOCKS], + lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_BLOCKS+wr], + discont_blocks); + lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_BLOCKS + wr], discont_blocks); } #define pct(a,b) (b ? a * 100 / b : 0) -static void display_brw_stats(struct seq_file *seq, struct obd_histogram *read, - struct obd_histogram *write) +static void display_brw_stats(struct seq_file *seq, char *name, char *units, + struct obd_histogram *read, struct obd_histogram *write, int log2) { - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; + unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0; int i; + seq_printf(seq, "\n%26s read | write\n", " "); + seq_printf(seq, "%-22s %-5s %% cum %% | %-5s %% cum %%\n", + name, units, units); + read_tot = lprocfs_oh_sum(read); write_tot = lprocfs_oh_sum(write); - - read_cum = 0; - write_cum = 0; for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = read->oh_buckets[i]; - unsigned long w = write->oh_buckets[i]; + r = read->oh_buckets[i]; + w = write->oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%u:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - 1 << i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, - pct(w, write_tot), - pct(write_cum, write_tot)); + if (read_cum == 0 && write_cum == 0) + continue; + + if (!log2) + seq_printf(seq, "%u", i); + else if (i < 10) + seq_printf(seq, "%u", 1<<i); + else if (i < 20) + seq_printf(seq, "%uK", 1<<(i-10)); + else + seq_printf(seq, "%uM", 1<<(i-20)); + + seq_printf(seq, ":\t\t%10lu %3lu %3lu | %4lu %3lu %3lu\n", + r, pct(r, read_tot), pct(read_cum, read_tot), + w, pct(w, write_tot), pct(write_cum, write_tot)); + if (read_cum == read_tot && write_cum == write_tot) break; } @@ -329,91 +300,44 @@ static void display_brw_stats(struct seq_file *seq, struct obd_histogram *read, static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats) { struct timeval now; -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; - int i; -#endif - - do_gettimeofday(&now); /* this sampling races with updates */ - + do_gettimeofday(&now); seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "pages per brw brws %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_PAGES], &brw_stats->hist[BRW_W_PAGES]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "discont pages rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); + display_brw_stats(seq, "pages per bulk r/w", "rpcs", + &brw_stats->hist[BRW_R_PAGES], + &brw_stats->hist[BRW_W_PAGES], 1); - display_brw_stats(seq, &brw_stats->hist[BRW_R_DISCONT_PAGES], - &brw_stats->hist[BRW_W_DISCONT_PAGES]); + display_brw_stats(seq, "discontiguous pages", "rpcs", + &brw_stats->hist[BRW_R_DISCONT_PAGES], + &brw_stats->hist[BRW_W_DISCONT_PAGES], 0); - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "discont blocks rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); + display_brw_stats(seq, "discontiguous blocks", "rpcs", + &brw_stats->hist[BRW_R_DISCONT_BLOCKS], + &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0); - display_brw_stats(seq, &brw_stats->hist[BRW_R_DISCONT_BLOCKS], - &brw_stats->hist[BRW_W_DISCONT_BLOCKS]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "dio frags rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_DIO_FRAGS], - &brw_stats->hist[BRW_W_DIO_FRAGS]); + display_brw_stats(seq, "disk fragmented I/Os", "ios", + &brw_stats->hist[BRW_R_DIO_FRAGS], + &brw_stats->hist[BRW_W_DIO_FRAGS], 0); #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "disk ios in flight ios %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_RPC_HIST], - &brw_stats->hist[BRW_W_RPC_HIST]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "io time (1/%ds) rpcs %% cum %% |", HZ); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_IO_TIME], - &brw_stats->hist[BRW_W_IO_TIME]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "disk I/O size count %% cum %% |"); - seq_printf(seq, " count %% cum %%\n"); - - read_tot = lprocfs_oh_sum(&brw_stats->hist[BRW_R_DISK_IOSIZE]); - write_tot = lprocfs_oh_sum(&brw_stats->hist[BRW_W_DISK_IOSIZE]); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = brw_stats->hist[BRW_R_DISK_IOSIZE].oh_buckets[i]; - unsigned long w = brw_stats->hist[BRW_W_DISK_IOSIZE].oh_buckets[i]; - - read_cum += r; - write_cum += w; - if (read_cum == 0 && write_cum == 0) - continue; - - if (i < 10) - seq_printf(seq, "%u", 1<<i); - else if (i < 20) - seq_printf(seq, "%uK", 1<<(i-10)); - else - seq_printf(seq, "%uM", 1<<(i-20)); - - seq_printf(seq, ":\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; + display_brw_stats(seq, "disk I/Os in flight", "ios", + &brw_stats->hist[BRW_R_RPC_HIST], + &brw_stats->hist[BRW_W_RPC_HIST], 0); + + { + char title[24]; + sprintf(title, "I/O time (1/%ds)", HZ); + display_brw_stats(seq, title, "ios", + &brw_stats->hist[BRW_R_IO_TIME], + &brw_stats->hist[BRW_W_IO_TIME], 1); } + + display_brw_stats(seq, "disk I/O size", "ios", + &brw_stats->hist[BRW_R_DISK_IOSIZE], + &brw_stats->hist[BRW_W_DISK_IOSIZE], 1); #endif }