From 9c68b2a4edd7635a5ce336b6d0dcbc8bc6468bae Mon Sep 17 00:00:00 2001 From: bwzhou <bwzhou> Date: Tue, 30 Oct 2007 05:26:00 +0000 Subject: [PATCH] Branch b1_6 b=11622 i=adilger i=johann 1. add page allocation statistics for lustre; 2. use percpu counters for both memory and page accounting; 3. update leak-finder to understand the new format of debug info. --- lustre/include/lprocfs_status.h | 69 +++++++++-- lustre/include/obd_support.h | 171 +++++++++++++++++++++------ lustre/liblustre/dir.c | 12 +- lustre/llite/lproc_llite.c | 3 +- lustre/lvfs/lvfs_lib.c | 123 ++++++++++++++++++- lustre/lvfs/lvfs_linux.c | 34 +----- lustre/mds/handler.c | 4 +- lustre/obdclass/class_obd.c | 37 ++++-- lustre/obdclass/linux/linux-sysctl.c | 137 +++++++++++++++++++-- lustre/obdclass/obd_mount.c | 8 +- lustre/obdecho/echo.c | 12 +- lustre/obdecho/echo_client.c | 11 +- lustre/obdfilter/filter.c | 13 +- lustre/ost/ost_handler.c | 4 +- lustre/ptlrpc/pinger.c | 2 + lustre/tests/leak_finder.pl | 7 +- 16 files changed, 514 insertions(+), 133 deletions(-) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 90e599cd35..b31ed27d49 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -123,11 +123,11 @@ struct lprocfs_atomic { struct lprocfs_counter { struct lprocfs_atomic lc_cntl; /* may need to move to per set */ unsigned int lc_config; - __u64 lc_count; - __u64 lc_sum; - __u64 lc_min; - __u64 lc_max; - __u64 lc_sumsquare; + __s64 lc_count; + __s64 lc_sum; + __s64 lc_min; + __s64 lc_max; + __s64 lc_sumsquare; const char *lc_name; /* must be static */ const char *lc_units; /* must be static */ }; @@ -140,10 +140,21 @@ struct lprocfs_percpu { #define LPROCFS_GET_SMP_ID 0x0002 enum lprocfs_stats_flags { + LPROCFS_STATS_FLAG_PERCPU = 0x0000, /* per cpu counter */ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu * area and need locking */ }; +enum lprocfs_fields_flags { + LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001, + LPROCFS_FIELDS_FLAGS_SUM = 0x0002, + LPROCFS_FIELDS_FLAGS_MIN = 0x0003, + LPROCFS_FIELDS_FLAGS_MAX = 0x0004, + LPROCFS_FIELDS_FLAGS_AVG = 0x0005, + LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006, + LPROCFS_FIELDS_FLAGS_COUNT = 0x0007, +}; + struct lprocfs_stats { unsigned int ls_num; /* # of counters */ unsigned int ls_percpu_size; @@ -242,7 +253,11 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, lprocfs_stats_unlock(stats); } -static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) +#define lprocfs_counter_incr(stats, idx) \ + lprocfs_counter_add(stats, idx, 1) + +static inline void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, + long amount) { struct lprocfs_counter *percpu_cntr; int smp_id; @@ -250,17 +265,38 @@ static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) if (!stats) return; + /* With per-client stats, statistics are allocated only for + * single CPU area, so the smp_id should be 0 always. */ smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID); - + percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]); atomic_inc(&percpu_cntr->lc_cntl.la_entry); - percpu_cntr->lc_count++; + if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) + percpu_cntr->lc_sum -= amount; atomic_inc(&percpu_cntr->lc_cntl.la_exit); - lprocfs_stats_unlock(stats); } +#define lprocfs_counter_decr(stats, idx) \ + lprocfs_counter_sub(stats, idx, 1) -extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, +extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field); + +static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats, + int idx, + enum lprocfs_fields_flags field) +{ + __u64 ret = 0; + int i; + + LASSERT(stats != NULL); + for (i = 0; i < num_possible_cpus(); i++) + ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]), + field); + return ret; +} + +extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags); extern void lprocfs_clear_stats(struct lprocfs_stats *stats); extern void lprocfs_free_stats(struct lprocfs_stats **stats); @@ -483,13 +519,19 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int index, long amount) { return; } static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int index) { return; } +static inline void lprocfs_counter_sub(struct lprocfs_stats *stats, + int index, long amount) { return; } static inline void lprocfs_counter_init(struct lprocfs_stats *stats, int index, unsigned conf, const char *name, const char *units) { return; } +static inline __u64 lc_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field) +{ return 0; } + static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num, - int client_stat) + enum lprocfs_stats_flags flags) { return NULL; } static inline void lprocfs_clear_stats(struct lprocfs_stats *stats) { return; } @@ -615,6 +657,11 @@ static inline int lprocfs_counter_write(struct file *file, const char *buffer, unsigned long count, void *data) { return 0; } +static inline +__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, + enum lprocfs_fields_flags field) +{ return (__u64)0; } + #define LPROCFS_ENTRY() #define LPROCFS_EXIT() #define LPROCFS_ENTRY_AND_CHECK(dp) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 2189aef369..daf027ae89 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -25,10 +25,16 @@ #include <libcfs/kp30.h> #include <lvfs.h> +#include <lprocfs_status.h> /* global variables */ -extern atomic_t obd_memory; -extern int obd_memmax; +extern struct lprocfs_stats *obd_memory; +enum { + OBD_MEMORY_STAT = 0, + OBD_MEMORY_PAGES_STAT = 1, + OBD_STATS_NUM, +}; + extern unsigned int obd_fail_loc; extern unsigned int obd_fail_val; extern unsigned int obd_debug_peer_on_timeout; @@ -395,13 +401,75 @@ extern atomic_t libcfs_kmemory; #define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1) #define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100) +#ifdef LPROCFS +#define obd_memory_add(size) \ + lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size)) +#define obd_memory_sub(size) \ + lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size)) +#define obd_memory_sum() \ + lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT, \ + LPROCFS_FIELDS_FLAGS_SUM) +#define obd_pages_add(order) \ + lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT, \ + (long)(1 << (order))) +#define obd_pages_sub(order) \ + lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT, \ + (long)(1 << (order))) +#define obd_pages_sum() \ + lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT, \ + LPROCFS_FIELDS_FLAGS_SUM) + +extern void obd_update_maxusage(void); +extern __u64 obd_memory_max(void); +extern __u64 obd_pages_max(void); + +#else + +extern __u64 obd_alloc; +extern __u64 obd_pages; + +extern __u64 obd_max_alloc; +extern __u64 obd_max_pages; + +static inline void obd_memory_add(long size) +{ + obd_alloc += size; + if (obd_alloc > obd_max_alloc) + obd_max_alloc = obd_alloc; +} + +static inline void obd_memory_sub(long size) +{ + obd_alloc -= size; +} + +static inline void obd_pages_add(int order) +{ + obd_pages += 1<< order; + if (obd_pages > obd_max_pages) + obd_max_pages = obd_pages; +} + +static inline void obd_pages_sub(int order) +{ + obd_pages -= 1<< order; +} + +#define obd_memory_sum() (obd_alloc) +#define obd_pages_sum() (obd_pages) + +#define obd_memory_max() (obd_max_alloc) +#define obd_pages_max() (obd_max_pages) + +#endif + #if defined(LUSTRE_UTILS) /* this version is for utils only */ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ - if ((ptr) == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed\n", \ + (int)(size)); \ } else { \ memset(ptr, 0, size); \ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n", \ @@ -424,11 +492,9 @@ do { \ __FILE__, __LINE__) || \ OBD_FREE_RTN0(ptr)))){ \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + obd_memory_add(size); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ } \ } while (0) #endif @@ -448,18 +514,16 @@ do { \ # define OBD_VMALLOC(ptr, size) \ do { \ (ptr) = cfs_alloc_large(size); \ - if ((ptr) == NULL) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + if (unlikely((ptr) == NULL)) { \ + CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n", \ + (int)(size)); \ + CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ + obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ } else { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + obd_memory_add(size); \ + CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ } \ } while (0) #endif @@ -481,9 +545,9 @@ do { \ #define OBD_FREE(ptr, size) \ do { \ LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + obd_memory_sub(size); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ POISON(ptr, 0x5a, size); \ cfs_free(ptr); \ (ptr) = (void *)0xdeadbeef; \ @@ -522,9 +586,9 @@ do { \ # define OBD_VFREE(ptr, size) \ do { \ LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + obd_memory_sub(size); \ + CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ POISON(ptr, 0x5a, size); \ cfs_free_large(ptr); \ (ptr) = (void *)0xdeadbeef; \ @@ -550,11 +614,9 @@ do { \ __FILE__, __LINE__) || \ OBD_SLAB_FREE_RTN0(ptr, slab)))) { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ - (int)(size), ptr, atomic_read(&obd_memory)); \ + obd_memory_add(size); \ + CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p.\n", \ + (int)(size), ptr); \ } \ } while (0) @@ -563,9 +625,9 @@ do { \ #define OBD_SLAB_FREE(ptr, slab, size) \ do { \ LASSERT(ptr); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ + obd_memory_sub(size); \ POISON(ptr, 0x5a, size); \ cfs_mem_cache_free(slab, ptr); \ (ptr) = (void *)0xdeadbeef; \ @@ -578,6 +640,47 @@ do { \ #define KEY_IS(str) (keylen >= strlen(str) && strcmp(key, str) == 0) +/* Wrapper for contiguous page frame allocation */ +#define OBD_PAGES_ALLOC(ptr, order, gfp_mask) \ +do { \ + (ptr) = cfs_alloc_pages(gfp_mask, order); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\ + "failed\n", (int)(1 << (order)), \ + (__u64)((1 << (order)) << CFS_PAGE_SHIFT)); \ + CERROR(LPU64" total bytes and "LPU64" total pages " \ + "("LPU64" bytes) allocated by Lustre, " \ + "%d total bytes by LNET\n", \ + obd_memory_sum(), \ + obd_pages_sum() << CFS_PAGE_SHIFT, \ + obd_pages_sum(), \ + atomic_read(&libcfs_kmemory)); \ + } else { \ + obd_pages_add(order); \ + CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / " \ + LPU64" bytes at %p.\n", \ + (int)(1 << (order)), \ + (__u64)((1 << (order)) << CFS_PAGE_SHIFT), ptr); \ + } \ +} while (0) + +#define OBD_PAGE_ALLOC(ptr, gfp_mask) \ + OBD_PAGES_ALLOC(ptr, 0, gfp_mask) + +#define OBD_PAGES_FREE(ptr, order) \ +do { \ + LASSERT(ptr); \ + obd_pages_sub(order); \ + CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \ + "at %p.\n", \ + (int)(1 << (order)), (__u64)((1 << (order)) << CFS_PAGE_SHIFT),\ + ptr); \ + __cfs_free_pages(ptr, order); \ + (ptr) = (void *)0xdeadbeef; \ +} while (0) + +#define OBD_PAGE_FREE(ptr) OBD_PAGES_FREE(ptr, 0) + #if defined(__linux__) #include <linux/obd_support.h> #elif defined(__APPLE__) diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index f571556642..e6788bfe60 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -131,16 +131,14 @@ static struct page *llu_dir_read_page(struct inode *ino, unsigned long pgidx) int rc; ENTRY; - page = cfs_alloc_page(0); - if (!page) { - CERROR("alloc page failed\n"); + OBD_PAGE_ALLOC(page, 0); + if (!page) RETURN(ERR_PTR(-ENOMEM)); - } page->index = pgidx; rc = llu_dir_do_readpage(ino, page); if (rc) { - free_page(page); + OBD_PAGE_FREE(page); RETURN(ERR_PTR(rc)); } @@ -253,7 +251,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, + le16_to_cpu(de->rec_len), le32_to_cpu(de->inode), d_type, &filled); if (over) { - free_page(page); + OBD_PAGE_FREE(page); /* * if buffer overflow with no data * returned yet, then report error @@ -267,7 +265,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, } } - free_page(page); + OBD_PAGE_FREE(page); } done: lli->lli_dir_pos = (__u64)pgidx << CFS_PAGE_SHIFT | offset; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index d40c9528a1..15759e8d07 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -687,7 +687,8 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, entry->data = sbi; /* File operations stats */ - sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, 0); + sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, + LPROCFS_STATS_FLAG_PERCPU); if (sbi->ll_stats == NULL) GOTO(out, err = -ENOMEM); /* do counter init */ diff --git a/lustre/lvfs/lvfs_lib.c b/lustre/lvfs/lvfs_lib.c index 219d83ba4b..ea0664dfa7 100644 --- a/lustre/lvfs/lvfs_lib.c +++ b/lustre/lvfs/lvfs_lib.c @@ -30,6 +30,22 @@ #endif #include <lustre_lib.h> +__u64 obd_max_pages = 0; +__u64 obd_max_alloc = 0; + +#ifdef __KERNEL__ +struct lprocfs_stats *obd_memory = NULL; +spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED; +/* refine later and change to seqlock or simlar from libcfs */ +#else +__u64 obd_alloc; +__u64 obd_pages; +#endif + +unsigned int obd_fail_val = 0; +unsigned int obd_fail_loc = 0; +unsigned int obd_alloc_fail_rate = 0; + int obd_alloc_fail(const void *ptr, const char *name, const char *type, size_t size, const char *file, int line) { @@ -38,10 +54,111 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n", ptr ? "force " :"", type, name, (__u64)size, file, line); - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory)); - return 1; + CERROR(LPU64" total bytes and "LPU64" total pages " + "("LPU64" bytes) allocated by Lustre, " + "%d total bytes by LNET\n", + obd_memory_sum(), + obd_pages_sum() << CFS_PAGE_SHIFT, + obd_pages_sum(), + atomic_read(&libcfs_kmemory)); + return 1; } return 0; } + +#ifdef __KERNEL__ +void obd_update_maxusage() +{ + __u64 max1, max2; + + max1 = obd_pages_sum(); + max2 = obd_memory_sum(); + + spin_lock(&obd_updatemax_lock); + if (max1 > obd_max_pages) + obd_max_pages = max1; + if (max2 > obd_max_alloc) + obd_max_alloc = max2; + spin_unlock(&obd_updatemax_lock); + +} + +__u64 obd_memory_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_alloc; + spin_unlock(&obd_updatemax_lock); + + return ret; +} + +__u64 obd_pages_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_pages; + spin_unlock(&obd_updatemax_lock); + + return ret; +} + +EXPORT_SYMBOL(obd_update_maxusage); +EXPORT_SYMBOL(obd_pages_max); +EXPORT_SYMBOL(obd_memory_max); +EXPORT_SYMBOL(obd_memory); + +#endif + +#ifdef LPROCFS +__s64 lprocfs_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field) +{ + __u64 ret = 0; + int centry; + + if (!lc) + RETURN(0); + do { + centry = atomic_read(&lc->lc_cntl.la_entry); + + switch (field) { + case LPROCFS_FIELDS_FLAGS_CONFIG: + ret = lc->lc_config; + break; + case LPROCFS_FIELDS_FLAGS_SUM: + ret = lc->lc_sum; + break; + case LPROCFS_FIELDS_FLAGS_MIN: + ret = lc->lc_min; + break; + case LPROCFS_FIELDS_FLAGS_MAX: + ret = lc->lc_max; + break; + case LPROCFS_FIELDS_FLAGS_AVG: + ret = (lc->lc_max - lc->lc_min)/2; + break; + case LPROCFS_FIELDS_FLAGS_SUMSQUARE: + ret = lc->lc_sumsquare; + break; + case LPROCFS_FIELDS_FLAGS_COUNT: + ret = lc->lc_count; + break; + default: + break; + }; + } while (centry != atomic_read(&lc->lc_cntl.la_entry) && + centry != atomic_read(&lc->lc_cntl.la_exit)); + + RETURN(ret); +} +EXPORT_SYMBOL(lprocfs_read_helper); +#endif + EXPORT_SYMBOL(obd_alloc_fail); + +EXPORT_SYMBOL(obd_fail_loc); +EXPORT_SYMBOL(obd_alloc_fail_rate); +EXPORT_SYMBOL(obd_fail_val); diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index f7cfe1355f..beea8baa91 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -49,12 +49,6 @@ #include <lustre_lib.h> #include <lustre_quota.h> -atomic_t obd_memory; -int obd_memmax; -unsigned int obd_fail_val; -unsigned int obd_fail_loc; -unsigned int obd_alloc_fail_rate = 0; - /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) @@ -460,8 +454,7 @@ long l_readdir(struct file *file, struct list_head *dentry_list) return 0; } EXPORT_SYMBOL(l_readdir); -EXPORT_SYMBOL(obd_memory); -EXPORT_SYMBOL(obd_memmax); + #ifdef LUSTRE_KERNEL_VERSION #ifndef HAVE_CLEAR_RDONLY_ON_PUT @@ -512,31 +505,6 @@ int lvfs_check_io_health(struct obd_device *obd, struct file *file) } EXPORT_SYMBOL(lvfs_check_io_health); -static int __init lvfs_linux_init(void) -{ - RETURN(0); -} - -static void __exit lvfs_linux_exit(void) -{ - int leaked; - ENTRY; - - leaked = atomic_read(&obd_memory); - CDEBUG(leaked ? D_ERROR : D_INFO, - "obd mem max: %d leaked: %d\n", obd_memmax, leaked); - - EXIT; - return; -} - -EXPORT_SYMBOL(obd_fail_loc); -EXPORT_SYMBOL(obd_alloc_fail_rate); -EXPORT_SYMBOL(obd_fail_val); - MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1"); MODULE_LICENSE("GPL"); - -module_init(lvfs_linux_init); -module_exit(lvfs_linux_exit); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index b313b0f503..8cdf88756b 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -97,7 +97,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, for (i = 0, tmpcount = count; i < npages; i++, tmpcount -= tmpsize) { tmpsize = tmpcount > CFS_PAGE_SIZE ? CFS_PAGE_SIZE : tmpcount; - pages[i] = cfs_alloc_page(CFS_ALLOC_STD); + OBD_PAGE_ALLOC(pages[i], CFS_ALLOC_STD); if (pages[i] == NULL) GOTO(cleanup_buf, rc = -ENOMEM); @@ -161,7 +161,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, cleanup_buf: for (i = 0; i < npages; i++) if (pages[i]) - __cfs_free_page(pages[i]); + OBD_PAGE_FREE(pages[i]); ptlrpc_free_bulk(desc); out_free: diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 56ed0f6ca4..b6d778079a 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -51,13 +51,6 @@ atomic_t libcfs_kmemory = {0}; struct obd_device *obd_devs[MAX_OBD_DEVICES]; struct list_head obd_types; spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED; -#ifndef __KERNEL__ -atomic_t obd_memory; -int obd_memmax; -unsigned int obd_fail_val; -unsigned int obd_fail_loc; -unsigned int obd_alloc_fail_rate; -#endif /* The following are visible and mutable through /proc/sys/lustre/. */ unsigned int obd_debug_peer_on_timeout; @@ -533,7 +526,21 @@ int init_obdclass(void) spin_lock_init(&obd_types_lock); cfs_waitq_init(&obd_race_waitq); obd_zombie_impexp_init(); +#ifdef LPROCFS + obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM, + LPROCFS_STATS_FLAG_PERCPU); + if (obd_memory == NULL) { + CERROR("kmalloc of 'obd_memory' failed\n"); + RETURN(-ENOMEM); + } + lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT, + LPROCFS_CNTR_AVGMINMAX, + "memused", "bytes"); + lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT, + LPROCFS_CNTR_AVGMINMAX, + "pagesused", "pages"); +#endif err = obd_init_checks(); if (err == -EOVERFLOW) return err; @@ -578,6 +585,8 @@ int init_obdclass(void) static void cleanup_obdclass(void) { int i; + __u64 memory_leaked, pages_leaked; + __u64 memory_max, pages_max; int lustre_unregister_fs(void); ENTRY; @@ -601,6 +610,20 @@ static void cleanup_obdclass(void) class_handle_cleanup(); class_exit_uuidlist(); + + memory_leaked = obd_memory_sum(); + pages_leaked = obd_pages_sum(); + + memory_max = obd_memory_max(); + pages_max = obd_pages_max(); + + lprocfs_free_stats(&obd_memory); + CDEBUG((memory_leaked | pages_leaked) ? D_ERROR : D_INFO, + "obd_memory max: "LPU64", leaked: "LPU64" " + "obd_memory_pages max: "LPU64", leaked: "LPU64"\n", + memory_max, memory_leaked, + pages_max, pages_leaked); + EXIT; } diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index e9ed5abd59..ea1fa46d82 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -57,6 +57,9 @@ enum { OBD_TIMEOUT, /* RPC timeout before recovery/intr */ OBD_DUMP_ON_TIMEOUT, /* dump kernel debug log upon eviction */ OBD_MEMUSED, /* bytes currently OBD_ALLOCated */ + OBD_PAGESUSED, /* pages currently OBD_PAGE_ALLOCated */ + OBD_MAXMEMUSED, /* maximum bytes OBD_ALLOCated concurrently */ + OBD_MAXPAGESUSED, /* maximum pages OBD_PAGE_ALLOCated concurrently */ OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */ OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */ OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */ @@ -88,9 +91,7 @@ int LL_PROC_PROTO(proc_set_timeout) #ifdef RANDOM_FAIL_ALLOC int LL_PROC_PROTO(proc_alloc_fail_rate) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) - loff_t *ppos = &filp->f_pos; -#endif + DECLARE_LL_PROC_PPOS_DECL int rc = 0; if (!table->data || !table->maxlen || !*lenp || (*ppos && !write)) { @@ -105,7 +106,7 @@ int LL_PROC_PROTO(proc_alloc_fail_rate) char buf[21]; int len; - len = lprocfs_read_frac_helper(buf, 21, + len = lprocfs_read_frac_helper(buf, sizeof(buf), *(unsigned int*)table->data, OBD_ALLOC_FAIL_MULT); if (len > *lenp) @@ -120,6 +121,102 @@ int LL_PROC_PROTO(proc_alloc_fail_rate) } #endif +int LL_PROC_PROTO(proc_memory_alloc) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_sum()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + +int LL_PROC_PROTO(proc_pages_alloc) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_sum()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + +int LL_PROC_PROTO(proc_mem_max) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_max()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + +int LL_PROC_PROTO(proc_pages_max) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + static cfs_sysctl_table_t obd_table[] = { { .ctl_name = OBD_FAIL_LOC, @@ -172,10 +269,34 @@ static cfs_sysctl_table_t obd_table[] = { { .ctl_name = OBD_MEMUSED, .procname = "memused", - .data = (int *)&obd_memory.counter, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_memory_alloc + }, + { + .ctl_name = OBD_PAGESUSED, + .procname = "pagesused", + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_pages_alloc + }, + { + .ctl_name = OBD_MAXMEMUSED, + .procname = "memused_max", + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_mem_max + }, + { + .ctl_name = OBD_MAXPAGESUSED, + .procname = "pagesused_max", + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_pages_max }, { .ctl_name = OBD_LDLM_TIMEOUT, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index b7b5c71d1c..39d3fe2e8a 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1211,6 +1211,7 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) struct vfsmount *mnt; char *options = NULL; unsigned long page, s_flags; + struct page *__page; int rc; ENTRY; @@ -1264,9 +1265,10 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) /* Done with our pre-mount, now do the real mount. */ /* Glom up mount options */ - page = __get_free_page(GFP_KERNEL); - if (!page) + OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD); + if (!__page) GOTO(out_free, rc = -ENOMEM); + page = (unsigned long)cfs_page_address(__page); options = (char *)page; memset(options, 0, CFS_PAGE_SIZE); @@ -1288,7 +1290,7 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) MT_STR(ldd), lmd->lmd_dev, options); mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev, (void *)options); - free_page(page); + OBD_PAGE_FREE(__page); if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); CERROR("ll_kern_mount failed: rc = %d\n", rc); diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index e2f0a5ea84..d4c8fbc426 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -311,7 +311,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, /* Take extra ref so __free_pages() can be called OK */ cfs_get_page (r->page); } else { - r->page = cfs_alloc_page(gfp_mask); + OBD_PAGE_ALLOC(r->page, gfp_mask); if (r->page == NULL) { CERROR("can't get page %u/%u for id " LPU64"\n", @@ -362,7 +362,7 @@ preprw_cleanup: cfs_kunmap(r->page); /* NB if this is a persistent page, __free_pages will just * lose the extra ref gained above */ - cfs_free_page(r->page); + OBD_PAGE_FREE(r->page); atomic_dec(&obd->u.echo.eo_prep); } memset(res, 0, sizeof(*res) * niocount); @@ -433,7 +433,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, cfs_kunmap(page); /* NB see comment above regarding persistent pages */ - cfs_free_page(page); + OBD_PAGE_FREE(page); atomic_dec(&obd->u.echo.eo_prep); } } @@ -448,7 +448,7 @@ commitrw_cleanup: cfs_page_t *page = r->page; /* NB see comment above regarding persistent pages */ - cfs_free_page(page); + OBD_PAGE_FREE(page); atomic_dec(&obd->u.echo.eo_prep); } return rc; @@ -544,7 +544,7 @@ echo_persistent_pages_fini (void) for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) if (echo_persistent_pages[i] != NULL) { - cfs_free_page (echo_persistent_pages[i]); + OBD_PAGE_FREE(echo_persistent_pages[i]); echo_persistent_pages[i] = NULL; } } @@ -559,7 +559,7 @@ echo_persistent_pages_init (void) int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ? CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER; - pg = cfs_alloc_page (gfp_mask); + OBD_PAGE_ALLOC(pg, gfp_mask); if (pg == NULL) { echo_persistent_pages_fini (); return (-ENOMEM); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index f0f4edea1a..51099bb2ec 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -544,7 +544,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, LASSERT (pgp->pg == NULL); /* for cleanup */ rc = -ENOMEM; - pgp->pg = cfs_alloc_page (gfp_mask); + OBD_PAGE_ALLOC(pgp->pg, gfp_mask); if (pgp->pg == NULL) goto out; @@ -576,7 +576,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, if (vrc != 0 && rc == 0) rc = vrc; } - cfs_free_page(pgp->pg); + OBD_PAGE_FREE(pgp->pg); } OBD_FREE(pga, npages * sizeof(*pga)); return (rc); @@ -807,13 +807,14 @@ static int echo_client_async_page(struct obd_export *exp, int rw, /* prepare the group of pages that we're going to be keeping * in flight */ for (i = 0; i < npages; i++) { - cfs_page_t *page = cfs_alloc_page(CFS_ALLOC_STD); + cfs_page_t *page; + OBD_PAGE_ALLOC(page, CFS_ALLOC_STD); if (page == NULL) GOTO(out, rc = -ENOMEM); OBD_ALLOC(eap, sizeof(*eap)); if (eap == NULL) { - cfs_free_page(page); + OBD_PAGE_FREE(page); GOTO(out, rc = -ENOMEM); } @@ -908,7 +909,7 @@ out: obd_teardown_async_page(exp, lsm, NULL, eap->eap_cookie); OBD_FREE(eap, sizeof(*eap)); - cfs_free_page(page); + OBD_PAGE_FREE(page); } OBD_FREE(aps, npages * sizeof aps[0]); } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index ad639a392c..07defed586 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1833,7 +1833,8 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) { struct lprocfs_static_vars lvars; struct lustre_cfg* lcfg = buf; - unsigned long page; + unsigned long addr; + struct page *page; int rc; CLASSERT(offsetof(struct obd_device, u.obt) == @@ -1843,9 +1844,11 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(-EINVAL); /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */ - page = get_zeroed_page(GFP_KERNEL); + OBD_PAGE_ALLOC(page, CFS_ALLOC_STD); if (!page) RETURN(-ENOMEM); + addr = (unsigned long)cfs_page_address(page); + clear_page((void *)addr); /* lprocfs must be setup before the filter so state can be safely added * to /proc incrementally as the filter is setup */ @@ -1868,10 +1871,10 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) filter_nid_stats_clear_read, filter_nid_stats_clear_write, obd); - memcpy((void *)page, lustre_cfg_buf(lcfg, 4), + memcpy((void *)addr, lustre_cfg_buf(lcfg, 4), LUSTRE_CFG_BUFLEN(lcfg, 4)); - rc = filter_common_setup(obd, len, buf, (void *)page); - free_page(page); + rc = filter_common_setup(obd, len, buf, (void *)addr); + OBD_PAGE_FREE(page); if (rc) { lprocfs_obd_cleanup(obd); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 2495bf6d94..056ed0d15f 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1681,7 +1681,7 @@ static void ost_thread_done(struct ptlrpc_thread *thread) if (tls != NULL) { for (i = 0; i < OST_THREAD_POOL_SIZE; ++ i) { if (tls->page[i] != NULL) - __cfs_free_page(tls->page[i]); + OBD_PAGE_FREE(tls->page[i]); } OBD_FREE_PTR(tls); thread->t_data = NULL; @@ -1712,7 +1712,7 @@ static int ost_thread_init(struct ptlrpc_thread *thread) * populate pool */ for (i = 0; i < OST_THREAD_POOL_SIZE; ++ i) { - tls->page[i] = cfs_alloc_page(OST_THREAD_POOL_GFP); + OBD_PAGE_ALLOC(tls->page[i], OST_THREAD_POOL_GFP); if (tls->page[i] == NULL) { ost_thread_done(thread); result = -ENOMEM; diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index af74dc7842..68cc9f209e 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -161,6 +161,8 @@ static int ptlrpc_pinger_main(void *arg) ptlrpc_update_next_ping(imp); } mutex_up(&pinger_sem); + /* update memory usage info */ + obd_update_maxusage(); /* Wait until the next ping time, or until we're stopped. */ time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, diff --git a/lustre/tests/leak_finder.pl b/lustre/tests/leak_finder.pl index 5219996d25..aa783bb546 100644 --- a/lustre/tests/leak_finder.pl +++ b/lustre/tests/leak_finder.pl @@ -14,7 +14,7 @@ my $max = 0; while ($line = <>) { $debug_line++; my ($file, $func, $lno, $name, $size, $addr, $type); - if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/){ + if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at (.*)\..*$/){ $file = $2; $lno = $3; $func = $4; @@ -22,7 +22,6 @@ while ($line = <>) { $name = $7; $size = $8; $addr = $9; - $tot = $10; # we can't dump the log after portals has exited, so skip "leaks" # from memory freed in the portals module unloading. @@ -69,10 +68,6 @@ while ($line = <>) { delete $memory->{$addr}; $total -= $size; } - if ($total != int($tot)) { - print "kernel total $tot != my total $total\n"; - $total = $tot; - } } # Sort leak output by allocation time -- GitLab