From ca6dd9a48e7b3ebf5df8a78c8ed65f7676cebf4d Mon Sep 17 00:00:00 2001
From: bobijam <bobijam>
Date: Thu, 13 Dec 2007 04:18:02 +0000
Subject: [PATCH] Branch HEAD b=12211 i=green, adilger

Description: make lustre randomly fail allocating memory
Details    : Make lustre randomly failed allocating memory for testing purpose.
---
 lustre/ChangeLog                     |  4 ++
 lustre/include/linux/lvfs.h          |  4 ++
 lustre/include/obd_support.h         | 48 ++++++++++++++++------
 lustre/lvfs/Makefile.in              |  2 +-
 lustre/lvfs/autoMakefile.am          |  2 +-
 lustre/lvfs/lvfs_lib.c               | 60 ++++++++++++++++++++++++++++
 lustre/lvfs/lvfs_linux.c             |  1 -
 lustre/{obdclass => lvfs}/prng.c     |  0
 lustre/obdclass/Makefile.in          |  2 +-
 lustre/obdclass/autoMakefile.am      |  4 +-
 lustre/obdclass/class_obd.c          |  4 --
 lustre/obdclass/linux/linux-sysctl.c | 47 ++++++++++++++++++++++
 12 files changed, 156 insertions(+), 22 deletions(-)
 create mode 100644 lustre/lvfs/lvfs_lib.c
 rename lustre/{obdclass => lvfs}/prng.c (100%)

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 1683f347f7..35739543a9 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -12,6 +12,10 @@
        * Recommended e2fsprogs version: 1.40.2-cfs4
        * Note that reiserfs quotas are disabled on SLES 10 in this kernel.
 
+Severity   : enhancement
+Bugzilla   : 12211
+Description: make lustre randomly fail allocating memory
+Details    : Make lustre randomly failed allocating memory for testing purpose.
 
 Severity   : enhancement
 Bugzilla   : 12702
diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h
index 7c2f6aee4d..1cc27c4fd9 100644
--- a/lustre/include/linux/lvfs.h
+++ b/lustre/include/linux/lvfs.h
@@ -38,6 +38,10 @@ struct group_info { /* unused */ };
 
 #define LLOG_LVFS
 
+/* lvfs.c */
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+                   size_t size, const char *file, int line);
+
 /* simple.c */
 
 struct lvfs_ucred {
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index 469ded98ea..befcde88c8 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -24,6 +24,7 @@
 #define _OBD_SUPPORT
 
 #include <libcfs/kp30.h>
+#include <lvfs.h>
 #include <lprocfs_status.h>
 
 /* global variables */
@@ -49,6 +50,7 @@ extern unsigned int obd_max_dirty_pages;
 extern atomic_t obd_dirty_pages;
 extern cfs_waitq_t obd_race_waitq;
 extern int obd_race_state;
+extern unsigned int obd_alloc_fail_rate;
 
 /* Timeout definitions */
 #define LDLM_TIMEOUT_DEFAULT 20
@@ -222,6 +224,8 @@ extern int obd_race_state;
 
 #define OBD_FAIL_LPROC_REMOVE            0xb00
 
+#define OBD_FAIL_GENERAL_ALLOC           0xc00
+
 #define OBD_FAIL_SEQ                     0x1000
 #define OBD_FAIL_SEQ_QUERY_NET           0x1001
 
@@ -564,6 +568,16 @@ __put_mem_track(void *ptr, int size,
 
 #endif /* !OBD_DEBUG_MEMUSAGE */
 
+#ifdef RANDOM_FAIL_ALLOC
+#define HAS_FAIL_ALLOC_FLAG OBD_FAIL_CHECK(OBD_FAIL_GENERAL_ALLOC)
+#else
+#define HAS_FAIL_ALLOC_FLAG 0
+#endif
+
+#define OBD_ALLOC_FAIL_BITS 24
+#define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1)
+#define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100)
+
 #if defined(LUSTRE_UTILS) /* this version is for utils only */
 #define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
@@ -578,15 +592,20 @@ do {                                                                          \
         }                                                                     \
 } while (0)
 #else /* this version is for the kernel and liblustre */
+#define OBD_FREE_RTN0(ptr)                                                    \
+({                                                                            \
+        cfs_free(ptr);                                                        \
+        (ptr) = NULL;                                                         \
+        0;                                                                    \
+})
 #define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
         (ptr) = cfs_alloc(size, (gfp_mask));                                  \
-        if (unlikely((ptr) == NULL)) {                                        \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed\n",           \
-                       (int)(size));                                          \
-                CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
-                       obd_memory_sum(), atomic_read(&libcfs_kmemory));       \
-        } else {                                                              \
+        if (likely((ptr) != NULL &&                                           \
+                   (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
+                    !obd_alloc_fail(ptr, #ptr, "km", size,                    \
+                                    __FILE__, __LINE__) ||                    \
+                    OBD_FREE_RTN0(ptr)))){                                    \
                 memset(ptr, 0, size);                                         \
                 OBD_ALLOC_POST(ptr, size, "kmalloced");                       \
         }                                                                     \
@@ -686,16 +705,21 @@ do {                                                                          \
 /* we memset() the slab object to 0 when allocation succeeds, so DO NOT
  * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
  * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
+#define OBD_SLAB_FREE_RTN0(ptr, slab)                                         \
+({                                                                            \
+        cfs_mem_cache_free((slab), (ptr));                                    \
+        (ptr) = NULL;                                                         \
+        0;                                                                    \
+}) 
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
         LASSERT(!in_interrupt());                                             \
         (ptr) = cfs_mem_cache_alloc(slab, (type));                            \
-        if (unlikely((ptr) == NULL)) {                                        \
-                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed\n",          \
-                       (int)(size));                                          \
-                CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
-                       obd_memory_sum(), atomic_read(&libcfs_kmemory));       \
-        } else {                                                              \
+        if (likely((ptr) != NULL &&                                           \
+                   (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
+                    !obd_alloc_fail(ptr, #ptr, "slab-", size,                 \
+                                    __FILE__, __LINE__) ||                    \
+                    OBD_SLAB_FREE_RTN0(ptr, slab)))) {                        \
                 memset(ptr, 0, size);                                         \
                 OBD_ALLOC_POST(ptr, size, "slab-alloced");                    \
         }                                                                     \
diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in
index afa2511799..f103b623db 100644
--- a/lustre/lvfs/Makefile.in
+++ b/lustre/lvfs/Makefile.in
@@ -2,7 +2,7 @@ MODULES := lvfs
 @SERVER_TRUE@MODULES += fsfilt_@BACKINGFS@
 @QUOTA_TRUE@MODULES += quotafmt_test
 
-lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o upcall_cache.o
+lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o upcall_cache.o prng.o lvfs_lib.o
 @QUOTA_TRUE@lvfs-objs += lustre_quota_fmt.o
 
 @QUOTA_TRUE@quotafmt-objs := quotafmt_test.o
diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am
index e9234522de..658e540aef 100644
--- a/lustre/lvfs/autoMakefile.am
+++ b/lustre/lvfs/autoMakefile.am
@@ -4,7 +4,7 @@
 # See the file COPYING in this distribution
 if LIBLUSTRE
 noinst_LIBRARIES = liblvfs.a
-liblvfs_a_SOURCES = lvfs_userfs.c
+liblvfs_a_SOURCES = lvfs_userfs.c prng.c lvfs_lib.c
 liblvfs_a_CFLAGS = $(LLCFLAGS)
 liblvfs_a_CPPFLAGS = $(LLCPPFLAGS)
 
diff --git a/lustre/lvfs/lvfs_lib.c b/lustre/lvfs/lvfs_lib.c
new file mode 100644
index 0000000000..8ea2133375
--- /dev/null
+++ b/lustre/lvfs/lvfs_lib.c
@@ -0,0 +1,60 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/lvfs/lvfs_lib.c
+ *  Lustre filesystem abstraction routines
+ *
+ *  Copyright (C) 2007 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifdef __KERNEL__
+#include <linux/module.h>
+#include <linux/random.h>
+#else
+#include <liblustre.h>
+#endif
+#include <lustre_lib.h>
+
+unsigned int obd_fail_val = 0;
+unsigned int obd_fail_loc = 0;
+unsigned int obd_alloc_fail_rate = 0;
+
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+                   size_t size, const char *file, int line)
+{
+        if (ptr == NULL ||
+            (ll_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) {
+                CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n",
+                       ptr ? "force " :"", type, name, (__u64)size, file,
+                       line);
+                CERROR(LPU64" total bytes and "LPU64" total pages "
+                       "("LPU64" bytes) allocated by Lustre, "
+                       "%d total bytes by LNET\n",
+                       obd_memory_sum(),
+                       obd_pages_sum() << CFS_PAGE_SHIFT,
+                       obd_pages_sum(),
+                       atomic_read(&libcfs_kmemory));                
+                return 1;
+        }
+        return 0;
+}
+EXPORT_SYMBOL(obd_alloc_fail);
+
+EXPORT_SYMBOL(obd_fail_loc);
+EXPORT_SYMBOL(obd_alloc_fail_rate);
+EXPORT_SYMBOL(obd_fail_val);
diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c
index 9bdcf354cd..e342eebae9 100644
--- a/lustre/lvfs/lvfs_linux.c
+++ b/lustre/lvfs/lvfs_linux.c
@@ -39,7 +39,6 @@
 #include <libcfs/kp30.h>
 #include <lustre_fsfilt.h>
 #include <obd.h>
-#include <obd_class.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/lustre_compat25.h>
diff --git a/lustre/obdclass/prng.c b/lustre/lvfs/prng.c
similarity index 100%
rename from lustre/obdclass/prng.c
rename to lustre/lvfs/prng.c
diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in
index 30b7361f54..11f93a06d2 100644
--- a/lustre/obdclass/Makefile.in
+++ b/lustre/obdclass/Makefile.in
@@ -23,7 +23,7 @@ obdclass-all-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o
 obdclass-all-objs += class_obd.o class_hash.o
 obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o
 obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
-obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o
+obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o mea.o
 obdclass-all-objs += lu_object.o dt_object.o hash.o capa.o lu_time.o
 
 obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am
index 7a6fdce89b..21886e60c2 100644
--- a/lustre/obdclass/autoMakefile.am
+++ b/lustre/obdclass/autoMakefile.am
@@ -11,7 +11,7 @@ liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid
 liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c class_hash.c
 liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c 
 liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c capa.c
-liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c
+liblustreclass_a_SOURCES += #llog_ioctl.c rbtree.c
 liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS)
 liblustreclass_a_CFLAGS = $(LLCFLAGS)
 
@@ -32,7 +32,7 @@ obdclass_SOURCES := \
         darwin/darwin-module.c darwin/darwin-sysctl.c 		\
         class_obd.c genops.c lprocfs_status.c           	\
         lustre_handles.c lustre_peer.c obd_config.c     	\
-        obdo.c debug.c llog_ioctl.c uuid.c prng.c               \
+        obdo.c debug.c llog_ioctl.c uuid.c                      \
         llog_swab.c llog_obd.c llog.c llog_cat.c llog_lvfs.c    \
         mea.c lu_object.c dt_object.c hash.c
 
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c
index 2e87394230..243063d7f4 100644
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -60,8 +60,6 @@ __u64 obd_pages;
 #endif
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
-unsigned int obd_fail_loc;
-unsigned int obd_fail_val;
 unsigned int obd_debug_peer_on_timeout;
 unsigned int obd_dump_on_timeout;
 unsigned int obd_dump_on_eviction;
@@ -379,8 +377,6 @@ void *obd_psdev = NULL;
 #endif
 
 EXPORT_SYMBOL(obd_devs);
-EXPORT_SYMBOL(obd_fail_loc);
-EXPORT_SYMBOL(obd_fail_val);
 EXPORT_SYMBOL(obd_print_fail_loc);
 EXPORT_SYMBOL(obd_race_waitq);
 EXPORT_SYMBOL(obd_race_state);
diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c
index 05b4bafa9a..b5d950e9f5 100644
--- a/lustre/obdclass/linux/linux-sysctl.c
+++ b/lustre/obdclass/linux/linux-sysctl.c
@@ -42,6 +42,7 @@
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <obd_support.h>
+#include <lprocfs_status.h>
 
 cfs_sysctl_table_header_t *obd_table_header = NULL;
 
@@ -60,6 +61,7 @@ enum {
         OBD_LDLM_TIMEOUT,       /* LDLM timeout for ASTs before client eviction */
         OBD_DUMP_ON_EVICTION,   /* dump kernel debug log upon eviction */
         OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */
+        OBD_ALLOC_FAIL_RATE,    /* memory allocation random failure rate */
 };
 
 int LL_PROC_PROTO(proc_fail_loc)
@@ -179,6 +181,41 @@ int LL_PROC_PROTO(proc_pages_max)
         return 0;
 }
 
+#ifdef RANDOM_FAIL_ALLOC
+int LL_PROC_PROTO(proc_alloc_fail_rate)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
+        loff_t *ppos = &filp->f_pos;
+#endif
+        int rc          = 0;
+
+        if (!table->data || !table->maxlen || !*lenp || (*ppos && !write)) {
+                *lenp = 0;
+                return 0;
+        }
+        if (write) {
+                rc = lprocfs_write_frac_helper(buffer, *lenp, 
+                                               (unsigned int*)table->data,
+                                               OBD_ALLOC_FAIL_MULT);
+        } else {
+                char buf[21];
+                int  len;
+
+                len = lprocfs_read_frac_helper(buf, 21,
+                                               *(unsigned int*)table->data,
+                                               OBD_ALLOC_FAIL_MULT);
+                if (len > *lenp)
+                        len = *lenp;
+                buf[len] = '\0';
+                if (copy_to_user(buffer, buf, len))
+                        return -EFAULT;
+                *lenp = len;
+        }
+        *ppos += *lenp;
+        return rc;
+}
+#endif
+
 static cfs_sysctl_table_t obd_table[] = {
         {
                 .ctl_name = OBD_FAIL_LOC,
@@ -268,6 +305,16 @@ static cfs_sysctl_table_t obd_table[] = {
                 .mode     = 0644,
                 .proc_handler = &proc_set_timeout
         },
+#ifdef RANDOM_FAIL_LOC
+        {
+                .ctl_name = OBD_ALLOC_FAIL_RATE,
+                .procname = "alloc_fail_rate",
+                .data     = &obd_alloc_fail_rate,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_alloc_fail_rate
+        },
+#endif
         { 0 }
 };
 
-- 
GitLab