From be031767b14b53b4e0a577c3a45d0c471c5f89a9 Mon Sep 17 00:00:00 2001
From: bobijam <bobijam>
Date: Tue, 19 Aug 2008 01:59:07 +0000
Subject: [PATCH] Branch HEAD b=16566 o=Jonathan Li(jli@cray.com) i=shadow,
 bobijam

Description: Upcall on Lustre log has been dumped
Details    : Allow for a user mode script to be called once a Lustre log has
             been dumped. It passes the filename of the dumped log to the
	     script, the location of the script can be specified via
	     /proc/sys/lnet/debug_log_upcall.
---
 libcfs/libcfs/debug.c             |  4 ++++
 libcfs/libcfs/linux/linux-debug.c | 36 +++++++++++++++++++++++++++++++
 libcfs/libcfs/linux/linux-proc.c  | 35 ++++++++++++++++++++----------
 libcfs/libcfs/tracefile.h         |  8 ++++---
 lustre/ChangeLog                  | 14 +++++++++---
 lustre/include/obd_support.h      |  2 ++
 lustre/ptlrpc/service.c           |  9 +++++---
 7 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c
index 3ae687d577..41848c2bd1 100644
--- a/libcfs/libcfs/debug.c
+++ b/libcfs/libcfs/debug.c
@@ -418,6 +418,9 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
         return 0;
 }
 
+/**
+ * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
+ */
 void libcfs_debug_dumplog_internal(void *arg)
 {
         CFS_DECL_JOURNAL_DATA;
@@ -431,6 +434,7 @@ void libcfs_debug_dumplog_internal(void *arg)
                 printk(KERN_ALERT "LustreError: dumping log to %s\n",
                        debug_file_name);
                 tracefile_dump_all_pages(debug_file_name);
+                libcfs_run_debug_log_upcall(debug_file_name);
         }
         CFS_POP_JOURNAL;
 }
diff --git a/libcfs/libcfs/linux/linux-debug.c b/libcfs/libcfs/linux/linux-debug.c
index c25e6599b7..e405ceb24c 100644
--- a/libcfs/libcfs/linux/linux-debug.c
+++ b/libcfs/libcfs/linux/linux-debug.c
@@ -78,6 +78,42 @@
 #endif
 
 char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
+char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
+
+/**
+ * Upcall function once a Lustre log has been dumped.
+ *
+ * \param file  path of the dumped log
+ */
+void libcfs_run_debug_log_upcall(char *file)
+{
+        char *argv[3];
+        int   rc;
+        char *envp[] = {
+                "HOME=/",
+                "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+                NULL};
+        ENTRY;
+
+        argv[0] = lnet_debug_log_upcall;
+
+        LASSERTF(file != NULL, "called on a null filename\n");
+        argv[1] = file; //only need to pass the path of the file
+
+        argv[2] = NULL;
+
+        rc = USERMODEHELPER(argv[0], argv, envp);
+        if (rc < 0 && rc != -ENOENT) {
+                CERROR("Error %d invoking LNET debug log upcall %s %s; "
+                       "check /proc/sys/lnet/debug_log_upcall\n",
+                       rc, argv[0], argv[1]);
+        } else {
+                CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
+                       argv[0], argv[1]);
+        }
+
+        EXIT;
+}
 
 void libcfs_run_upcall(char **argv)
 {
diff --git a/libcfs/libcfs/linux/linux-proc.c b/libcfs/libcfs/linux/linux-proc.c
index e4580ff14f..a6b10f0c93 100644
--- a/libcfs/libcfs/linux/linux-proc.c
+++ b/libcfs/libcfs/linux/linux-proc.c
@@ -78,6 +78,10 @@
 
 static cfs_sysctl_table_header_t *lnet_table_header = NULL;
 extern char lnet_upcall[1024];
+/**
+ * The path of debug log dump upcall script.
+ */
+extern char lnet_debug_log_upcall[1024];
 
 #define PSDEV_LNET  (0x100)
 enum {
@@ -97,11 +101,12 @@ enum {
         PSDEV_LNET_DUMP_KERNEL,   /* snapshot kernel debug buffer to file */
         PSDEV_LNET_DAEMON_FILE,   /* spool kernel debug buffer to file */
         PSDEV_LNET_DEBUG_MB,      /* size of debug buffer */
+        PSDEV_LNET_DEBUG_LOG_UPCALL, /* debug log upcall script */
 };
 
-static int 
-proc_call_handler(void *data, int write, 
-                  loff_t *ppos, void *buffer, size_t *lenp, 
+static int
+proc_call_handler(void *data, int write,
+                  loff_t *ppos, void *buffer, size_t *lenp,
                   int (*handler)(void *data, int write,
                                  loff_t pos, void *buffer, int len))
 {
@@ -130,7 +135,7 @@ LL_PROC_PROTO(name)                                     \
                                  __##name);             \
 }
 
-static int __proc_dobitmasks(void *data, int write, 
+static int __proc_dobitmasks(void *data, int write,
                              loff_t pos, void *buffer, int nob)
 {
         const int     tmpstrlen = 512;
@@ -176,7 +181,7 @@ static int __proc_dump_kernel(void *data, int write,
 {
         if (!write)
                 return 0;
-        
+
         return trace_dump_debug_buffer_usrstr(buffer, nob);
 }
 
@@ -187,14 +192,14 @@ static int __proc_daemon_file(void *data, int write,
 {
         if (!write) {
                 int len = strlen(tracefile);
-                
+
                 if (pos >= len)
                         return 0;
-                
-                return trace_copyout_string(buffer, nob, 
+
+                return trace_copyout_string(buffer, nob,
                                             tracefile + pos, "\n");
         }
-        
+
         return trace_daemon_command_usrstr(buffer, nob);
 }
 
@@ -210,10 +215,10 @@ static int __proc_debug_mb(void *data, int write,
 
                 if (pos >= len)
                         return 0;
-                
+
                 return trace_copyout_string(buffer, nob, tmpstr + pos, "\n");
         }
-        
+
         return trace_set_debug_mb_usrstr(buffer, nob);
 }
 
@@ -384,6 +389,14 @@ static cfs_sysctl_table_t lnet_table[] = {
                 .mode     = 0644,
                 .proc_handler = &proc_dostring,
         },
+        {
+                .ctl_name = PSDEV_LNET_DEBUG_LOG_UPCALL,
+                .procname = "debug_log_upcall",
+                .data     = lnet_debug_log_upcall,
+                .maxlen   = sizeof(lnet_debug_log_upcall),
+                .mode     = 0644,
+                .proc_handler = &proc_dostring,
+        },
         {
                 .ctl_name = PSDEV_LNET_MEMUSED,
                 .procname = "memused",
diff --git a/libcfs/libcfs/tracefile.h b/libcfs/libcfs/tracefile.h
index 0493063928..dd25327c21 100644
--- a/libcfs/libcfs/tracefile.h
+++ b/libcfs/libcfs/tracefile.h
@@ -45,6 +45,8 @@
 extern char      tracefile[TRACEFILE_NAME_SIZE];
 extern long long tracefile_size;
 
+extern void libcfs_run_debug_log_upcall(char *file);
+
 int  tracefile_init_arch(void);
 void tracefile_fini_arch(void);
 
@@ -96,7 +98,7 @@ extern int  trace_max_debug_mb(void);
 
 #define TRACEFILE_SIZE (500 << 20)
 
-/* Size of a buffer for sprinting console messages if we can't get a page 
+/* Size of a buffer for sprinting console messages if we can't get a page
  * from system */
 #define TRACE_CONSOLE_BUFFER_SIZE   1024
 
@@ -125,7 +127,7 @@ union trace_data_union {
 
 		/*
 		 * Maximal number of pages allowed on ->tcd_pages and
-		 * ->tcd_daemon_pages each. 
+		 * ->tcd_daemon_pages each.
 		 * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
 		 * implementation.
 		 */
@@ -233,7 +235,7 @@ struct trace_page {
 	 */
 	unsigned short   cpu;
 	/*
-	 * type(context) of this page 
+	 * type(context) of this page
 	 */
 	unsigned short   type;
 };
diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index f286dd9689..a3ce983d1f 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -12,12 +12,20 @@ tbd  Sun Microsystems, Inc.
        * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a
         removed cwd "./" (refer to Bugzilla 14399).
 
+Severity   : enhancement
+Bugzilla   : 16566
+Description: Upcall on Lustre log has been dumped
+Details    : Allow for a user mode script to be called once a Lustre log has
+             been dumped. It passes the filename of the dumped log to the
+	     script, the location of the script can be specified via
+	     /proc/sys/lnet/debug_log_upcall.
+
 Severity   : minor
 Bugzilla   : 16583
 Frequency  : rare
-Description: avoid messages about idr_remove called for id  which is not allocated. 
-Details    : Move assigment s_dev for clustered nfs to end of initialization, for avoid
-             problem with error handling.
+Description: avoid messages about idr_remove called for id  which is not allocated.
+Details    : Move assigment s_dev for clustered nfs to end of initialization, for
+             avoid problem with error handling.
 
 Severity   : minor
 Bugzilla   : 16109
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index cda024f602..4a9a82f894 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -280,6 +280,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_PTLRPC_PAUSE_REQ        0x50a
 #define OBD_FAIL_PTLRPC_PAUSE_REP        0x50c
 
+#define OBD_FAIL_PTLRPC_DUMP_LOG         0x50e
+
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
 #define OBD_FAIL_OBD_LOGD_NET            0x602
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c
index 2f6c814849..c362aad0e3 100644
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -644,7 +644,7 @@ static void ptlrpc_at_set_timer(struct ptlrpc_service *svc)
         /* Set timer for closest deadline */
         rq = list_entry(svc->srv_at_list.next, struct ptlrpc_request,
                         rq_timed_list);
-        next = (__s32)(rq->rq_deadline - cfs_time_current_sec() - 
+        next = (__s32)(rq->rq_deadline - cfs_time_current_sec() -
                        at_early_margin);
         if (next <= 0)
                 ptlrpc_at_timer((unsigned long)svc);
@@ -1071,6 +1071,9 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
 
         spin_unlock(&svc->srv_lock);
 
+        if(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG))
+                libcfs_debug_dumplog();
+
         do_gettimeofday(&work_start);
         timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,NULL);
         if (likely(svc->srv_stats != NULL)) {
@@ -1619,7 +1622,7 @@ int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc)
                 if (rc == -EMFILE)
                         break;
                 if (rc) {
-                        CERROR("cannot start %s thread #%d: rc %d\n", 
+                        CERROR("cannot start %s thread #%d: rc %d\n",
                                svc->srv_thread_name, i, rc);
                         ptlrpc_stop_all_threads(svc);
                 }
@@ -1667,7 +1670,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc)
         d.thread = thread;
 
         CDEBUG(D_RPCTRACE, "starting thread '%s'\n", name);
-        
+
           /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
          * just drop the VM and FILES in ptlrpc_daemonize() right away.
          */
-- 
GitLab