diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c index 3ae687d5771fc21cd3d642f1a95682addf02b7fd..41848c2bd1f110cc6311d6fc148572b4a79631a6 100644 --- a/libcfs/libcfs/debug.c +++ b/libcfs/libcfs/debug.c @@ -418,6 +418,9 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) return 0; } +/** + * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() + */ void libcfs_debug_dumplog_internal(void *arg) { CFS_DECL_JOURNAL_DATA; @@ -431,6 +434,7 @@ void libcfs_debug_dumplog_internal(void *arg) printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); tracefile_dump_all_pages(debug_file_name); + libcfs_run_debug_log_upcall(debug_file_name); } CFS_POP_JOURNAL; } diff --git a/libcfs/libcfs/linux/linux-debug.c b/libcfs/libcfs/linux/linux-debug.c index c25e6599b78766ee112dff030edea86a25400428..e405ceb24c2196629f5025b69020ada7282e6b97 100644 --- a/libcfs/libcfs/linux/linux-debug.c +++ b/libcfs/libcfs/linux/linux-debug.c @@ -78,6 +78,42 @@ #endif char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; +char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; + +/** + * Upcall function once a Lustre log has been dumped. + * + * \param file path of the dumped log + */ +void libcfs_run_debug_log_upcall(char *file) +{ + char *argv[3]; + int rc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + ENTRY; + + argv[0] = lnet_debug_log_upcall; + + LASSERTF(file != NULL, "called on a null filename\n"); + argv[1] = file; //only need to pass the path of the file + + argv[2] = NULL; + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET debug log upcall %s %s; " + "check /proc/sys/lnet/debug_log_upcall\n", + rc, argv[0], argv[1]); + } else { + CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", + argv[0], argv[1]); + } + + EXIT; +} void libcfs_run_upcall(char **argv) { diff --git a/libcfs/libcfs/linux/linux-proc.c b/libcfs/libcfs/linux/linux-proc.c index e4580ff14fa181c7fe39018eae8962b813bdd80c..a6b10f0c9373c49d1499cd60bd9eaaf700416d23 100644 --- a/libcfs/libcfs/linux/linux-proc.c +++ b/libcfs/libcfs/linux/linux-proc.c @@ -78,6 +78,10 @@ static cfs_sysctl_table_header_t *lnet_table_header = NULL; extern char lnet_upcall[1024]; +/** + * The path of debug log dump upcall script. + */ +extern char lnet_debug_log_upcall[1024]; #define PSDEV_LNET (0x100) enum { @@ -97,11 +101,12 @@ enum { PSDEV_LNET_DUMP_KERNEL, /* snapshot kernel debug buffer to file */ PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */ PSDEV_LNET_DEBUG_MB, /* size of debug buffer */ + PSDEV_LNET_DEBUG_LOG_UPCALL, /* debug log upcall script */ }; -static int -proc_call_handler(void *data, int write, - loff_t *ppos, void *buffer, size_t *lenp, +static int +proc_call_handler(void *data, int write, + loff_t *ppos, void *buffer, size_t *lenp, int (*handler)(void *data, int write, loff_t pos, void *buffer, int len)) { @@ -130,7 +135,7 @@ LL_PROC_PROTO(name) \ __##name); \ } -static int __proc_dobitmasks(void *data, int write, +static int __proc_dobitmasks(void *data, int write, loff_t pos, void *buffer, int nob) { const int tmpstrlen = 512; @@ -176,7 +181,7 @@ static int __proc_dump_kernel(void *data, int write, { if (!write) return 0; - + return trace_dump_debug_buffer_usrstr(buffer, nob); } @@ -187,14 +192,14 @@ static int __proc_daemon_file(void *data, int write, { if (!write) { int len = strlen(tracefile); - + if (pos >= len) return 0; - - return trace_copyout_string(buffer, nob, + + return trace_copyout_string(buffer, nob, tracefile + pos, "\n"); } - + return trace_daemon_command_usrstr(buffer, nob); } @@ -210,10 +215,10 @@ static int __proc_debug_mb(void *data, int write, if (pos >= len) return 0; - + return trace_copyout_string(buffer, nob, tmpstr + pos, "\n"); } - + return trace_set_debug_mb_usrstr(buffer, nob); } @@ -384,6 +389,14 @@ static cfs_sysctl_table_t lnet_table[] = { .mode = 0644, .proc_handler = &proc_dostring, }, + { + .ctl_name = PSDEV_LNET_DEBUG_LOG_UPCALL, + .procname = "debug_log_upcall", + .data = lnet_debug_log_upcall, + .maxlen = sizeof(lnet_debug_log_upcall), + .mode = 0644, + .proc_handler = &proc_dostring, + }, { .ctl_name = PSDEV_LNET_MEMUSED, .procname = "memused", diff --git a/libcfs/libcfs/tracefile.h b/libcfs/libcfs/tracefile.h index 0493063928cbb2e607a988976fd0a5247a6e9306..dd25327c21e3bb44828d551a961a11bb154665e4 100644 --- a/libcfs/libcfs/tracefile.h +++ b/libcfs/libcfs/tracefile.h @@ -45,6 +45,8 @@ extern char tracefile[TRACEFILE_NAME_SIZE]; extern long long tracefile_size; +extern void libcfs_run_debug_log_upcall(char *file); + int tracefile_init_arch(void); void tracefile_fini_arch(void); @@ -96,7 +98,7 @@ extern int trace_max_debug_mb(void); #define TRACEFILE_SIZE (500 << 20) -/* Size of a buffer for sprinting console messages if we can't get a page +/* Size of a buffer for sprinting console messages if we can't get a page * from system */ #define TRACE_CONSOLE_BUFFER_SIZE 1024 @@ -125,7 +127,7 @@ union trace_data_union { /* * Maximal number of pages allowed on ->tcd_pages and - * ->tcd_daemon_pages each. + * ->tcd_daemon_pages each. * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current * implementation. */ @@ -233,7 +235,7 @@ struct trace_page { */ unsigned short cpu; /* - * type(context) of this page + * type(context) of this page */ unsigned short type; }; diff --git a/lustre/ChangeLog b/lustre/ChangeLog index f286dd96894858078b34e7bc8453759ee06669e6..a3ce983d1fa7ef73fbc8f442352356eeba2d09b0 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -12,12 +12,20 @@ tbd Sun Microsystems, Inc. * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a removed cwd "./" (refer to Bugzilla 14399). +Severity : enhancement +Bugzilla : 16566 +Description: Upcall on Lustre log has been dumped +Details : Allow for a user mode script to be called once a Lustre log has + been dumped. It passes the filename of the dumped log to the + script, the location of the script can be specified via + /proc/sys/lnet/debug_log_upcall. + Severity : minor Bugzilla : 16583 Frequency : rare -Description: avoid messages about idr_remove called for id which is not allocated. -Details : Move assigment s_dev for clustered nfs to end of initialization, for avoid - problem with error handling. +Description: avoid messages about idr_remove called for id which is not allocated. +Details : Move assigment s_dev for clustered nfs to end of initialization, for + avoid problem with error handling. Severity : minor Bugzilla : 16109 diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index cda024f60294c3e4386b0025429b848edf1e0fa3..4a9a82f8943a810c55adc7152ef1261b80b6fa71 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -280,6 +280,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_PTLRPC_PAUSE_REQ 0x50a #define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c +#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e + #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOGD_NET 0x602 diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 2f6c8148495d253d2b1a7a91b276b22e66f23280..c362aad0e3300ea2311ee26fe450e94cbf8780e7 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -644,7 +644,7 @@ static void ptlrpc_at_set_timer(struct ptlrpc_service *svc) /* Set timer for closest deadline */ rq = list_entry(svc->srv_at_list.next, struct ptlrpc_request, rq_timed_list); - next = (__s32)(rq->rq_deadline - cfs_time_current_sec() - + next = (__s32)(rq->rq_deadline - cfs_time_current_sec() - at_early_margin); if (next <= 0) ptlrpc_at_timer((unsigned long)svc); @@ -1071,6 +1071,9 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, spin_unlock(&svc->srv_lock); + if(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG)) + libcfs_debug_dumplog(); + do_gettimeofday(&work_start); timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,NULL); if (likely(svc->srv_stats != NULL)) { @@ -1619,7 +1622,7 @@ int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc) if (rc == -EMFILE) break; if (rc) { - CERROR("cannot start %s thread #%d: rc %d\n", + CERROR("cannot start %s thread #%d: rc %d\n", svc->srv_thread_name, i, rc); ptlrpc_stop_all_threads(svc); } @@ -1667,7 +1670,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc) d.thread = thread; CDEBUG(D_RPCTRACE, "starting thread '%s'\n", name); - + /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we * just drop the VM and FILES in ptlrpc_daemonize() right away. */