From 8d6f5ad916263883824ad1cbdf0b483b939882a1 Mon Sep 17 00:00:00 2001 From: eeb <eeb> Date: Thu, 9 Dec 2004 01:03:34 +0000 Subject: [PATCH] * removed a diff that crept in somehow --- lnet/archdep.m4 | 106 ++++- lnet/include/linux/kp30.h | 7 +- lnet/include/linux/libcfs.h | 56 ++- lnet/include/linux/portals_compat25.h | 23 +- lnet/klnds/lolnd/lolnd.h | 2 +- lnet/klnds/qswlnd/qswlnd.h | 2 +- lnet/libcfs/Makefile.in | 2 +- lnet/libcfs/debug.c | 111 ++--- lnet/libcfs/module.c | 15 +- lnet/libcfs/proc.c | 10 +- lnet/libcfs/tracefile.c | 104 +++-- lnet/libcfs/tracefile.h | 8 +- lnet/libcfs/watchdog.c | 402 ++++++++++++++++++ lnet/lnet/api-ni.c | 6 +- lnet/lnet/module.c | 2 +- lnet/router/proc.c | 38 +- lnet/router/router.c | 44 +- lnet/tests/ping_cli.c | 4 +- lnet/tests/ping_srv.c | 2 +- lnet/tests/sping_cli.c | 4 +- lnet/tests/sping_srv.c | 2 +- lnet/utils/debug.c | 122 ++++-- lnet/utils/portals.c | 16 + lustre/portals/archdep.m4 | 106 ++++- lustre/portals/include/linux/kp30.h | 7 +- lustre/portals/include/linux/libcfs.h | 56 ++- .../portals/include/linux/portals_compat25.h | 23 +- lustre/portals/knals/lonal/lonal.h | 2 +- lustre/portals/knals/qswnal/qswnal.h | 2 +- lustre/portals/libcfs/Makefile.in | 2 +- lustre/portals/libcfs/debug.c | 111 ++--- lustre/portals/libcfs/module.c | 15 +- lustre/portals/libcfs/proc.c | 10 +- lustre/portals/libcfs/tracefile.c | 104 +++-- lustre/portals/libcfs/tracefile.h | 8 +- lustre/portals/libcfs/watchdog.c | 402 ++++++++++++++++++ lustre/portals/portals/api-ni.c | 6 +- lustre/portals/portals/module.c | 2 +- lustre/portals/router/proc.c | 38 +- lustre/portals/router/router.c | 44 +- lustre/portals/tests/ping_cli.c | 4 +- lustre/portals/tests/ping_srv.c | 2 +- lustre/portals/tests/sping_cli.c | 4 +- lustre/portals/tests/sping_srv.c | 2 +- lustre/portals/utils/debug.c | 122 ++++-- lustre/portals/utils/portals.c | 16 + 46 files changed, 1636 insertions(+), 540 deletions(-) create mode 100644 lnet/libcfs/watchdog.c create mode 100644 lustre/portals/libcfs/watchdog.c diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 021fa6815f..26e86ed3eb 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -141,12 +141,22 @@ if test x$enable_modules != xno ; then ln -s `pwd` $LINUX/fs/lustre fi + # -------- linux objects (for 2.6) -- + AC_MSG_CHECKING([for Linux objects dir]) + AC_ARG_WITH([linux-obj], + AC_HELP_STRING([--with-linux-obj=path], + [set path to Linux objects dir (default=\$LINUX)]), + [LINUX_OBJ=$with_linux_obj], + [LINUX_OBJ=$LINUX]) + AC_MSG_RESULT([$LINUX_OBJ]) + AC_SUBST(LINUX_OBJ) + # -------- check for .confg -------- AC_ARG_WITH([linux-config], [AC_HELP_STRING([--with-linux-config=path], - [set path to Linux .conf (default=\$LINUX/.config)])], + [set path to Linux .conf (default=\$LINUX_OBJ/.config)])], [LINUX_CONFIG=$with_linux_config], - [LINUX_CONFIG=$LINUX/.config]) + [LINUX_CONFIG=$LINUX_OBJ/.config]) AC_SUBST(LINUX_CONFIG) AC_CHECK_FILE([/boot/kernel.h], @@ -241,7 +251,7 @@ _ACEOF AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE], [m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko -AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])], +AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] CC="$CC" -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM $MODULE_TARGET=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])], [$4], [_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])dnl])dnl @@ -266,8 +276,8 @@ if test x$enable_modules != xno ; then [AC_MSG_ERROR([Kernel config could not be found. If you are building from a kernel-source rpm consult README.kernel-source])]) # ----------- make dep run? ------------------ - AC_CHECK_FILES([$LINUX/include/linux/autoconf.h - $LINUX/include/linux/version.h + AC_CHECK_FILES([$LINUX_OBJ/include/linux/autoconf.h + $LINUX_OBJ/include/linux/version.h $LINUX/include/linux/config.h],[], [AC_MSG_ERROR([Run make config in $LINUX.])]) @@ -278,7 +288,7 @@ if test x$enable_modules != xno ; then # tarred up the tree and ran make dep etc. in it, then # version.h gets overwritten with a standard linux one. - if grep rhconfig $LINUX/include/linux/version.h >/dev/null ; then + if grep rhconfig $LINUX_OBJ/include/linux/version.h >/dev/null ; then # This is a clean kernel-source tree, we need to # enable extensive workarounds to get this to build # modules @@ -292,22 +302,9 @@ if test x$enable_modules != xno ; then EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS" fi - # --- check that we can build modules at all - AC_MSG_CHECKING([that modules can be built]) - LUSTRE_MODULE_TRY_COMPILE([],[], - [ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_WARN([Consult config.log for details.]) - AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source]) - AC_MSG_ERROR([Kernel modules could not be built.]) - ]) - - # ------------ LINUXRELEASE and moduledir ------------------ + # ------------ external module support --------------------- MODULE_TARGET="SUBDIRS" if test $linux25 = 'yes' ; then - # ------------ external module support --------------------- makerule="$PWD/kernel-tests" AC_MSG_CHECKING([for external module build support]) rm -f kernel-tests/conftest.i @@ -325,6 +322,20 @@ if test x$enable_modules != xno ; then makerule="_dir_$PWD/kernel-tests" fi AC_SUBST(MODULE_TARGET) + + # --- check that we can build modules at all + AC_MSG_CHECKING([that modules can be built]) + LUSTRE_MODULE_TRY_COMPILE([],[], + [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_WARN([Consult config.log for details.]) + AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source]) + AC_MSG_ERROR([Kernel modules could not be built.]) + ]) + + # ------------ LINUXRELEASE and moduledir ------------------ LINUXRELEASE= rm -f kernel-tests/conftest.i AC_MSG_CHECKING([for Linux release]) @@ -466,6 +477,7 @@ if test x$enable_modules != xno ; then AC_SUBST(GMCPPFLAGS) AC_SUBST(GMNAL) + if test $linux25 = 'no' ; then #### OpenIB AC_MSG_CHECKING([if OpenIB kernel headers are present]) OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD" @@ -488,6 +500,7 @@ if test x$enable_modules != xno ; then EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" AC_SUBST(OPENIBCPPFLAGS) AC_SUBST(OPENIBNAL) + fi #### Infinicon IB AC_MSG_CHECKING([if Infinicon IB kernel headers are present]) @@ -660,8 +673,8 @@ if test x$enable_modules != xno ; then #include <linux/fs.h> #include <linux/version.h> ],[ - #if defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24)) - #error "x86_64 down_read_trylock broken before 2.4.24" + #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24)) + #error "down_read_trylock broken before 2.4.24" #endif struct inode i; return (char *)&i.i_alloc_sem - (char *)&i; @@ -720,6 +733,41 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([no]) ]) + # ------------ kallsyms (so software watchdogs produce useful stacks) + AC_MSG_CHECKING([if kallsyms is enabled]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include <linux/config.h> + ],[ + #ifndef CONFIG_KALLSYMS + #error CONFIG_KALLSYMS is not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + if test "x$ARCH_UM" = "x" ; then + AC_MSG_ERROR([Lustre requires that CONFIG_KALLSYMS is enabled in your kernel.]) + fi + ]) + + # ------------ check for our show_task patch + AC_MSG_CHECKING([if kernel exports show_task]) + have_show_task=0 + for file in ksyms sched ; do + if grep -q "EXPORT_SYMBOL(show_task)" \ + "$LINUX/kernel/$file.c" 2>/dev/null ; then + have_show_task=1 + break + fi + done + if test x$have_show_task = x1 ; then + AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi + case $BACKINGFS in ext3) # --- Check that ext3 and ext3 xattr are enabled in the kernel @@ -808,3 +856,17 @@ echo "LLCPPFLAGS: $LLCPPFLAGS" echo "CFLAGS: $CFLAGS" echo "EXTRA_KCFLAGS: $EXTRA_KCFLAGS" echo "LLCFLAGS: $LLCFLAGS" + +ENABLE_INIT_SCRIPTS=0 +if test x$enable_utils = xyes ; then + AC_MSG_CHECKING([whether to install init scripts]) + # our scripts only work on red hat systems + if test -f /etc/init.d/functions -a -f /etc/sysconfig/network ; then + ENABLE_INIT_SCRIPTS=1 + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +fi +AM_CONDITIONAL(INIT_SCRIPTS, test x$ENABLE_INIT_SCRIPTS = "x1") +AC_SUBST(ENABLE_INIT_SCRIPTS) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 9006f13f93..d49baeb8aa 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -92,7 +92,7 @@ do { \ #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG\n"); \ - CERROR("STACK: %s\n", portals_debug_dumpstack()); \ + portals_debug_dumpstack(NULL); \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ set_task_state(current, TASK_UNINTERRUPTIBLE); \ @@ -243,9 +243,7 @@ extern struct prof_ent prof_ents[MAX_PROFS]; #endif /* PORTALS_PROFILING */ /* debug.c */ -extern spinlock_t stack_backtrace_lock; - -char *portals_debug_dumpstack(void); +void portals_debug_dumpstack(struct task_struct *tsk); void portals_run_upcall(char **argv); void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); @@ -747,4 +745,3 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); (unsigned long)(id)->li_stc.u.e3s.l3s_gen #endif - diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h index f6a0a60e51..8f8d6fe307 100644 --- a/lnet/include/linux/libcfs.h +++ b/lnet/include/linux/libcfs.h @@ -108,12 +108,14 @@ struct ptldebug_header { #define S_IBNAL 0x00400000 /* All IB NALs */ #define S_SM 0x00800000 #define S_ASOBD 0x01000000 -#define S_LMV 0x02000000 -#define S_CMOBD 0x04000000 -#define S_LONAL 0x08000000 - -/* If you change these values, please keep portals/utils/debug.c - * up to date! */ +#define S_CONFOBD 0x02000000 +#define S_LMV 0x04000000 +#define S_CMOBD 0x08000000 +#define S_LONAL 0x10000000 +/* If you change these values, please keep these files up to date... + * portals/utils/debug.c + * utils/lconf + */ /* Debugging masks (32 bits, non-overlapping) */ #define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ @@ -141,6 +143,11 @@ struct ptldebug_header { #define D_READA 0x00400000 /* read-ahead */ #define D_MMAP 0x00800000 #define D_CONFIG 0x01000000 +/* If you change these values, please keep these files up to date... + * portals/utils/debug.c + * utils/lconf + */ + #ifdef __KERNEL__ # include <linux/sched.h> /* THREAD_SIZE */ #else @@ -192,8 +199,8 @@ do { \ #define CDEBUG_MAX_LIMIT 600 #define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...) \ do { \ - static unsigned long cdebug_next; \ - static int cdebug_count, cdebug_delay = 1; \ + static unsigned long cdebug_next = 0; \ + static int cdebug_count = 0, cdebug_delay = 1; \ \ CHECK_STACK(CDEBUG_STACK); \ if (time_after(jiffies, cdebug_next)) { \ @@ -203,7 +210,8 @@ do { \ if (cdebug_count) { \ portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, \ __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK, cdebug_format, ## a); \ + 0, "skipped %d similar messages\n", \ + cdebug_count); \ cdebug_count = 0; \ } \ if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\ @@ -373,6 +381,34 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); #endif -#define _LIBCFS_H +#ifdef __KERNEL__ +/* libcfs watchdogs */ +struct lc_watchdog; + +/* Just use the default handler (dumplog) */ +#define LC_WATCHDOG_DEFAULT_CB NULL + +/* Add a watchdog which fires after "time" milliseconds of delay. You have to + * touch it once to enable it. */ +struct lc_watchdog *lc_watchdog_add(int time, + void (*cb)(struct lc_watchdog *, + struct task_struct *, + void *), + void *data); + +/* Enables a watchdog and resets its timer. */ +void lc_watchdog_touch(struct lc_watchdog *lcw); + +/* Disable a watchdog; touch it to restart it. */ +void lc_watchdog_disable(struct lc_watchdog *lcw); + +/* Clean up the watchdog */ +void lc_watchdog_delete(struct lc_watchdog *lcw); + +/* Dump a debug log */ +void lc_watchdog_dumplog(struct lc_watchdog *lcw, + struct task_struct *tsk, + void *data); +#endif /* !__KERNEL__ */ #endif /* _LIBCFS_H */ diff --git a/lnet/include/linux/portals_compat25.h b/lnet/include/linux/portals_compat25.h index 5a43a45686..fa2709e6ee 100644 --- a/lnet/include/linux/portals_compat25.h +++ b/lnet/include/linux/portals_compat25.h @@ -27,8 +27,10 @@ # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp, 1) # define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \ + TIF_SIGPENDING) # define CURRENT_SECONDS get_seconds() -# define smp_num_cpus NR_CPUS +# define smp_num_cpus num_online_cpus() #elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ @@ -40,11 +42,9 @@ # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING (current->sigpending = 0) # define CURRENT_SECONDS CURRENT_TIME -# define kernel_text_address(addr) is_kernel_text_address(addr) -extern int is_kernel_text_address(unsigned long addr); - #else /* 2.4.x */ # define SIGNAL_MASK_LOCK(task, flags) \ @@ -54,19 +54,22 @@ extern int is_kernel_text_address(unsigned long addr); # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending(current) +# define CLEAR_SIGPENDING (current->sigpending = 0) # define CURRENT_SECONDS CURRENT_TIME -# define kernel_text_address(addr) is_kernel_text_address(addr) -extern int is_kernel_text_address(unsigned long addr); - #endif #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) -# define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len, fmt "|%d", ## a, current->thread.extern_pid) +#define UML_PID(tsk) ((tsk)->thread.extern_pid) #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid) +#else +#define UML_PID(tsk) ((tsk)->pid) +#endif + +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) # define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len,fmt"|%d", ## a,current->thread.mode.tt.extern_pid) + snprintf(comm, len,fmt"|%d", ## a, UML_PID(current)) #else # define THREAD_NAME(comm, len, fmt, a...) \ snprintf(comm, len, fmt, ## a) diff --git a/lnet/klnds/lolnd/lolnd.h b/lnet/klnds/lolnd/lolnd.h index 6571dab135..c6f2dda11a 100644 --- a/lnet/klnds/lolnd/lolnd.h +++ b/lnet/klnds/lolnd/lolnd.h @@ -33,6 +33,7 @@ #include <linux/stat.h> #include <linux/errno.h> #include <linux/uio.h> +#include <linux/init.h> #define DEBUG_SUBSYSTEM S_LONAL @@ -54,7 +55,6 @@ typedef struct struct iovec *iov; ptl_kiov_t *kiov; } klod_iov; - } klo_desc_t; typedef struct diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index b08d710812..0b0914e5de 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -99,7 +99,7 @@ typedef unsigned long kqsw_csum_t; #define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ #define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 512 /* # reserved transmit messages if can't block */ +#define KQSW_NNBLK_TXMSGS (PAGE_SIZE == 4096 ? 512 : 256) /* # reserved transmit messages if can't block */ /* avoid qsnet crash b=5291 */ #define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ #define KQSW_EP_ENVELOPES_LARGE 256 /* # large ep envelopes */ diff --git a/lnet/libcfs/Makefile.in b/lnet/libcfs/Makefile.in index 0967123ce9..15fff121b4 100644 --- a/lnet/libcfs/Makefile.in +++ b/lnet/libcfs/Makefile.in @@ -1,4 +1,4 @@ MODULES = libcfs -libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o +libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o watchdog.o @INCLUDE_RULES@ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 53f15d2020..cd77438f0e 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -85,14 +85,9 @@ static char debug_file_name[1024]; static int handled_panic; /* to avoid recursive calls to notifiers */ char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; -int portals_do_debug_dumplog(void *arg) +void portals_debug_dumplog_internal(void *arg) { - void *journal_info; - - kportal_daemonize(""); - - reparent_to_init(); - journal_info = current->journal_info; + void *journal_info = current->journal_info; current->journal_info = NULL; snprintf(debug_file_name, sizeof(debug_file_path) - 1, @@ -101,6 +96,13 @@ int portals_do_debug_dumplog(void *arg) tracefile_dump_all_pages(debug_file_name); current->journal_info = journal_info; +} + +int portals_debug_dumplog_thread(void *arg) +{ + kportal_daemonize(""); + reparent_to_init(); + portals_debug_dumplog_internal(arg); wake_up(&debug_ctlwq); return 0; } @@ -117,7 +119,8 @@ void portals_debug_dumplog(void) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&debug_ctlwq, &wait); - rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, + rc = kernel_thread(portals_debug_dumplog_thread, + (void *)(long)current->pid, CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " @@ -288,98 +291,38 @@ char *portals_id2str(int nal, ptl_process_id_t id, char *str) portals_nid2str(nal, id.nid, str); len = strlen(str); - snprintf(str + len, PTL_NALFMT_SIZE - len, ",%u", id.pid); + snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); return str; } #ifdef __KERNEL__ -char stack_backtrace[LUSTRE_TRACE_SIZE]; -spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; - -#if defined(__arch_um__) -char *portals_debug_dumpstack(void) +void portals_debug_dumpstack(struct task_struct *tsk) { +#if defined(__arch_um__) + if (tsk != NULL) + CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", + tsk->pid, UML_PID(tsk)); asm("int $3"); - return "dump stack\n"; -} - -#elif defined(__i386__) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -extern int lookup_symbol(unsigned long address, char *buf, int buflen); -const char *kallsyms_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, char *namebuf) -{ - int rc = lookup_symbol(addr, namebuf, 128); - if (rc == -ENOSYS) - return NULL; - return namebuf; -} +#elif defined(HAVE_SHOW_TASK) + /* this is exported by lustre kernel version 42 */ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + CWARN("showing stack for process %d\n", tsk->pid); + show_task(tsk); +#else + CWARN("can't show stack: kernel doesn't export show_task\n"); #endif - -char *portals_debug_dumpstack(void) -{ - unsigned long esp = current->thread.esp, addr; - unsigned long *stack = (unsigned long *)&esp; - char *buf = stack_backtrace, *pbuf = buf; - int size; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ - buf[0] = '\0'; - goto out; - } - - size = sprintf(pbuf, " Call Trace: "); - pbuf += size; - while (((long) stack & (THREAD_SIZE - 1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - const char *sym_name; - char *modname, buffer[128]; - unsigned long junk, offset; - - sym_name = kallsyms_lookup(addr, &junk, &offset, - &modname, buffer); - if (sym_name == NULL) { - if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) - break; - size = sprintf(pbuf, "[<%08lx>] ", addr); - } else { - if (buf + LUSTRE_TRACE_SIZE - /* fix length + sizeof('\0') */ - <= pbuf + strlen(buffer) + 28 + 1) - break; - size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack - 1); - } - pbuf += size; - } - } -out: - return buf; -} - -#else /* !__arch_um__ && !__i386__ */ - -char *portals_debug_dumpstack(void) -{ - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; } -#endif /* __arch_um__ */ struct task_struct *portals_current(void) { CWARN("current task struct is %p\n", current); return current; } -EXPORT_SYMBOL(stack_backtrace_lock); EXPORT_SYMBOL(portals_debug_dumpstack); EXPORT_SYMBOL(portals_current); #endif /* __KERNEL__ */ diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index a2422e300f..2a8e6f694c 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -266,7 +266,7 @@ libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) int i; int rc; - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); + CDEBUG(D_IOCTL, "Register NAL %x, handler: %p\n", nal, handler); down(&nal_cmd_sem); @@ -302,7 +302,7 @@ libcfs_nal_cmd_unregister(int nal) { struct nal_cmd_handler *cmd; - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); + CDEBUG(D_IOCTL, "Unregister NAL %x\n", nal); down(&nal_cmd_sem); cmd = libcfs_find_nal_cmd_handler(nal); @@ -316,6 +316,10 @@ EXPORT_SYMBOL(libcfs_nal_cmd_unregister); int libcfs_nal_cmd(struct portals_cfg *pcfg) { +#if CRAY_PORTALS + /* pretend success */ + RETURN(0); +#else struct nal_cmd_handler *cmd; __u32 nal = pcfg->pcfg_nal; int rc = -EINVAL; @@ -324,15 +328,16 @@ libcfs_nal_cmd(struct portals_cfg *pcfg) down(&nal_cmd_sem); cmd = libcfs_find_nal_cmd_handler(nal); if (cmd != NULL) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, + CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal, pcfg->pcfg_command); rc = cmd->nch_handler(pcfg, cmd->nch_private); } else { - CERROR("invalid nal: %d, cmd: %d\n", nal, pcfg->pcfg_command); + CERROR("invalid nal: %x, cmd: %d\n", nal, pcfg->pcfg_command); } up(&nal_cmd_sem); RETURN(rc); +#endif } EXPORT_SYMBOL(libcfs_nal_cmd); @@ -464,7 +469,7 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, break; } - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, + CDEBUG (D_IOCTL, "nal command nal %x cmd %d\n", pcfg.pcfg_nal, pcfg.pcfg_command); err = libcfs_nal_cmd(&pcfg); diff --git a/lnet/libcfs/proc.c b/lnet/libcfs/proc.c index aa9cfa85a2..08446a0ee8 100644 --- a/lnet/libcfs/proc.c +++ b/lnet/libcfs/proc.c @@ -280,13 +280,13 @@ int insert_proc(void) ent->write_proc = trace_write_daemon_file; ent->read_proc = trace_read_daemon_file; - ent = create_proc_entry("sys/portals/debug_size", 0, NULL); + ent = create_proc_entry("sys/portals/debug_mb", 0, NULL); if (ent == NULL) { - CERROR("couldn't register debug_size\n"); + CERROR("couldn't register debug_mb\n"); return -1; } - ent->write_proc = trace_write_debug_size; - ent->read_proc = trace_read_debug_size; + ent->write_proc = trace_write_debug_mb; + ent->read_proc = trace_read_debug_mb; return 0; } @@ -311,7 +311,7 @@ void remove_proc(void) remove_proc_entry("sys/portals/dump_kernel", NULL); remove_proc_entry("sys/portals/daemon_file", NULL); - remove_proc_entry("sys/portals/debug_size", NULL); + remove_proc_entry("sys/portals/debug_mb", NULL); #ifdef CONFIG_SYSCTL if (portals_table_header) diff --git a/lnet/libcfs/tracefile.c b/lnet/libcfs/tracefile.c index 57593166b2..aca4c41339 100644 --- a/lnet/libcfs/tracefile.c +++ b/lnet/libcfs/tracefile.c @@ -40,7 +40,7 @@ #include <linux/portals_compat25.h> #include <linux/libcfs.h> -#define TCD_MAX_PAGES 1280 +#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) /* XXX move things up to the top, comment */ @@ -72,8 +72,10 @@ struct tracefiled_ctl { atomic_t tctl_shutdown; }; +#define TRACEFILE_SIZE (500 << 20) static DECLARE_RWSEM(tracefile_sem); static char *tracefile = NULL; +static long long tracefile_size = TRACEFILE_SIZE; static struct tracefiled_ctl trace_tctl; static DECLARE_MUTEX(trace_thread_sem); static int thread_running = 0; @@ -123,7 +125,6 @@ static struct page *trace_get_page(struct trace_cpu_data *tcd, /* the kernel should print a message for us. fall back * to using the last page in the ring buffer. */ goto ring_buffer; - return NULL; } page->index = 0; page->mapping = (void *)(long)smp_processor_id(); @@ -200,8 +201,8 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, struct trace_cpu_data *tcd; struct ptldebug_header header; struct page *page; - char *debug_buf; - int known_size, needed, max_nob; + char *debug_buf = format; + int known_size, needed = 85 /* average message length */, max_nob; va_list ap; unsigned long flags; struct timeval tv; @@ -235,24 +236,26 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls - page = trace_get_page(tcd, known_size + 40); /* slop */ retry: - if (page == NULL) + page = trace_get_page(tcd, needed + known_size); + if (page == NULL) { + debug_buf = format; + if (needed + known_size > PAGE_SIZE) + mask |= D_ERROR; + needed = strlen(format); goto out; + } debug_buf = page_address(page) + page->index + known_size; - va_start(ap, format); max_nob = PAGE_SIZE - page->index - known_size; LASSERT(max_nob > 0); + va_start(ap, format); needed = vsnprintf(debug_buf, max_nob, format, ap); va_end(ap); - if (needed > max_nob) { - /* overflow. oh poop. */ - page = trace_get_page(tcd, needed + known_size); + if (needed > max_nob) /* overflow. oh poop. */ goto retry; - } header.ph_len = known_size + needed; debug_buf = page_address(page) + page->index; @@ -274,10 +277,10 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n", page->index); + out: if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk) print_to_console(&header, mask, debug_buf, needed, file, fn); - out: trace_put_tcd(tcd, flags); } EXPORT_SYMBOL(portals_debug_msg); @@ -450,7 +453,7 @@ int tracefile_dump_all_pages(char *filename) down_write(&tracefile_sem); - filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600); + filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600); if (IS_ERR(filp)) { rc = PTR_ERR(filp); printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", @@ -594,8 +597,8 @@ static int tracefiled(void *arg) filp = NULL; down_read(&tracefile_sem); if (tracefile != NULL) { - filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE, - 0600); + filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE, + 0600); if (IS_ERR(filp)) { printk("couldn't open %s: %ld\n", tracefile, PTR_ERR(filp)); @@ -621,12 +624,18 @@ static int tracefiled(void *arg) hdr->ph_flags |= PH_FLAG_FIRST_RECORD; list_for_each_safe(pos, tmp, &pc.pc_pages) { + static loff_t f_pos; page = list_entry(pos, struct page, PAGE_LIST_ENTRY); LASSERT(page->index <= PAGE_SIZE); LASSERT(page_count(page) > 0); + if (f_pos >= tracefile_size) + f_pos = 0; + else if (f_pos > filp->f_dentry->d_inode->i_size) + f_pos = filp->f_dentry->d_inode->i_size; + rc = filp->f_op->write(filp, page_address(page), - page->index, &filp->f_pos); + page->index, &f_pos); if (rc != page->index) { printk(KERN_WARNING "wanted to write %lu but " "wrote %d\n", page->index, rc); @@ -709,6 +718,13 @@ int trace_write_daemon_file(struct file *file, const char *buffer, tracefile = NULL; trace_stop_thread(); goto out_sem; + } else if (strncmp(name, "size=", 5) == 0) { + tracefile_size = simple_strtoul(name + 5, NULL, 0); + if (tracefile_size < 10 || tracefile_size > 20480) + tracefile_size = TRACEFILE_SIZE; + else + tracefile_size <<= 20; + goto out_sem; } if (name[0] != '/') { @@ -721,14 +737,17 @@ int trace_write_daemon_file(struct file *file, const char *buffer, tracefile = name; name = NULL; + + printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " + "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); + trace_start_thread(); out_sem: up_write(&tracefile_sem); out: - if (name) - kfree(name); + kfree(name); return count; } @@ -744,54 +763,53 @@ int trace_read_daemon_file(char *page, char **start, off_t off, int count, return rc; } -int trace_write_debug_size(struct file *file, const char *buffer, - unsigned long count, void *data) +int trace_write_debug_mb(struct file *file, const char *buffer, + unsigned long count, void *data) { - char *string; - int rc, i, max; - - string = kmalloc(count + 1, GFP_KERNEL); - if (string == NULL) - return -ENOMEM; + char string[32]; + int i; + unsigned max; - if (copy_from_user(string, buffer, count)) { - rc = -EFAULT; - goto out; + if (count >= sizeof(string)) { + printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", + count); + return -EOVERFLOW; } + if (copy_from_user(string, buffer, count)) + return -EFAULT; + max = simple_strtoul(string, NULL, 0); - if (max == 0) { - rc = -EINVAL; - goto out; - } + if (max == 0) + return -EINVAL; max /= smp_num_cpus; - if (max > num_physpages / 5 * 4) { + if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) { printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%d pages, which is more than 80%% of physical pages " - "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4); - return count; + "%d MB, which is more than 80%% of physical RAM " + "(%lu).\n", max * smp_num_cpus, + (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); + return -EINVAL; } for (i = 0; i < NR_CPUS; i++) { struct trace_cpu_data *tcd; tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max; + tcd->tcd_max_pages = max << (20 - PAGE_SHIFT); } - out: - kfree(string); return count; } -int trace_read_debug_size(char *page, char **start, off_t off, int count, - int *eof, void *data) +int trace_read_debug_mb(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct trace_cpu_data *tcd; unsigned long flags; int rc; tcd = trace_get_tcd(flags); - rc = snprintf(page, count, "%lu", tcd->tcd_max_pages); + rc = snprintf(page, count, "%lu\n", + (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus); trace_put_tcd(tcd, flags); return rc; diff --git a/lnet/libcfs/tracefile.h b/lnet/libcfs/tracefile.h index 1b6e7a05e1..f581257647 100644 --- a/lnet/libcfs/tracefile.h +++ b/lnet/libcfs/tracefile.h @@ -12,10 +12,10 @@ int trace_write_daemon_file(struct file *file, const char *buffer, unsigned long count, void *data); int trace_read_daemon_file(char *page, char **start, off_t off, int count, int *eof, void *data); -int trace_write_debug_size(struct file *file, const char *buffer, - unsigned long count, void *data); -int trace_read_debug_size(char *page, char **start, off_t off, int count, - int *eof, void *data); +int trace_write_debug_mb(struct file *file, const char *buffer, + unsigned long count, void *data); +int trace_read_debug_mb(char *page, char **start, off_t off, int count, + int *eof, void *data); int trace_dk(struct file *file, const char *buffer, unsigned long count, void *data); diff --git a/lnet/libcfs/watchdog.c b/lnet/libcfs/watchdog.c new file mode 100644 index 0000000000..844845a35f --- /dev/null +++ b/lnet/libcfs/watchdog.c @@ -0,0 +1,402 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Jacob Berkman <jacob@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include <linux/kp30.h> +#include <linux/libcfs.h> +#include <linux/portals_compat25.h> + + + +struct lc_watchdog { + struct timer_list lcw_timer; /* kernel timer */ + struct list_head lcw_list; + struct timeval lcw_last_touched; + struct task_struct *lcw_task; + + void (*lcw_callback)(struct lc_watchdog *, + struct task_struct *, + void *data); + void *lcw_data; + + int lcw_pid; + int lcw_time; /* time until watchdog fires, in ms */ + + enum { + LC_WATCHDOG_DISABLED, + LC_WATCHDOG_ENABLED, + LC_WATCHDOG_EXPIRED + } lcw_state; +}; + +/* + * The dispatcher will complete lcw_start_completion when it starts, + * and lcw_stop_completion when it exits. + * Wake lcw_event_waitq to signal timer callback dispatches. + */ +static struct completion lcw_start_completion; +static struct completion lcw_stop_completion; +static wait_queue_head_t lcw_event_waitq; + +/* + * Set this and wake lcw_event_waitq to stop the dispatcher. + */ +enum { + LCW_FLAG_STOP = 0 +}; +static unsigned long lcw_flags = 0; + +/* + * Number of outstanding watchdogs. + * When it hits 1, we start the dispatcher. + * When it hits 0, we stop the distpatcher. + */ +static __u32 lcw_refcount = 0; +static DECLARE_MUTEX(lcw_refcount_sem); + +/* + * List of timers that have fired that need their callbacks run by the + * dispatcher. + */ +static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; +static struct list_head lcw_pending_timers = \ + LIST_HEAD_INIT(lcw_pending_timers); + +static struct task_struct *lcw_lookup_task(struct lc_watchdog *lcw) +{ + struct task_struct *tsk; + unsigned long flags; + ENTRY; + + read_lock_irqsave(&tasklist_lock, flags); + tsk = find_task_by_pid(lcw->lcw_pid); + read_unlock_irqrestore(&tasklist_lock, flags); + if (!tsk) { + CWARN("Process %d was not found in the task list; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); + } else if (tsk != lcw->lcw_task) { + tsk = NULL; + CWARN("The current process %d did not set the watchdog; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); + } + + RETURN(tsk); +} + +static void lcw_cb(unsigned long data) +{ + struct lc_watchdog *lcw = (struct lc_watchdog *)data; + struct task_struct *tsk; + unsigned long flags; + + ENTRY; + + if (lcw->lcw_state != LC_WATCHDOG_ENABLED) { + EXIT; + return; + } + + lcw->lcw_state = LC_WATCHDOG_EXPIRED; + + CWARN("Watchdog triggered for pid %d: it was inactive for %dus\n", + lcw->lcw_pid, (lcw->lcw_time * 1000) / HZ); + + tsk = lcw_lookup_task(lcw); + if (tsk != NULL) + portals_debug_dumpstack(tsk); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (list_empty(&lcw->lcw_list)) { + list_add(&lcw->lcw_list, &lcw_pending_timers); + wake_up(&lcw_event_waitq); + } + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + EXIT; +} + +static int is_watchdog_fired(void) +{ + unsigned long flags; + int rc; + + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) + return 1; + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + return rc; +} + +static int lcw_dispatch_main(void *data) +{ + int rc = 0; + unsigned long flags; + struct lc_watchdog *lcw; + struct task_struct *tsk; + + ENTRY; + + kportal_daemonize("lc_watchdogd"); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + complete(&lcw_start_completion); + + while (1) { + wait_event_interruptible(lcw_event_waitq, is_watchdog_fired()); + CDEBUG(D_INFO, "Watchdog got woken up...\n"); + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { + CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + if (rc) { + CERROR("pending timers list was not empty at " + "time of watchdog dispatch shutdown\n"); + } + break; + } + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + while (!list_empty(&lcw_pending_timers)) { + + lcw = list_entry(lcw_pending_timers.next, + struct lc_watchdog, + lcw_list); + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid); + + if (lcw->lcw_state != LC_WATCHDOG_DISABLED) { + /* + * sanity check the task against our + * watchdog + */ + tsk = lcw_lookup_task(lcw); + lcw->lcw_callback(lcw, tsk, lcw->lcw_data); + } + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + } + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + } + + complete(&lcw_stop_completion); + + RETURN(rc); +} + +static void lcw_dispatch_start(void) +{ + int rc; + + ENTRY; + LASSERT(lcw_refcount == 1); + + init_completion(&lcw_stop_completion); + init_completion(&lcw_start_completion); + init_waitqueue_head(&lcw_event_waitq); + + CDEBUG(D_INFO, "starting dispatch thread\n"); + rc = kernel_thread(lcw_dispatch_main, NULL, 0); + if (rc < 0) { + CERROR("error spawning watchdog dispatch thread: %d\n", rc); + EXIT; + return; + } + wait_for_completion(&lcw_start_completion); + CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); + + EXIT; +} + +static void lcw_dispatch_stop(void) +{ + ENTRY; + LASSERT(lcw_refcount == 0); + + CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); + + set_bit(LCW_FLAG_STOP, &lcw_flags); + wake_up(&lcw_event_waitq); + + wait_for_completion(&lcw_stop_completion); + + CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); + + EXIT; +} + +struct lc_watchdog *lc_watchdog_add(int time, + void (*callback)(struct lc_watchdog *, + struct task_struct *, + void *), + void *data) +{ + struct lc_watchdog *lcw = NULL; + ENTRY; + + PORTAL_ALLOC(lcw, sizeof(*lcw)); + if (!lcw) { + CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); + RETURN(ERR_PTR(-ENOMEM)); + } + + lcw->lcw_task = current; + lcw->lcw_pid = current->pid; + lcw->lcw_time = (time * HZ) / 1000; + lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog; + lcw->lcw_data = data; + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + INIT_LIST_HEAD(&lcw->lcw_list); + + lcw->lcw_timer.function = lcw_cb; + lcw->lcw_timer.data = (unsigned long)lcw; + lcw->lcw_timer.expires = jiffies + lcw->lcw_time; + init_timer(&lcw->lcw_timer); + + down(&lcw_refcount_sem); + if (++lcw_refcount == 1) + lcw_dispatch_start(); + up(&lcw_refcount_sem); + + /* Keep this working in case we enable them by default */ + if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { + do_gettimeofday(&lcw->lcw_last_touched); + add_timer(&lcw->lcw_timer); + } + + RETURN(lcw); +} +EXPORT_SYMBOL(lc_watchdog_add); + +static long +timeval_sub(struct timeval *large, struct timeval *small) +{ + return (large->tv_sec - small->tv_sec) * 1000000 + + (large->tv_usec - small->tv_usec); +} + +static void lcw_update_time(struct lc_watchdog *lcw, const char *message) +{ + struct timeval newtime; + unsigned long timediff; + + do_gettimeofday(&newtime); + if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { + timediff = timeval_sub(&newtime, &lcw->lcw_last_touched); + CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", + lcw->lcw_pid, + message, + timediff / 1000000, + (timediff % 1000000) / 100); + } + lcw->lcw_last_touched = newtime; +} + +void lc_watchdog_touch(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + lcw_update_time(lcw, "touched"); + lcw->lcw_state = LC_WATCHDOG_ENABLED; + + mod_timer(&lcw->lcw_timer, jiffies + lcw->lcw_time); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_touch); + +void lc_watchdog_disable(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + lcw_update_time(lcw, "disabled"); + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_disable); + +void lc_watchdog_delete(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + del_timer(&lcw->lcw_timer); + + lcw_update_time(lcw, "deleted"); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + down(&lcw_refcount_sem); + if (--lcw_refcount == 0) + lcw_dispatch_stop(); + up(&lcw_refcount_sem); + + PORTAL_FREE(lcw, sizeof(*lcw)); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_delete); + +/* + * Provided watchdog handlers + */ + +extern void portals_debug_dumplog_internal(void *arg); + +void lc_watchdog_dumplog(struct lc_watchdog *lcw, + struct task_struct *tsk, + void *data) +{ + tsk = tsk ? tsk : current; + portals_debug_dumplog_internal((void *)(long)tsk->pid); +} +EXPORT_SYMBOL(lc_watchdog_dumplog); diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 72d3b4188e..7e92256fe6 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -165,7 +165,7 @@ void PtlFini(void) continue; if (nal->nal_refct != 0) { - CWARN("NAL %d has outstanding refcount %d\n", + CWARN("NAL %x has outstanding refcount %d\n", i, nal->nal_refct); nal->nal_ni_fini(nal); } @@ -212,11 +212,11 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; nal->nal_handle.cookie = 0; - CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct); + CDEBUG(D_OTHER, "Starting up NAL (%x) refs %d\n", interface, nal->nal_refct); rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits); if (rc != PTL_OK) { - CERROR("Error %d starting up NAL %d, refs %d\n", rc, + CERROR("Error %d starting up NAL %x, refs %d\n", rc, interface, nal->nal_refct); GOTO(out, rc); } diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 61ef37250d..c1303b7863 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -81,7 +81,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, ptl_handle_ni_t nih; ptl_process_id_t pid; - CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); + CDEBUG (D_IOCTL, "Getting nid for nal [%x]\n", data->ioc_nal); err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); diff --git a/lnet/router/proc.c b/lnet/router/proc.c index a1397d28d2..61b6880c6f 100644 --- a/lnet/router/proc.c +++ b/lnet/router/proc.c @@ -31,6 +31,7 @@ struct proc_route_data { struct list_head *curr; unsigned int generation; off_t skip; + rwlock_t proc_route_rwlock; } kpr_read_routes_data; /* nal2name support re-used from utils/portals.c */ @@ -43,6 +44,8 @@ struct name2num { { "tcp", SOCKNAL}, { "gm", GMNAL}, { "ib", OPENIBNAL}, + { "iib", IIBNAL}, + { "lo", LONAL}, { NULL, -1} }; @@ -96,19 +99,22 @@ static int kpr_proc_router_write(struct file *file, const char *ubuffer, static int kpr_proc_routes_read(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct proc_route_data *prd = data; - kpr_route_entry_t *re; - kpr_gateway_entry_t *ge; - int chunk_len = 0; - int line_len = 0; - int user_len = 0; + struct proc_route_data *prd = data; + kpr_route_entry_t *re; + kpr_gateway_entry_t *ge; + int chunk_len = 0; + int line_len = 0; + int user_len = 0; + int rc = 0; *eof = 1; *start = page; + write_lock(&(prd->proc_route_rwlock)); + if (prd->curr == NULL) { if (off != 0) - return 0; + goto routes_read_exit; /* First pass, initialize our private data */ prd->curr = kpr_routes.next; @@ -118,13 +124,14 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, /* Abort route list generation change */ if (prd->generation != kpr_routes_generation) { prd->curr = NULL; - return sprintf(page, "\nError: Routes Changed\n"); + rc = sprintf(page, "\nError: Routes Changed\n"); + goto routes_read_exit; } /* All the routes have been walked */ if (prd->curr == &kpr_routes) { prd->curr = NULL; - return 0; + goto routes_read_exit; } } @@ -148,7 +155,8 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, if (prd->curr->next == NULL) { prd->curr = NULL; read_unlock(&kpr_rwlock); - return sprintf(page, "\nError: Routes Changed\n"); + rc = sprintf(page, "\nError: Routes Changed\n"); + goto routes_read_exit; } prd->curr = prd->curr->next; @@ -169,13 +177,18 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, prd->curr = prd->curr->prev; prd->skip = line_len - (user_len - count); read_unlock(&kpr_rwlock); - return count; + rc = count; + goto routes_read_exit; } /* Not enough data to entirely satify callers request */ prd->skip = 0; read_unlock(&kpr_rwlock); - return user_len; + rc = user_len; + +routes_read_exit: + write_unlock(&(prd->proc_route_rwlock)); + return rc; } static int kpr_proc_routes_write(struct file *file, const char *ubuffer, @@ -215,6 +228,7 @@ void kpr_proc_init(void) kpr_read_routes_data.curr = NULL; kpr_read_routes_data.generation = 0; kpr_read_routes_data.skip = 0; + kpr_read_routes_data.proc_route_rwlock = RW_LOCK_UNLOCKED; routes_entry->data = &kpr_read_routes_data; routes_entry->read_proc = kpr_proc_routes_read; diff --git a/lnet/router/router.c b/lnet/router/router.c index 448ab1f588..7edc5f6993 100644 --- a/lnet/router/router.c +++ b/lnet/router/router.c @@ -56,7 +56,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) struct list_head *e; kpr_nal_entry_t *ne; - CDEBUG (D_NET, "Registering NAL %d\n", nalif->kprni_nalid); + CDEBUG (D_NET, "Registering NAL %x\n", nalif->kprni_nalid); PORTAL_ALLOC (ne, sizeof (*ne)); if (ne == NULL) @@ -76,7 +76,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { write_unlock_irqrestore (&kpr_rwlock, flags); - CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid); + CERROR ("Attempt to register same NAL %x twice\n", ne->kpne_interface.kprni_nalid); PORTAL_FREE (ne, sizeof (*ne)); return (-EEXIST); @@ -126,7 +126,7 @@ kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when) kpr_upcall_t *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC); if (u == NULL) { - CERROR ("Upcall out of memory: nal %d nid "LPX64" (%s) %s\n", + CERROR ("Upcall out of memory: nal %x nid "LPX64" (%s) %s\n", gw_nalid, gw_nid, portals_nid2str(gw_nalid, gw_nid, str), alive ? "up" : "down"); @@ -155,14 +155,14 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, struct list_head *n; char str[PTL_NALFMT_SIZE]; - CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n", + CDEBUG (D_NET, "%s notifying [%x] "LPX64": %s\n", byNal ? "NAL" : "userspace", gateway_nalid, gateway_nid, alive ? "up" : "down"); /* can't do predictions... */ do_gettimeofday (&now); if (when > now.tv_sec) { - CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s " + CWARN ("Ignoring prediction from %s of [%x] "LPX64" %s " "%ld seconds in the future\n", byNal ? "NAL" : "userspace", gateway_nalid, gateway_nid, @@ -252,7 +252,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, if (byNal) { /* It wasn't userland that notified me... */ - CWARN ("Upcall: NAL %d NID "LPX64" (%s) is %s\n", + CWARN ("Upcall: NAL %x NID "LPX64" (%s) is %s\n", gateway_nalid, gateway_nid, portals_nid2str(gateway_nalid, gateway_nid, str), alive ? "alive" : "dead"); @@ -278,7 +278,7 @@ kpr_shutdown_nal (void *arg) unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - CDEBUG (D_NET, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); + CDEBUG (D_NET, "Shutting down NAL %x\n", ne->kpne_interface.kprni_nalid); LASSERT (!ne->kpne_shutdown); LASSERT (!in_interrupt()); @@ -294,7 +294,7 @@ kpr_deregister_nal (void *arg) unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); + CDEBUG (D_NET, "Deregister NAL %x\n", ne->kpne_interface.kprni_nalid); LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */ LASSERT (!in_interrupt()); @@ -306,7 +306,7 @@ kpr_deregister_nal (void *arg) /* Wait until all outstanding messages/notifications have completed */ while (atomic_read (&ne->kpne_refcount) != 0) { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", + CDEBUG (D_NET, "Waiting for refcount on NAL %x to reach zero (%d)\n", ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); set_current_state (TASK_UNINTERRUPTIBLE); @@ -367,7 +367,7 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, /* Caller wants to know if 'target_nid' can be reached via a gateway * ON HER OWN NETWORK */ - CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid, + CDEBUG (D_NET, "lookup "LPX64" from NAL %x\n", target_nid, ne->kpne_interface.kprni_nalid); LASSERT (!in_interrupt()); @@ -411,7 +411,7 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, /* NB can't deref 're' now; it might have been removed! */ - CDEBUG (D_NET, "lookup "LPX64" from NAL %d: %d ("LPX64")\n", + CDEBUG (D_NET, "lookup "LPX64" from NAL %x: %d ("LPX64")\n", target_nid, ne->kpne_interface.kprni_nalid, rc, (rc == 0) ? *gateway_nidp : (ptl_nid_t)0); return (rc); @@ -449,7 +449,7 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) kpr_nal_entry_t *tmp_ne; int rc; - CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd, + CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid); LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov)); @@ -509,8 +509,8 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) read_unlock (&kpr_rwlock); - CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d: " - "to "LPX64" on NAL %d\n", + CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x: " + "to "LPX64" on NAL %x\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid, fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid); @@ -522,7 +522,7 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) out: kpr_fwd_errors++; - CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d: %d\n", + CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %x: %d\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc); (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc); @@ -536,14 +536,14 @@ kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error) kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg; kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg; - CDEBUG (D_NET, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd, + CDEBUG (D_NET, "complete(1) [%p] from NAL %x to NAL %x: %d\n", fwd, src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error); atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */ (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error); - CDEBUG (D_NET, "complete(2) [%p] from NAL %d: %d\n", fwd, + CDEBUG (D_NET, "complete(2) [%p] from NAL %x: %d\n", fwd, src_ne->kpne_interface.kprni_nalid, error); atomic_dec (&kpr_queue_depth); @@ -560,7 +560,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, kpr_gateway_entry_t *ge; int dup = 0; - CDEBUG(D_NET, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n", + CDEBUG(D_NET, "Add route: %x "LPX64" : "LPX64" - "LPX64"\n", gateway_nalid, gateway_nid, lo_nid, hi_nid); if (gateway_nalid == PTL_NID_ANY || @@ -645,7 +645,7 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, struct list_head *e; struct list_head *n; - CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", + CDEBUG(D_NET, "Del route [%x] "LPX64" : "LPX64" - "LPX64"\n", gw_nalid, gw_nid, lo, hi); LASSERT(!in_interrupt()); @@ -731,7 +731,7 @@ kpr_nal_cmd(struct portals_cfg *pcfg, void * private) break; case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", + CDEBUG(D_IOCTL, "Adding route: [%x] "LPU64" : "LPU64" - "LPU64"\n", pcfg->pcfg_nal, pcfg->pcfg_nid, pcfg->pcfg_nid2, pcfg->pcfg_nid3); err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, @@ -739,7 +739,7 @@ kpr_nal_cmd(struct portals_cfg *pcfg, void * private) break; case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", + CDEBUG (D_IOCTL, "Removing routes via [%x] "LPU64" : "LPU64" - "LPU64"\n", pcfg->pcfg_gw_nal, pcfg->pcfg_nid, pcfg->pcfg_nid2, pcfg->pcfg_nid3); err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, @@ -747,7 +747,7 @@ kpr_nal_cmd(struct portals_cfg *pcfg, void * private) break; case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", + CDEBUG (D_IOCTL, "Notifying peer [%x] "LPU64" %s @ %ld\n", pcfg->pcfg_gw_nal, pcfg->pcfg_nid, pcfg->pcfg_flags ? "Enabling" : "Disabling", (time_t)pcfg->pcfg_nid3); diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c index 7a3f8a0809..e9a84813c6 100644 --- a/lnet/tests/ping_cli.c +++ b/lnet/tests/ping_cli.c @@ -114,7 +114,7 @@ pingcli_start(struct portal_ioctl_data *args) client->tsk = current; client->args = args; CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ - nal %d, size %u, count: %u, timeout: %u\n", + nal %x, size %u, count: %u, timeout: %u\n", args->ioc_nid, portals_nid2str(args->ioc_nal, args->ioc_nid, str), args->ioc_nal, args->ioc_size, @@ -142,7 +142,7 @@ pingcli_start(struct portal_ioctl_data *args) rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); if (rc != PTL_OK || rc != PTL_IFACE_DUP) { - CERROR ("NAL %d not loaded\n", args->ioc_nal); + CERROR ("NAL %x not loaded\n", args->ioc_nal); pingcli_shutdown (nih, 4); return (NULL); } diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c index dec806ac1e..49e82af35c 100644 --- a/lnet/tests/ping_srv.c +++ b/lnet/tests/ping_srv.c @@ -206,7 +206,7 @@ static struct pingsrv_data *pingsrv_setup(void) /* Aquire and initialize the proper nal for portals. */ rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { - CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); + CDEBUG (D_OTHER, "NAL %x not loaded\n", nal); return pingsrv_shutdown (4); } diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c index 730ba00392..d9970e7ee7 100644 --- a/lnet/tests/sping_cli.c +++ b/lnet/tests/sping_cli.c @@ -110,7 +110,7 @@ pingcli_start(struct portal_ioctl_data *args) client->args = args; CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ - nal %d, size %u, count: %u, timeout: %u\n", + nal %x, size %u, count: %u, timeout: %u\n", args->ioc_nid, portals_nid2str(args->ioc_nid, args->ioc_nal, str), args->ioc_nal, args->ioc_size, @@ -138,7 +138,7 @@ pingcli_start(struct portal_ioctl_data *args) rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR ("NAL %d not loaded.\n", args->ioc_nal); + CERROR ("NAL %x not loaded.\n", args->ioc_nal); pingcli_shutdown (nih, 4); return (NULL); } diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c index f2382d1066..069423d3e6 100644 --- a/lnet/tests/sping_srv.c +++ b/lnet/tests/sping_srv.c @@ -195,7 +195,7 @@ static struct pingsrv_data *pingsrv_setup(void) rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); + CDEBUG (D_OTHER, "Nal %x not loaded.\n", nal); return pingsrv_shutdown (4); } diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index d5ff09a390..16af0b5ec4 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -75,15 +75,22 @@ static int debug_mask = ~0; #define MAX_MARK_SIZE 100 static const char *portal_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite", - "rpc", "mgmt", "portals", "libcfs", "socknal", "qswnal", "pinger", - "filter", "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", - "ibnal", "lmv", "cmobd", "smfs", NULL}; + {"undefined", "mdc", "mds", "osc", + "ost", "class", "log", "llite", + "rpc", "mgmt", "portals", "socknal", + "qswnal", "pinger", "filter", "ptlbd", + "echo", "ldlm", "lov", "gmnal", + "router", "cobd", "ibnal", "sm", + "asobd", "confobd", "lmv", "cmobd", + "lonal", NULL}; static const char *portal_debug_masks[] = - {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", - "blocks", "net", "warning", "buffs", "other", "dentry", "portals", - "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", - "reada", "mmap", NULL}; + {"trace", "inode", "super", "ext2", + "malloc", "cache", "info", "ioctl", + "blocks", "net", "warning", "buffs", + "other", "dentry", "portals", "page", + "dlmtrace", "error", "emerg", "ha", + "rpctrace", "vfstrace", "reada", "mmap", + "config", NULL}; struct debug_daemon_cmd { char *cmd; @@ -488,51 +495,96 @@ int jt_dbg_debug_file(int argc, char **argv) return parse_buffer(in, out); } -const char debug_daemon_usage[]="usage: debug_daemon {start file [MB]|stop}\n"; +static int +dbg_write_cmd(int fd, char *str) +{ + int len = strlen(str); + int rc = write(fd, str, len); + + return (rc == len ? 0 : 1); +} + +const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n"; +#define DAEMON_FILE "/proc/sys/portals/daemon_file" int jt_dbg_debug_daemon(int argc, char **argv) { - int rc, fd; + int rc; + int fd; if (argc <= 1) { - fprintf(stderr, debug_daemon_usage); - return 0; + fprintf(stderr, debug_daemon_usage, argv[0]); + return 1; } - fd = open("/proc/sys/portals/daemon_file", O_WRONLY); + fd = open(DAEMON_FILE, O_WRONLY); if (fd < 0) { - fprintf(stderr, "open(daemon_file) failed: %s\n", + fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE, strerror(errno)); - return 1; + return -1; } - + + rc = -1; if (strcasecmp(argv[1], "start") == 0) { - if (argc != 3) { - fprintf(stderr, debug_daemon_usage); - return 1; + if (argc < 3 || argc > 4 || + (argc == 4 && strlen(argv[3]) > 5)) { + fprintf(stderr, debug_daemon_usage, argv[0]); + goto out; } - rc = write(fd, argv[2], strlen(argv[2])); - if (rc != strlen(argv[2])) { - fprintf(stderr, "write(%s) failed: %s\n", argv[2], - strerror(errno)); - close(fd); - return 1; + if (argc == 4) { + char buf[12]; + const long min_size = 10; + const long max_size = 20480; + long size; + char *end; + + size = strtoul(argv[3], &end, 0); + if (size < min_size || + size > max_size || + *end != 0) { + fprintf(stderr, "size %s invalid, must be in " + "the range %ld-%ld MB\n", argv[3], + min_size, max_size); + goto out; + } + + snprintf(buf, sizeof(buf), "size=%ld", size); + rc = dbg_write_cmd(fd, buf); + if (rc != 0) { + fprintf(stderr, "set %s failed: %s\n", + buf, strerror(errno)); + goto out; + } } - } else if (strcasecmp(argv[1], "stop") == 0) { - rc = write(fd, "stop", 4); - if (rc != 4) { - fprintf(stderr, "write(stop) failed: %s\n", + + rc = dbg_write_cmd(fd, "start"); + if (rc != 0) { + fprintf(stderr, "start debug_daemon on %s failed: %s\n", + argv[2], strerror(errno)); + goto out; + } + + rc = 0; + goto out; + } + + if (strcasecmp(argv[1], "stop") == 0) { + rc = dbg_write_cmd(fd, "stop"); + if (rc != 0) { + fprintf(stderr, "stopping debug_daemon failed: %s\n", strerror(errno)); - close(fd); - return 1; + goto out; } - } else { - fprintf(stderr, debug_daemon_usage); - return 1; + + rc = 0; + goto out; } + fprintf(stderr, debug_daemon_usage, argv[0]); + rc = -1; +out: close(fd); - return 0; + return rc; } int jt_dbg_clear_debug_buf(int argc, char **argv) diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 07fada2634..abe3f987dc 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -39,6 +39,12 @@ #include <time.h> #include <stdarg.h> #include <endian.h> +#if CRAY_PORTALS +#ifdef REDSTORM +#define __QK__ +#endif +#include <portals/ipmap.h> +#endif #ifdef __CYGWIN__ @@ -66,12 +72,18 @@ typedef struct static name2num_t nalnames[] = { {"any", 0}, +#if !CRAY_PORTALS {"tcp", SOCKNAL}, {"elan", QSWNAL}, {"gm", GMNAL}, {"openib", OPENIBNAL}, {"iib", IIBNAL}, {"lo", LONAL}, +#else + {"cray_kern_nal", CRAY_KERN_NAL}, + {"cray_user_nal", CRAY_USER_NAL}, + {"cray_qk_nal", CRAY_QK_NAL}, +#endif {NULL, -1} }; @@ -363,7 +375,11 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) } if (ptl_parse_ipaddr (&ipaddr, str) == 0) { +#if !CRAY_PORTALS *nidp = (ptl_nid_t)ipaddr; +#else + *nidp = (((ptl_nid_t)ipaddr & PNAL_HOSTID_MASK) << PNAL_VNODE_SHIFT); +#endif return (0); } diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 021fa6815f..26e86ed3eb 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -141,12 +141,22 @@ if test x$enable_modules != xno ; then ln -s `pwd` $LINUX/fs/lustre fi + # -------- linux objects (for 2.6) -- + AC_MSG_CHECKING([for Linux objects dir]) + AC_ARG_WITH([linux-obj], + AC_HELP_STRING([--with-linux-obj=path], + [set path to Linux objects dir (default=\$LINUX)]), + [LINUX_OBJ=$with_linux_obj], + [LINUX_OBJ=$LINUX]) + AC_MSG_RESULT([$LINUX_OBJ]) + AC_SUBST(LINUX_OBJ) + # -------- check for .confg -------- AC_ARG_WITH([linux-config], [AC_HELP_STRING([--with-linux-config=path], - [set path to Linux .conf (default=\$LINUX/.config)])], + [set path to Linux .conf (default=\$LINUX_OBJ/.config)])], [LINUX_CONFIG=$with_linux_config], - [LINUX_CONFIG=$LINUX/.config]) + [LINUX_CONFIG=$LINUX_OBJ/.config]) AC_SUBST(LINUX_CONFIG) AC_CHECK_FILE([/boot/kernel.h], @@ -241,7 +251,7 @@ _ACEOF AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE], [m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko -AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])], +AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] CC="$CC" -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM $MODULE_TARGET=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])], [$4], [_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])dnl])dnl @@ -266,8 +276,8 @@ if test x$enable_modules != xno ; then [AC_MSG_ERROR([Kernel config could not be found. If you are building from a kernel-source rpm consult README.kernel-source])]) # ----------- make dep run? ------------------ - AC_CHECK_FILES([$LINUX/include/linux/autoconf.h - $LINUX/include/linux/version.h + AC_CHECK_FILES([$LINUX_OBJ/include/linux/autoconf.h + $LINUX_OBJ/include/linux/version.h $LINUX/include/linux/config.h],[], [AC_MSG_ERROR([Run make config in $LINUX.])]) @@ -278,7 +288,7 @@ if test x$enable_modules != xno ; then # tarred up the tree and ran make dep etc. in it, then # version.h gets overwritten with a standard linux one. - if grep rhconfig $LINUX/include/linux/version.h >/dev/null ; then + if grep rhconfig $LINUX_OBJ/include/linux/version.h >/dev/null ; then # This is a clean kernel-source tree, we need to # enable extensive workarounds to get this to build # modules @@ -292,22 +302,9 @@ if test x$enable_modules != xno ; then EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS" fi - # --- check that we can build modules at all - AC_MSG_CHECKING([that modules can be built]) - LUSTRE_MODULE_TRY_COMPILE([],[], - [ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_WARN([Consult config.log for details.]) - AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source]) - AC_MSG_ERROR([Kernel modules could not be built.]) - ]) - - # ------------ LINUXRELEASE and moduledir ------------------ + # ------------ external module support --------------------- MODULE_TARGET="SUBDIRS" if test $linux25 = 'yes' ; then - # ------------ external module support --------------------- makerule="$PWD/kernel-tests" AC_MSG_CHECKING([for external module build support]) rm -f kernel-tests/conftest.i @@ -325,6 +322,20 @@ if test x$enable_modules != xno ; then makerule="_dir_$PWD/kernel-tests" fi AC_SUBST(MODULE_TARGET) + + # --- check that we can build modules at all + AC_MSG_CHECKING([that modules can be built]) + LUSTRE_MODULE_TRY_COMPILE([],[], + [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_WARN([Consult config.log for details.]) + AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source]) + AC_MSG_ERROR([Kernel modules could not be built.]) + ]) + + # ------------ LINUXRELEASE and moduledir ------------------ LINUXRELEASE= rm -f kernel-tests/conftest.i AC_MSG_CHECKING([for Linux release]) @@ -466,6 +477,7 @@ if test x$enable_modules != xno ; then AC_SUBST(GMCPPFLAGS) AC_SUBST(GMNAL) + if test $linux25 = 'no' ; then #### OpenIB AC_MSG_CHECKING([if OpenIB kernel headers are present]) OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD" @@ -488,6 +500,7 @@ if test x$enable_modules != xno ; then EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" AC_SUBST(OPENIBCPPFLAGS) AC_SUBST(OPENIBNAL) + fi #### Infinicon IB AC_MSG_CHECKING([if Infinicon IB kernel headers are present]) @@ -660,8 +673,8 @@ if test x$enable_modules != xno ; then #include <linux/fs.h> #include <linux/version.h> ],[ - #if defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24)) - #error "x86_64 down_read_trylock broken before 2.4.24" + #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24)) + #error "down_read_trylock broken before 2.4.24" #endif struct inode i; return (char *)&i.i_alloc_sem - (char *)&i; @@ -720,6 +733,41 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([no]) ]) + # ------------ kallsyms (so software watchdogs produce useful stacks) + AC_MSG_CHECKING([if kallsyms is enabled]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include <linux/config.h> + ],[ + #ifndef CONFIG_KALLSYMS + #error CONFIG_KALLSYMS is not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + if test "x$ARCH_UM" = "x" ; then + AC_MSG_ERROR([Lustre requires that CONFIG_KALLSYMS is enabled in your kernel.]) + fi + ]) + + # ------------ check for our show_task patch + AC_MSG_CHECKING([if kernel exports show_task]) + have_show_task=0 + for file in ksyms sched ; do + if grep -q "EXPORT_SYMBOL(show_task)" \ + "$LINUX/kernel/$file.c" 2>/dev/null ; then + have_show_task=1 + break + fi + done + if test x$have_show_task = x1 ; then + AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi + case $BACKINGFS in ext3) # --- Check that ext3 and ext3 xattr are enabled in the kernel @@ -808,3 +856,17 @@ echo "LLCPPFLAGS: $LLCPPFLAGS" echo "CFLAGS: $CFLAGS" echo "EXTRA_KCFLAGS: $EXTRA_KCFLAGS" echo "LLCFLAGS: $LLCFLAGS" + +ENABLE_INIT_SCRIPTS=0 +if test x$enable_utils = xyes ; then + AC_MSG_CHECKING([whether to install init scripts]) + # our scripts only work on red hat systems + if test -f /etc/init.d/functions -a -f /etc/sysconfig/network ; then + ENABLE_INIT_SCRIPTS=1 + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +fi +AM_CONDITIONAL(INIT_SCRIPTS, test x$ENABLE_INIT_SCRIPTS = "x1") +AC_SUBST(ENABLE_INIT_SCRIPTS) diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 9006f13f93..d49baeb8aa 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -92,7 +92,7 @@ do { \ #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG\n"); \ - CERROR("STACK: %s\n", portals_debug_dumpstack()); \ + portals_debug_dumpstack(NULL); \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ set_task_state(current, TASK_UNINTERRUPTIBLE); \ @@ -243,9 +243,7 @@ extern struct prof_ent prof_ents[MAX_PROFS]; #endif /* PORTALS_PROFILING */ /* debug.c */ -extern spinlock_t stack_backtrace_lock; - -char *portals_debug_dumpstack(void); +void portals_debug_dumpstack(struct task_struct *tsk); void portals_run_upcall(char **argv); void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); @@ -747,4 +745,3 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); (unsigned long)(id)->li_stc.u.e3s.l3s_gen #endif - diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h index f6a0a60e51..8f8d6fe307 100644 --- a/lustre/portals/include/linux/libcfs.h +++ b/lustre/portals/include/linux/libcfs.h @@ -108,12 +108,14 @@ struct ptldebug_header { #define S_IBNAL 0x00400000 /* All IB NALs */ #define S_SM 0x00800000 #define S_ASOBD 0x01000000 -#define S_LMV 0x02000000 -#define S_CMOBD 0x04000000 -#define S_LONAL 0x08000000 - -/* If you change these values, please keep portals/utils/debug.c - * up to date! */ +#define S_CONFOBD 0x02000000 +#define S_LMV 0x04000000 +#define S_CMOBD 0x08000000 +#define S_LONAL 0x10000000 +/* If you change these values, please keep these files up to date... + * portals/utils/debug.c + * utils/lconf + */ /* Debugging masks (32 bits, non-overlapping) */ #define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ @@ -141,6 +143,11 @@ struct ptldebug_header { #define D_READA 0x00400000 /* read-ahead */ #define D_MMAP 0x00800000 #define D_CONFIG 0x01000000 +/* If you change these values, please keep these files up to date... + * portals/utils/debug.c + * utils/lconf + */ + #ifdef __KERNEL__ # include <linux/sched.h> /* THREAD_SIZE */ #else @@ -192,8 +199,8 @@ do { \ #define CDEBUG_MAX_LIMIT 600 #define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...) \ do { \ - static unsigned long cdebug_next; \ - static int cdebug_count, cdebug_delay = 1; \ + static unsigned long cdebug_next = 0; \ + static int cdebug_count = 0, cdebug_delay = 1; \ \ CHECK_STACK(CDEBUG_STACK); \ if (time_after(jiffies, cdebug_next)) { \ @@ -203,7 +210,8 @@ do { \ if (cdebug_count) { \ portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, \ __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK, cdebug_format, ## a); \ + 0, "skipped %d similar messages\n", \ + cdebug_count); \ cdebug_count = 0; \ } \ if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\ @@ -373,6 +381,34 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); #endif -#define _LIBCFS_H +#ifdef __KERNEL__ +/* libcfs watchdogs */ +struct lc_watchdog; + +/* Just use the default handler (dumplog) */ +#define LC_WATCHDOG_DEFAULT_CB NULL + +/* Add a watchdog which fires after "time" milliseconds of delay. You have to + * touch it once to enable it. */ +struct lc_watchdog *lc_watchdog_add(int time, + void (*cb)(struct lc_watchdog *, + struct task_struct *, + void *), + void *data); + +/* Enables a watchdog and resets its timer. */ +void lc_watchdog_touch(struct lc_watchdog *lcw); + +/* Disable a watchdog; touch it to restart it. */ +void lc_watchdog_disable(struct lc_watchdog *lcw); + +/* Clean up the watchdog */ +void lc_watchdog_delete(struct lc_watchdog *lcw); + +/* Dump a debug log */ +void lc_watchdog_dumplog(struct lc_watchdog *lcw, + struct task_struct *tsk, + void *data); +#endif /* !__KERNEL__ */ #endif /* _LIBCFS_H */ diff --git a/lustre/portals/include/linux/portals_compat25.h b/lustre/portals/include/linux/portals_compat25.h index 5a43a45686..fa2709e6ee 100644 --- a/lustre/portals/include/linux/portals_compat25.h +++ b/lustre/portals/include/linux/portals_compat25.h @@ -27,8 +27,10 @@ # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp, 1) # define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \ + TIF_SIGPENDING) # define CURRENT_SECONDS get_seconds() -# define smp_num_cpus NR_CPUS +# define smp_num_cpus num_online_cpus() #elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ @@ -40,11 +42,9 @@ # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING (current->sigpending = 0) # define CURRENT_SECONDS CURRENT_TIME -# define kernel_text_address(addr) is_kernel_text_address(addr) -extern int is_kernel_text_address(unsigned long addr); - #else /* 2.4.x */ # define SIGNAL_MASK_LOCK(task, flags) \ @@ -54,19 +54,22 @@ extern int is_kernel_text_address(unsigned long addr); # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending(current) +# define CLEAR_SIGPENDING (current->sigpending = 0) # define CURRENT_SECONDS CURRENT_TIME -# define kernel_text_address(addr) is_kernel_text_address(addr) -extern int is_kernel_text_address(unsigned long addr); - #endif #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) -# define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len, fmt "|%d", ## a, current->thread.extern_pid) +#define UML_PID(tsk) ((tsk)->thread.extern_pid) #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid) +#else +#define UML_PID(tsk) ((tsk)->pid) +#endif + +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) # define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len,fmt"|%d", ## a,current->thread.mode.tt.extern_pid) + snprintf(comm, len,fmt"|%d", ## a, UML_PID(current)) #else # define THREAD_NAME(comm, len, fmt, a...) \ snprintf(comm, len, fmt, ## a) diff --git a/lustre/portals/knals/lonal/lonal.h b/lustre/portals/knals/lonal/lonal.h index 6571dab135..c6f2dda11a 100644 --- a/lustre/portals/knals/lonal/lonal.h +++ b/lustre/portals/knals/lonal/lonal.h @@ -33,6 +33,7 @@ #include <linux/stat.h> #include <linux/errno.h> #include <linux/uio.h> +#include <linux/init.h> #define DEBUG_SUBSYSTEM S_LONAL @@ -54,7 +55,6 @@ typedef struct struct iovec *iov; ptl_kiov_t *kiov; } klod_iov; - } klo_desc_t; typedef struct diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h index b08d710812..0b0914e5de 100644 --- a/lustre/portals/knals/qswnal/qswnal.h +++ b/lustre/portals/knals/qswnal/qswnal.h @@ -99,7 +99,7 @@ typedef unsigned long kqsw_csum_t; #define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ #define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 512 /* # reserved transmit messages if can't block */ +#define KQSW_NNBLK_TXMSGS (PAGE_SIZE == 4096 ? 512 : 256) /* # reserved transmit messages if can't block */ /* avoid qsnet crash b=5291 */ #define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ #define KQSW_EP_ENVELOPES_LARGE 256 /* # large ep envelopes */ diff --git a/lustre/portals/libcfs/Makefile.in b/lustre/portals/libcfs/Makefile.in index 0967123ce9..15fff121b4 100644 --- a/lustre/portals/libcfs/Makefile.in +++ b/lustre/portals/libcfs/Makefile.in @@ -1,4 +1,4 @@ MODULES = libcfs -libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o +libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o watchdog.o @INCLUDE_RULES@ diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 53f15d2020..cd77438f0e 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -85,14 +85,9 @@ static char debug_file_name[1024]; static int handled_panic; /* to avoid recursive calls to notifiers */ char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; -int portals_do_debug_dumplog(void *arg) +void portals_debug_dumplog_internal(void *arg) { - void *journal_info; - - kportal_daemonize(""); - - reparent_to_init(); - journal_info = current->journal_info; + void *journal_info = current->journal_info; current->journal_info = NULL; snprintf(debug_file_name, sizeof(debug_file_path) - 1, @@ -101,6 +96,13 @@ int portals_do_debug_dumplog(void *arg) tracefile_dump_all_pages(debug_file_name); current->journal_info = journal_info; +} + +int portals_debug_dumplog_thread(void *arg) +{ + kportal_daemonize(""); + reparent_to_init(); + portals_debug_dumplog_internal(arg); wake_up(&debug_ctlwq); return 0; } @@ -117,7 +119,8 @@ void portals_debug_dumplog(void) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&debug_ctlwq, &wait); - rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, + rc = kernel_thread(portals_debug_dumplog_thread, + (void *)(long)current->pid, CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " @@ -288,98 +291,38 @@ char *portals_id2str(int nal, ptl_process_id_t id, char *str) portals_nid2str(nal, id.nid, str); len = strlen(str); - snprintf(str + len, PTL_NALFMT_SIZE - len, ",%u", id.pid); + snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); return str; } #ifdef __KERNEL__ -char stack_backtrace[LUSTRE_TRACE_SIZE]; -spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; - -#if defined(__arch_um__) -char *portals_debug_dumpstack(void) +void portals_debug_dumpstack(struct task_struct *tsk) { +#if defined(__arch_um__) + if (tsk != NULL) + CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", + tsk->pid, UML_PID(tsk)); asm("int $3"); - return "dump stack\n"; -} - -#elif defined(__i386__) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -extern int lookup_symbol(unsigned long address, char *buf, int buflen); -const char *kallsyms_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, char *namebuf) -{ - int rc = lookup_symbol(addr, namebuf, 128); - if (rc == -ENOSYS) - return NULL; - return namebuf; -} +#elif defined(HAVE_SHOW_TASK) + /* this is exported by lustre kernel version 42 */ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + CWARN("showing stack for process %d\n", tsk->pid); + show_task(tsk); +#else + CWARN("can't show stack: kernel doesn't export show_task\n"); #endif - -char *portals_debug_dumpstack(void) -{ - unsigned long esp = current->thread.esp, addr; - unsigned long *stack = (unsigned long *)&esp; - char *buf = stack_backtrace, *pbuf = buf; - int size; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ - buf[0] = '\0'; - goto out; - } - - size = sprintf(pbuf, " Call Trace: "); - pbuf += size; - while (((long) stack & (THREAD_SIZE - 1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - const char *sym_name; - char *modname, buffer[128]; - unsigned long junk, offset; - - sym_name = kallsyms_lookup(addr, &junk, &offset, - &modname, buffer); - if (sym_name == NULL) { - if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) - break; - size = sprintf(pbuf, "[<%08lx>] ", addr); - } else { - if (buf + LUSTRE_TRACE_SIZE - /* fix length + sizeof('\0') */ - <= pbuf + strlen(buffer) + 28 + 1) - break; - size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack - 1); - } - pbuf += size; - } - } -out: - return buf; -} - -#else /* !__arch_um__ && !__i386__ */ - -char *portals_debug_dumpstack(void) -{ - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; } -#endif /* __arch_um__ */ struct task_struct *portals_current(void) { CWARN("current task struct is %p\n", current); return current; } -EXPORT_SYMBOL(stack_backtrace_lock); EXPORT_SYMBOL(portals_debug_dumpstack); EXPORT_SYMBOL(portals_current); #endif /* __KERNEL__ */ diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c index a2422e300f..2a8e6f694c 100644 --- a/lustre/portals/libcfs/module.c +++ b/lustre/portals/libcfs/module.c @@ -266,7 +266,7 @@ libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) int i; int rc; - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); + CDEBUG(D_IOCTL, "Register NAL %x, handler: %p\n", nal, handler); down(&nal_cmd_sem); @@ -302,7 +302,7 @@ libcfs_nal_cmd_unregister(int nal) { struct nal_cmd_handler *cmd; - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); + CDEBUG(D_IOCTL, "Unregister NAL %x\n", nal); down(&nal_cmd_sem); cmd = libcfs_find_nal_cmd_handler(nal); @@ -316,6 +316,10 @@ EXPORT_SYMBOL(libcfs_nal_cmd_unregister); int libcfs_nal_cmd(struct portals_cfg *pcfg) { +#if CRAY_PORTALS + /* pretend success */ + RETURN(0); +#else struct nal_cmd_handler *cmd; __u32 nal = pcfg->pcfg_nal; int rc = -EINVAL; @@ -324,15 +328,16 @@ libcfs_nal_cmd(struct portals_cfg *pcfg) down(&nal_cmd_sem); cmd = libcfs_find_nal_cmd_handler(nal); if (cmd != NULL) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, + CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal, pcfg->pcfg_command); rc = cmd->nch_handler(pcfg, cmd->nch_private); } else { - CERROR("invalid nal: %d, cmd: %d\n", nal, pcfg->pcfg_command); + CERROR("invalid nal: %x, cmd: %d\n", nal, pcfg->pcfg_command); } up(&nal_cmd_sem); RETURN(rc); +#endif } EXPORT_SYMBOL(libcfs_nal_cmd); @@ -464,7 +469,7 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, break; } - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, + CDEBUG (D_IOCTL, "nal command nal %x cmd %d\n", pcfg.pcfg_nal, pcfg.pcfg_command); err = libcfs_nal_cmd(&pcfg); diff --git a/lustre/portals/libcfs/proc.c b/lustre/portals/libcfs/proc.c index aa9cfa85a2..08446a0ee8 100644 --- a/lustre/portals/libcfs/proc.c +++ b/lustre/portals/libcfs/proc.c @@ -280,13 +280,13 @@ int insert_proc(void) ent->write_proc = trace_write_daemon_file; ent->read_proc = trace_read_daemon_file; - ent = create_proc_entry("sys/portals/debug_size", 0, NULL); + ent = create_proc_entry("sys/portals/debug_mb", 0, NULL); if (ent == NULL) { - CERROR("couldn't register debug_size\n"); + CERROR("couldn't register debug_mb\n"); return -1; } - ent->write_proc = trace_write_debug_size; - ent->read_proc = trace_read_debug_size; + ent->write_proc = trace_write_debug_mb; + ent->read_proc = trace_read_debug_mb; return 0; } @@ -311,7 +311,7 @@ void remove_proc(void) remove_proc_entry("sys/portals/dump_kernel", NULL); remove_proc_entry("sys/portals/daemon_file", NULL); - remove_proc_entry("sys/portals/debug_size", NULL); + remove_proc_entry("sys/portals/debug_mb", NULL); #ifdef CONFIG_SYSCTL if (portals_table_header) diff --git a/lustre/portals/libcfs/tracefile.c b/lustre/portals/libcfs/tracefile.c index 57593166b2..aca4c41339 100644 --- a/lustre/portals/libcfs/tracefile.c +++ b/lustre/portals/libcfs/tracefile.c @@ -40,7 +40,7 @@ #include <linux/portals_compat25.h> #include <linux/libcfs.h> -#define TCD_MAX_PAGES 1280 +#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) /* XXX move things up to the top, comment */ @@ -72,8 +72,10 @@ struct tracefiled_ctl { atomic_t tctl_shutdown; }; +#define TRACEFILE_SIZE (500 << 20) static DECLARE_RWSEM(tracefile_sem); static char *tracefile = NULL; +static long long tracefile_size = TRACEFILE_SIZE; static struct tracefiled_ctl trace_tctl; static DECLARE_MUTEX(trace_thread_sem); static int thread_running = 0; @@ -123,7 +125,6 @@ static struct page *trace_get_page(struct trace_cpu_data *tcd, /* the kernel should print a message for us. fall back * to using the last page in the ring buffer. */ goto ring_buffer; - return NULL; } page->index = 0; page->mapping = (void *)(long)smp_processor_id(); @@ -200,8 +201,8 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, struct trace_cpu_data *tcd; struct ptldebug_header header; struct page *page; - char *debug_buf; - int known_size, needed, max_nob; + char *debug_buf = format; + int known_size, needed = 85 /* average message length */, max_nob; va_list ap; unsigned long flags; struct timeval tv; @@ -235,24 +236,26 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls - page = trace_get_page(tcd, known_size + 40); /* slop */ retry: - if (page == NULL) + page = trace_get_page(tcd, needed + known_size); + if (page == NULL) { + debug_buf = format; + if (needed + known_size > PAGE_SIZE) + mask |= D_ERROR; + needed = strlen(format); goto out; + } debug_buf = page_address(page) + page->index + known_size; - va_start(ap, format); max_nob = PAGE_SIZE - page->index - known_size; LASSERT(max_nob > 0); + va_start(ap, format); needed = vsnprintf(debug_buf, max_nob, format, ap); va_end(ap); - if (needed > max_nob) { - /* overflow. oh poop. */ - page = trace_get_page(tcd, needed + known_size); + if (needed > max_nob) /* overflow. oh poop. */ goto retry; - } header.ph_len = known_size + needed; debug_buf = page_address(page) + page->index; @@ -274,10 +277,10 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n", page->index); + out: if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk) print_to_console(&header, mask, debug_buf, needed, file, fn); - out: trace_put_tcd(tcd, flags); } EXPORT_SYMBOL(portals_debug_msg); @@ -450,7 +453,7 @@ int tracefile_dump_all_pages(char *filename) down_write(&tracefile_sem); - filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600); + filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600); if (IS_ERR(filp)) { rc = PTR_ERR(filp); printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", @@ -594,8 +597,8 @@ static int tracefiled(void *arg) filp = NULL; down_read(&tracefile_sem); if (tracefile != NULL) { - filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE, - 0600); + filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE, + 0600); if (IS_ERR(filp)) { printk("couldn't open %s: %ld\n", tracefile, PTR_ERR(filp)); @@ -621,12 +624,18 @@ static int tracefiled(void *arg) hdr->ph_flags |= PH_FLAG_FIRST_RECORD; list_for_each_safe(pos, tmp, &pc.pc_pages) { + static loff_t f_pos; page = list_entry(pos, struct page, PAGE_LIST_ENTRY); LASSERT(page->index <= PAGE_SIZE); LASSERT(page_count(page) > 0); + if (f_pos >= tracefile_size) + f_pos = 0; + else if (f_pos > filp->f_dentry->d_inode->i_size) + f_pos = filp->f_dentry->d_inode->i_size; + rc = filp->f_op->write(filp, page_address(page), - page->index, &filp->f_pos); + page->index, &f_pos); if (rc != page->index) { printk(KERN_WARNING "wanted to write %lu but " "wrote %d\n", page->index, rc); @@ -709,6 +718,13 @@ int trace_write_daemon_file(struct file *file, const char *buffer, tracefile = NULL; trace_stop_thread(); goto out_sem; + } else if (strncmp(name, "size=", 5) == 0) { + tracefile_size = simple_strtoul(name + 5, NULL, 0); + if (tracefile_size < 10 || tracefile_size > 20480) + tracefile_size = TRACEFILE_SIZE; + else + tracefile_size <<= 20; + goto out_sem; } if (name[0] != '/') { @@ -721,14 +737,17 @@ int trace_write_daemon_file(struct file *file, const char *buffer, tracefile = name; name = NULL; + + printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " + "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); + trace_start_thread(); out_sem: up_write(&tracefile_sem); out: - if (name) - kfree(name); + kfree(name); return count; } @@ -744,54 +763,53 @@ int trace_read_daemon_file(char *page, char **start, off_t off, int count, return rc; } -int trace_write_debug_size(struct file *file, const char *buffer, - unsigned long count, void *data) +int trace_write_debug_mb(struct file *file, const char *buffer, + unsigned long count, void *data) { - char *string; - int rc, i, max; - - string = kmalloc(count + 1, GFP_KERNEL); - if (string == NULL) - return -ENOMEM; + char string[32]; + int i; + unsigned max; - if (copy_from_user(string, buffer, count)) { - rc = -EFAULT; - goto out; + if (count >= sizeof(string)) { + printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", + count); + return -EOVERFLOW; } + if (copy_from_user(string, buffer, count)) + return -EFAULT; + max = simple_strtoul(string, NULL, 0); - if (max == 0) { - rc = -EINVAL; - goto out; - } + if (max == 0) + return -EINVAL; max /= smp_num_cpus; - if (max > num_physpages / 5 * 4) { + if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) { printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%d pages, which is more than 80%% of physical pages " - "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4); - return count; + "%d MB, which is more than 80%% of physical RAM " + "(%lu).\n", max * smp_num_cpus, + (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); + return -EINVAL; } for (i = 0; i < NR_CPUS; i++) { struct trace_cpu_data *tcd; tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max; + tcd->tcd_max_pages = max << (20 - PAGE_SHIFT); } - out: - kfree(string); return count; } -int trace_read_debug_size(char *page, char **start, off_t off, int count, - int *eof, void *data) +int trace_read_debug_mb(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct trace_cpu_data *tcd; unsigned long flags; int rc; tcd = trace_get_tcd(flags); - rc = snprintf(page, count, "%lu", tcd->tcd_max_pages); + rc = snprintf(page, count, "%lu\n", + (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus); trace_put_tcd(tcd, flags); return rc; diff --git a/lustre/portals/libcfs/tracefile.h b/lustre/portals/libcfs/tracefile.h index 1b6e7a05e1..f581257647 100644 --- a/lustre/portals/libcfs/tracefile.h +++ b/lustre/portals/libcfs/tracefile.h @@ -12,10 +12,10 @@ int trace_write_daemon_file(struct file *file, const char *buffer, unsigned long count, void *data); int trace_read_daemon_file(char *page, char **start, off_t off, int count, int *eof, void *data); -int trace_write_debug_size(struct file *file, const char *buffer, - unsigned long count, void *data); -int trace_read_debug_size(char *page, char **start, off_t off, int count, - int *eof, void *data); +int trace_write_debug_mb(struct file *file, const char *buffer, + unsigned long count, void *data); +int trace_read_debug_mb(char *page, char **start, off_t off, int count, + int *eof, void *data); int trace_dk(struct file *file, const char *buffer, unsigned long count, void *data); diff --git a/lustre/portals/libcfs/watchdog.c b/lustre/portals/libcfs/watchdog.c new file mode 100644 index 0000000000..844845a35f --- /dev/null +++ b/lustre/portals/libcfs/watchdog.c @@ -0,0 +1,402 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Jacob Berkman <jacob@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include <linux/kp30.h> +#include <linux/libcfs.h> +#include <linux/portals_compat25.h> + + + +struct lc_watchdog { + struct timer_list lcw_timer; /* kernel timer */ + struct list_head lcw_list; + struct timeval lcw_last_touched; + struct task_struct *lcw_task; + + void (*lcw_callback)(struct lc_watchdog *, + struct task_struct *, + void *data); + void *lcw_data; + + int lcw_pid; + int lcw_time; /* time until watchdog fires, in ms */ + + enum { + LC_WATCHDOG_DISABLED, + LC_WATCHDOG_ENABLED, + LC_WATCHDOG_EXPIRED + } lcw_state; +}; + +/* + * The dispatcher will complete lcw_start_completion when it starts, + * and lcw_stop_completion when it exits. + * Wake lcw_event_waitq to signal timer callback dispatches. + */ +static struct completion lcw_start_completion; +static struct completion lcw_stop_completion; +static wait_queue_head_t lcw_event_waitq; + +/* + * Set this and wake lcw_event_waitq to stop the dispatcher. + */ +enum { + LCW_FLAG_STOP = 0 +}; +static unsigned long lcw_flags = 0; + +/* + * Number of outstanding watchdogs. + * When it hits 1, we start the dispatcher. + * When it hits 0, we stop the distpatcher. + */ +static __u32 lcw_refcount = 0; +static DECLARE_MUTEX(lcw_refcount_sem); + +/* + * List of timers that have fired that need their callbacks run by the + * dispatcher. + */ +static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; +static struct list_head lcw_pending_timers = \ + LIST_HEAD_INIT(lcw_pending_timers); + +static struct task_struct *lcw_lookup_task(struct lc_watchdog *lcw) +{ + struct task_struct *tsk; + unsigned long flags; + ENTRY; + + read_lock_irqsave(&tasklist_lock, flags); + tsk = find_task_by_pid(lcw->lcw_pid); + read_unlock_irqrestore(&tasklist_lock, flags); + if (!tsk) { + CWARN("Process %d was not found in the task list; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); + } else if (tsk != lcw->lcw_task) { + tsk = NULL; + CWARN("The current process %d did not set the watchdog; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); + } + + RETURN(tsk); +} + +static void lcw_cb(unsigned long data) +{ + struct lc_watchdog *lcw = (struct lc_watchdog *)data; + struct task_struct *tsk; + unsigned long flags; + + ENTRY; + + if (lcw->lcw_state != LC_WATCHDOG_ENABLED) { + EXIT; + return; + } + + lcw->lcw_state = LC_WATCHDOG_EXPIRED; + + CWARN("Watchdog triggered for pid %d: it was inactive for %dus\n", + lcw->lcw_pid, (lcw->lcw_time * 1000) / HZ); + + tsk = lcw_lookup_task(lcw); + if (tsk != NULL) + portals_debug_dumpstack(tsk); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (list_empty(&lcw->lcw_list)) { + list_add(&lcw->lcw_list, &lcw_pending_timers); + wake_up(&lcw_event_waitq); + } + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + EXIT; +} + +static int is_watchdog_fired(void) +{ + unsigned long flags; + int rc; + + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) + return 1; + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + return rc; +} + +static int lcw_dispatch_main(void *data) +{ + int rc = 0; + unsigned long flags; + struct lc_watchdog *lcw; + struct task_struct *tsk; + + ENTRY; + + kportal_daemonize("lc_watchdogd"); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + complete(&lcw_start_completion); + + while (1) { + wait_event_interruptible(lcw_event_waitq, is_watchdog_fired()); + CDEBUG(D_INFO, "Watchdog got woken up...\n"); + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { + CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + if (rc) { + CERROR("pending timers list was not empty at " + "time of watchdog dispatch shutdown\n"); + } + break; + } + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + while (!list_empty(&lcw_pending_timers)) { + + lcw = list_entry(lcw_pending_timers.next, + struct lc_watchdog, + lcw_list); + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid); + + if (lcw->lcw_state != LC_WATCHDOG_DISABLED) { + /* + * sanity check the task against our + * watchdog + */ + tsk = lcw_lookup_task(lcw); + lcw->lcw_callback(lcw, tsk, lcw->lcw_data); + } + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + } + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + } + + complete(&lcw_stop_completion); + + RETURN(rc); +} + +static void lcw_dispatch_start(void) +{ + int rc; + + ENTRY; + LASSERT(lcw_refcount == 1); + + init_completion(&lcw_stop_completion); + init_completion(&lcw_start_completion); + init_waitqueue_head(&lcw_event_waitq); + + CDEBUG(D_INFO, "starting dispatch thread\n"); + rc = kernel_thread(lcw_dispatch_main, NULL, 0); + if (rc < 0) { + CERROR("error spawning watchdog dispatch thread: %d\n", rc); + EXIT; + return; + } + wait_for_completion(&lcw_start_completion); + CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); + + EXIT; +} + +static void lcw_dispatch_stop(void) +{ + ENTRY; + LASSERT(lcw_refcount == 0); + + CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); + + set_bit(LCW_FLAG_STOP, &lcw_flags); + wake_up(&lcw_event_waitq); + + wait_for_completion(&lcw_stop_completion); + + CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); + + EXIT; +} + +struct lc_watchdog *lc_watchdog_add(int time, + void (*callback)(struct lc_watchdog *, + struct task_struct *, + void *), + void *data) +{ + struct lc_watchdog *lcw = NULL; + ENTRY; + + PORTAL_ALLOC(lcw, sizeof(*lcw)); + if (!lcw) { + CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); + RETURN(ERR_PTR(-ENOMEM)); + } + + lcw->lcw_task = current; + lcw->lcw_pid = current->pid; + lcw->lcw_time = (time * HZ) / 1000; + lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog; + lcw->lcw_data = data; + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + INIT_LIST_HEAD(&lcw->lcw_list); + + lcw->lcw_timer.function = lcw_cb; + lcw->lcw_timer.data = (unsigned long)lcw; + lcw->lcw_timer.expires = jiffies + lcw->lcw_time; + init_timer(&lcw->lcw_timer); + + down(&lcw_refcount_sem); + if (++lcw_refcount == 1) + lcw_dispatch_start(); + up(&lcw_refcount_sem); + + /* Keep this working in case we enable them by default */ + if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { + do_gettimeofday(&lcw->lcw_last_touched); + add_timer(&lcw->lcw_timer); + } + + RETURN(lcw); +} +EXPORT_SYMBOL(lc_watchdog_add); + +static long +timeval_sub(struct timeval *large, struct timeval *small) +{ + return (large->tv_sec - small->tv_sec) * 1000000 + + (large->tv_usec - small->tv_usec); +} + +static void lcw_update_time(struct lc_watchdog *lcw, const char *message) +{ + struct timeval newtime; + unsigned long timediff; + + do_gettimeofday(&newtime); + if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { + timediff = timeval_sub(&newtime, &lcw->lcw_last_touched); + CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", + lcw->lcw_pid, + message, + timediff / 1000000, + (timediff % 1000000) / 100); + } + lcw->lcw_last_touched = newtime; +} + +void lc_watchdog_touch(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + lcw_update_time(lcw, "touched"); + lcw->lcw_state = LC_WATCHDOG_ENABLED; + + mod_timer(&lcw->lcw_timer, jiffies + lcw->lcw_time); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_touch); + +void lc_watchdog_disable(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + lcw_update_time(lcw, "disabled"); + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_disable); + +void lc_watchdog_delete(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + del_timer(&lcw->lcw_timer); + + lcw_update_time(lcw, "deleted"); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + down(&lcw_refcount_sem); + if (--lcw_refcount == 0) + lcw_dispatch_stop(); + up(&lcw_refcount_sem); + + PORTAL_FREE(lcw, sizeof(*lcw)); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_delete); + +/* + * Provided watchdog handlers + */ + +extern void portals_debug_dumplog_internal(void *arg); + +void lc_watchdog_dumplog(struct lc_watchdog *lcw, + struct task_struct *tsk, + void *data) +{ + tsk = tsk ? tsk : current; + portals_debug_dumplog_internal((void *)(long)tsk->pid); +} +EXPORT_SYMBOL(lc_watchdog_dumplog); diff --git a/lustre/portals/portals/api-ni.c b/lustre/portals/portals/api-ni.c index 72d3b4188e..7e92256fe6 100644 --- a/lustre/portals/portals/api-ni.c +++ b/lustre/portals/portals/api-ni.c @@ -165,7 +165,7 @@ void PtlFini(void) continue; if (nal->nal_refct != 0) { - CWARN("NAL %d has outstanding refcount %d\n", + CWARN("NAL %x has outstanding refcount %d\n", i, nal->nal_refct); nal->nal_ni_fini(nal); } @@ -212,11 +212,11 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; nal->nal_handle.cookie = 0; - CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct); + CDEBUG(D_OTHER, "Starting up NAL (%x) refs %d\n", interface, nal->nal_refct); rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits); if (rc != PTL_OK) { - CERROR("Error %d starting up NAL %d, refs %d\n", rc, + CERROR("Error %d starting up NAL %x, refs %d\n", rc, interface, nal->nal_refct); GOTO(out, rc); } diff --git a/lustre/portals/portals/module.c b/lustre/portals/portals/module.c index 61ef37250d..c1303b7863 100644 --- a/lustre/portals/portals/module.c +++ b/lustre/portals/portals/module.c @@ -81,7 +81,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, ptl_handle_ni_t nih; ptl_process_id_t pid; - CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); + CDEBUG (D_IOCTL, "Getting nid for nal [%x]\n", data->ioc_nal); err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); diff --git a/lustre/portals/router/proc.c b/lustre/portals/router/proc.c index a1397d28d2..61b6880c6f 100644 --- a/lustre/portals/router/proc.c +++ b/lustre/portals/router/proc.c @@ -31,6 +31,7 @@ struct proc_route_data { struct list_head *curr; unsigned int generation; off_t skip; + rwlock_t proc_route_rwlock; } kpr_read_routes_data; /* nal2name support re-used from utils/portals.c */ @@ -43,6 +44,8 @@ struct name2num { { "tcp", SOCKNAL}, { "gm", GMNAL}, { "ib", OPENIBNAL}, + { "iib", IIBNAL}, + { "lo", LONAL}, { NULL, -1} }; @@ -96,19 +99,22 @@ static int kpr_proc_router_write(struct file *file, const char *ubuffer, static int kpr_proc_routes_read(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct proc_route_data *prd = data; - kpr_route_entry_t *re; - kpr_gateway_entry_t *ge; - int chunk_len = 0; - int line_len = 0; - int user_len = 0; + struct proc_route_data *prd = data; + kpr_route_entry_t *re; + kpr_gateway_entry_t *ge; + int chunk_len = 0; + int line_len = 0; + int user_len = 0; + int rc = 0; *eof = 1; *start = page; + write_lock(&(prd->proc_route_rwlock)); + if (prd->curr == NULL) { if (off != 0) - return 0; + goto routes_read_exit; /* First pass, initialize our private data */ prd->curr = kpr_routes.next; @@ -118,13 +124,14 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, /* Abort route list generation change */ if (prd->generation != kpr_routes_generation) { prd->curr = NULL; - return sprintf(page, "\nError: Routes Changed\n"); + rc = sprintf(page, "\nError: Routes Changed\n"); + goto routes_read_exit; } /* All the routes have been walked */ if (prd->curr == &kpr_routes) { prd->curr = NULL; - return 0; + goto routes_read_exit; } } @@ -148,7 +155,8 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, if (prd->curr->next == NULL) { prd->curr = NULL; read_unlock(&kpr_rwlock); - return sprintf(page, "\nError: Routes Changed\n"); + rc = sprintf(page, "\nError: Routes Changed\n"); + goto routes_read_exit; } prd->curr = prd->curr->next; @@ -169,13 +177,18 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, prd->curr = prd->curr->prev; prd->skip = line_len - (user_len - count); read_unlock(&kpr_rwlock); - return count; + rc = count; + goto routes_read_exit; } /* Not enough data to entirely satify callers request */ prd->skip = 0; read_unlock(&kpr_rwlock); - return user_len; + rc = user_len; + +routes_read_exit: + write_unlock(&(prd->proc_route_rwlock)); + return rc; } static int kpr_proc_routes_write(struct file *file, const char *ubuffer, @@ -215,6 +228,7 @@ void kpr_proc_init(void) kpr_read_routes_data.curr = NULL; kpr_read_routes_data.generation = 0; kpr_read_routes_data.skip = 0; + kpr_read_routes_data.proc_route_rwlock = RW_LOCK_UNLOCKED; routes_entry->data = &kpr_read_routes_data; routes_entry->read_proc = kpr_proc_routes_read; diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c index 448ab1f588..7edc5f6993 100644 --- a/lustre/portals/router/router.c +++ b/lustre/portals/router/router.c @@ -56,7 +56,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) struct list_head *e; kpr_nal_entry_t *ne; - CDEBUG (D_NET, "Registering NAL %d\n", nalif->kprni_nalid); + CDEBUG (D_NET, "Registering NAL %x\n", nalif->kprni_nalid); PORTAL_ALLOC (ne, sizeof (*ne)); if (ne == NULL) @@ -76,7 +76,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { write_unlock_irqrestore (&kpr_rwlock, flags); - CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid); + CERROR ("Attempt to register same NAL %x twice\n", ne->kpne_interface.kprni_nalid); PORTAL_FREE (ne, sizeof (*ne)); return (-EEXIST); @@ -126,7 +126,7 @@ kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when) kpr_upcall_t *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC); if (u == NULL) { - CERROR ("Upcall out of memory: nal %d nid "LPX64" (%s) %s\n", + CERROR ("Upcall out of memory: nal %x nid "LPX64" (%s) %s\n", gw_nalid, gw_nid, portals_nid2str(gw_nalid, gw_nid, str), alive ? "up" : "down"); @@ -155,14 +155,14 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, struct list_head *n; char str[PTL_NALFMT_SIZE]; - CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n", + CDEBUG (D_NET, "%s notifying [%x] "LPX64": %s\n", byNal ? "NAL" : "userspace", gateway_nalid, gateway_nid, alive ? "up" : "down"); /* can't do predictions... */ do_gettimeofday (&now); if (when > now.tv_sec) { - CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s " + CWARN ("Ignoring prediction from %s of [%x] "LPX64" %s " "%ld seconds in the future\n", byNal ? "NAL" : "userspace", gateway_nalid, gateway_nid, @@ -252,7 +252,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, if (byNal) { /* It wasn't userland that notified me... */ - CWARN ("Upcall: NAL %d NID "LPX64" (%s) is %s\n", + CWARN ("Upcall: NAL %x NID "LPX64" (%s) is %s\n", gateway_nalid, gateway_nid, portals_nid2str(gateway_nalid, gateway_nid, str), alive ? "alive" : "dead"); @@ -278,7 +278,7 @@ kpr_shutdown_nal (void *arg) unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - CDEBUG (D_NET, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); + CDEBUG (D_NET, "Shutting down NAL %x\n", ne->kpne_interface.kprni_nalid); LASSERT (!ne->kpne_shutdown); LASSERT (!in_interrupt()); @@ -294,7 +294,7 @@ kpr_deregister_nal (void *arg) unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); + CDEBUG (D_NET, "Deregister NAL %x\n", ne->kpne_interface.kprni_nalid); LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */ LASSERT (!in_interrupt()); @@ -306,7 +306,7 @@ kpr_deregister_nal (void *arg) /* Wait until all outstanding messages/notifications have completed */ while (atomic_read (&ne->kpne_refcount) != 0) { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", + CDEBUG (D_NET, "Waiting for refcount on NAL %x to reach zero (%d)\n", ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); set_current_state (TASK_UNINTERRUPTIBLE); @@ -367,7 +367,7 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, /* Caller wants to know if 'target_nid' can be reached via a gateway * ON HER OWN NETWORK */ - CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid, + CDEBUG (D_NET, "lookup "LPX64" from NAL %x\n", target_nid, ne->kpne_interface.kprni_nalid); LASSERT (!in_interrupt()); @@ -411,7 +411,7 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, /* NB can't deref 're' now; it might have been removed! */ - CDEBUG (D_NET, "lookup "LPX64" from NAL %d: %d ("LPX64")\n", + CDEBUG (D_NET, "lookup "LPX64" from NAL %x: %d ("LPX64")\n", target_nid, ne->kpne_interface.kprni_nalid, rc, (rc == 0) ? *gateway_nidp : (ptl_nid_t)0); return (rc); @@ -449,7 +449,7 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) kpr_nal_entry_t *tmp_ne; int rc; - CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd, + CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid); LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov)); @@ -509,8 +509,8 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) read_unlock (&kpr_rwlock); - CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d: " - "to "LPX64" on NAL %d\n", + CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x: " + "to "LPX64" on NAL %x\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid, fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid); @@ -522,7 +522,7 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) out: kpr_fwd_errors++; - CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d: %d\n", + CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %x: %d\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc); (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc); @@ -536,14 +536,14 @@ kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error) kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg; kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg; - CDEBUG (D_NET, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd, + CDEBUG (D_NET, "complete(1) [%p] from NAL %x to NAL %x: %d\n", fwd, src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error); atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */ (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error); - CDEBUG (D_NET, "complete(2) [%p] from NAL %d: %d\n", fwd, + CDEBUG (D_NET, "complete(2) [%p] from NAL %x: %d\n", fwd, src_ne->kpne_interface.kprni_nalid, error); atomic_dec (&kpr_queue_depth); @@ -560,7 +560,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, kpr_gateway_entry_t *ge; int dup = 0; - CDEBUG(D_NET, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n", + CDEBUG(D_NET, "Add route: %x "LPX64" : "LPX64" - "LPX64"\n", gateway_nalid, gateway_nid, lo_nid, hi_nid); if (gateway_nalid == PTL_NID_ANY || @@ -645,7 +645,7 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, struct list_head *e; struct list_head *n; - CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", + CDEBUG(D_NET, "Del route [%x] "LPX64" : "LPX64" - "LPX64"\n", gw_nalid, gw_nid, lo, hi); LASSERT(!in_interrupt()); @@ -731,7 +731,7 @@ kpr_nal_cmd(struct portals_cfg *pcfg, void * private) break; case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", + CDEBUG(D_IOCTL, "Adding route: [%x] "LPU64" : "LPU64" - "LPU64"\n", pcfg->pcfg_nal, pcfg->pcfg_nid, pcfg->pcfg_nid2, pcfg->pcfg_nid3); err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, @@ -739,7 +739,7 @@ kpr_nal_cmd(struct portals_cfg *pcfg, void * private) break; case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", + CDEBUG (D_IOCTL, "Removing routes via [%x] "LPU64" : "LPU64" - "LPU64"\n", pcfg->pcfg_gw_nal, pcfg->pcfg_nid, pcfg->pcfg_nid2, pcfg->pcfg_nid3); err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, @@ -747,7 +747,7 @@ kpr_nal_cmd(struct portals_cfg *pcfg, void * private) break; case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", + CDEBUG (D_IOCTL, "Notifying peer [%x] "LPU64" %s @ %ld\n", pcfg->pcfg_gw_nal, pcfg->pcfg_nid, pcfg->pcfg_flags ? "Enabling" : "Disabling", (time_t)pcfg->pcfg_nid3); diff --git a/lustre/portals/tests/ping_cli.c b/lustre/portals/tests/ping_cli.c index 7a3f8a0809..e9a84813c6 100644 --- a/lustre/portals/tests/ping_cli.c +++ b/lustre/portals/tests/ping_cli.c @@ -114,7 +114,7 @@ pingcli_start(struct portal_ioctl_data *args) client->tsk = current; client->args = args; CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ - nal %d, size %u, count: %u, timeout: %u\n", + nal %x, size %u, count: %u, timeout: %u\n", args->ioc_nid, portals_nid2str(args->ioc_nal, args->ioc_nid, str), args->ioc_nal, args->ioc_size, @@ -142,7 +142,7 @@ pingcli_start(struct portal_ioctl_data *args) rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); if (rc != PTL_OK || rc != PTL_IFACE_DUP) { - CERROR ("NAL %d not loaded\n", args->ioc_nal); + CERROR ("NAL %x not loaded\n", args->ioc_nal); pingcli_shutdown (nih, 4); return (NULL); } diff --git a/lustre/portals/tests/ping_srv.c b/lustre/portals/tests/ping_srv.c index dec806ac1e..49e82af35c 100644 --- a/lustre/portals/tests/ping_srv.c +++ b/lustre/portals/tests/ping_srv.c @@ -206,7 +206,7 @@ static struct pingsrv_data *pingsrv_setup(void) /* Aquire and initialize the proper nal for portals. */ rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { - CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); + CDEBUG (D_OTHER, "NAL %x not loaded\n", nal); return pingsrv_shutdown (4); } diff --git a/lustre/portals/tests/sping_cli.c b/lustre/portals/tests/sping_cli.c index 730ba00392..d9970e7ee7 100644 --- a/lustre/portals/tests/sping_cli.c +++ b/lustre/portals/tests/sping_cli.c @@ -110,7 +110,7 @@ pingcli_start(struct portal_ioctl_data *args) client->args = args; CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ - nal %d, size %u, count: %u, timeout: %u\n", + nal %x, size %u, count: %u, timeout: %u\n", args->ioc_nid, portals_nid2str(args->ioc_nid, args->ioc_nal, str), args->ioc_nal, args->ioc_size, @@ -138,7 +138,7 @@ pingcli_start(struct portal_ioctl_data *args) rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR ("NAL %d not loaded.\n", args->ioc_nal); + CERROR ("NAL %x not loaded.\n", args->ioc_nal); pingcli_shutdown (nih, 4); return (NULL); } diff --git a/lustre/portals/tests/sping_srv.c b/lustre/portals/tests/sping_srv.c index f2382d1066..069423d3e6 100644 --- a/lustre/portals/tests/sping_srv.c +++ b/lustre/portals/tests/sping_srv.c @@ -195,7 +195,7 @@ static struct pingsrv_data *pingsrv_setup(void) rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); + CDEBUG (D_OTHER, "Nal %x not loaded.\n", nal); return pingsrv_shutdown (4); } diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index d5ff09a390..16af0b5ec4 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -75,15 +75,22 @@ static int debug_mask = ~0; #define MAX_MARK_SIZE 100 static const char *portal_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite", - "rpc", "mgmt", "portals", "libcfs", "socknal", "qswnal", "pinger", - "filter", "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", - "ibnal", "lmv", "cmobd", "smfs", NULL}; + {"undefined", "mdc", "mds", "osc", + "ost", "class", "log", "llite", + "rpc", "mgmt", "portals", "socknal", + "qswnal", "pinger", "filter", "ptlbd", + "echo", "ldlm", "lov", "gmnal", + "router", "cobd", "ibnal", "sm", + "asobd", "confobd", "lmv", "cmobd", + "lonal", NULL}; static const char *portal_debug_masks[] = - {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", - "blocks", "net", "warning", "buffs", "other", "dentry", "portals", - "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", - "reada", "mmap", NULL}; + {"trace", "inode", "super", "ext2", + "malloc", "cache", "info", "ioctl", + "blocks", "net", "warning", "buffs", + "other", "dentry", "portals", "page", + "dlmtrace", "error", "emerg", "ha", + "rpctrace", "vfstrace", "reada", "mmap", + "config", NULL}; struct debug_daemon_cmd { char *cmd; @@ -488,51 +495,96 @@ int jt_dbg_debug_file(int argc, char **argv) return parse_buffer(in, out); } -const char debug_daemon_usage[]="usage: debug_daemon {start file [MB]|stop}\n"; +static int +dbg_write_cmd(int fd, char *str) +{ + int len = strlen(str); + int rc = write(fd, str, len); + + return (rc == len ? 0 : 1); +} + +const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n"; +#define DAEMON_FILE "/proc/sys/portals/daemon_file" int jt_dbg_debug_daemon(int argc, char **argv) { - int rc, fd; + int rc; + int fd; if (argc <= 1) { - fprintf(stderr, debug_daemon_usage); - return 0; + fprintf(stderr, debug_daemon_usage, argv[0]); + return 1; } - fd = open("/proc/sys/portals/daemon_file", O_WRONLY); + fd = open(DAEMON_FILE, O_WRONLY); if (fd < 0) { - fprintf(stderr, "open(daemon_file) failed: %s\n", + fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE, strerror(errno)); - return 1; + return -1; } - + + rc = -1; if (strcasecmp(argv[1], "start") == 0) { - if (argc != 3) { - fprintf(stderr, debug_daemon_usage); - return 1; + if (argc < 3 || argc > 4 || + (argc == 4 && strlen(argv[3]) > 5)) { + fprintf(stderr, debug_daemon_usage, argv[0]); + goto out; } - rc = write(fd, argv[2], strlen(argv[2])); - if (rc != strlen(argv[2])) { - fprintf(stderr, "write(%s) failed: %s\n", argv[2], - strerror(errno)); - close(fd); - return 1; + if (argc == 4) { + char buf[12]; + const long min_size = 10; + const long max_size = 20480; + long size; + char *end; + + size = strtoul(argv[3], &end, 0); + if (size < min_size || + size > max_size || + *end != 0) { + fprintf(stderr, "size %s invalid, must be in " + "the range %ld-%ld MB\n", argv[3], + min_size, max_size); + goto out; + } + + snprintf(buf, sizeof(buf), "size=%ld", size); + rc = dbg_write_cmd(fd, buf); + if (rc != 0) { + fprintf(stderr, "set %s failed: %s\n", + buf, strerror(errno)); + goto out; + } } - } else if (strcasecmp(argv[1], "stop") == 0) { - rc = write(fd, "stop", 4); - if (rc != 4) { - fprintf(stderr, "write(stop) failed: %s\n", + + rc = dbg_write_cmd(fd, "start"); + if (rc != 0) { + fprintf(stderr, "start debug_daemon on %s failed: %s\n", + argv[2], strerror(errno)); + goto out; + } + + rc = 0; + goto out; + } + + if (strcasecmp(argv[1], "stop") == 0) { + rc = dbg_write_cmd(fd, "stop"); + if (rc != 0) { + fprintf(stderr, "stopping debug_daemon failed: %s\n", strerror(errno)); - close(fd); - return 1; + goto out; } - } else { - fprintf(stderr, debug_daemon_usage); - return 1; + + rc = 0; + goto out; } + fprintf(stderr, debug_daemon_usage, argv[0]); + rc = -1; +out: close(fd); - return 0; + return rc; } int jt_dbg_clear_debug_buf(int argc, char **argv) diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index 07fada2634..abe3f987dc 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -39,6 +39,12 @@ #include <time.h> #include <stdarg.h> #include <endian.h> +#if CRAY_PORTALS +#ifdef REDSTORM +#define __QK__ +#endif +#include <portals/ipmap.h> +#endif #ifdef __CYGWIN__ @@ -66,12 +72,18 @@ typedef struct static name2num_t nalnames[] = { {"any", 0}, +#if !CRAY_PORTALS {"tcp", SOCKNAL}, {"elan", QSWNAL}, {"gm", GMNAL}, {"openib", OPENIBNAL}, {"iib", IIBNAL}, {"lo", LONAL}, +#else + {"cray_kern_nal", CRAY_KERN_NAL}, + {"cray_user_nal", CRAY_USER_NAL}, + {"cray_qk_nal", CRAY_QK_NAL}, +#endif {NULL, -1} }; @@ -363,7 +375,11 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) } if (ptl_parse_ipaddr (&ipaddr, str) == 0) { +#if !CRAY_PORTALS *nidp = (ptl_nid_t)ipaddr; +#else + *nidp = (((ptl_nid_t)ipaddr & PNAL_HOSTID_MASK) << PNAL_VNODE_SHIFT); +#endif return (0); } -- GitLab