diff --git a/lnet/libcfs/watchdog.c b/lnet/libcfs/watchdog.c index 3000e8f5e73e9b5c121ab1750085006e3f6b1fcf..22754af525b15ddb6c20a66764a54a00f756fb8b 100644 --- a/lnet/libcfs/watchdog.c +++ b/lnet/libcfs/watchdog.c @@ -36,7 +36,7 @@ struct lc_watchdog { void *lcw_data; pid_t lcw_pid; - int lcw_time; /* time until watchdog fires, in ms */ + cfs_duration_t lcw_time; /* time until watchdog fires, jiffies */ enum { LC_WATCHDOG_DISABLED, @@ -126,8 +126,8 @@ static void lcw_cb(unsigned long data) /* NB this warning should appear on the console, but may not get into * the logs since we're running in a softirq handler */ - CWARN("Watchdog triggered for pid %d: it was inactive for %ldms\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time) * 1000); + CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n", + (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); lcw_dump(lcw); spin_lock_bh(&lcw_pending_timers_lock); @@ -197,8 +197,9 @@ static int lcw_dispatch_main(void *data) list_del_init(&lcw->lcw_list); spin_unlock_bh(&lcw_pending_timers_lock); - CDEBUG(D_INFO, "found lcw for pid %d: inactive for %ldms\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time) * 1000); + CDEBUG(D_INFO, "found lcw for pid %d: inactive for " + "%lds\n", (int)lcw->lcw_pid, + cfs_duration_sec(lcw->lcw_time)); if (lcw->lcw_state != LC_WATCHDOG_DISABLED) lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 583544218d1b57592ac13ee197c8325fdc176ea8..e1fa8e814fbd112e3000ba153c2c466f70357ebb 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -120,7 +120,7 @@ #define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) #define MGS_THREADS_AUTO_MIN 2 -#define MGS_THREADS_AUTO_MAX 128 +#define MGS_THREADS_AUTO_MAX 32 #define MGS_NBUFS (64 * smp_num_cpus) #define MGS_BUFSIZE (8 * 1024) #define MGS_MAXREQSIZE (8 * 1024) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 6cdcc9a59d18a50f30ab39fc1b9ae954043b31bb..1b31dd4d2d11de56cad109f42428fa83c76a1729 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -35,6 +35,7 @@ extern unsigned int obd_dump_on_eviction; extern unsigned int obd_timeout; /* seconds */ #define PING_INTERVAL max(obd_timeout / 4, 1U) #define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) +#define LDLM_TIMEOUT_DEFAULT 20 extern unsigned int ldlm_timeout; extern unsigned int obd_health_check_timeout; extern unsigned int obd_sync_filter; @@ -188,6 +189,7 @@ extern int obd_race_state; #define OBD_FAIL_MGS_ALL_REPLY_NET 0x902 #define OBD_FAIL_MGC_PROCESS_LOG 0x903 #define OBD_FAIL_MGS_SLOW_REQUEST_NET 0x904 +#define OBD_FAIL_MGS_SLOW_TARGET_REG 0x905 #define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 5f59ab31eedfd324f446715cc836d17c3c2b33fc..8cc1aba77eaa49f040eb3c4f2b3251338915034a 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -2053,7 +2053,8 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) obd->obd_replayable ? "enabled" : "disabled"); } - ldlm_timeout = 6; + if (ldlm_timeout == LDLM_TIMEOUT_DEFAULT) + ldlm_timeout = 6; RETURN(0); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index e161ad30da381681013409fd9f6c358d99810b8b..e6ba9bea694e47119c2e6039458e166e3c2da7eb 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -382,6 +382,8 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) obd->obd_name, lockrc); } + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10); + /* Log writing contention is handled by the fsdb_sem */ if (mti->mti_flags & LDD_F_WRITECONF) { diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 21732e6e189185636dd4a7a9ca178fe68b270bbb..c2af448229cd95ea4318fddc0a48ecede7c37d27 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -64,7 +64,7 @@ unsigned int obd_debug_peer_on_timeout; unsigned int obd_dump_on_timeout; unsigned int obd_dump_on_eviction; unsigned int obd_timeout = 100; /* seconds */ -unsigned int ldlm_timeout = 20; /* seconds */ +unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ unsigned int obd_health_check_timeout = 120; /* seconds */ unsigned int obd_max_dirty_pages = 256; atomic_t obd_dirty_pages;