From ac391ce3aa27602c81890e8b7e74bff74eb36e50 Mon Sep 17 00:00:00 2001 From: deen <deen> Date: Fri, 12 Sep 2008 15:46:21 +0000 Subject: [PATCH] Initialize RPC XID from clock at startup (randomly if clock is bad). b=2066 i=adilger i=robert.read --- lustre/ChangeLog | 6 +++++ lustre/ptlrpc/client.c | 39 +++++++++++++++++++++++++++++++-- lustre/ptlrpc/ptlrpc_internal.h | 7 ++++++ lustre/ptlrpc/ptlrpc_module.c | 4 +--- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index f9ebc36882..4141ff900c 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1401,6 +1401,12 @@ Bugzilla : 16813 Description: X2 build failures Details : fix build failures on Cray X2. +Severity : normal +Bugzilla : 2066 +Description: xid & resent requests +Details : Initialize RPC XID from clock at startup (randomly if clock is + bad). + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. <info@clusterfs.com> diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 7ce2ef5583..cbe80c696e 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -2350,8 +2350,37 @@ void ptlrpc_abort_set(struct ptlrpc_request_set *set) } } -static __u64 ptlrpc_last_xid = 0; -spinlock_t ptlrpc_last_xid_lock; +static __u64 ptlrpc_last_xid; +static spinlock_t ptlrpc_last_xid_lock; + +/* Initialize the XID for the node. This is common among all requests on + * this node, and only requires the property that it is monotonically + * increasing. It does not need to be sequential. Since this is also used + * as the RDMA match bits, it is important that a single client NOT have + * the same match bits for two different in-flight requests, hence we do + * NOT want to have an XID per target or similar. + * + * To avoid an unlikely collision between match bits after a client reboot + * (which would cause old to be delivered into the wrong buffer) we initialize + * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s. + * If the time is clearly incorrect, we instead use a 62-bit random number. + * In the worst case the random number will overflow 1M RPCs per second in + * 9133 years, or permutations thereof. + */ +#define YEAR_2004 (1ULL << 30) +void ptlrpc_init_xid(void) +{ + time_t now = cfs_time_current_sec(); + + spin_lock_init(&ptlrpc_last_xid_lock); + if (now < YEAR_2004) { + ll_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid)); + ptlrpc_last_xid >>= 2; + ptlrpc_last_xid |= (1ULL << 61); + } else { + ptlrpc_last_xid = (now << 20); + } +} __u64 ptlrpc_next_xid(void) { @@ -2364,10 +2393,16 @@ __u64 ptlrpc_next_xid(void) __u64 ptlrpc_sample_next_xid(void) { +#if BITS_PER_LONG == 32 + /* need to avoid possible word tearing on 32-bit systems */ __u64 tmp; spin_lock(&ptlrpc_last_xid_lock); tmp = ptlrpc_last_xid + 1; spin_unlock(&ptlrpc_last_xid_lock); return tmp; +#else + /* No need to lock, since returned value is racy anyways */ + return ptlrpc_last_xid + 1; +#endif } EXPORT_SYMBOL(ptlrpc_sample_next_xid); diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 4ebb4482ac..bdea8bd672 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -47,6 +47,13 @@ struct ldlm_res_id; struct ptlrpc_request_set; extern int test_req_buffer_pressure; +/* client.c */ +void ptlrpc_init_xid(void); + +/* events.c */ +int ptlrpc_init_portals(void); +void ptlrpc_exit_portals(void); + void ptlrpc_request_handle_notconn(struct ptlrpc_request *); void lustre_assert_wire_constants(void); int ptlrpc_import_in_recovery(struct obd_import *imp); diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 6bb04f9579..5d7d35eb5f 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -55,8 +55,6 @@ extern spinlock_t ptlrpc_rs_debug_lock; extern spinlock_t ptlrpc_all_services_lock; extern struct semaphore pinger_sem; extern struct semaphore ptlrpcd_sem; -extern int ptlrpc_init_portals(void); -extern void ptlrpc_exit_portals(void); __init int ptlrpc_init(void) { @@ -64,11 +62,11 @@ __init int ptlrpc_init(void) ENTRY; lustre_assert_wire_constants(); - spin_lock_init(&ptlrpc_last_xid_lock); spin_lock_init(&ptlrpc_rs_debug_lock); spin_lock_init(&ptlrpc_all_services_lock); init_mutex(&pinger_sem); init_mutex(&ptlrpcd_sem); + ptlrpc_init_xid(); rc = req_layout_init(); if (rc) -- GitLab