From 289e9caabec18e07df40f206b1000d91d9e305d7 Mon Sep 17 00:00:00 2001 From: zhaoqiang <zhaoqiang> Date: Thu, 26 Aug 2004 07:34:53 +0000 Subject: [PATCH] Land from b_hd_pid to HEAD b=bug4165(pid) r=Peter - Assign the constant number to kernel portals pid, this constant is LUSTRE_SRV_PTL_PID(999999) defined in lustre_idl.h; In the user mode(liblustre), use the getpid() - Modify the struct ptlrpc_peer's field peer_nid from ptl_nid_t to ptl_process_id_t, which include pid. Modify the related code with peer_nid. - Change the snprintf remote_uuid.uuid to include pid info in target_handle_connect; - In ksocknal_api_startup, use LUSTRE_SRV_PTL_PID to PtlNIInit. - In tcpnal_init, b->lib_nal->libnal_ni.ni_pid.pid=0; This line should be deleted. - In ptlrpc_get_connection, add the pid compare when iterate the conn_list & conn_unused_list - Change request_in_callback while assign req->rq_peer.peer_id - In ptlrpc_uuid_to_peer, give default value (LUSTRE_SRV_PTL_PID) to peer->peer_id.pid - Change each entry to call PtlNIInit/PtlPut - Change the entry to call PtlMEAttach, use c_peer value replace PTL_PID_ANY , it is the most important! --- lnet/include/linux/kp30.h | 3 +- lnet/klnds/gmlnd/gmlnd_api.c | 2 +- lnet/klnds/iblnd/ibnal.c | 2 +- lnet/klnds/qswlnd/qswlnd.c | 4 +- lnet/klnds/scimaclnd/scimacnal.c | 2 +- lnet/klnds/socklnd/socklnd.c | 4 +- lnet/klnds/socklnd/socklnd.h | 1 + lnet/libcfs/debug.c | 25 +++++++++++++ lnet/lnet/lib-move.c | 2 + lnet/lnet/module.c | 5 ++- lnet/ulnds/socklnd/tcplnd.c | 2 +- lnet/ulnds/tcplnd.c | 2 +- lustre/include/linux/lustre_idl.h | 4 ++ lustre/include/linux/lustre_net.h | 15 +++++++- lustre/ldlm/ldlm_lib.c | 4 +- lustre/ldlm/ldlm_lockd.c | 2 +- lustre/liblustre/llite_lib.c | 24 +++++++++++- lustre/liblustre/tests/echo_test.c | 23 ++++++++++++ lustre/obdfilter/filter.c | 16 ++++---- lustre/osc/osc_request.c | 6 +-- lustre/ost/ost_handler.c | 4 +- lustre/portals/include/linux/kp30.h | 3 +- lustre/portals/knals/gmnal/gmnal_api.c | 2 +- lustre/portals/knals/ibnal/ibnal.c | 2 +- lustre/portals/knals/qswnal/qswnal.c | 4 +- lustre/portals/knals/scimacnal/scimacnal.c | 2 +- lustre/portals/knals/socknal/socknal.c | 4 +- lustre/portals/knals/socknal/socknal.h | 1 + lustre/portals/libcfs/debug.c | 25 +++++++++++++ lustre/portals/portals/lib-move.c | 2 + lustre/portals/portals/module.c | 5 ++- lustre/portals/unals/tcpnal.c | 2 +- lustre/ptlrpc/connection.c | 6 +-- lustre/ptlrpc/events.c | 30 ++++++++++++--- lustre/ptlrpc/niobuf.c | 43 +++++++--------------- 35 files changed, 202 insertions(+), 81 deletions(-) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 6ef28a8535..b13f161638 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -326,6 +326,7 @@ void portals_debug_dumplog(void); /* support decl needed both by kernel and liblustre */ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); +char *portals_id2str(int nal, ptl_process_id_t nid, char *str); #ifndef CURRENT_TIME # define CURRENT_TIME time(0) @@ -644,7 +645,7 @@ enum { NAL_ENUM_END_MARKER }; -#define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */ +#define PTL_NALFMT_SIZE 30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */ #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c index 002587d1b5..bdf5cfe6cf 100644 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ b/lnet/klnds/gmlnd/gmlnd_api.c @@ -318,7 +318,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, /* pid = gm_getpid(); */ - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = global_nid; CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); diff --git a/lnet/klnds/iblnd/ibnal.c b/lnet/klnds/iblnd/ibnal.c index 86c2a63552..c6bd7ed044 100644 --- a/lnet/klnds/iblnd/ibnal.c +++ b/lnet/klnds/iblnd/ibnal.c @@ -246,7 +246,7 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, - 0, // process id is set as 0 + requested_pid , // process id is set as requested_pid instead of 0 ptl_size, ac_size); diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index c595450684..38d1636166 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -696,7 +696,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, /* Network interface ready to initialise */ my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); - my_process_id.pid = 0; + my_process_id.pid = requested_pid; rc = lib_init(&kqswnal_lib, nal, my_process_id, requested_limits, actual_limits); @@ -802,7 +802,7 @@ kqswnal_initialise (void) /* Pure gateways, and the workaround for 'EKC blocks forever until * the service is active' want the NAL started up at module load * time... */ - rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(QSWNAL); return (-ENODEV); diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c index e77bd8ee27..75188e97dc 100644 --- a/lnet/klnds/scimaclnd/scimacnal.c +++ b/lnet/klnds/scimaclnd/scimacnal.c @@ -205,7 +205,7 @@ static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, } kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = kscimacnal_data.ksci_nid; CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 9d39cb1bd5..3a3629b873 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1589,7 +1589,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } /* NB we have to wait to be told our true NID... */ - process_id.pid = 0; + process_id.pid = requested_pid; //LUSTRE_SRV_PTL_PID; process_id.nid = 0; rc = lib_init(&ksocknal_lib, nal, process_id, @@ -1739,7 +1739,7 @@ ksocknal_module_init (void) } /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(SOCKNAL); return (-ENODEV); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index ff73f71ce5..e14205951e 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -66,6 +66,7 @@ #include <portals/lib-p30.h> #include <portals/nal.h> #include <portals/socknal.h> +#include <linux/lustre_idl.h> #if CONFIG_SMP # define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 2b2ce3b992..eb75e60442 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -985,6 +985,30 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) } return str; } +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} + #ifdef __KERNEL__ char stack_backtrace[LUSTRE_TRACE_SIZE]; @@ -1077,3 +1101,4 @@ EXPORT_SYMBOL(portals_debug_set_level); EXPORT_SYMBOL(portals_run_upcall); EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); +EXPORT_SYMBOL(portals_id2str); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 9dcc06e343..854a452e55 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -82,6 +82,8 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, if (me->match_id.nid != PTL_NID_ANY && me->match_id.nid != src_nid) continue; + + CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid); if (me->match_id.pid != PTL_PID_ANY && me->match_id.pid != src_pid) diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 5615a72443..d292a501a0 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -50,6 +50,7 @@ #include <linux/kp30.h> #include <linux/kpr.h> #include <linux/portals_compat25.h> +#include <linux/lustre_idl.h> extern void (kping_client)(struct portal_ioctl_data *); @@ -83,7 +84,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); @@ -104,7 +105,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c index 34a9c9d5e3..6e9cca99ca 100644 --- a/lnet/ulnds/socklnd/tcplnd.c +++ b/lnet/ulnds/socklnd/tcplnd.c @@ -252,7 +252,7 @@ int tcpnal_init(bridge b) return(PTL_NAL_FAILED); } /* XXX cfs hack */ - b->lib_nal->libnal_ni.ni_pid.pid=0; +// b->lib_nal->libnal_ni.ni_pid.pid=0; b->lower=m; return(PTL_OK); } diff --git a/lnet/ulnds/tcplnd.c b/lnet/ulnds/tcplnd.c index 34a9c9d5e3..6e9cca99ca 100644 --- a/lnet/ulnds/tcplnd.c +++ b/lnet/ulnds/tcplnd.c @@ -252,7 +252,7 @@ int tcpnal_init(bridge b) return(PTL_NAL_FAILED); } /* XXX cfs hack */ - b->lib_nal->libnal_ni.ni_pid.pid=0; +// b->lib_nal->libnal_ni.ni_pid.pid=0; b->lower=m; return(PTL_OK); } diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 8f5bd37d53..b69e1e195c 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -133,6 +133,10 @@ #define LUSTRE_OST_VERSION (0x00040000|PTLRPC_MSG_VERSION) #define LUSTRE_DLM_VERSION (0x00040000|PTLRPC_MSG_VERSION) +/* initial pid */ +#define LUSTRE_PTL_PID 999999 +#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID + struct lustre_handle { __u64 cookie; }; diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 03d9ab3a77..aa8309d53c 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -153,7 +153,9 @@ #define PTLBD_MAXREQSIZE 1024 struct ptlrpc_peer { - ptl_nid_t peer_nid; +/* bugfix #4615 + */ + ptl_process_id_t peer_id; struct ptlrpc_ni *peer_ni; }; @@ -515,9 +517,17 @@ struct ptlrpc_service { static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str) { LASSERT(p->peer_ni != NULL); - return (portals_nid2str(p->peer_ni->pni_number, p->peer_nid, str)); + return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str)); +} + +/* For bug #4615 */ +static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str) +{ + LASSERT(p->peer_ni != NULL); + return (portals_id2str(p->peer_ni->pni_number, p->peer_id, str)); } + /* ptlrpc/events.c */ extern struct ptlrpc_ni ptlrpc_interfaces[]; extern int ptlrpc_ninterfaces; @@ -539,6 +549,7 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c); struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); void ptlrpc_init_connection(void); void ptlrpc_cleanup_connection(void); +extern ptl_pid_t ptl_get_pid(void); /* ptlrpc/niobuf.c */ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index fb70b653d9..8d41587132 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -416,11 +416,11 @@ int target_handle_connect(struct ptlrpc_request *req) /* NB the casts only avoid compiler warnings */ case 8: snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_nid); + "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_id.nid); break; case 4: snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_%x_UUID", (__u32)req->rq_peer.peer_nid); + "NET_%x_UUID", (__u32)req->rq_peer.peer_id.nid); break; default: LBUG(); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b4acd619e4..d1a60feb14 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -311,7 +311,7 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,const char *ast_type) LDLM_ERROR(lock, "%s AST failed (%d): evicting client %s@%s NID "LPX64 " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid, - conn->c_remote_uuid.uuid, conn->c_peer.peer_nid, + conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid, ptlrpc_peernid2str(&conn->c_peer, str)); ptlrpc_fail_export(lock->l_export); } diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index 16437bcc99..73f97da7e4 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -101,7 +101,29 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) } return str; } - +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} /* * random number generator stuff */ diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 19fd83a9d1..13157cdb3b 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -82,6 +82,29 @@ struct pingcli_args { int count; int size; }; +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} struct task_struct *current; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 649e6fe53b..4faa1eff86 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1626,9 +1626,9 @@ static int filter_connect_post(struct obd_export *exp) rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse); portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CDEBUG(D_OTHER, "%s: init llog ctxt for export "LPX64"/%s, group %d\n", - obd->obd_name, exp->exp_connection->c_peer.peer_nid, + obd->obd_name, exp->exp_connection->c_peer.peer_id.nid, str, fed->fed_group); RETURN(rc); @@ -1685,11 +1685,11 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, if (fed->fed_group != 0 && fed->fed_group != group) { char str[PTL_NALFMT_SIZE]; portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CERROR("!!! This export (nid "LPX64"/%s) used object group %d " "earlier; now it's trying to use group %d! This could " "be a bug in the MDS. Tell CFS.\n", - exp->exp_connection->c_peer.peer_nid, str, + exp->exp_connection->c_peer.peer_id.nid, str, fed->fed_group, group); GOTO(cleanup, rc = -EPROTO); } @@ -2392,9 +2392,9 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if (!(oa->o_valid & OBD_MD_FLGROUP) || group == 0) { portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CERROR("!!! nid "LPX64"/%s sent invalid object group %d\n", - exp->exp_connection->c_peer.peer_nid, str, group); + exp->exp_connection->c_peer.peer_id.nid, str, group); RETURN(-EINVAL); } @@ -2410,11 +2410,11 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if (fed->fed_group != group && !recreate_objs && !(oa->o_valid & OBD_MD_REINT)) { portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CERROR("!!! This export (nid "LPX64"/%s) used object group %d " "earlier; now it's trying to use group %d! This could " "be a bug in the MDS. Tell CFS.\n", - exp->exp_connection->c_peer.peer_nid, str, + exp->exp_connection->c_peer.peer_id.nid, str, fed->fed_group, group); RETURN(-ENOTUNIQ); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index ed75cfd7da..1f84b4b52c 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -870,12 +870,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, if (server_cksum != cksum) { CERROR("Bad checksum: server %x, client %x, server NID " LPX64" (%s)\n", server_cksum, cksum, - peer->peer_nid, str); + peer->peer_id.nid, str); cksum_counter = 0; oa->o_cksum = cksum; } else if ((cksum_counter & (-cksum_counter)) == cksum_counter){ CWARN("Checksum %u from "LPX64" (%s) OK: %x\n", - cksum_counter, peer->peer_nid, str, cksum); + cksum_counter, peer->peer_id.nid, str, cksum); } } else { static int cksum_missed; @@ -884,7 +884,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, if ((cksum_missed & (-cksum_missed)) == cksum_missed) CERROR("Request checksum %u from "LPX64", no reply\n", cksum_missed, - req->rq_import->imp_connection->c_peer.peer_nid); + req->rq_import->imp_connection->c_peer.peer_id.nid); } #endif RETURN(0); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 2271c6c408..d96936e397 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -669,7 +669,7 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (client_cksum != cksum) { CERROR("Bad checksum: client %x, server %x, client NID " LPX64" (%s)\n", client_cksum, cksum, - req->rq_connection->c_peer.peer_nid, str); + req->rq_connection->c_peer.peer_id.nid, str); cksum_counter = 1; repbody->oa.o_cksum = cksum; } else { @@ -677,7 +677,7 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if ((cksum_counter & (-cksum_counter)) == cksum_counter) CWARN("Checksum %u from "LPX64": %x OK\n", cksum_counter, - req->rq_connection->c_peer.peer_nid, + req->rq_connection->c_peer.peer_id.nid, cksum); } } diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 6ef28a8535..b13f161638 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -326,6 +326,7 @@ void portals_debug_dumplog(void); /* support decl needed both by kernel and liblustre */ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); +char *portals_id2str(int nal, ptl_process_id_t nid, char *str); #ifndef CURRENT_TIME # define CURRENT_TIME time(0) @@ -644,7 +645,7 @@ enum { NAL_ENUM_END_MARKER }; -#define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */ +#define PTL_NALFMT_SIZE 30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */ #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) diff --git a/lustre/portals/knals/gmnal/gmnal_api.c b/lustre/portals/knals/gmnal/gmnal_api.c index 002587d1b5..bdf5cfe6cf 100644 --- a/lustre/portals/knals/gmnal/gmnal_api.c +++ b/lustre/portals/knals/gmnal/gmnal_api.c @@ -318,7 +318,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, /* pid = gm_getpid(); */ - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = global_nid; CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); diff --git a/lustre/portals/knals/ibnal/ibnal.c b/lustre/portals/knals/ibnal/ibnal.c index 86c2a63552..c6bd7ed044 100644 --- a/lustre/portals/knals/ibnal/ibnal.c +++ b/lustre/portals/knals/ibnal/ibnal.c @@ -246,7 +246,7 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, - 0, // process id is set as 0 + requested_pid , // process id is set as requested_pid instead of 0 ptl_size, ac_size); diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index c595450684..38d1636166 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -696,7 +696,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, /* Network interface ready to initialise */ my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); - my_process_id.pid = 0; + my_process_id.pid = requested_pid; rc = lib_init(&kqswnal_lib, nal, my_process_id, requested_limits, actual_limits); @@ -802,7 +802,7 @@ kqswnal_initialise (void) /* Pure gateways, and the workaround for 'EKC blocks forever until * the service is active' want the NAL started up at module load * time... */ - rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(QSWNAL); return (-ENODEV); diff --git a/lustre/portals/knals/scimacnal/scimacnal.c b/lustre/portals/knals/scimacnal/scimacnal.c index e77bd8ee27..75188e97dc 100644 --- a/lustre/portals/knals/scimacnal/scimacnal.c +++ b/lustre/portals/knals/scimacnal/scimacnal.c @@ -205,7 +205,7 @@ static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, } kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = kscimacnal_data.ksci_nid; CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 9d39cb1bd5..3a3629b873 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -1589,7 +1589,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } /* NB we have to wait to be told our true NID... */ - process_id.pid = 0; + process_id.pid = requested_pid; //LUSTRE_SRV_PTL_PID; process_id.nid = 0; rc = lib_init(&ksocknal_lib, nal, process_id, @@ -1739,7 +1739,7 @@ ksocknal_module_init (void) } /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(SOCKNAL); return (-ENODEV); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index ff73f71ce5..e14205951e 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -66,6 +66,7 @@ #include <portals/lib-p30.h> #include <portals/nal.h> #include <portals/socknal.h> +#include <linux/lustre_idl.h> #if CONFIG_SMP # define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 2b2ce3b992..eb75e60442 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -985,6 +985,30 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) } return str; } +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} + #ifdef __KERNEL__ char stack_backtrace[LUSTRE_TRACE_SIZE]; @@ -1077,3 +1101,4 @@ EXPORT_SYMBOL(portals_debug_set_level); EXPORT_SYMBOL(portals_run_upcall); EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); +EXPORT_SYMBOL(portals_id2str); diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c index 9dcc06e343..854a452e55 100644 --- a/lustre/portals/portals/lib-move.c +++ b/lustre/portals/portals/lib-move.c @@ -82,6 +82,8 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, if (me->match_id.nid != PTL_NID_ANY && me->match_id.nid != src_nid) continue; + + CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid); if (me->match_id.pid != PTL_PID_ANY && me->match_id.pid != src_pid) diff --git a/lustre/portals/portals/module.c b/lustre/portals/portals/module.c index 5615a72443..d292a501a0 100644 --- a/lustre/portals/portals/module.c +++ b/lustre/portals/portals/module.c @@ -50,6 +50,7 @@ #include <linux/kp30.h> #include <linux/kpr.h> #include <linux/portals_compat25.h> +#include <linux/lustre_idl.h> extern void (kping_client)(struct portal_ioctl_data *); @@ -83,7 +84,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); @@ -104,7 +105,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); diff --git a/lustre/portals/unals/tcpnal.c b/lustre/portals/unals/tcpnal.c index 34a9c9d5e3..6e9cca99ca 100644 --- a/lustre/portals/unals/tcpnal.c +++ b/lustre/portals/unals/tcpnal.c @@ -252,7 +252,7 @@ int tcpnal_init(bridge b) return(PTL_NAL_FAILED); } /* XXX cfs hack */ - b->lib_nal->libnal_ni.ni_pid.pid=0; +// b->lib_nal->libnal_ni.ni_pid.pid=0; b->lower=m; return(PTL_OK); } diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 2e0d889da6..646cb0721b 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -62,12 +62,12 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, CDEBUG(D_INFO, "peer is %s on %s\n", - ptlrpc_peernid2str(peer, str), peer->peer_ni->pni_name); + ptlrpc_id2str(peer, str), peer->peer_ni->pni_name); spin_lock(&conn_lock); list_for_each(tmp, &conn_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (peer->peer_nid == c->c_peer.peer_nid && + if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) && peer->peer_ni == c->c_peer.peer_ni) { ptlrpc_connection_addref(c); GOTO(out, c); @@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, list_for_each_safe(tmp, pos, &conn_unused_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (peer->peer_nid == c->c_peer.peer_nid && + if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) && peer->peer_ni == c->c_peer.peer_ni) { ptlrpc_connection_addref(c); list_del(&c->c_link); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 254ae3093b..8ea1813f70 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -197,8 +197,8 @@ void request_in_callback(ptl_event_t *ev) CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", service->srv_name, - portals_nid2str(srv_ni->sni_ni->pni_number, - ev->initiator.nid, str)); + portals_id2str(srv_ni->sni_ni->pni_number, + ev->initiator, str)); return; } } @@ -212,7 +212,7 @@ void request_in_callback(ptl_event_t *ev) ev->ni_fail_type == PTL_NI_OK) req->rq_reqlen = ev->mlength; do_gettimeofday(&req->rq_arrival_time); - req->rq_peer.peer_nid = ev->initiator.nid; + req->rq_peer.peer_id = ev->initiator; req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni; req->rq_rqbd = rqbd; @@ -361,7 +361,8 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) pni = &ptlrpc_interfaces[i]; if (pni->pni_number == peer_nal) { - peer->peer_nid = peer_nid; + peer->peer_id.nid = peer_nid; + peer->peer_id.pid = LUSTRE_SRV_PTL_PID; //#4165:only client will call this func. peer->peer_ni = pni; return (0); } @@ -409,20 +410,37 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni) /* notreached */ } +ptl_pid_t ptl_get_pid(void) +{ + ptl_pid_t pid; + +#ifndef __KERNEL__ + pid = getpid(); +#else + pid = LUSTRE_SRV_PTL_PID; +#endif + return pid; +} + int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) { int rc; char str[20]; ptl_handle_ni_t nih; - + ptl_pid_t pid; + + pid = ptl_get_pid(); + /* We're not passing any limits yet... */ - rc = PtlNIInit(number, 0, NULL, NULL, &nih); + rc = PtlNIInit(number, pid, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CDEBUG (D_NET, "Can't init network interface %s: %d\n", name, rc); return (-ENOENT); } + CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid()); + PtlSnprintHandle(str, sizeof(str), nih); CDEBUG (D_NET, "init %d %s: %s\n", number, name, str); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index b6282164d2..e8e46607ae 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -34,7 +34,6 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, ptl_ack_req_t ack, struct ptlrpc_cb_id *cbid, struct ptlrpc_connection *conn, int portal, __u64 xid) { - ptl_process_id_t remote_id; int rc; int rc2; ptl_md_t md; @@ -43,14 +42,10 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, LASSERT (portal != 0); LASSERT (conn != NULL); - CDEBUG (D_INFO, "conn=%p ni %s nid %s on %s\n", + CDEBUG (D_INFO, "conn=%p ni %s id %s on %s\n", conn, conn->c_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&conn->c_peer, str), + ptlrpc_id2str(&conn->c_peer, str), conn->c_peer.peer_ni->pni_name); - - remote_id.nid = conn->c_peer.peer_nid, - remote_id.pid = 0; - md.start = base; md.length = len; md.threshold = (ack == PTL_ACK_REQ) ? 2 : 1; @@ -76,13 +71,13 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n", len, portal, xid); - rc2 = PtlPut (*mdh, ack, remote_id, portal, 0, xid, 0, 0); + rc = PtlPut (*mdh, ack, conn->c_peer.peer_id, portal, 0, xid, 0, 0); if (rc != PTL_OK) { /* We're going to get an UNLINK event when I unlink below, * which will complete just like any other failed send, so * I fall through and return success here! */ CERROR("PtlPut(%s, %d, "LPD64") failed: %d\n", - ptlrpc_peernid2str(&conn->c_peer, str), + ptlrpc_id2str(&conn->c_peer, str), portal, xid, rc); rc2 = PtlMDUnlink(*mdh); LASSERT (rc2 == PTL_OK); @@ -96,7 +91,6 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) int rc; int rc2; struct ptlrpc_peer *peer; - ptl_process_id_t remote_id; ptl_md_t md; __u64 xid; char str[PTL_NALFMT_SIZE]; @@ -134,22 +128,19 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) /* Client's bulk and reply matchbits are the same */ xid = desc->bd_req->rq_xid; - remote_id.nid = peer->peer_nid; - remote_id.pid = 0; - CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d on %s " "nid %s pid %d xid "LPX64"\n", desc->bd_iov_count, desc->bd_nob, desc->bd_portal, peer->peer_ni->pni_name, - ptlrpc_peernid2str(peer, str), remote_id.pid, xid); + ptlrpc_id2str(peer, str), peer->peer_id.pid, xid); /* Network is about to get at the memory */ desc->bd_network_rw = 1; if (desc->bd_type == BULK_PUT_SOURCE) - rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, remote_id, + rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, peer->peer_id, desc->bd_portal, 0, xid, 0, 0); else - rc = PtlGet (desc->bd_md_h, remote_id, + rc = PtlGet (desc->bd_md_h, peer->peer_id, desc->bd_portal, 0, xid, 0); if (rc != PTL_OK) { @@ -157,7 +148,7 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) * event this creates will signal completion with failure, * so we return SUCCESS here! */ CERROR("Transfer(%s, %d, "LPX64") failed: %d\n", - ptlrpc_peernid2str(peer, str), + ptlrpc_id2str(peer, str), desc->bd_portal, xid, rc); rc2 = PtlMDUnlink(desc->bd_md_h); LASSERT (rc2 == PTL_OK); @@ -205,7 +196,6 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) struct ptlrpc_peer *peer; int rc; int rc2; - ptl_process_id_t source_id; ptl_handle_me_t me_h; ptl_md_t md; ENTRY; @@ -242,13 +232,10 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid); desc->bd_registered = 1; desc->bd_last_xid = req->rq_xid; - - source_id.nid = desc->bd_import->imp_connection->c_peer.peer_nid; - source_id.pid = PTL_PID_ANY; - + rc = PtlMEAttach(peer->peer_ni->pni_ni_h, - desc->bd_portal, source_id, req->rq_xid, 0, - PTL_UNLINK, PTL_INS_AFTER, &me_h); + desc->bd_portal, desc->bd_import->imp_connection->c_peer.peer_id, + req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc); LASSERT (rc == PTL_NO_SPACE); @@ -397,7 +384,6 @@ int ptl_send_rpc(struct ptlrpc_request *request) int rc2; struct ptlrpc_connection *connection; unsigned long flags; - ptl_process_id_t source_id; ptl_handle_me_t reply_me_h; ptl_md_t reply_md; ENTRY; @@ -419,10 +405,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_reqmsg->handle = request->rq_import->imp_remote_handle; request->rq_reqmsg->type = PTL_RPC_MSG_REQUEST; request->rq_reqmsg->conn_cnt = request->rq_import->imp_conn_cnt; - - source_id.nid = connection->c_peer.peer_nid; - source_id.pid = PTL_PID_ANY; - + LASSERT (request->rq_replen != 0); if (request->rq_repmsg == NULL) OBD_ALLOC(request->rq_repmsg, request->rq_replen); @@ -431,7 +414,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h, request->rq_reply_portal, /* XXX FIXME bug 249 */ - source_id, request->rq_xid, 0, PTL_UNLINK, + connection->c_peer.peer_id, request->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &reply_me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc); -- GitLab