diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 6ef28a8535e985f668dcbf675399140c6c071516..b13f161638e66d3772c585a06aa9afa5b3da76b2 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -326,6 +326,7 @@ void portals_debug_dumplog(void); /* support decl needed both by kernel and liblustre */ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); +char *portals_id2str(int nal, ptl_process_id_t nid, char *str); #ifndef CURRENT_TIME # define CURRENT_TIME time(0) @@ -644,7 +645,7 @@ enum { NAL_ENUM_END_MARKER }; -#define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */ +#define PTL_NALFMT_SIZE 30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */ #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c index 002587d1b5962f3b73acbcb9fc29dde9d0be25ae..bdf5cfe6cfb547f8ed8d3af0b05857456f1c9773 100644 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ b/lnet/klnds/gmlnd/gmlnd_api.c @@ -318,7 +318,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, /* pid = gm_getpid(); */ - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = global_nid; CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); diff --git a/lnet/klnds/iblnd/ibnal.c b/lnet/klnds/iblnd/ibnal.c index 86c2a6355206923d845a8321c343316a9ba7eb7b..c6bd7ed044c1c7f0390168566df82e97da05bf91 100644 --- a/lnet/klnds/iblnd/ibnal.c +++ b/lnet/klnds/iblnd/ibnal.c @@ -246,7 +246,7 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, - 0, // process id is set as 0 + requested_pid , // process id is set as requested_pid instead of 0 ptl_size, ac_size); diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index c595450684aadb55fb7bb7baf11f7cc1dc7319ac..38d1636166d57950555ae99a3d4e977220ccee7d 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -696,7 +696,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, /* Network interface ready to initialise */ my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); - my_process_id.pid = 0; + my_process_id.pid = requested_pid; rc = lib_init(&kqswnal_lib, nal, my_process_id, requested_limits, actual_limits); @@ -802,7 +802,7 @@ kqswnal_initialise (void) /* Pure gateways, and the workaround for 'EKC blocks forever until * the service is active' want the NAL started up at module load * time... */ - rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(QSWNAL); return (-ENODEV); diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c index e77bd8ee278dc69428c1e5d1525825e6907dfab5..75188e97dcb1b35fd4cc90929b1e8e6665db763f 100644 --- a/lnet/klnds/scimaclnd/scimacnal.c +++ b/lnet/klnds/scimaclnd/scimacnal.c @@ -205,7 +205,7 @@ static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, } kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = kscimacnal_data.ksci_nid; CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 9d39cb1bd522b4ec6e115e5b604adc658f45d98f..3a3629b873d45b64e163b0d8b4123cf72b6ec2b1 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1589,7 +1589,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } /* NB we have to wait to be told our true NID... */ - process_id.pid = 0; + process_id.pid = requested_pid; //LUSTRE_SRV_PTL_PID; process_id.nid = 0; rc = lib_init(&ksocknal_lib, nal, process_id, @@ -1739,7 +1739,7 @@ ksocknal_module_init (void) } /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(SOCKNAL); return (-ENODEV); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index ff73f71ce51cacad07e14ac0e76001f340d4e4a9..e14205951eb4d76ce34272dab93c5e14e74300eb 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -66,6 +66,7 @@ #include <portals/lib-p30.h> #include <portals/nal.h> #include <portals/socknal.h> +#include <linux/lustre_idl.h> #if CONFIG_SMP # define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 2b2ce3b99247aa72c1b99ad6659024177f866c22..eb75e60442e470d08c4b96dcd265d376fb500445 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -985,6 +985,30 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) } return str; } +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} + #ifdef __KERNEL__ char stack_backtrace[LUSTRE_TRACE_SIZE]; @@ -1077,3 +1101,4 @@ EXPORT_SYMBOL(portals_debug_set_level); EXPORT_SYMBOL(portals_run_upcall); EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); +EXPORT_SYMBOL(portals_id2str); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 9dcc06e343baea58fbeb05faf2f3cfa75bd9c645..854a452e55a4081d8fc9b778972ff4305e81d424 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -82,6 +82,8 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, if (me->match_id.nid != PTL_NID_ANY && me->match_id.nid != src_nid) continue; + + CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid); if (me->match_id.pid != PTL_PID_ANY && me->match_id.pid != src_pid) diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 5615a7244329138a1242d378fefc27ebcd12f8cf..d292a501a057de31388bd608e0c385be870287f5 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -50,6 +50,7 @@ #include <linux/kp30.h> #include <linux/kpr.h> #include <linux/portals_compat25.h> +#include <linux/lustre_idl.h> extern void (kping_client)(struct portal_ioctl_data *); @@ -83,7 +84,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); @@ -104,7 +105,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c index 34a9c9d5e3d43b6a57deccb423d7e116b0e2f70a..6e9cca99ca7647fb842cd3c3a592f4e1a46ee3ec 100644 --- a/lnet/ulnds/socklnd/tcplnd.c +++ b/lnet/ulnds/socklnd/tcplnd.c @@ -252,7 +252,7 @@ int tcpnal_init(bridge b) return(PTL_NAL_FAILED); } /* XXX cfs hack */ - b->lib_nal->libnal_ni.ni_pid.pid=0; +// b->lib_nal->libnal_ni.ni_pid.pid=0; b->lower=m; return(PTL_OK); } diff --git a/lnet/ulnds/tcplnd.c b/lnet/ulnds/tcplnd.c index 34a9c9d5e3d43b6a57deccb423d7e116b0e2f70a..6e9cca99ca7647fb842cd3c3a592f4e1a46ee3ec 100644 --- a/lnet/ulnds/tcplnd.c +++ b/lnet/ulnds/tcplnd.c @@ -252,7 +252,7 @@ int tcpnal_init(bridge b) return(PTL_NAL_FAILED); } /* XXX cfs hack */ - b->lib_nal->libnal_ni.ni_pid.pid=0; +// b->lib_nal->libnal_ni.ni_pid.pid=0; b->lower=m; return(PTL_OK); } diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 8f5bd37d5369319f6bfdc53003cfcfa470c134e2..b69e1e195c3febd4edc69f74a4b4e6077b707d70 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -133,6 +133,10 @@ #define LUSTRE_OST_VERSION (0x00040000|PTLRPC_MSG_VERSION) #define LUSTRE_DLM_VERSION (0x00040000|PTLRPC_MSG_VERSION) +/* initial pid */ +#define LUSTRE_PTL_PID 999999 +#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID + struct lustre_handle { __u64 cookie; }; diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 03d9ab3a7794d2d78ce4cc24223c731f22735f24..aa8309d53ca6b79d69ae564417a235c89bf89c3b 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -153,7 +153,9 @@ #define PTLBD_MAXREQSIZE 1024 struct ptlrpc_peer { - ptl_nid_t peer_nid; +/* bugfix #4615 + */ + ptl_process_id_t peer_id; struct ptlrpc_ni *peer_ni; }; @@ -515,9 +517,17 @@ struct ptlrpc_service { static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str) { LASSERT(p->peer_ni != NULL); - return (portals_nid2str(p->peer_ni->pni_number, p->peer_nid, str)); + return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str)); +} + +/* For bug #4615 */ +static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str) +{ + LASSERT(p->peer_ni != NULL); + return (portals_id2str(p->peer_ni->pni_number, p->peer_id, str)); } + /* ptlrpc/events.c */ extern struct ptlrpc_ni ptlrpc_interfaces[]; extern int ptlrpc_ninterfaces; @@ -539,6 +549,7 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c); struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); void ptlrpc_init_connection(void); void ptlrpc_cleanup_connection(void); +extern ptl_pid_t ptl_get_pid(void); /* ptlrpc/niobuf.c */ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index fb70b653d9b16d0fb7e0bd0f169fc07e76b11eb5..8d415871329844559b3d3335a9ae2f7d2d5c7a37 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -416,11 +416,11 @@ int target_handle_connect(struct ptlrpc_request *req) /* NB the casts only avoid compiler warnings */ case 8: snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_nid); + "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_id.nid); break; case 4: snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_%x_UUID", (__u32)req->rq_peer.peer_nid); + "NET_%x_UUID", (__u32)req->rq_peer.peer_id.nid); break; default: LBUG(); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b4acd619e4a200d9ecade6b6d765de8c5b375be1..d1a60feb1450311fccd530209799a82ac9914f65 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -311,7 +311,7 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,const char *ast_type) LDLM_ERROR(lock, "%s AST failed (%d): evicting client %s@%s NID "LPX64 " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid, - conn->c_remote_uuid.uuid, conn->c_peer.peer_nid, + conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid, ptlrpc_peernid2str(&conn->c_peer, str)); ptlrpc_fail_export(lock->l_export); } diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index 16437bcc99852b1149b4d4576777b56d85a11969..73f97da7e47ecc025c134555edacfbb2391d68d4 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -101,7 +101,29 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) } return str; } - +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} /* * random number generator stuff */ diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 19fd83a9d1dd1923077bae91a769e502ad1de294..13157cdb3b400c489dcbd0144f4491d53142803a 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -82,6 +82,29 @@ struct pingcli_args { int count; int size; }; +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} struct task_struct *current; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 649e6fe53bdb0dbf6713284740d596552539effe..4faa1eff860872ad14a224d668b18fd71dd46a3e 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1626,9 +1626,9 @@ static int filter_connect_post(struct obd_export *exp) rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse); portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CDEBUG(D_OTHER, "%s: init llog ctxt for export "LPX64"/%s, group %d\n", - obd->obd_name, exp->exp_connection->c_peer.peer_nid, + obd->obd_name, exp->exp_connection->c_peer.peer_id.nid, str, fed->fed_group); RETURN(rc); @@ -1685,11 +1685,11 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, if (fed->fed_group != 0 && fed->fed_group != group) { char str[PTL_NALFMT_SIZE]; portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CERROR("!!! This export (nid "LPX64"/%s) used object group %d " "earlier; now it's trying to use group %d! This could " "be a bug in the MDS. Tell CFS.\n", - exp->exp_connection->c_peer.peer_nid, str, + exp->exp_connection->c_peer.peer_id.nid, str, fed->fed_group, group); GOTO(cleanup, rc = -EPROTO); } @@ -2392,9 +2392,9 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if (!(oa->o_valid & OBD_MD_FLGROUP) || group == 0) { portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CERROR("!!! nid "LPX64"/%s sent invalid object group %d\n", - exp->exp_connection->c_peer.peer_nid, str, group); + exp->exp_connection->c_peer.peer_id.nid, str, group); RETURN(-EINVAL); } @@ -2410,11 +2410,11 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if (fed->fed_group != group && !recreate_objs && !(oa->o_valid & OBD_MD_REINT)) { portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number, - exp->exp_connection->c_peer.peer_nid, str); + exp->exp_connection->c_peer.peer_id.nid, str); CERROR("!!! This export (nid "LPX64"/%s) used object group %d " "earlier; now it's trying to use group %d! This could " "be a bug in the MDS. Tell CFS.\n", - exp->exp_connection->c_peer.peer_nid, str, + exp->exp_connection->c_peer.peer_id.nid, str, fed->fed_group, group); RETURN(-ENOTUNIQ); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index ed75cfd7dace2b2460ea40ff916d8a06e6d2b3cf..1f84b4b52c26c4544cc3d769f7267c5940c03bfe 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -870,12 +870,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, if (server_cksum != cksum) { CERROR("Bad checksum: server %x, client %x, server NID " LPX64" (%s)\n", server_cksum, cksum, - peer->peer_nid, str); + peer->peer_id.nid, str); cksum_counter = 0; oa->o_cksum = cksum; } else if ((cksum_counter & (-cksum_counter)) == cksum_counter){ CWARN("Checksum %u from "LPX64" (%s) OK: %x\n", - cksum_counter, peer->peer_nid, str, cksum); + cksum_counter, peer->peer_id.nid, str, cksum); } } else { static int cksum_missed; @@ -884,7 +884,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, if ((cksum_missed & (-cksum_missed)) == cksum_missed) CERROR("Request checksum %u from "LPX64", no reply\n", cksum_missed, - req->rq_import->imp_connection->c_peer.peer_nid); + req->rq_import->imp_connection->c_peer.peer_id.nid); } #endif RETURN(0); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 2271c6c4081dab41debf12978c5fcdd437d2971a..d96936e397a98bb16014ff647e5e3fb7142fdd91 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -669,7 +669,7 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (client_cksum != cksum) { CERROR("Bad checksum: client %x, server %x, client NID " LPX64" (%s)\n", client_cksum, cksum, - req->rq_connection->c_peer.peer_nid, str); + req->rq_connection->c_peer.peer_id.nid, str); cksum_counter = 1; repbody->oa.o_cksum = cksum; } else { @@ -677,7 +677,7 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if ((cksum_counter & (-cksum_counter)) == cksum_counter) CWARN("Checksum %u from "LPX64": %x OK\n", cksum_counter, - req->rq_connection->c_peer.peer_nid, + req->rq_connection->c_peer.peer_id.nid, cksum); } } diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 6ef28a8535e985f668dcbf675399140c6c071516..b13f161638e66d3772c585a06aa9afa5b3da76b2 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -326,6 +326,7 @@ void portals_debug_dumplog(void); /* support decl needed both by kernel and liblustre */ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); +char *portals_id2str(int nal, ptl_process_id_t nid, char *str); #ifndef CURRENT_TIME # define CURRENT_TIME time(0) @@ -644,7 +645,7 @@ enum { NAL_ENUM_END_MARKER }; -#define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */ +#define PTL_NALFMT_SIZE 30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */ #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) diff --git a/lustre/portals/knals/gmnal/gmnal_api.c b/lustre/portals/knals/gmnal/gmnal_api.c index 002587d1b5962f3b73acbcb9fc29dde9d0be25ae..bdf5cfe6cfb547f8ed8d3af0b05857456f1c9773 100644 --- a/lustre/portals/knals/gmnal/gmnal_api.c +++ b/lustre/portals/knals/gmnal/gmnal_api.c @@ -318,7 +318,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, /* pid = gm_getpid(); */ - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = global_nid; CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); diff --git a/lustre/portals/knals/ibnal/ibnal.c b/lustre/portals/knals/ibnal/ibnal.c index 86c2a6355206923d845a8321c343316a9ba7eb7b..c6bd7ed044c1c7f0390168566df82e97da05bf91 100644 --- a/lustre/portals/knals/ibnal/ibnal.c +++ b/lustre/portals/knals/ibnal/ibnal.c @@ -246,7 +246,7 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, - 0, // process id is set as 0 + requested_pid , // process id is set as requested_pid instead of 0 ptl_size, ac_size); diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index c595450684aadb55fb7bb7baf11f7cc1dc7319ac..38d1636166d57950555ae99a3d4e977220ccee7d 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -696,7 +696,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, /* Network interface ready to initialise */ my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); - my_process_id.pid = 0; + my_process_id.pid = requested_pid; rc = lib_init(&kqswnal_lib, nal, my_process_id, requested_limits, actual_limits); @@ -802,7 +802,7 @@ kqswnal_initialise (void) /* Pure gateways, and the workaround for 'EKC blocks forever until * the service is active' want the NAL started up at module load * time... */ - rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(QSWNAL); return (-ENODEV); diff --git a/lustre/portals/knals/scimacnal/scimacnal.c b/lustre/portals/knals/scimacnal/scimacnal.c index e77bd8ee278dc69428c1e5d1525825e6907dfab5..75188e97dcb1b35fd4cc90929b1e8e6665db763f 100644 --- a/lustre/portals/knals/scimacnal/scimacnal.c +++ b/lustre/portals/knals/scimacnal/scimacnal.c @@ -205,7 +205,7 @@ static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, } kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); - process_id.pid = 0; + process_id.pid = requested_pid; process_id.nid = kscimacnal_data.ksci_nid; CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 9d39cb1bd522b4ec6e115e5b604adc658f45d98f..3a3629b873d45b64e163b0d8b4123cf72b6ec2b1 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -1589,7 +1589,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } /* NB we have to wait to be told our true NID... */ - process_id.pid = 0; + process_id.pid = requested_pid; //LUSTRE_SRV_PTL_PID; process_id.nid = 0; rc = lib_init(&ksocknal_lib, nal, process_id, @@ -1739,7 +1739,7 @@ ksocknal_module_init (void) } /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(SOCKNAL); return (-ENODEV); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index ff73f71ce51cacad07e14ac0e76001f340d4e4a9..e14205951eb4d76ce34272dab93c5e14e74300eb 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -66,6 +66,7 @@ #include <portals/lib-p30.h> #include <portals/nal.h> #include <portals/socknal.h> +#include <linux/lustre_idl.h> #if CONFIG_SMP # define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 2b2ce3b99247aa72c1b99ad6659024177f866c22..eb75e60442e470d08c4b96dcd265d376fb500445 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -985,6 +985,30 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) } return str; } +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ + case TCPNAL: + /* userspace NAL */ + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + case IBNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; + } + return str; +} + #ifdef __KERNEL__ char stack_backtrace[LUSTRE_TRACE_SIZE]; @@ -1077,3 +1101,4 @@ EXPORT_SYMBOL(portals_debug_set_level); EXPORT_SYMBOL(portals_run_upcall); EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); +EXPORT_SYMBOL(portals_id2str); diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c index 9dcc06e343baea58fbeb05faf2f3cfa75bd9c645..854a452e55a4081d8fc9b778972ff4305e81d424 100644 --- a/lustre/portals/portals/lib-move.c +++ b/lustre/portals/portals/lib-move.c @@ -82,6 +82,8 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, if (me->match_id.nid != PTL_NID_ANY && me->match_id.nid != src_nid) continue; + + CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid); if (me->match_id.pid != PTL_PID_ANY && me->match_id.pid != src_pid) diff --git a/lustre/portals/portals/module.c b/lustre/portals/portals/module.c index 5615a7244329138a1242d378fefc27ebcd12f8cf..d292a501a057de31388bd608e0c385be870287f5 100644 --- a/lustre/portals/portals/module.c +++ b/lustre/portals/portals/module.c @@ -50,6 +50,7 @@ #include <linux/kp30.h> #include <linux/kpr.h> #include <linux/portals_compat25.h> +#include <linux/lustre_idl.h> extern void (kping_client)(struct portal_ioctl_data *); @@ -83,7 +84,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); @@ -104,7 +105,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); diff --git a/lustre/portals/unals/tcpnal.c b/lustre/portals/unals/tcpnal.c index 34a9c9d5e3d43b6a57deccb423d7e116b0e2f70a..6e9cca99ca7647fb842cd3c3a592f4e1a46ee3ec 100644 --- a/lustre/portals/unals/tcpnal.c +++ b/lustre/portals/unals/tcpnal.c @@ -252,7 +252,7 @@ int tcpnal_init(bridge b) return(PTL_NAL_FAILED); } /* XXX cfs hack */ - b->lib_nal->libnal_ni.ni_pid.pid=0; +// b->lib_nal->libnal_ni.ni_pid.pid=0; b->lower=m; return(PTL_OK); } diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 2e0d889da6fd6386ac9b2a8e072a3c061ffa39af..646cb0721badd79a80f87a3e10fc9028157d478f 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -62,12 +62,12 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, CDEBUG(D_INFO, "peer is %s on %s\n", - ptlrpc_peernid2str(peer, str), peer->peer_ni->pni_name); + ptlrpc_id2str(peer, str), peer->peer_ni->pni_name); spin_lock(&conn_lock); list_for_each(tmp, &conn_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (peer->peer_nid == c->c_peer.peer_nid && + if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) && peer->peer_ni == c->c_peer.peer_ni) { ptlrpc_connection_addref(c); GOTO(out, c); @@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, list_for_each_safe(tmp, pos, &conn_unused_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (peer->peer_nid == c->c_peer.peer_nid && + if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) && peer->peer_ni == c->c_peer.peer_ni) { ptlrpc_connection_addref(c); list_del(&c->c_link); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 254ae3093b16e722b770167a25f2f54d7e246367..8ea1813f70f34ea6faa07d583ca224c016f51b1a 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -197,8 +197,8 @@ void request_in_callback(ptl_event_t *ev) CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", service->srv_name, - portals_nid2str(srv_ni->sni_ni->pni_number, - ev->initiator.nid, str)); + portals_id2str(srv_ni->sni_ni->pni_number, + ev->initiator, str)); return; } } @@ -212,7 +212,7 @@ void request_in_callback(ptl_event_t *ev) ev->ni_fail_type == PTL_NI_OK) req->rq_reqlen = ev->mlength; do_gettimeofday(&req->rq_arrival_time); - req->rq_peer.peer_nid = ev->initiator.nid; + req->rq_peer.peer_id = ev->initiator; req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni; req->rq_rqbd = rqbd; @@ -361,7 +361,8 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) pni = &ptlrpc_interfaces[i]; if (pni->pni_number == peer_nal) { - peer->peer_nid = peer_nid; + peer->peer_id.nid = peer_nid; + peer->peer_id.pid = LUSTRE_SRV_PTL_PID; //#4165:only client will call this func. peer->peer_ni = pni; return (0); } @@ -409,20 +410,37 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni) /* notreached */ } +ptl_pid_t ptl_get_pid(void) +{ + ptl_pid_t pid; + +#ifndef __KERNEL__ + pid = getpid(); +#else + pid = LUSTRE_SRV_PTL_PID; +#endif + return pid; +} + int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) { int rc; char str[20]; ptl_handle_ni_t nih; - + ptl_pid_t pid; + + pid = ptl_get_pid(); + /* We're not passing any limits yet... */ - rc = PtlNIInit(number, 0, NULL, NULL, &nih); + rc = PtlNIInit(number, pid, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CDEBUG (D_NET, "Can't init network interface %s: %d\n", name, rc); return (-ENOENT); } + CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid()); + PtlSnprintHandle(str, sizeof(str), nih); CDEBUG (D_NET, "init %d %s: %s\n", number, name, str); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index b6282164d298fce6572259ebb8020827db5c0906..e8e46607ae54a269d01dadc2638dc3e87e369927 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -34,7 +34,6 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, ptl_ack_req_t ack, struct ptlrpc_cb_id *cbid, struct ptlrpc_connection *conn, int portal, __u64 xid) { - ptl_process_id_t remote_id; int rc; int rc2; ptl_md_t md; @@ -43,14 +42,10 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, LASSERT (portal != 0); LASSERT (conn != NULL); - CDEBUG (D_INFO, "conn=%p ni %s nid %s on %s\n", + CDEBUG (D_INFO, "conn=%p ni %s id %s on %s\n", conn, conn->c_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&conn->c_peer, str), + ptlrpc_id2str(&conn->c_peer, str), conn->c_peer.peer_ni->pni_name); - - remote_id.nid = conn->c_peer.peer_nid, - remote_id.pid = 0; - md.start = base; md.length = len; md.threshold = (ack == PTL_ACK_REQ) ? 2 : 1; @@ -76,13 +71,13 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n", len, portal, xid); - rc2 = PtlPut (*mdh, ack, remote_id, portal, 0, xid, 0, 0); + rc = PtlPut (*mdh, ack, conn->c_peer.peer_id, portal, 0, xid, 0, 0); if (rc != PTL_OK) { /* We're going to get an UNLINK event when I unlink below, * which will complete just like any other failed send, so * I fall through and return success here! */ CERROR("PtlPut(%s, %d, "LPD64") failed: %d\n", - ptlrpc_peernid2str(&conn->c_peer, str), + ptlrpc_id2str(&conn->c_peer, str), portal, xid, rc); rc2 = PtlMDUnlink(*mdh); LASSERT (rc2 == PTL_OK); @@ -96,7 +91,6 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) int rc; int rc2; struct ptlrpc_peer *peer; - ptl_process_id_t remote_id; ptl_md_t md; __u64 xid; char str[PTL_NALFMT_SIZE]; @@ -134,22 +128,19 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) /* Client's bulk and reply matchbits are the same */ xid = desc->bd_req->rq_xid; - remote_id.nid = peer->peer_nid; - remote_id.pid = 0; - CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d on %s " "nid %s pid %d xid "LPX64"\n", desc->bd_iov_count, desc->bd_nob, desc->bd_portal, peer->peer_ni->pni_name, - ptlrpc_peernid2str(peer, str), remote_id.pid, xid); + ptlrpc_id2str(peer, str), peer->peer_id.pid, xid); /* Network is about to get at the memory */ desc->bd_network_rw = 1; if (desc->bd_type == BULK_PUT_SOURCE) - rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, remote_id, + rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, peer->peer_id, desc->bd_portal, 0, xid, 0, 0); else - rc = PtlGet (desc->bd_md_h, remote_id, + rc = PtlGet (desc->bd_md_h, peer->peer_id, desc->bd_portal, 0, xid, 0); if (rc != PTL_OK) { @@ -157,7 +148,7 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) * event this creates will signal completion with failure, * so we return SUCCESS here! */ CERROR("Transfer(%s, %d, "LPX64") failed: %d\n", - ptlrpc_peernid2str(peer, str), + ptlrpc_id2str(peer, str), desc->bd_portal, xid, rc); rc2 = PtlMDUnlink(desc->bd_md_h); LASSERT (rc2 == PTL_OK); @@ -205,7 +196,6 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) struct ptlrpc_peer *peer; int rc; int rc2; - ptl_process_id_t source_id; ptl_handle_me_t me_h; ptl_md_t md; ENTRY; @@ -242,13 +232,10 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid); desc->bd_registered = 1; desc->bd_last_xid = req->rq_xid; - - source_id.nid = desc->bd_import->imp_connection->c_peer.peer_nid; - source_id.pid = PTL_PID_ANY; - + rc = PtlMEAttach(peer->peer_ni->pni_ni_h, - desc->bd_portal, source_id, req->rq_xid, 0, - PTL_UNLINK, PTL_INS_AFTER, &me_h); + desc->bd_portal, desc->bd_import->imp_connection->c_peer.peer_id, + req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc); LASSERT (rc == PTL_NO_SPACE); @@ -397,7 +384,6 @@ int ptl_send_rpc(struct ptlrpc_request *request) int rc2; struct ptlrpc_connection *connection; unsigned long flags; - ptl_process_id_t source_id; ptl_handle_me_t reply_me_h; ptl_md_t reply_md; ENTRY; @@ -419,10 +405,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_reqmsg->handle = request->rq_import->imp_remote_handle; request->rq_reqmsg->type = PTL_RPC_MSG_REQUEST; request->rq_reqmsg->conn_cnt = request->rq_import->imp_conn_cnt; - - source_id.nid = connection->c_peer.peer_nid; - source_id.pid = PTL_PID_ANY; - + LASSERT (request->rq_replen != 0); if (request->rq_repmsg == NULL) OBD_ALLOC(request->rq_repmsg, request->rq_replen); @@ -431,7 +414,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h, request->rq_reply_portal, /* XXX FIXME bug 249 */ - source_id, request->rq_xid, 0, PTL_UNLINK, + connection->c_peer.peer_id, request->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &reply_me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc);