From a63d0eba2836e6aa5d68e36edce422d1a4e1956b Mon Sep 17 00:00:00 2001 From: eeb <eeb> Date: Wed, 23 May 2007 11:25:46 +0000 Subject: [PATCH] * Print portals error string in ptllnd warnings/errors --- lnet/klnds/ptllnd/ptllnd.c | 15 ++++++++----- lnet/klnds/ptllnd/ptllnd_cb.c | 10 +++++---- lnet/klnds/ptllnd/ptllnd_peer.c | 20 ++++++++++------- lnet/klnds/ptllnd/ptllnd_rx_buf.c | 29 ++++++++++++++----------- lnet/ulnds/ptllnd/ptllnd.c | 12 +++++++---- lnet/ulnds/ptllnd/ptllnd_cb.c | 36 +++++++++++++++++++------------ 6 files changed, 75 insertions(+), 47 deletions(-) diff --git a/lnet/klnds/ptllnd/ptllnd.c b/lnet/klnds/ptllnd/ptllnd.c index f9361f900d..fd707f38a6 100755 --- a/lnet/klnds/ptllnd/ptllnd.c +++ b/lnet/klnds/ptllnd/ptllnd.c @@ -532,7 +532,8 @@ kptllnd_startup (lnet_ni_t *ni) * Which is ok. */ if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) { - CERROR ("PtlNIInit: error %d\n", ptl_rc); + CERROR ("PtlNIInit: error %s(%d)\n", + kptllnd_errtype2str(ptl_rc), ptl_rc); rc = -EINVAL; goto failed; } @@ -543,7 +544,8 @@ kptllnd_startup (lnet_ni_t *ni) kptllnd_eq_callback, /* handler callback */ &kptllnd_data.kptl_eqh); /* output handle */ if (ptl_rc != PTL_OK) { - CERROR("PtlEQAlloc failed %d\n", ptl_rc); + CERROR("PtlEQAlloc failed %s(%d)\n", + kptllnd_errtype2str(ptl_rc), ptl_rc); rc = -ENOMEM; goto failed; } @@ -554,7 +556,8 @@ kptllnd_startup (lnet_ni_t *ni) ptl_rc = PtlGetId(kptllnd_data.kptl_nih, &kptllnd_data.kptl_portals_id); if (ptl_rc != PTL_OK) { - CERROR ("PtlGetID: error %d\n", ptl_rc); + CERROR ("PtlGetID: error %s(%d)\n", + kptllnd_errtype2str(ptl_rc), ptl_rc); rc = -EINVAL; goto failed; } @@ -818,13 +821,15 @@ kptllnd_shutdown (lnet_ni_t *ni) if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) { prc = PtlEQFree(kptllnd_data.kptl_eqh); if (prc != PTL_OK) - CERROR("Error %d freeing portals EQ\n", prc); + CERROR("Error %s(%d) freeing portals EQ\n", + kptllnd_errtype2str(prc), prc); } if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) { prc = PtlNIFini(kptllnd_data.kptl_nih); if (prc != PTL_OK) - CERROR("Error %d finalizing portals NI\n", prc); + CERROR("Error %s(%d) finalizing portals NI\n", + kptllnd_errtype2str(prc), prc); } LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0); diff --git a/lnet/klnds/ptllnd/ptllnd_cb.c b/lnet/klnds/ptllnd/ptllnd_cb.c index 75344e17ae..d734ba8167 100644 --- a/lnet/klnds/ptllnd/ptllnd_cb.c +++ b/lnet/klnds/ptllnd/ptllnd_cb.c @@ -227,8 +227,9 @@ kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, PTL_UNLINK, &mdh); if (ptlrc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %d\n", - libcfs_id2str(peer->peer_id), ptlrc); + CERROR("PtlMDBind(%s) failed: %s(%d)\n", + libcfs_id2str(peer->peer_id), + kptllnd_errtype2str(ptlrc), ptlrc); tx->tx_status = -EIO; kptllnd_tx_decref(tx); return -EIO; @@ -271,8 +272,9 @@ kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, 0); /* offset */ if (ptlrc != PTL_OK) { - CERROR("Ptl%s failed: %d\n", - (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", ptlrc); + CERROR("Ptl%s failed: %s(%d)\n", + (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", + kptllnd_errtype2str(ptlrc), ptlrc); kptllnd_peer_close(peer, -EIO); /* Everything (including this RDMA) queued on the peer will diff --git a/lnet/klnds/ptllnd/ptllnd_peer.c b/lnet/klnds/ptllnd/ptllnd_peer.c index 77b7191dfb..71329e2cf6 100644 --- a/lnet/klnds/ptllnd/ptllnd_peer.c +++ b/lnet/klnds/ptllnd/ptllnd_peer.c @@ -503,8 +503,9 @@ kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh); if (prc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %d\n", - libcfs_id2str(peer->peer_id), prc); + CERROR("PtlMDBind(%s) failed: %s(%d)\n", + libcfs_id2str(peer->peer_id), + kptllnd_errtype2str(prc), prc); tx->tx_status = -EIO; kptllnd_tx_decref(tx); return; @@ -664,16 +665,18 @@ kptllnd_peer_check_sends (kptl_peer_t *peer) PTL_INS_BEFORE, &meh); if (rc != PTL_OK) { - CERROR("PtlMEAttach(%s) failed: %d\n", - libcfs_id2str(peer->peer_id), rc); + CERROR("PtlMEAttach(%s) failed: %s(%d)\n", + libcfs_id2str(peer->peer_id), + kptllnd_errtype2str(rc), rc); goto failed; } rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK, &tx->tx_rdma_mdh); if (rc != PTL_OK) { - CERROR("PtlMDAttach(%s) failed: %d\n", - libcfs_id2str(tx->tx_peer->peer_id), rc); + CERROR("PtlMDAttach(%s) failed: %s(%d)\n", + libcfs_id2str(tx->tx_peer->peer_id), + kptllnd_errtype2str(rc), rc); rc = PtlMEUnlink(meh); LASSERT(rc == PTL_OK); tx->tx_rdma_mdh = PTL_INVALID_HANDLE; @@ -696,8 +699,9 @@ kptllnd_peer_check_sends (kptl_peer_t *peer) 0, /* offset */ 0); /* header data */ if (rc != PTL_OK) { - CERROR("PtlPut %s error %d\n", - libcfs_id2str(peer->peer_id), rc); + CERROR("PtlPut %s error %s(%d)\n", + libcfs_id2str(peer->peer_id), + kptllnd_errtype2str(rc), rc); goto failed; } diff --git a/lnet/klnds/ptllnd/ptllnd_rx_buf.c b/lnet/klnds/ptllnd/ptllnd_rx_buf.c index e897086a26..ce21e9559b 100644 --- a/lnet/klnds/ptllnd/ptllnd_rx_buf.c +++ b/lnet/klnds/ptllnd/ptllnd_rx_buf.c @@ -268,7 +268,8 @@ kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb) PTL_INS_AFTER, &meh); if (rc != PTL_OK) { - CERROR("PtlMeAttach rxb failed %d\n", rc); + CERROR("PtlMeAttach rxb failed %s(%d)\n", + kptllnd_errtype2str(rc), rc); goto failed; } @@ -296,7 +297,8 @@ kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb) return; } - CERROR("PtlMDAttach rxb failed %d\n", rc); + CERROR("PtlMDAttach rxb failed %s(%d)\n", + kptllnd_errtype2str(rc), rc); rc = PtlMEUnlink(meh); LASSERT(rc == PTL_OK); @@ -395,16 +397,15 @@ kptllnd_rx_buffer_callback (ptl_event_t *ev) LASSERT (ev->type == PTL_EVENT_UNLINK || ev->match_bits == LNET_MSG_MATCHBITS); - if (ev->ni_fail_type != PTL_NI_OK) + if (ev->ni_fail_type != PTL_NI_OK) { CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn", kptllnd_ptlid2str(ev->initiator), kptllnd_evtype2str(ev->type), ev->type, rxb, kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type, unlinked); - if (ev->type == PTL_EVENT_PUT_END && - ev->ni_fail_type == PTL_NI_OK && - !rxbp->rxbp_shutdown) { + } else if (ev->type == PTL_EVENT_PUT_END && + !rxbp->rxbp_shutdown) { /* rxbp_shutdown sampled without locking! I only treat it as a * hint since shutdown can start while rx's are queued on @@ -494,8 +495,9 @@ kptllnd_nak (kptl_rx_t *rx) rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh); if (rc != PTL_OK) { - CWARN("Can't NAK %s: bind failed %d\n", - kptllnd_ptlid2str(rx->rx_initiator), rc); + CWARN("Can't NAK %s: bind failed %s(%d)\n", + kptllnd_ptlid2str(rx->rx_initiator), + kptllnd_errtype2str(rc), rc); return; } @@ -504,8 +506,9 @@ kptllnd_nak (kptl_rx_t *rx) LNET_MSG_MATCHBITS, 0, 0); if (rc != PTL_OK) - CWARN("Can't NAK %s: put failed %d\n", - kptllnd_ptlid2str(rx->rx_initiator), rc); + CWARN("Can't NAK %s: put failed %s(%d)\n", + kptllnd_ptlid2str(rx->rx_initiator), + kptllnd_errtype2str(rc), rc); } void @@ -548,9 +551,11 @@ kptllnd_rx_parse(kptl_rx_t *rx) srcid.nid = msg->ptlm_srcnid; srcid.pid = msg->ptlm_srcpid; - CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks\n", + CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n", libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type), - msg->ptlm_credits, rx, rx->rx_rxb, jiffies - rx->rx_treceived); + msg->ptlm_credits, rx, rx->rx_rxb, + jiffies - rx->rx_treceived, + cfs_duration_sec(jiffies - rx->rx_treceived)); if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) { CERROR("Bad source id %s from %s\n", diff --git a/lnet/ulnds/ptllnd/ptllnd.c b/lnet/ulnds/ptllnd/ptllnd.c index a3d06f2081..ef882a1ced 100644 --- a/lnet/ulnds/ptllnd/ptllnd.c +++ b/lnet/ulnds/ptllnd/ptllnd.c @@ -691,7 +691,8 @@ ptllnd_startup (lnet_ni_t *ni) rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid, NULL, NULL, &plni->plni_nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR("PtlNIInit failed: %d\n", rc); + CERROR("PtlNIInit failed: %s(%d)\n", + ptllnd_errtype2str(rc), rc); rc = -ENODEV; goto failed2; } @@ -699,7 +700,8 @@ ptllnd_startup (lnet_ni_t *ni) rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size, PTL_EQ_HANDLER_NONE, &plni->plni_eqh); if (rc != PTL_OK) { - CERROR("PtlEQAlloc failed: %d\n", rc); + CERROR("PtlEQAlloc failed: %s(%d)\n", + ptllnd_errtype2str(rc), rc); rc = -ENODEV; goto failed3; } @@ -707,8 +709,10 @@ ptllnd_startup (lnet_ni_t *ni) /* * Fetch the Portals NID */ - if(rc != PtlGetId(plni->plni_nih,&plni->plni_portals_id)){ - CERROR ("PtlGetID failed : %d\n", rc); + rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id); + if (rc != PTL_OK) { + CERROR ("PtlGetID failed : %s(%d)\n", + ptllnd_errtype2str(rc), rc); rc = -EINVAL; goto failed4; } diff --git a/lnet/ulnds/ptllnd/ptllnd_cb.c b/lnet/ulnds/ptllnd/ptllnd_cb.c index ec6170f7f6..d2bab0750a 100644 --- a/lnet/ulnds/ptllnd/ptllnd_cb.c +++ b/lnet/ulnds/ptllnd/ptllnd_cb.c @@ -713,7 +713,8 @@ ptllnd_post_buffer(ptllnd_buffer_t *buf) anyid, LNET_MSG_MATCHBITS, 0, PTL_UNLINK, PTL_INS_AFTER, &meh); if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); + CERROR("PtlMEAttach failed: %s(%d)\n", + ptllnd_errtype2str(rc), rc); return -ENOMEM; } @@ -724,7 +725,8 @@ ptllnd_post_buffer(ptllnd_buffer_t *buf) if (rc == PTL_OK) return 0; - CERROR("PtlMDAttach failed: %d\n", rc); + CERROR("PtlMDAttach failed: %s(%d)\n", + ptllnd_errtype2str(rc), rc); buf->plb_posted = 0; plni->plni_nposted_buffers--; @@ -843,8 +845,9 @@ ptllnd_check_sends(ptllnd_peer_t *peer) rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh); if (rc != PTL_OK) { - CERROR("PtlMDBind for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); + CERROR("PtlMDBind for %s failed: %s(%d)\n", + libcfs_id2str(peer->plp_id), + ptllnd_errtype2str(rc), rc); tx->tx_status = -EIO; ptllnd_tx_done(tx); break; @@ -869,8 +872,9 @@ ptllnd_check_sends(ptllnd_peer_t *peer) rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid, plni->plni_portal, 0, LNET_MSG_MATCHBITS, 0, 0); if (rc != PTL_OK) { - CERROR("PtlPut for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); + CERROR("PtlPut for %s failed: %s(%d)\n", + libcfs_id2str(peer->plp_id), + ptllnd_errtype2str(rc), rc); tx->tx_status = -EIO; ptllnd_tx_done(tx); break; @@ -950,8 +954,9 @@ ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg, rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid, matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh); if (rc != PTL_OK) { - CERROR("PtlMEAttach for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); + CERROR("PtlMEAttach for %s failed: %s(%d)\n", + libcfs_id2str(peer->plp_id), + ptllnd_errtype2str(rc), rc); rc = -EIO; goto failed; } @@ -960,8 +965,9 @@ ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg, rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh); if (rc != PTL_OK) { - CERROR("PtlMDAttach for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); + CERROR("PtlMDAttach for %s failed: %s(%d)\n", + libcfs_id2str(peer->plp_id), + ptllnd_errtype2str(rc), rc); rc2 = PtlMEUnlink(meh); LASSERT (rc2 == PTL_OK); rc = -EIO; @@ -1051,8 +1057,9 @@ ptllnd_active_rdma(ptllnd_peer_t *peer, int type, rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh); if (rc != PTL_OK) { - CERROR("PtlMDBind for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); + CERROR("PtlMDBind for %s failed: %s(%d)\n", + libcfs_id2str(peer->plp_id), + ptllnd_errtype2str(rc), rc); rc = -EIO; goto failed; } @@ -1075,8 +1082,9 @@ ptllnd_active_rdma(ptllnd_peer_t *peer, int type, if (rc == PTL_OK) return 0; - CERROR("Can't initiate RDMA with %s: %d\n", - libcfs_id2str(peer->plp_id), rc); + CERROR("Can't initiate RDMA with %s: %s(%d)\n", + libcfs_id2str(peer->plp_id), + ptllnd_errtype2str(rc), rc); tx->tx_lnetmsg = NULL; failed: -- GitLab