Commit 7cc0b292 authored by Amir Shehata's avatar Amir Shehata Committed by Oleg Drokin
Browse files

LU-12424 lnet: prevent loop in LNetPrimaryNID()



If discovery is disabled locally or at the remote end, then attempt
discovery only once. Do not update the internal database when
discovery is disabled and do not repeat discovery.

This change prevents LNet from getting hung waiting for
discovery to complete.
Signed-off-by: default avatarAmir Shehata <ashehata@whamcloud.com>
Change-Id: I4543b0f71e6cf297a1a5f058ebcc6bf74b8ac328
Reviewed-on: https://review.whamcloud.com/35191

Reviewed-by: default avatarOlaf Weber <olaf.weber@hpe.com>
Tested-by: Jenkins
Reviewed-by: default avatarChris Horn <hornc@cray.com>
Tested-by: default avatarMaloo <maloo@whamcloud.com>
Reviewed-by: default avatarOleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/38890

Reviewed-by: default avatarChris Horn <chris.horn@hpe.com>
Tested-by: default avatarjenkins <devops@whamcloud.com>
parent 8a68af6d
......@@ -1077,6 +1077,35 @@ lnet_peer_primary_nid_locked(lnet_nid_t nid)
return primary_nid;
}
bool
lnet_is_discovery_disabled_locked(struct lnet_peer *lp)
{
if (lnet_peer_discovery_disabled)
return true;
if (!(lp->lp_state & LNET_PEER_MULTI_RAIL) ||
(lp->lp_state & LNET_PEER_NO_DISCOVERY)) {
return true;
}
return false;
}
/*
* Peer Discovery
*/
bool
lnet_is_discovery_disabled(struct lnet_peer *lp)
{
bool rc = false;
spin_lock(&lp->lp_lock);
rc = lnet_is_discovery_disabled_locked(lp);
spin_unlock(&lp->lp_lock);
return rc;
}
lnet_nid_t
LNetPrimaryNID(lnet_nid_t nid)
{
......@@ -1093,11 +1122,16 @@ LNetPrimaryNID(lnet_nid_t nid)
goto out_unlock;
}
lp = lpni->lpni_peer_net->lpn_peer;
while (!lnet_peer_is_uptodate(lp)) {
rc = lnet_discover_peer_locked(lpni, cpt, true);
if (rc)
goto out_decref;
lp = lpni->lpni_peer_net->lpn_peer;
/* Only try once if discovery is disabled */
if (lnet_is_discovery_disabled(lp))
break;
}
primary_nid = lp->lp_primary_nid;
out_decref:
......@@ -1700,10 +1734,6 @@ out_mutex_unlock:
return lpni;
}
/*
* Peer Discovery
*/
/*
* Is a peer uptodate from the point of view of discovery?
*
......@@ -2036,6 +2066,7 @@ again:
if (lnet_peer_is_uptodate(lp))
break;
lnet_peer_queue_for_discovery(lp);
/*
* if caller requested a non-blocking operation then
* return immediately. Once discovery is complete then the
......@@ -2053,6 +2084,16 @@ again:
lnet_peer_decref_locked(lp);
/* Peer may have changed */
lp = lpni->lpni_peer_net->lpn_peer;
/*
* Wait for discovery to complete, but don't repeat if
* discovery is disabled. This is done to ensure we can
* use discovery as a standard ping as well for backwards
* compatibility with routers which do not have discovery
* or have discovery disabled
*/
if (lnet_is_discovery_disabled(lp))
break;
}
finish_wait(&lp->lp_dc_waitq, &wait);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment