diff --git a/lustre/ptlrpc/gss/gss_cli_upcall.c b/lustre/ptlrpc/gss/gss_cli_upcall.c
index 9a55329a7dd6f9a5191376440b88edb38452a0a4..fed7c482f0cd389a9ff67505b1de6ea010deb8c7 100644
--- a/lustre/ptlrpc/gss/gss_cli_upcall.c
+++ b/lustre/ptlrpc/gss/gss_cli_upcall.c
@@ -329,6 +329,8 @@ int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx)
         int                      rc;
         ENTRY;
 
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
         if (cli_ctx_is_error(ctx) || !cli_ctx_is_uptodate(ctx)) {
                 CDEBUG(D_SEC, "ctx %p(%u->%s) not uptodate, "
                        "don't send destroy rpc\n", ctx,
@@ -343,9 +345,6 @@ int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx)
                "server finishing reverse" : "client finishing forward",
                ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
 
-        /* context's refcount could be 0, steal one */
-        atomic_inc(&ctx->cc_refcount);
-
         gctx->gc_proc = PTLRPC_GSS_PROC_DESTROY;
 
         req = ptlrpc_prep_req_pool(imp, LUSTRE_OBD_VERSION, SEC_CTX_FINI,
@@ -353,7 +352,7 @@ int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx)
         if (!req) {
                 CWARN("ctx %p(%u): fail to prepare rpc, destroy locally\n",
                       ctx, ctx->cc_vcred.vc_uid);
-                GOTO(out_ref, rc = -ENOMEM);
+                GOTO(out, rc = -ENOMEM);
         }
 
         /* fix the user desc */
@@ -377,8 +376,7 @@ int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx)
         }
 
         ptlrpc_req_finished(req);
-out_ref:
-        atomic_dec(&ctx->cc_refcount);
+out:
         RETURN(rc);
 }
 
diff --git a/lustre/ptlrpc/gss/gss_keyring.c b/lustre/ptlrpc/gss/gss_keyring.c
index 02c45fefdfe5d2d4a2c5b399fe25926e0862668b..ca5d5292142a8ae5d58d66e15240c68befc9671c 100644
--- a/lustre/ptlrpc/gss/gss_keyring.c
+++ b/lustre/ptlrpc/gss/gss_keyring.c
@@ -226,10 +226,12 @@ static void ctx_destroy_kr(struct ptlrpc_cli_ctx *ctx)
         LASSERT(gctx_kr->gck_timer == NULL);
 
         rc = gss_cli_ctx_fini_common(sec, ctx);
+        if (rc < 0)
+                return;
 
         OBD_FREE_PTR(gctx_kr);
 
-        if (rc) {
+        if (rc > 0) {
                 CWARN("released the last ctx, proceed to destroy sec %s@%p\n",
                       sec->ps_policy->sp_name, sec);
                 sptlrpc_sec_destroy(sec);
diff --git a/lustre/ptlrpc/gss/gss_pipefs.c b/lustre/ptlrpc/gss/gss_pipefs.c
index 8cc7aeab9999ec6af0ffa45865f8e1292537e5c5..d0a3456445067ebf1daaea6f74bc9b83a3a37a2d 100644
--- a/lustre/ptlrpc/gss/gss_pipefs.c
+++ b/lustre/ptlrpc/gss/gss_pipefs.c
@@ -123,9 +123,12 @@ void ctx_destroy_pf(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx)
         int                 rc;
 
         rc = gss_cli_ctx_fini_common(sec, ctx);
+        if (rc < 0)
+                return;
+
         OBD_FREE_PTR(gctx);
 
-        if (rc) {
+        if (rc > 0) {
                 CWARN("released the last ctx, proceed to destroy sec %s@%p\n",
                       sec->ps_policy->sp_name, sec);
                 sptlrpc_sec_destroy(sec);
diff --git a/lustre/ptlrpc/gss/sec_gss.c b/lustre/ptlrpc/gss/sec_gss.c
index 11d2478a2a1288bcfffffc7e6cd34c264ffcc187..1042f324b4cacca30b7024fcec6e930bdc437aaf 100644
--- a/lustre/ptlrpc/gss/sec_gss.c
+++ b/lustre/ptlrpc/gss/sec_gss.c
@@ -415,8 +415,10 @@ void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx)
 static
 void gss_cli_ctx_finalize(struct gss_cli_ctx *gctx)
 {
-        if (gctx->gc_mechctx)
+        if (gctx->gc_mechctx) {
                 lgss_delete_sec_context(&gctx->gc_mechctx);
+                gctx->gc_mechctx = NULL;
+        }
 
         rawobj_free(&gctx->gc_handle);
 }
@@ -1116,8 +1118,10 @@ int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
 }
 
 /*
- * return 1 if the busy count of the sec dropped to zero, then usually caller
- * should destroy the sec too; otherwise return 0.
+ * return:
+ *  -1: the destroy has been taken care of by someone else
+ *   0: proceed to destroy the ctx
+ *   1: busy count dropped to 0, proceed to destroy ctx and sec
  */
 int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
                             struct ptlrpc_cli_ctx *ctx)
@@ -1129,8 +1133,17 @@ int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
         LASSERT(atomic_read(&sec->ps_busy) > 0);
 
         if (gctx->gc_mechctx) {
+                /* the final context fini rpc will use this ctx too, and it's
+                 * asynchronous which finished by request_out_callback(). so
+                 * we add refcount, whoever drop finally drop the refcount to
+                 * 0 should responsible for the rest of destroy. */
+                atomic_inc(&ctx->cc_refcount);
+
                 gss_do_ctx_fini_rpc(gctx);
                 gss_cli_ctx_finalize(gctx);
+
+                if (!atomic_dec_and_test(&ctx->cc_refcount))
+                        return -1;
         }
 
         if (sec_is_reverse(sec))