diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 8a0f5513e90fd9815bf4d5192c985c6177ba3882..5b458bb1849a4ca5830bc05551f952230bcefff7 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -14,6 +14,15 @@ tbd Cluster File Systems, Inc. <info@clusterfs.com> * Recommended e2fsprogs version: 1.40.2-cfs4 * Note that reiserfs quotas are disabled on SLES 10 in this kernel. +Severity : major +Bugzilla : 14260 +Frequency : rare, at shutdown +Description: access already free / zero obd_namespace. +Details : if client_disconnect_export was called without force flag set, + and exist connect request in flight, this can produce access to + NULL pointer (or already free pointer) when connect_interpret + store ocd flags in obd_namespace. + Severity : minor Bugzilla : 14418 Frequency : only at startup diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index e58752865794a41bcd7e4ae2deb7eaac19f016c7..01b886b495c36b61f6bbd2ea8d3086fdfca63d18 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -474,14 +474,16 @@ int client_disconnect_export(struct obd_export *exp) NULL); ldlm_namespace_free_prior(obd->obd_namespace); to_be_freed = obd->obd_namespace; - obd->obd_namespace = NULL; } - /* Yeah, obd_force means "forced shutdown". */ - if (!obd->obd_force) - rc = ptlrpc_disconnect_import(imp, 0); + rc = ptlrpc_disconnect_import(imp, 0); ptlrpc_invalidate_import(imp); + /* set obd_namespace to NULL only after invalidate, because we can have + * some connect requests in flight, and his need store a connect flags + * in obd_namespace. bug 14260 */ + obd->obd_namespace = NULL; + ptlrpc_free_rq_pool(imp->imp_rq_pool); class_destroy_import(imp); cli->cl_import = NULL; @@ -494,7 +496,7 @@ int client_disconnect_export(struct obd_export *exp) out_sem: mutex_up(&cli->cl_sem); if (to_be_freed) - ldlm_namespace_free_post(to_be_freed, obd->obd_no_recov); + ldlm_namespace_free_post(to_be_freed, obd->obd_force); RETURN(rc); } diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 3da90dcef608047bbf70090f52aeca0ea4caa148..ff67f74b64dd3ea2eeafc144582543357caa7566 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -755,9 +755,8 @@ static int lustre_stop_mgc(struct super_block *sb) GOTO(out, rc = -EBUSY); } - /* MGC should disconnect nicely so MGS won't print eviction messages */ - obd->obd_force = (lsi->lsi_flags & LSI_UMOUNT_FORCE) != 0; - /* The MGC has no recoverable data in any case. */ + /* The MGC has no recoverable data in any case. + * force shotdown set in umount_begin */ obd->obd_no_recov = 1; if (obd->u.cli.cl_mgc_mgsexp) @@ -1367,12 +1366,10 @@ static void server_put_super(struct super_block *sb) obd = class_name2obd(lsi->lsi_ldd->ldd_svname); if (obd) { CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name); - if (lsi->lsi_flags & LSI_UMOUNT_FORCE) - obd->obd_force = 1; if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER) obd->obd_fail = 1; /* We can't seem to give an error return code - to .put_super, so we better make sure we clean up! */ + * to .put_super, so we better make sure we clean up! */ obd->obd_force = 1; class_manual_cleanup(obd); } else { diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 6fdc64b64c930e2fb782869649cd95c72d757c77..5744e6343de6f39d4ab6213afe7ffcc9532c581c 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1141,14 +1141,13 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) if (req->rq_bulk != NULL) ptlrpc_unregister_bulk (req); - req->rq_phase = RQ_PHASE_COMPLETE; - if (req->rq_interpret_reply != NULL) { int (*interpreter)(struct ptlrpc_request *,void *,int) = req->rq_interpret_reply; req->rq_status = interpreter(req, &req->rq_async_args, req->rq_status); } + req->rq_phase = RQ_PHASE_COMPLETE; CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:" "opc %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), @@ -1317,6 +1316,7 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || (req->rq_phase == RQ_PHASE_BULK) || (req->rq_phase == RQ_PHASE_NEW))) + continue; if (req->rq_timedout) /* already timed out */ continue; diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index b7cff711002e32c8f0362c8aacb1e841eea8104b..aa6a7668c414c0708e41743169b6a19aa98b7376 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1032,8 +1032,12 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) { struct ptlrpc_request *req; int rq_opc, rc = 0; + int nowait = imp->imp_obd->obd_force; ENTRY; + if (nowait) + GOTO(set_state, rc); + switch (imp->imp_connect_op) { case OST_CONNECT: rq_opc = OST_DISCONNECT; break; case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break; @@ -1094,6 +1098,7 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) ptlrpc_req_finished(req); } +set_state: spin_lock(&imp->imp_lock); out: if (noclose)