diff --git a/lustre/ChangeLog b/lustre/ChangeLog index f8df57820d5b2187874b9259388cac62b2938a6a..c89792fa526af0fdc8dae93a9fc8d96ea4070bf4 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -26,6 +26,8 @@ tbd Cluster File Systems, Inc. <info@clusterfs.com> - fix timeouts when evicting a client with a single lock held (2642) - set deadline for the initial HELLO message to drain (2634) - print out dotted-quad IP addresses in the socknal (2302) + * miscellania + - additional debugging for MDS client eviction problem (2443) 2004-01-27 Cluster File Systems, Inc. <info@clusterfs.com> * version 1.0.3 diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 7d6f9eee02c1d2d2d13c7b76b2d22853ecb06cfe..bb8900edcea0f29cb1d68edc298683ad1452f644 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -295,7 +295,7 @@ struct ptlrpc_request { do { \ CDEBUG(level, "@@@ " fmt \ " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " \ - REQ_FLAGS_FMT"/%x/%x rc %x\n" , ## args, req, req->rq_xid, \ + REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid, \ req->rq_transno, \ req->rq_reqmsg ? req->rq_reqmsg->opc : -1, \ req->rq_import ? (char *)req->rq_import->imp_target_uuid.uuid : "<?>", \ @@ -308,7 +308,7 @@ CDEBUG(level, "@@@ " fmt \ DEBUG_REQ_FLAGS(req), \ req->rq_reqmsg ? req->rq_reqmsg->flags : 0, \ req->rq_repmsg ? req->rq_repmsg->flags : 0, \ - req->rq_status); \ + req->rq_status, req->rq_repmsg ? req->rq_repmsg->status : 0); \ } while (0) struct ptlrpc_bulk_page { diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 877018aca9287b5fef270f3807aff773a799f413..64dfb52a008dfbefa25d2235f85ab585082898be 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -49,7 +49,7 @@ extern int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req); static DECLARE_MUTEX(ldlm_ref_sem); static int ldlm_refcount = 0; -/* LDLM state */ +/* LDLM state */ static struct ldlm_state *ldlm ; @@ -125,6 +125,7 @@ static int expired_lock_main(void *arg) wake_up(&expired_lock_thread.elt_waitq); while (1) { + struct list_head *tmp, *n, work_list; l_wait_event(expired_lock_thread.elt_waitq, have_expired_locks() || expired_lock_thread.elt_state == ELT_TERMINATE, @@ -132,12 +133,32 @@ static int expired_lock_main(void *arg) spin_lock_bh(&expired_lock_thread.elt_lock); while (!list_empty(expired)) { - struct ldlm_lock *lock = list_entry(expired->next, - struct ldlm_lock, - l_pending_chain); + struct ldlm_lock *lock; + + list_add(&work_list, expired); + list_del_init(expired); + + list_for_each_entry(lock, &work_list, l_pending_chain) { + LDLM_DEBUG(lock, "moving to work list"); + } + spin_unlock_bh(&expired_lock_thread.elt_lock); - ptlrpc_fail_export(lock->l_export); + + list_for_each_safe(tmp, n, &work_list) { + lock = list_entry(tmp, struct ldlm_lock, + l_pending_chain); + ptlrpc_fail_export(lock->l_export); + } + + + if (!list_empty(&work_list)) { + list_for_each_entry(lock, &work_list, l_pending_chain) { + LDLM_ERROR(lock, "still on work list!"); + } + } + LASSERTF (list_empty(&work_list), + "some exports not failed properly\n"); spin_lock_bh(&expired_lock_thread.elt_lock); } @@ -1125,7 +1146,7 @@ static int ldlm_setup(void) ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - ldlm_callback_handler, "ldlm_cbd", + ldlm_callback_handler, "ldlm_cbd", ldlm_svc_proc_dir); if (!ldlm->ldlm_cb_service) { @@ -1137,7 +1158,7 @@ static int ldlm_setup(void) ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL, LDLM_CANCEL_REPLY_PORTAL, - ldlm_cancel_handler, "ldlm_canceld", + ldlm_cancel_handler, "ldlm_canceld", ldlm_svc_proc_dir); if (!ldlm->ldlm_cancel_service) { diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index ae9b202570613a6dbfd3de76e275451738ac8fbb..a8f006af78193088dd2de134a842c9a1bff45340 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -53,10 +53,12 @@ int ldlm_expired_completion_wait(void *data) "server code, just going back to sleep"); if (time_after(jiffies, next_dump)) { unsigned int debug = portal_debug; - next_dump = jiffies + 300 * HZ; portal_debug |= D_OTHER; ldlm_namespace_dump(lock->l_resource->lr_namespace); portal_debug = debug; + if (next_dump == 0) + portals_debug_dumplog(); + next_dump = jiffies + 300 * HZ; } RETURN(0); } @@ -440,7 +442,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) LDLM_DEBUG(lock, "client-side convert"); - req = ptlrpc_prep_req(class_exp2cliimp(lock->l_conn_export), + req = ptlrpc_prep_req(class_exp2cliimp(lock->l_conn_export), LDLM_CONVERT, 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -834,10 +836,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, } /* non-blocking function to manipulate a lock whose cb_data is being put away.*/ -void ldlm_change_cbdata(struct ldlm_namespace *ns, - struct ldlm_res_id *res_id, - ldlm_iterator_t iter, - void *data) +void ldlm_change_cbdata(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, + ldlm_iterator_t iter, void *data) { struct ldlm_resource *res; ENTRY;