From 2b8a823c4ab0ad089aefd903abc6c702b52bd420 Mon Sep 17 00:00:00 2001 From: shadow <shadow> Date: Thu, 21 Aug 2008 06:08:58 +0000 Subject: [PATCH] avoid I/O failures after umount during fail back Branch b1_6 b=16611 i=green i=rread --- lustre/ChangeLog | 8 ++++++++ lustre/ptlrpc/import.c | 33 ++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 32141756be..b29653bf2f 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -38,6 +38,14 @@ tbd Sun Microsystems, Inc. * Output of lfs quota has been made less detailed by default, old (verbose) output can be obtained by using -v option. +Severity : normal +Bugzilla : 16611 +Frequency : on recovery +Description: I/O failures after umount during fail back +Details : if client reconnected to restarted server we need join to recovery + instead of find server handler is changed and process self eviction + with cancel all locks. + Severity : enhancement Bugzilla : 16633 Description: Update to RHEL5 kernel-2.6.18-92.1.10.el5. diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 0986cc11e7..5d7252d781 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -705,19 +705,34 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, if (memcmp(&imp->imp_remote_handle, lustre_msg_get_handle(request->rq_repmsg), sizeof(imp->imp_remote_handle))) { + int level = msg_flags & MSG_CONNECT_RECOVERING ? D_HA : + D_WARNING; + + /* Bug 16611/14775: if server handle have changed, + * that means some sort of disconnection happened. + * If the server is not in recovery, that also means it + * already erased all of our state because of previous + * eviction. If it is in recovery - we are safe to + * participate since we can reestablish all of our state + * with server again */ + CDEBUG(level,"%s@%s changed server handle from " + LPX64" to "LPX64"%s \n" "but is still in recovery \n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_remote_handle.cookie, + lustre_msg_get_handle(request->rq_repmsg)-> + cookie, + (MSG_CONNECT_RECOVERING & msg_flags) ? + "but is still in recovery" : ""); - CWARN("%s@%s changed server handle from " - LPX64" to "LPX64" - evicting.\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_remote_handle.cookie, - lustre_msg_get_handle(request->rq_repmsg)-> - cookie); imp->imp_remote_handle = *lustre_msg_get_handle(request->rq_repmsg); - IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); - GOTO(finish, rc = 0); + if (!(MSG_CONNECT_RECOVERING & msg_flags)) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); + GOTO(finish, rc = 0); + } + } else { CDEBUG(D_HA, "reconnected to %s@%s after partition\n", obd2cli_tgt(imp->imp_obd), -- GitLab