diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 7db70b298f2f713d0ae38eafc444e3a68d21ec4d..2ec269325008b79f5fb9f6fb58955cf33ee723a3 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -198,7 +198,8 @@ struct ldlm_ast_work { /* Per-export ldlm state. */ struct ldlm_export_data { - struct list_head led_held_locks; + struct list_head led_held_locks; + struct ptlrpc_client led_client; /* cached client for locks */ }; static inline struct ldlm_extent *ldlm_res2extent(struct ldlm_resource *res) diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/linux/lustre_ha.h index 31abdda57954eaa66f2dea6ac33f2870a39ae76d..4f0fa0caf7bb9aaf682be01341c93b122dc5fde0 100644 --- a/lustre/include/linux/lustre_ha.h +++ b/lustre/include/linux/lustre_ha.h @@ -5,22 +5,19 @@ #ifndef _LUSTRE_HA_H #define _LUSTRE_HA_H -#include <linux/lustre_net.h> - #define LUSTRE_HA_NAME "ptlrpc" -extern struct recovd_obd *ptlrpc_connmgr; - -struct connmgr_thread { - struct recovd_obd *mgr; - char *name; +struct recovd_data { + struct list_head rd_managed_chain; + int (*rd_recover)(struct recovd_data *); }; -int connmgr_connect(struct recovd_obd *mgr, struct ptlrpc_connection *conn); -int connmgr_handle(struct ptlrpc_request *req); -void recovd_cli_fail(struct ptlrpc_client *cli); -void recovd_cli_manage(struct recovd_obd *mgr, struct ptlrpc_client *cli); -void recovd_cli_fixed(struct ptlrpc_client *cli); +struct recovd_obd; +struct ptlrpc_connection; + +void recovd_conn_fail(struct ptlrpc_connection *conn); +void recovd_conn_manage(struct recovd_obd *mgr, struct ptlrpc_connection *conn); +void recovd_conn_fixed(struct ptlrpc_connection *conn); int recovd_setup(struct recovd_obd *mgr); int recovd_cleanup(struct recovd_obd *mgr); diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 6fecf2378f5dfc21adba3c51ea50c54cc0f03e55..ac0cf686b7ba7f31e5079ef2524f567691bfe59e 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -183,7 +183,7 @@ int mdc_rename(struct lustre_handle *conn, int mdc_create_client(char *uuid, struct ptlrpc_client *cl); extern int mds_client_add(struct mds_export_data *med, int cl_off); -extern int mds_client_free(struct mds_export_data *med); +extern int mds_client_free(struct obd_export *exp); /* mds/mds_fs.c */ struct mds_fs_operations { diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 8e43c9226b2b57493be03f49bf81102743eee8ed..5cdddeec37087c94549c1f0614c55a9fd9906b75 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -28,52 +28,59 @@ #include <linux/obd.h> #include <portals/p30.h> #include <linux/lustre_idl.h> +#include <linux/lustre_ha.h> /* default rpc ring length */ #define RPC_RING_LENGTH 10 struct ptlrpc_connection { - struct list_head c_link; - struct lustre_peer c_peer; - __u8 c_local_uuid[37]; /* XXX do we need this? */ - __u8 c_remote_uuid[37]; - - int c_level; - __u32 c_generation; /* changes upon new connection */ - __u32 c_epoch; /* changes when peer changes */ - __u32 c_bootcount; /* peer's boot count */ - - spinlock_t c_lock; - __u32 c_xid_in; - __u32 c_xid_out; - - atomic_t c_refcount; - __u64 c_token; - __u64 c_remote_conn; - __u64 c_remote_token; + struct list_head c_link; + struct lustre_peer c_peer; + __u8 c_local_uuid[37]; /* XXX do we need this? */ + __u8 c_remote_uuid[37]; + + int c_level; + __u32 c_generation; /* changes upon new connection */ + __u32 c_epoch; /* changes when peer changes */ + __u32 c_bootcount; /* peer's boot count */ + + spinlock_t c_lock; + __u32 c_xid_in; + __u32 c_xid_out; + + atomic_t c_refcount; + __u64 c_token; + __u64 c_remote_conn; + __u64 c_remote_token; + + __u64 c_last_xid; + __u64 c_last_committed; + struct list_head c_delayed_head; /* delayed until post-recovery */ + struct list_head c_sending_head; + struct list_head c_dying_head; + struct recovd_data c_recovd_data; + + struct list_head c_clients; /* XXXshaver will be c_imports */ + struct list_head c_exports; + + /* should this be in recovd_data? */ + struct recovd_obd *c_recovd; }; struct ptlrpc_client { - struct obd_device *cli_obd; - __u32 cli_request_portal; - __u32 cli_reply_portal; - - __u64 cli_last_xid; - __u64 cli_last_committed; - __u32 cli_target_devno; - - void *cli_data; - struct semaphore cli_rpc_sem; /* limits outstanding requests */ - - spinlock_t cli_lock; /* protects lists */ - struct list_head cli_delayed_head; /* delayed until after recovery */ - struct list_head cli_sending_head; - struct list_head cli_dying_head; - struct list_head cli_ha_item; - int (*cli_recover)(struct ptlrpc_client *); - - struct recovd_obd *cli_recovd; - char *cli_name; + struct obd_device *cli_obd; + __u32 cli_request_portal; + __u32 cli_reply_portal; + + __u32 cli_target_devno; + + struct ptlrpc_connection *cli_connection; + + void *cli_data; + struct semaphore cli_rpc_sem; /* limits outstanding requests */ + + struct list_head cli_client_chain; + char *cli_name; }; /* state flags of requests */ @@ -240,10 +247,8 @@ int ptl_send_rpc(struct ptlrpc_request *request); void ptlrpc_link_svc_me(struct ptlrpc_service *service, int i); /* rpc/client.c */ -void ptlrpc_init_client(struct recovd_obd *, - int (*recover)(struct ptlrpc_client *), - int req_portal, int rep_portal, - struct ptlrpc_client *); +void ptlrpc_init_client(int req_portal, int rep_portal, struct ptlrpc_client *, + struct ptlrpc_connection *); void ptlrpc_cleanup_client(struct ptlrpc_client *cli); __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req); struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid); @@ -253,8 +258,7 @@ void ptlrpc_continue_req(struct ptlrpc_request *req); int ptlrpc_replay_req(struct ptlrpc_request *req); void ptlrpc_restart_req(struct ptlrpc_request *req); -struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, - struct ptlrpc_connection *u, int opcode, +struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, int opcode, int count, int *lengths, char **bufs); void ptlrpc_free_req(struct ptlrpc_request *request); void ptlrpc_req_finished(struct ptlrpc_request *request); diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 73d00457780e98982e8b2a13e3d0df113e60c81f..eccf74f76e5d6059498e5898d1750ae6b04dfdfb 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -151,13 +151,11 @@ struct echo_obd { struct recovd_obd { time_t recovd_waketime; time_t recovd_timeout; - struct ptlrpc_service *recovd_service; - struct ptlrpc_client *recovd_client; - __u32 recovd_flags; - __u32 recovd_wakeup_flag; - spinlock_t recovd_lock; - struct list_head recovd_clients_lh; /* clients managed */ - struct list_head recovd_troubled_lh; /* clients in trouble */ + __u32 recovd_flags; + __u32 recovd_wakeup_flag; + spinlock_t recovd_lock; + struct list_head recovd_managed_items; /* items managed */ + struct list_head recovd_troubled_items; /* items in trouble */ wait_queue_head_t recovd_recovery_waitq; wait_queue_head_t recovd_ctl_waitq; wait_queue_head_t recovd_waitq; diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 1ad154bdb9f1805fd9a28ef793207d05e4bb6825..5ae5755f13dfa0bab4384430d553823d94c8ac09 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -64,11 +64,6 @@ extern void proc_lustre_remove_obd_entry(const char* name, */ #ifdef __KERNEL__ -extern struct obd_export *class_conn2export(struct lustre_handle *conn); -extern struct obd_device *class_conn2obd(struct lustre_handle *conn); -extern int class_rconn2export(struct lustre_handle *conn, - struct lustre_handle *rconn); - struct obd_export { __u64 exp_cookie; struct lustre_handle exp_rconnh; /* remote connection handle */ @@ -87,6 +82,11 @@ struct obd_export { uuid_t exp_uuid; }; +extern struct obd_export *class_conn2export(struct lustre_handle *conn); +extern struct obd_device *class_conn2obd(struct lustre_handle *conn); +extern int class_rconn2export(struct lustre_handle *conn, + struct lustre_handle *rconn); + struct obd_import { __u64 imp_cookie; struct lustre_handle imp_expconnh; @@ -711,6 +711,7 @@ int class_name2dev(char *name); int class_uuid2dev(char *name); struct obd_device *class_uuid2obd(char *name); struct obd_export *class_new_export(struct obd_device *obddev); +void class_destroy_export(struct obd_export *exp); int class_connect(struct lustre_handle *conn, struct obd_device *obd, char *cluuid); int class_disconnect(struct lustre_handle *conn); @@ -721,7 +722,12 @@ struct obd_export *class_conn2export(struct lustre_handle *); int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data); int class_multi_cleanup(struct obd_device *obddev); -extern void (*class_signal_client_failure)(struct ptlrpc_client *); +extern void (*class_signal_connection_failure)(struct ptlrpc_connection *); + +/* == mds_client_free if MDS running here */ +extern int (*mds_destroy_export)(struct obd_export *exp); +/* == ldlm_client_free if(?) DLM running here */ +extern int (*ldlm_destroy_export)(struct obd_export *exp); #endif diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 7d4be4e3e99973f5ff6f0b8b18f2128612ae07d1..22d74709cdd827c1abcc99e53af33edf1291855b 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -119,12 +119,10 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock, { struct ldlm_request *body; struct ptlrpc_request *req; - struct ptlrpc_client *cl; int rc = 0, size = sizeof(*body); ENTRY; - cl = &lock->l_resource->lr_namespace->ns_rpc_client; - req = ptlrpc_prep_req(cl, lock->l_export->exp_connection, + req = ptlrpc_prep_req(&lock->l_export->exp_ldlm_data.led_client, LDLM_BL_CALLBACK, 1, &size, NULL); if (!req) RETURN(-ENOMEM); @@ -149,7 +147,6 @@ static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags) { struct ldlm_request *body; struct ptlrpc_request *req; - struct ptlrpc_client *cl; int rc = 0, size = sizeof(*body); ENTRY; @@ -158,8 +155,7 @@ static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags) RETURN(-EINVAL); } - cl = &lock->l_resource->lr_namespace->ns_rpc_client; - req = ptlrpc_prep_req(cl, lock->l_export->exp_connection, + req = ptlrpc_prep_req(&lock->l_export->exp_ldlm_data.led_client, LDLM_CP_CALLBACK, 1, &size, NULL); if (!req) RETURN(-ENOMEM); @@ -517,13 +513,13 @@ static int ldlm_iocontrol(long cmd, struct lustre_handle *conn, int len, OBD_ALLOC(obddev->u.ldlm.ldlm_client, sizeof(*obddev->u.ldlm.ldlm_client)); - ptlrpc_init_client(NULL, NULL, - LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, - obddev->u.ldlm.ldlm_client); connection = ptlrpc_uuid_to_connection("ldlm"); if (!connection) CERROR("No LDLM UUID found: assuming ldlm is local.\n"); + ptlrpc_init_client(LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, + obddev->u.ldlm.ldlm_client, connection); + switch (cmd) { case IOC_LDLM_TEST: err = ldlm_test(obddev, conn); @@ -579,7 +575,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf) waiting_locks_timer.function = waiting_locks_callback; waiting_locks_timer.data = 0; init_timer(&waiting_locks_timer); - + RETURN(0); out_thread: diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 3e055149e0773f9a21bb50757fd434fa5592b1d1..a198bf3979d2d8228eb88e2730bd171326cc3a2a 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -12,6 +12,7 @@ #define DEBUG_SUBSYSTEM S_LDLM #include <linux/lustre_dlm.h> +#include <linux/obd_class.h> kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; @@ -64,9 +65,6 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) } strcpy(ns->ns_name, name); - ptlrpc_init_client(NULL, NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, - &ns->ns_rpc_client); - INIT_LIST_HEAD(&ns->ns_root_list); l_lock_init(&ns->ns_lock); ns->ns_refcount = 0; @@ -170,13 +168,19 @@ int ldlm_namespace_free(struct ldlm_namespace *ns) vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */); obd_memory -= sizeof(*ns->ns_hash) * RES_HASH_SIZE; - ptlrpc_cleanup_client(&ns->ns_rpc_client); OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1); OBD_FREE(ns, sizeof(*ns)); return ELDLM_OK; } +int ldlm_client_free(struct obd_export *exp) +{ + struct ldlm_export_data *led = &exp->exp_ldlm_data; + ptlrpc_cleanup_client(&led->led_client); + RETURN(0); +} + static __u32 ldlm_hash_fn(struct ldlm_resource *parent, __u64 *name) { __u32 hash = 0; diff --git a/lustre/lib/l_net.c b/lustre/lib/l_net.c index 10041bbb9a4f14829b2ce40421362f16567e482e..8a1d0ab5465b1654f4b93c9a221435c1d6e09922 100644 --- a/lustre/lib/l_net.c +++ b/lustre/lib/l_net.c @@ -45,7 +45,6 @@ struct client_obd *client_conn2cli(struct lustre_handle *conn) LBUG(); return &export->exp_obd->u.cli; } -extern struct recovd_obd *ptlrpc_connmgr; int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) { @@ -95,14 +94,9 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) if (mdc->cl_ldlm_client == NULL) GOTO(out_client, rc = -ENOMEM); - /* XXX get recovery hooked in here again */ - //ptlrpc_init_client(ptlrpc_connmgr, ll_recover,... - - ptlrpc_init_client(ptlrpc_connmgr, NULL, rq_portal, rp_portal, - mdc->cl_client); - /* XXXshaver Should the LDLM have its own recover function? Probably. */ - ptlrpc_init_client(ptlrpc_connmgr, NULL, LDLM_REQUEST_PORTAL, - LDLM_REPLY_PORTAL, mdc->cl_ldlm_client); + ptlrpc_init_client(rq_portal, rp_portal, mdc->cl_client, mdc->cl_conn); + ptlrpc_init_client(LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, + mdc->cl_ldlm_client, mdc->cl_conn); mdc->cl_client->cli_name = "mdc"; mdc->cl_ldlm_client->cli_name = "ldlm"; mdc->cl_max_mdsize = sizeof(struct lov_mds_md); @@ -159,8 +153,7 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd, if (obd->obd_namespace == NULL) GOTO(out_disco, rc = -ENOMEM); - request = ptlrpc_prep_req(cli->cl_client, cli->cl_conn, rq_opc, 2, size, - tmp); + request = ptlrpc_prep_req(cli->cl_client, rq_opc, 2, size, tmp); if (!request) GOTO(out_ldlm, rc = -ENOMEM); @@ -291,6 +284,10 @@ int target_handle_connect(struct ptlrpc_request *req) req->rq_export = export; export->exp_connection = req->rq_connection; + ptlrpc_init_client(LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, + &export->exp_ldlm_data.led_client, + export->exp_connection); + #warning Peter: is this the right place to upgrade the server connection level? req->rq_connection->c_level = LUSTRE_CONN_FULL; out: diff --git a/lustre/lib/page.c b/lustre/lib/page.c index 7e9968b04d6e69d95d4d4891391e3c76acd3d2f9..51345080172a804050e0c986632c98cf2e87f3ed 100644 --- a/lustre/lib/page.c +++ b/lustre/lib/page.c @@ -58,13 +58,13 @@ static int sync_io_timeout(void *data) ENTRY; desc->b_connection->c_level = LUSTRE_CONN_RECOVD; desc->b_flags |= PTL_RPC_FL_TIMEOUT; - if (desc->b_client && desc->b_client->cli_recovd && - class_signal_client_failure) { + if (desc->b_connection && desc->b_connection->c_recovd && + class_signal_connection_failure) { /* XXXshaver Do we need a resend strategy, or do we just * XXXshaver return -ERESTARTSYS and punt it? */ - CERROR("signalling failure of client %p\n", desc->b_client); - class_signal_client_failure(desc->b_client); + CERROR("signalling failure of conn %p\n", desc->b_connection); + class_signal_connection_failure(desc->b_connection); /* We go back to sleep, until we're resumed or interrupted. */ RETURN(0); diff --git a/lustre/llite/recover.c b/lustre/llite/recover.c index e282daf29ea7ea23c81a9319d1ea1d1a86f7ae52..cdc9488282b0c3a32667a8526561f960fe1b4698 100644 --- a/lustre/llite/recover.c +++ b/lustre/llite/recover.c @@ -22,7 +22,6 @@ #include <linux/lustre_lite.h> #include <linux/lustre_ha.h> - static int ll_reconnect(struct ll_sb_info *sbi) { struct ll_fid rootfid; @@ -33,24 +32,16 @@ static int ll_reconnect(struct ll_sb_info *sbi) ptlrpc_readdress_connection(sbi2mdc(sbi)->cl_conn, "mds"); - err = connmgr_connect(ptlrpc_connmgr, sbi2mdc(sbi)->cl_conn); - if (err) { - CERROR("cannot connect to MDS: rc = %d\n", err); - ptlrpc_put_connection(sbi2mdc(sbi)->cl_conn); - GOTO(out_disc, err = -ENOTCONN); - } sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_CON; /* XXX: need to store the last_* values somewhere */ - err = mdc_getstatus(&sbi->ll_mdc_conn, - &rootfid, &last_committed, - &last_xid, - &request); + err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed, + &last_xid, &request); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); GOTO(out_disc, err = -ENOTCONN); } - sbi2mdc(sbi)->cl_client->cli_last_xid = last_xid; + sbi2mdc(sbi)->cl_conn->c_last_xid = last_xid; sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_RECOVD; out_disc: @@ -59,9 +50,14 @@ static int ll_reconnect(struct ll_sb_info *sbi) int ll_recover(struct ptlrpc_client *cli) { + RETURN(-ENOSYS); +#if 0 + /* XXXshaver this code needs to know about connection-driven recovery! */ + struct ptlrpc_request *req; struct list_head *tmp, *pos; struct ll_sb_info *sbi = cli->cli_data; + struct ptlrpc_connection *conn = cli->cli_connection; int rc = 0; ENTRY; @@ -69,17 +65,17 @@ int ll_recover(struct ptlrpc_client *cli) ll_reconnect(sbi); /* 2. walk the request list */ - spin_lock(&cli->cli_lock); - list_for_each_safe(tmp, pos, &cli->cli_sending_head) { + spin_lock(&conn->c_lock); + list_for_each_safe(tmp, pos, &conn->c_sending_head) { req = list_entry(tmp, struct ptlrpc_request, rq_list); /* replay what needs to be replayed */ if (req->rq_flags & PTL_RPC_FL_REPLAY) { - CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n", - req->rq_xid, cli->cli_last_xid); + CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n", + req->rq_xid, conn->c_last_xid); rc = ptlrpc_replay_req(req); if (rc) { - CERROR("recovery replay error %d for request %Ld\n", + CERROR("recovery replay error %d for req %Ld\n", rc, req->rq_xid); GOTO(out, rc); } @@ -87,20 +83,21 @@ int ll_recover(struct ptlrpc_client *cli) /* server has seen req, we have reply: skip */ if ((req->rq_flags & PTL_RPC_FL_REPLIED) && - req->rq_xid <= cli->cli_last_xid) { - CDEBUG(D_INODE, "req %Ld was complete: skip [last rcvd %Ld]\n", - req->rq_xid, cli->cli_last_xid); + req->rq_xid <= conn->c_last_xid) { + CDEBUG(D_INODE, + "req %Ld was complete: skip [last rcvd %Ld]\n", + req->rq_xid, conn->c_last_xid); continue; } /* server has lost req, we have reply: resend, ign reply */ if ((req->rq_flags & PTL_RPC_FL_REPLIED) && - req->rq_xid > cli->cli_last_xid) { - CDEBUG(D_INODE, "lost req %Ld have rep: replay [last rcvd %Ld]\n", - req->rq_xid, cli->cli_last_xid); + req->rq_xid > conn->c_last_xid) { + CDEBUG(D_INODE, "lost req %Ld have rep: replay [last " + "rcvd %Ld]\n", req->rq_xid, conn->c_last_xid); rc = ptlrpc_replay_req(req); if (rc) { - CERROR("request resend error %d for request %Ld\n", + CERROR("request resend error %d for req %Ld\n", rc, req->rq_xid); GOTO(out, rc); } @@ -108,33 +105,36 @@ int ll_recover(struct ptlrpc_client *cli) /* server has seen req, we have lost reply: -ERESTARTSYS */ if ( !(req->rq_flags & PTL_RPC_FL_REPLIED) && - req->rq_xid <= cli->cli_last_xid) { - CDEBUG(D_INODE, "lost rep %Ld srv did req: restart [last rcvd %Ld]\n", - req->rq_xid, cli->cli_last_xid); + req->rq_xid <= conn->c_last_xid) { + CDEBUG(D_INODE, "lost rep %Ld srv did req: restart " + "[last rcvd %Ld]\n", + req->rq_xid, conn->c_last_xid); ptlrpc_restart_req(req); } /* service has not seen req, no reply: resend */ if ( !(req->rq_flags & PTL_RPC_FL_REPLIED) && - req->rq_xid > cli->cli_last_xid) { - CDEBUG(D_INODE, "lost rep/req %Ld: resend [last rcvd %Ld]\n", - req->rq_xid, cli->cli_last_xid); + req->rq_xid > conn->c_last_xid) { + CDEBUG(D_INODE, + "lost rep/req %Ld: resend [last rcvd %Ld]\n", + req->rq_xid, conn->c_last_xid); ptlrpc_resend_req(req); } } sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_FULL; - recovd_cli_fixed(cli); + recovd_conn_fixed(conn); /* Finally, continue what we delayed since recovery started */ - list_for_each_safe(tmp, pos, &cli->cli_delayed_head) { + list_for_each_safe(tmp, pos, &conn->c_delayed_head) { req = list_entry(tmp, struct ptlrpc_request, rq_list); ptlrpc_continue_req(req); } EXIT; out: - spin_unlock(&cli->cli_lock); + spin_unlock(&conn->c_lock); return rc; +#endif } diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 116926b5374d32e212a68caef66ebe154ff4a75f..567d90dcdc0f81819284ef8f45af39291c9bc340 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -126,14 +126,6 @@ static struct super_block * ll_read_super(struct super_block *sb, GOTO(out_free, sb = NULL); } -#if 0 - err = connmgr_connect(ptlrpc_connmgr, sbi->ll_mdc_conn); - if (err) { - CERROR("cannot connect to MDC: rc = %d\n", err); - GOTO(out_rpc, sb = NULL); - } -#endif - err = obd_connect(&sbi->ll_mdc_conn, obd, sbi->ll_sb_uuid); if (err) { CERROR("cannot connect to %s: rc = %d\n", mdc, err); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index b179fa925613abe79f0302c57fd13be94b4e9d79..c590235aea014da55091cf4fe4a46dd4df95b56f 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -329,10 +329,6 @@ static int mds_disconnect(struct lustre_handle *conn) if (!exp) RETURN(-EINVAL); - rc = mds_client_free(&exp->exp_mds_data); - if (rc) - CERROR("error freeing client data: rc = %d\n", rc); - rc = class_disconnect(conn); if (!rc) MOD_DEC_USE_COUNT; @@ -1125,6 +1121,8 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) rc = mds_recover(obddev); if (rc) GOTO(err_thread, rc); + + mds_destroy_export = mds_client_free; RETURN(0); diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 25242181bc288fe132ebeeb4f4ae667c86fb3f3d..dce0ff075e9209b904ae2c08e9d48da69f75e12d 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -79,11 +79,15 @@ int mds_client_add(struct mds_export_data *med, int cl_off) return 0; } -int mds_client_free(struct mds_export_data *med) +int mds_client_free(struct obd_export *exp) { + struct mds_export_data *med = &exp->exp_mds_data; unsigned long *word; int bit; + if (!med->med_mcd) + RETURN(0); + CDEBUG(D_INFO, "freeing client at offset %d with UUID '%s'\n", med->med_off, med->med_mcd->mcd_uuid); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 4b2b07964fa74d03292cac5d9603e8bdd5409a66..1007cf33850cb4b8faefdd3d40de01ebea3e6113 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -597,7 +597,9 @@ static struct miscdevice obd_psdev = { &obd_psdev_fops }; -void (*class_signal_client_failure)(struct ptlrpc_client *); +void (*class_signal_connection_failure)(struct ptlrpc_connection *); +int (*mds_destroy_export)(struct obd_export *exp); +int (*ldlm_destroy_export)(struct obd_export *exp); EXPORT_SYMBOL(obd_dev); EXPORT_SYMBOL(obdo_cachep); @@ -612,6 +614,7 @@ EXPORT_SYMBOL(class_name2dev); EXPORT_SYMBOL(class_uuid2dev); EXPORT_SYMBOL(class_uuid2obd); EXPORT_SYMBOL(class_new_export); +EXPORT_SYMBOL(class_destroy_export); EXPORT_SYMBOL(class_connect); EXPORT_SYMBOL(class_conn2export); EXPORT_SYMBOL(class_rconn2export); @@ -623,7 +626,9 @@ EXPORT_SYMBOL(class_uuid_unparse); //EXPORT_SYMBOL(class_multi_setup); //EXPORT_SYMBOL(class_multi_cleanup); -EXPORT_SYMBOL(class_signal_client_failure); +EXPORT_SYMBOL(class_signal_connection_failure); +EXPORT_SYMBOL(mds_destroy_export); +EXPORT_SYMBOL(ldlm_destroy_export); static int __init init_obdclass(void) { diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 359fcfa4dc8cb579310e364f4d46db021822143b..483f0e57c6c4a7c6131147bb119e188b74578d8e 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: -* - * linux/fs/ext2_obd/sim_obd.c - * Copyright (C) 2001 Cluster File Systems, Inc. + * + * lustre/obdclass/genops.c + * Copyright (C) 2001-2002 Cluster File Systems, Inc. * * This code is issued under the GNU General Public License. * See the file COPYING in this distribution @@ -281,6 +281,33 @@ struct obd_export *class_new_export(struct obd_device *obddev) return export; } +void class_destroy_export(struct obd_export *exp) +{ + int rc; + ENTRY; + + spin_lock(&exp->exp_connection->c_lock); + list_del(&exp->exp_chain); + spin_unlock(&exp->exp_connection->c_lock); + + /* XXXshaver these bits want to be hung off the export, instead of + * XXXshaver hard-coded here. + */ + if (mds_destroy_export) { + rc = mds_destroy_export(exp); + if (rc) + CERROR("error freeing mds client data: rc = %d\n", rc); + } + if (ldlm_destroy_export) { + rc = ldlm_destroy_export(exp); + if (rc) + CERROR("error freeing dlm client data: rc = %d\n", rc); + } + kmem_cache_free(export_cachep, exp); + + EXIT; +} + /* a connection defines an export context in which preallocation can be managed. */ int class_connect (struct lustre_handle *conn, struct obd_device *obd, @@ -337,8 +364,8 @@ int class_disconnect(struct lustre_handle *conn) } else CDEBUG(D_IOCTL, "disconnect: addr %Lx cookie %Lx\n", (long long)conn->addr, (long long)conn->cookie); - list_del(&export->exp_chain); - kmem_cache_free(export_cachep, export); + + class_destroy_export(export); RETURN(0); } diff --git a/lustre/ptlrpc/Makefile.am b/lustre/ptlrpc/Makefile.am index 33a07340f1e7fc6e3abb066488d68ab7b4c74e47..4c922ff59d94d1d964721652f049f9a1b487d3c4 100644 --- a/lustre/ptlrpc/Makefile.am +++ b/lustre/ptlrpc/Makefile.am @@ -9,6 +9,6 @@ MODULE = ptlrpc modulefs_DATA = ptlrpc.o EXTRA_PROGRAMS = ptlrpc -ptlrpc_SOURCES = connmgr.c recovd.c connection.c rpc.c events.c service.c client.c niobuf.c pack_generic.c +ptlrpc_SOURCES = recovd.c connection.c rpc.c events.c service.c client.c niobuf.c pack_generic.c include $(top_srcdir)/Rules diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index c775a5ec176e904c85c4488731c33ae89609abd1..cce98a1af73e16437c8f770f05bc0b6aa0a5eab7 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -27,22 +27,20 @@ #include <linux/lustre_lib.h> #include <linux/lustre_ha.h> -void ptlrpc_init_client(struct recovd_obd *recovd, - int (*recover)(struct ptlrpc_client *recover), - int req_portal, - int rep_portal, struct ptlrpc_client *cl) +void ptlrpc_init_client(int req_portal, int rep_portal, struct ptlrpc_client *cl, + struct ptlrpc_connection *conn) { memset(cl, 0, sizeof(*cl)); - cl->cli_recover = recover; - if (recovd) - recovd_cli_manage(recovd, cl); + /* Some things, like the LDLM, can call us without a connection. + * I don't like it one bit. + */ + if (conn) { + cl->cli_connection = conn; + list_add(&cl->cli_client_chain, &conn->c_clients); + } cl->cli_obd = NULL; cl->cli_request_portal = req_portal; cl->cli_reply_portal = rep_portal; - INIT_LIST_HEAD(&cl->cli_delayed_head); - INIT_LIST_HEAD(&cl->cli_sending_head); - INIT_LIST_HEAD(&cl->cli_dying_head); - spin_lock_init(&cl->cli_lock); sema_init(&cl->cli_rpc_sem, 32); } @@ -153,11 +151,11 @@ void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk) } struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, - struct ptlrpc_connection *conn, int opcode, int count, int *lengths, char **bufs) { struct ptlrpc_request *request; + struct ptlrpc_connection *conn = cl->cli_connection; int rc; ENTRY; @@ -212,8 +210,7 @@ struct ptlrpc_request *ptlrpc_prep_req2(struct lustre_handle *conn, } clobd = &export->exp_obd->u.cli; - req = ptlrpc_prep_req(clobd->cl_client, clobd->cl_conn, - opcode, count, lengths, bufs); + req = ptlrpc_prep_req(clobd->cl_client, opcode, count, lengths, bufs); ptlrpc_hdl2req(req, &clobd->cl_exporth); return req; } @@ -246,10 +243,10 @@ void ptlrpc_free_req(struct ptlrpc_request *request) if (request->rq_reqmsg != NULL) OBD_FREE(request->rq_reqmsg, request->rq_reqlen); - if (request->rq_client) { - spin_lock(&request->rq_client->cli_lock); + if (request->rq_connection) { + spin_lock(&request->rq_connection->c_lock); list_del_init(&request->rq_list); - spin_unlock(&request->rq_client->cli_lock); + spin_unlock(&request->rq_connection->c_lock); } ptlrpc_put_connection(request->rq_connection); @@ -335,12 +332,12 @@ static int ptlrpc_abort(struct ptlrpc_request *request) } /* caller must lock cli */ -void ptlrpc_free_committed(struct ptlrpc_client *cli) +void ptlrpc_free_committed(struct ptlrpc_connection *conn) { struct list_head *tmp, *saved; struct ptlrpc_request *req; - list_for_each_safe(tmp, saved, &cli->cli_sending_head) { + list_for_each_safe(tmp, saved, &conn->c_sending_head) { req = list_entry(tmp, struct ptlrpc_request, rq_list); if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) { @@ -350,13 +347,13 @@ void ptlrpc_free_committed(struct ptlrpc_client *cli) } /* not yet committed */ - if (req->rq_transno > cli->cli_last_committed) + if (req->rq_transno > conn->c_last_committed) break; CDEBUG(D_INFO, "Marking request xid %Ld as committed (" "transno=%Lu, last_committed=%Lu\n", (long long)req->rq_xid, (long long)req->rq_transno, - (long long)cli->cli_last_committed); + (long long)conn->c_last_committed); if (atomic_dec_and_test(&req->rq_refcount)) { /* we do this to prevent free_req deadlock */ list_del_init(&req->rq_list); @@ -364,7 +361,7 @@ void ptlrpc_free_committed(struct ptlrpc_client *cli) ptlrpc_free_req(req); } else { list_del_init(&req->rq_list); - list_add(&req->rq_list, &cli->cli_dying_head); + list_add(&req->rq_list, &conn->c_dying_head); } } @@ -376,24 +373,34 @@ void ptlrpc_cleanup_client(struct ptlrpc_client *cli) { struct list_head *tmp, *saved; struct ptlrpc_request *req; + struct ptlrpc_connection *conn = cli->cli_connection; ENTRY; - spin_lock(&cli->cli_lock); - list_for_each_safe(tmp, saved, &cli->cli_sending_head) { + if (!conn) { + EXIT; + return; + } + + spin_lock(&conn->c_lock); + list_for_each_safe(tmp, saved, &conn->c_sending_head) { req = list_entry(tmp, struct ptlrpc_request, rq_list); + if (req->rq_client != cli) + continue; CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req); list_del_init(&req->rq_list); req->rq_client = NULL; ptlrpc_free_req(req); } - list_for_each_safe(tmp, saved, &cli->cli_dying_head) { + list_for_each_safe(tmp, saved, &conn->c_dying_head) { req = list_entry(tmp, struct ptlrpc_request, rq_list); + if (req->rq_client != cli) + continue; CERROR("Request %p is on the dying list at cleanup!\n", req); list_del_init(&req->rq_list); req->rq_client = NULL; ptlrpc_free_req(req); } - spin_unlock(&cli->cli_lock); + spin_unlock(&conn->c_lock); EXIT; return; @@ -444,8 +451,9 @@ static int expired_request(void *data) req->rq_connection->c_level = LUSTRE_CONN_RECOVD; req->rq_flags |= PTL_RPC_FL_TIMEOUT; /* Activate the recovd for this client, if there is one. */ - if (req->rq_client && req->rq_client->cli_recovd) - recovd_cli_fail(req->rq_client); + if (req->rq_client && req->rq_client->cli_connection && + req->rq_client->cli_connection->c_recovd) + recovd_conn_fail(req->rq_client->cli_connection); /* If this request is for recovery or other primordial tasks, * don't go back to sleep. @@ -468,6 +476,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) int rc = 0; struct l_wait_info lwi; struct ptlrpc_client *cli = req->rq_client; + struct ptlrpc_connection *conn = cli->cli_connection; ENTRY; init_waitqueue_head(&req->rq_wait_for_rep); @@ -480,19 +489,19 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) CERROR("process %d waiting for recovery (%d > %d)\n", current->pid, req->rq_level, req->rq_connection->c_level); - spin_lock(&cli->cli_lock); + spin_lock(&conn->c_lock); list_del_init(&req->rq_list); - list_add_tail(&req->rq_list, &cli->cli_delayed_head); - spin_unlock(&cli->cli_lock); + list_add_tail(&req->rq_list, &conn->c_delayed_head); + spin_unlock(&conn->c_lock); lwi = LWI_INTR(NULL, NULL); rc = l_wait_event(req->rq_wait_for_rep, req->rq_level <= req->rq_connection->c_level, &lwi); - spin_lock(&cli->cli_lock); + spin_lock(&conn->c_lock); list_del_init(&req->rq_list); - spin_unlock(&cli->cli_lock); + spin_unlock(&conn->c_lock); if (rc) RETURN(rc); @@ -512,10 +521,10 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) RETURN(-rc); } - spin_lock(&cli->cli_lock); + spin_lock(&conn->c_lock); list_del_init(&req->rq_list); - list_add_tail(&req->rq_list, &cli->cli_sending_head); - spin_unlock(&cli->cli_lock); + list_add_tail(&req->rq_list, &conn->c_sending_head); + spin_unlock(&conn->c_lock); CDEBUG(D_OTHER, "-- sleeping\n"); lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request, @@ -555,11 +564,11 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg, req->rq_replen, req->rq_repmsg->status); - spin_lock(&cli->cli_lock); - cli->cli_last_xid = req->rq_repmsg->last_xid; - cli->cli_last_committed = req->rq_repmsg->last_committed; - ptlrpc_free_committed(cli); - spin_unlock(&cli->cli_lock); + spin_lock(&conn->c_lock); + conn->c_last_xid = req->rq_repmsg->last_xid; + conn->c_last_committed = req->rq_repmsg->last_committed; + ptlrpc_free_committed(conn); + spin_unlock(&conn->c_lock); EXIT; out: diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index d093bbf3aab6ca3fd00d67ad8975f18d974ba6fb..4c3b3de1701926179de8c4eb47e9e4312e8b52a0 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -66,6 +66,11 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer) c->c_generation = 1; c->c_epoch = 1; c->c_bootcount = 0; + INIT_LIST_HEAD(&c->c_delayed_head); + INIT_LIST_HEAD(&c->c_sending_head); + INIT_LIST_HEAD(&c->c_dying_head); + INIT_LIST_HEAD(&c->c_clients); + INIT_LIST_HEAD(&c->c_exports); atomic_set(&c->c_refcount, 0); ptlrpc_connection_addref(c); spin_lock_init(&c->c_lock); diff --git a/lustre/ptlrpc/connmgr.c b/lustre/ptlrpc/connmgr.c deleted file mode 100644 index ec57fd9544ade82ca0cc30c37f659b2349687f27..0000000000000000000000000000000000000000 --- a/lustre/ptlrpc/connmgr.c +++ /dev/null @@ -1,154 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * obd/rpc/recovd.c - * - * Lustre High Availability Daemon - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - * - * by Peter Braam <braam@clusterfs.com> - * - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include <linux/kmod.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_ha.h> - -static int connmgr_unpack_body(struct ptlrpc_request *req) -{ - struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0); - if (b == NULL) { - LBUG(); - RETURN(-EINVAL); - } - - b->generation = NTOH__u32(b->generation); - - return 0; -} - -int connmgr_connect(struct recovd_obd *recovd, struct ptlrpc_connection *conn) -{ - struct ptlrpc_request *req; - struct ptlrpc_client *cl; - struct connmgr_body *body; - int rc, size = sizeof(*body); - ENTRY; - - if (!recovd) { - CERROR("no manager\n"); - LBUG(); - } - cl = recovd->recovd_client; - - req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0); - body->generation = HTON__u32(conn->c_generation); - body->conn = (__u64)(unsigned long)conn; - body->conn_token = conn->c_token; - strncpy(body->conn_uuid, conn->c_local_uuid, sizeof(body->conn_uuid)); - - req->rq_replen = lustre_msg_size(1, &size); - req->rq_level = LUSTRE_CONN_NEW; - - rc = ptlrpc_queue_wait(req); - rc = ptlrpc_check_status(req, rc); - if (!rc) { - rc = connmgr_unpack_body(req); - if (rc) - GOTO(out_free, rc); - body = lustre_msg_buf(req->rq_repmsg, 0); - CDEBUG(D_NET, "remote generation: %o\n", body->generation); - conn->c_level = LUSTRE_CONN_CON; - conn->c_remote_conn = body->conn; - conn->c_remote_token = body->conn_token; - strncpy(conn->c_remote_uuid, body->conn_uuid, - sizeof(conn->c_remote_uuid)); - } - - EXIT; - out_free: - ptlrpc_free_req(req); - out: - return rc; -} - -static int connmgr_handle_connect(struct ptlrpc_request *req) -{ - struct connmgr_body *body; - int rc, size = sizeof(*body); - ENTRY; - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("connmgr: out of memory\n"); - req->rq_status = -ENOMEM; - RETURN(0); - } - - body = lustre_msg_buf(req->rq_reqmsg, 0); - connmgr_unpack_body(req); - - req->rq_connection->c_remote_conn = body->conn; - req->rq_connection->c_remote_token = body->conn_token; - strncpy(req->rq_connection->c_remote_uuid, body->conn_uuid, - sizeof(req->rq_connection->c_remote_uuid)); - - CERROR("incoming generation %d\n", body->generation); - body = lustre_msg_buf(req->rq_repmsg, 0); - body->generation = 4711; - body->conn = (__u64)(unsigned long)req->rq_connection; - body->conn_token = req->rq_connection->c_token; - - req->rq_connection->c_level = LUSTRE_CONN_CON; - RETURN(0); -} - -int connmgr_handle(struct ptlrpc_request *req) -{ - int rc; - ENTRY; - - rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen); - if (rc) { - CERROR("Invalid request\n"); - GOTO(out, rc); - } - - if (req->rq_reqmsg->type != NTOH__u32(PTL_RPC_MSG_REQUEST)) { - CERROR("wrong packet type sent %d\n", - req->rq_reqmsg->type); - GOTO(out, rc = -EINVAL); - } - - switch (req->rq_reqmsg->opc) { - case CONNMGR_CONNECT: - CDEBUG(D_INODE, "connmgr connect\n"); - rc = connmgr_handle_connect(req); - break; - - default: - rc = ptlrpc_error(req->rq_svc, req); - RETURN(rc); - } - - EXIT; -out: - if (rc) { - ptlrpc_error(req->rq_svc, req); - } else { - CDEBUG(D_NET, "sending reply\n"); - ptlrpc_reply(req->rq_svc, req); - } - - return 0; -} diff --git a/lustre/ptlrpc/recovd.c b/lustre/ptlrpc/recovd.c index 194e2b44f74ff91fa88efeb0fb53e7a1fda585f3..ecbef4160b46fd25d6a20165a0a68f6fd5689f53 100644 --- a/lustre/ptlrpc/recovd.c +++ b/lustre/ptlrpc/recovd.c @@ -21,37 +21,39 @@ #include <linux/lustre_ha.h> #include <linux/obd_support.h> -struct recovd_obd *ptlrpc_connmgr; - -void recovd_cli_manage(struct recovd_obd *recovd, struct ptlrpc_client *cli) +void recovd_conn_manage(struct recovd_obd *recovd, + struct ptlrpc_connection *conn) { ENTRY; - cli->cli_recovd = recovd; + conn->c_recovd = recovd; spin_lock(&recovd->recovd_lock); - list_add(&cli->cli_ha_item, &recovd->recovd_clients_lh); + list_add(&conn->c_recovd_data.rd_managed_chain, + &recovd->recovd_managed_items); spin_unlock(&recovd->recovd_lock); EXIT; } -void recovd_cli_fail(struct ptlrpc_client *cli) +void recovd_conn_fail(struct ptlrpc_connection *conn) { ENTRY; - spin_lock(&cli->cli_recovd->recovd_lock); - cli->cli_recovd->recovd_flags |= RECOVD_FAIL; - cli->cli_recovd->recovd_wakeup_flag = 1; - list_del(&cli->cli_ha_item); - list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_troubled_lh); - spin_unlock(&cli->cli_recovd->recovd_lock); - wake_up(&cli->cli_recovd->recovd_waitq); + spin_lock(&conn->c_recovd->recovd_lock); + conn->c_recovd->recovd_flags |= RECOVD_FAIL; + conn->c_recovd->recovd_wakeup_flag = 1; + list_del(&conn->c_recovd_data.rd_managed_chain); + list_add(&conn->c_recovd_data.rd_managed_chain, + &conn->c_recovd->recovd_troubled_items); + spin_unlock(&conn->c_recovd->recovd_lock); + wake_up(&conn->c_recovd->recovd_waitq); EXIT; } -/* this function must be called with cli->cli_lock held */ -void recovd_cli_fixed(struct ptlrpc_client *cli) +/* this function must be called with conn->c_lock held */ +void recovd_conn_fixed(struct ptlrpc_connection *conn) { ENTRY; - list_del(&cli->cli_ha_item); - list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_clients_lh); + list_del(&conn->c_recovd_data.rd_managed_chain); + list_add(&conn->c_recovd_data.rd_managed_chain, + &conn->c_recovd->recovd_managed_items); EXIT; } @@ -129,15 +131,15 @@ static int recovd_handle_event(struct recovd_obd *recovd) if (recovd->recovd_flags & RECOVD_UPCALL_ANSWER) { CERROR("UPCALL_WAITING: upcall answer\n"); - while (!list_empty(&recovd->recovd_troubled_lh)) { - struct ptlrpc_client *cli = - list_entry(recovd->recovd_troubled_lh.next, - struct ptlrpc_client, cli_ha_item); + while (!list_empty(&recovd->recovd_troubled_items)) { + struct recovd_data *rd = + list_entry(recovd->recovd_troubled_items.next, + struct recovd_data, rd_managed_chain); - list_del(&cli->cli_ha_item); - if (cli->cli_recover) { + list_del(&rd->rd_managed_chain); + if (rd->rd_recover) { spin_unlock(&recovd->recovd_lock); - cli->cli_recover(cli); + rd->rd_recover(rd); spin_lock(&recovd->recovd_lock); } } @@ -195,12 +197,13 @@ static int recovd_main(void *arg) int recovd_setup(struct recovd_obd *recovd) { int rc; - extern void (*class_signal_client_failure)(struct ptlrpc_client *); + extern void (*class_signal_connection_failure) + (struct ptlrpc_connection *); ENTRY; - INIT_LIST_HEAD(&recovd->recovd_clients_lh); - INIT_LIST_HEAD(&recovd->recovd_troubled_lh); + INIT_LIST_HEAD(&recovd->recovd_managed_items); + INIT_LIST_HEAD(&recovd->recovd_troubled_items); spin_lock_init(&recovd->recovd_lock); init_waitqueue_head(&recovd->recovd_waitq); @@ -216,7 +219,7 @@ int recovd_setup(struct recovd_obd *recovd) wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & RECOVD_IDLE); /* exported and called by obdclass timeout handlers */ - class_signal_client_failure = recovd_cli_fail; + class_signal_connection_failure = recovd_conn_fail; RETURN(0); } diff --git a/lustre/ptlrpc/rpc.c b/lustre/ptlrpc/rpc.c index 9121f226512418f3934458b6d6b1d61d1e2c8f39..df72d83b9fad3bbabe0ad7211c1fd8b39b7d7b43 100644 --- a/lustre/ptlrpc/rpc.c +++ b/lustre/ptlrpc/rpc.c @@ -42,47 +42,13 @@ int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf) MOD_INC_USE_COUNT; memset(recovd, 0, sizeof(*recovd)); - OBD_ALLOC(recovd->recovd_client, sizeof(*recovd->recovd_client)); - if (!recovd) - GOTO(err_dec, err = -ENOMEM); - err = recovd_setup(recovd); - if (err) - GOTO(err_free, err); - - recovd->recovd_service = ptlrpc_init_svc(16* 1024, - CONNMGR_REQUEST_PORTAL, - CONNMGR_REPLY_PORTAL, - "self", connmgr_handle, - "connmgr"); - if (!recovd->recovd_service) { - CERROR("failed to start service\n"); - GOTO(err_recovd, err = -ENOMEM); - } - - ptlrpc_init_client(NULL, NULL, CONNMGR_REQUEST_PORTAL, - CONNMGR_REPLY_PORTAL, recovd->recovd_client); - recovd->recovd_client->cli_name = "connmgr"; - - err = ptlrpc_start_thread(obddev, recovd->recovd_service, - "lustre_connmgr"); if (err) { - CERROR("cannot start thread\n"); - GOTO(err_svc, err); + MOD_DEC_USE_COUNT; + RETURN(err); } - ptlrpc_connmgr = recovd; RETURN(0); - -err_svc: - ptlrpc_unregister_service(recovd->recovd_service); -err_recovd: - recovd_cleanup(recovd); -err_free: - OBD_FREE(recovd->recovd_client, sizeof(*recovd->recovd_client)); -err_dec: - MOD_DEC_USE_COUNT; - RETURN(err); } int connmgr_cleanup(struct obd_device *dev) @@ -94,10 +60,6 @@ int connmgr_cleanup(struct obd_device *dev) if (err) LBUG(); - ptlrpc_stop_all_threads(recovd->recovd_service); - ptlrpc_unregister_service(recovd->recovd_service); - ptlrpc_cleanup_client(recovd->recovd_client); - OBD_FREE(recovd->recovd_client, sizeof(*recovd->recovd_client)); MOD_DEC_USE_COUNT; RETURN(0); } @@ -126,7 +88,7 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *conn, int len, void *karg, static struct obd_ops recovd_obd_ops = { o_setup: connmgr_setup, o_cleanup: connmgr_cleanup, - o_iocontrol: connmgr_iocontrol, + o_iocontrol: connmgr_iocontrol, }; static int __init ptlrpc_init(void) @@ -148,12 +110,9 @@ static void __exit ptlrpc_exit(void) } /* connmgr.c */ -EXPORT_SYMBOL(ptlrpc_connmgr); -EXPORT_SYMBOL(connmgr_connect); -EXPORT_SYMBOL(connmgr_handle); -EXPORT_SYMBOL(recovd_cli_fail); -EXPORT_SYMBOL(recovd_cli_manage); -EXPORT_SYMBOL(recovd_cli_fixed); +EXPORT_SYMBOL(recovd_conn_fail); +EXPORT_SYMBOL(recovd_conn_manage); +EXPORT_SYMBOL(recovd_conn_fixed); EXPORT_SYMBOL(recovd_setup); EXPORT_SYMBOL(recovd_cleanup);