From ee2c39adb23a8060882f93a032f28f3927d7669b Mon Sep 17 00:00:00 2001 From: bwzhou <bwzhou> Date: Mon, 24 Dec 2007 03:20:03 +0000 Subject: [PATCH] Branch b1_6 b=13079 r=adilger, deen set an upper limit for ost recovery period --- lustre/include/lprocfs_status.h | 10 ++++++++++ lustre/include/obd.h | 7 +++++-- lustre/include/obd_support.h | 3 +++ lustre/ldlm/ldlm_lib.c | 6 ++++++ lustre/mds/mds_fs.c | 4 ++++ lustre/obdclass/lprocfs_status.c | 29 +++++++++++++++++++++++++++++ lustre/obdfilter/filter.c | 4 ++++ lustre/obdfilter/lproc_obdfilter.c | 4 ++++ 8 files changed, 65 insertions(+), 2 deletions(-) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 9bfe431829..711788c71c 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -516,6 +516,16 @@ struct file_operations name##_fops = { \ struct ptlrpc_request; extern void target_print_req(void *seq_file, struct ptlrpc_request *req); +#ifdef CRAY_XT3 +/* lprocfs_status.c: read recovery max time bz13079 */ +int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* lprocfs_status.c: write recovery max time bz13079 */ +int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, + unsigned long count, void *data); +#endif + #else /* LPROCFS is not defined */ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 6af873403f..ee38516f68 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -815,8 +815,11 @@ struct obd_device { cfs_timer_t obd_recovery_timer; struct list_head obd_recovery_queue; struct list_head obd_delayed_reply_queue; - time_t obd_recovery_start; - time_t obd_recovery_end; /* for lprocfs_status */ + time_t obd_recovery_start; /* seconds */ + time_t obd_recovery_end; /* seconds, for lprocfs_status */ +#ifdef CRAY_XT3 + time_t obd_recovery_max_time; /* seconds, bz13079 */ +#endif int obd_recovery_timeout; union { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 09c2359af5..eaa1db826e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -54,6 +54,9 @@ extern unsigned int obd_alloc_fail_rate; /* Timeout definitions */ #define OBD_TIMEOUT_DEFAULT 100 #define LDLM_TIMEOUT_DEFAULT 20 +#ifdef CRAY_XT3 + #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */ +#endif /* Time to wait for all clients to reconnect during recovery */ /* Should be very conservative; must catch the first reconnect after reboot */ #define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 01b886b495..68308dfa79 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1102,6 +1102,12 @@ static void reset_recovery_timer(struct obd_device *obd, int mintime) } /* Track the client's largest expected replay time */ obd->obd_recovery_timeout = max(mintime, obd->obd_recovery_timeout); +#ifdef CRAY_XT3 + if(cfs_time_current_sec() + obd->obd_recovery_timeout > + obd->obd_recovery_start + obd->obd_recovery_max_time) + obd->obd_recovery_timeout = obd->obd_recovery_start + + obd->obd_recovery_max_time - cfs_time_current_sec(); +#endif obd->obd_recovery_end = cfs_time_current_sec() + obd->obd_recovery_timeout; cfs_timer_arm(&obd->obd_recovery_timer, diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index b91d624c3a..d3209d3348 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -463,6 +463,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) obd->obd_recovery_start = 0; obd->obd_recovery_end = 0; obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout; +#ifdef CRAY_XT3 + /* bz13079: this won't be changed for mds */ + obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME; +#endif } mds->mds_mount_count = mount_count + 1; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index ff46954836..2fb75982e7 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1677,6 +1677,35 @@ out: } EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status); +#ifdef CRAY_XT3 +int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->obd_recovery_max_time); +} +EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime); + +int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + obd->obd_recovery_max_time = val; + return count; +} +EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime); +#endif /* CRAY_XT3 */ + EXPORT_SYMBOL(lprocfs_register); EXPORT_SYMBOL(lprocfs_srch); EXPORT_SYMBOL(lprocfs_remove); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 09b059f945..146a08e612 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -836,6 +836,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) obd->obd_recovery_start = 0; obd->obd_recovery_end = 0; obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout; +#ifdef CRAY_XT3 + /* b13079: this should be set to desired value for ost */ + obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME; +#endif } out: diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 17e73ef881..5d09fa7e6a 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -192,6 +192,10 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "tot_pending", lprocfs_filter_rd_tot_pending, 0, 0 }, { "tot_granted", lprocfs_filter_rd_tot_granted, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, +#ifdef CRAY_XT3 + { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime, + lprocfs_obd_wr_recovery_maxtime, 0}, +#endif { "evict_client", 0, lprocfs_wr_evict_client, 0, &lprocfs_evict_client_fops}, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, -- GitLab