diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 9bfe4318294457d6e7809fad495a3ccf5123be32..711788c71c34ecca8ce7dcda20397e80ba7be2e1 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -516,6 +516,16 @@ struct file_operations name##_fops = { \ struct ptlrpc_request; extern void target_print_req(void *seq_file, struct ptlrpc_request *req); +#ifdef CRAY_XT3 +/* lprocfs_status.c: read recovery max time bz13079 */ +int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* lprocfs_status.c: write recovery max time bz13079 */ +int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, + unsigned long count, void *data); +#endif + #else /* LPROCFS is not defined */ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 6af873403f1885e15ece0c1f0fbc85de09081393..ee38516f680439fce5a12f9b688aac35ffabf5d1 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -815,8 +815,11 @@ struct obd_device { cfs_timer_t obd_recovery_timer; struct list_head obd_recovery_queue; struct list_head obd_delayed_reply_queue; - time_t obd_recovery_start; - time_t obd_recovery_end; /* for lprocfs_status */ + time_t obd_recovery_start; /* seconds */ + time_t obd_recovery_end; /* seconds, for lprocfs_status */ +#ifdef CRAY_XT3 + time_t obd_recovery_max_time; /* seconds, bz13079 */ +#endif int obd_recovery_timeout; union { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 09c2359af58341e898ef0e2cdf306c8239408c95..eaa1db826e8fefc0531af8532e841a56045691d9 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -54,6 +54,9 @@ extern unsigned int obd_alloc_fail_rate; /* Timeout definitions */ #define OBD_TIMEOUT_DEFAULT 100 #define LDLM_TIMEOUT_DEFAULT 20 +#ifdef CRAY_XT3 + #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */ +#endif /* Time to wait for all clients to reconnect during recovery */ /* Should be very conservative; must catch the first reconnect after reboot */ #define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 01b886b495c36b61f6bbd2ea8d3086fdfca63d18..68308dfa79c5094f7dcd5e4ea70951d0b54fe22c 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1102,6 +1102,12 @@ static void reset_recovery_timer(struct obd_device *obd, int mintime) } /* Track the client's largest expected replay time */ obd->obd_recovery_timeout = max(mintime, obd->obd_recovery_timeout); +#ifdef CRAY_XT3 + if(cfs_time_current_sec() + obd->obd_recovery_timeout > + obd->obd_recovery_start + obd->obd_recovery_max_time) + obd->obd_recovery_timeout = obd->obd_recovery_start + + obd->obd_recovery_max_time - cfs_time_current_sec(); +#endif obd->obd_recovery_end = cfs_time_current_sec() + obd->obd_recovery_timeout; cfs_timer_arm(&obd->obd_recovery_timer, diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index b91d624c3af2ea256ecc57960ac2d775fa43753e..d3209d3348e03ee223337b1e564b645b3e7bc375 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -463,6 +463,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) obd->obd_recovery_start = 0; obd->obd_recovery_end = 0; obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout; +#ifdef CRAY_XT3 + /* bz13079: this won't be changed for mds */ + obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME; +#endif } mds->mds_mount_count = mount_count + 1; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index ff469548367824c7d5a71d526329f93e269cac8c..2fb75982e7d385cf3bb41550d898d33ee8471fad 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1677,6 +1677,35 @@ out: } EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status); +#ifdef CRAY_XT3 +int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->obd_recovery_max_time); +} +EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime); + +int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + obd->obd_recovery_max_time = val; + return count; +} +EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime); +#endif /* CRAY_XT3 */ + EXPORT_SYMBOL(lprocfs_register); EXPORT_SYMBOL(lprocfs_srch); EXPORT_SYMBOL(lprocfs_remove); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 09b059f9457b2feba8befa61221677ab268021e3..146a08e61263c64ce14ebaad91a027e9167cf8dd 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -836,6 +836,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) obd->obd_recovery_start = 0; obd->obd_recovery_end = 0; obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout; +#ifdef CRAY_XT3 + /* b13079: this should be set to desired value for ost */ + obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME; +#endif } out: diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 17e73ef8815c878af1eced85b4865cabbe8d9943..5d09fa7e6ad05f4bb1989c46856d9d5005b4dee2 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -192,6 +192,10 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "tot_pending", lprocfs_filter_rd_tot_pending, 0, 0 }, { "tot_granted", lprocfs_filter_rd_tot_granted, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, +#ifdef CRAY_XT3 + { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime, + lprocfs_obd_wr_recovery_maxtime, 0}, +#endif { "evict_client", 0, lprocfs_wr_evict_client, 0, &lprocfs_evict_client_fops}, { "num_exports", lprocfs_rd_num_exports, 0, 0 },