From ee2c39adb23a8060882f93a032f28f3927d7669b Mon Sep 17 00:00:00 2001
From: bwzhou <bwzhou>
Date: Mon, 24 Dec 2007 03:20:03 +0000
Subject: [PATCH] Branch b1_6 b=13079 r=adilger, deen

set an upper limit for ost recovery period
---
 lustre/include/lprocfs_status.h    | 10 ++++++++++
 lustre/include/obd.h               |  7 +++++--
 lustre/include/obd_support.h       |  3 +++
 lustre/ldlm/ldlm_lib.c             |  6 ++++++
 lustre/mds/mds_fs.c                |  4 ++++
 lustre/obdclass/lprocfs_status.c   | 29 +++++++++++++++++++++++++++++
 lustre/obdfilter/filter.c          |  4 ++++
 lustre/obdfilter/lproc_obdfilter.c |  4 ++++
 8 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h
index 9bfe431829..711788c71c 100644
--- a/lustre/include/lprocfs_status.h
+++ b/lustre/include/lprocfs_status.h
@@ -516,6 +516,16 @@ struct file_operations name##_fops = {                                     \
 struct ptlrpc_request;
 extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
 
+#ifdef CRAY_XT3
+/* lprocfs_status.c: read recovery max time bz13079 */
+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data);
+
+/* lprocfs_status.c: write recovery max time bz13079 */
+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
+                                    unsigned long count, void *data);
+#endif
+
 #else
 /* LPROCFS is not defined */
 static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index 6af873403f..ee38516f68 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -815,8 +815,11 @@ struct obd_device {
         cfs_timer_t                      obd_recovery_timer;
         struct list_head                 obd_recovery_queue;
         struct list_head                 obd_delayed_reply_queue;
-        time_t                           obd_recovery_start;
-        time_t                           obd_recovery_end; /* for lprocfs_status */
+        time_t                           obd_recovery_start; /* seconds */
+        time_t                           obd_recovery_end; /* seconds, for lprocfs_status */
+#ifdef CRAY_XT3
+        time_t                           obd_recovery_max_time; /* seconds, bz13079 */
+#endif
         int                              obd_recovery_timeout;
 
         union {
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index 09c2359af5..eaa1db826e 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -54,6 +54,9 @@ extern unsigned int obd_alloc_fail_rate;
 /* Timeout definitions */
 #define OBD_TIMEOUT_DEFAULT 100
 #define LDLM_TIMEOUT_DEFAULT 20
+#ifdef CRAY_XT3
+ #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
+#endif
 /* Time to wait for all clients to reconnect during recovery */
 /* Should be very conservative; must catch the first reconnect after reboot */
 #define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 01b886b495..68308dfa79 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1102,6 +1102,12 @@ static void reset_recovery_timer(struct obd_device *obd, int mintime)
         }
         /* Track the client's largest expected replay time */
         obd->obd_recovery_timeout = max(mintime, obd->obd_recovery_timeout);
+#ifdef CRAY_XT3
+        if(cfs_time_current_sec() + obd->obd_recovery_timeout >
+           obd->obd_recovery_start + obd->obd_recovery_max_time)
+                obd->obd_recovery_timeout = obd->obd_recovery_start + 
+                        obd->obd_recovery_max_time - cfs_time_current_sec();
+#endif
         obd->obd_recovery_end = cfs_time_current_sec() +
                 obd->obd_recovery_timeout;
         cfs_timer_arm(&obd->obd_recovery_timer, 
diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c
index b91d624c3a..d3209d3348 100644
--- a/lustre/mds/mds_fs.c
+++ b/lustre/mds/mds_fs.c
@@ -463,6 +463,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                 obd->obd_recovery_start = 0;
                 obd->obd_recovery_end = 0;
                 obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
+#ifdef CRAY_XT3
+                /* bz13079: this won't be changed for mds */
+                obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME;
+#endif
         }
 
         mds->mds_mount_count = mount_count + 1;
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c
index ff46954836..2fb75982e7 100644
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -1677,6 +1677,35 @@ out:
 }
 EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
 
+#ifdef CRAY_XT3
+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->obd_recovery_max_time);
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime);
+
+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
+                                    unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        obd->obd_recovery_max_time = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime);
+#endif /* CRAY_XT3 */
+
 EXPORT_SYMBOL(lprocfs_register);
 EXPORT_SYMBOL(lprocfs_srch);
 EXPORT_SYMBOL(lprocfs_remove);
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c
index 09b059f945..146a08e612 100644
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -836,6 +836,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                 obd->obd_recovery_start = 0;
                 obd->obd_recovery_end = 0;
                 obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
+#ifdef CRAY_XT3
+                /* b13079: this should be set to desired value for ost */
+                obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME;
+#endif
         }
 
 out:
diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c
index 17e73ef881..5d09fa7e6a 100644
--- a/lustre/obdfilter/lproc_obdfilter.c
+++ b/lustre/obdfilter/lproc_obdfilter.c
@@ -192,6 +192,10 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "tot_pending",  lprocfs_filter_rd_tot_pending, 0, 0 },
         { "tot_granted",  lprocfs_filter_rd_tot_granted, 0, 0 },
         { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
+#ifdef CRAY_XT3
+        { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime,
+                              lprocfs_obd_wr_recovery_maxtime, 0},
+#endif
         { "evict_client", 0, lprocfs_wr_evict_client, 0,
                                 &lprocfs_evict_client_fops},
         { "num_exports",  lprocfs_rd_num_exports,   0, 0 },
-- 
GitLab