From 08ce3a8f30ab618ba4fc01dbaddaed0ec6e0ee4f Mon Sep 17 00:00:00 2001
From: tianzy <tianzy>
Date: Mon, 17 Nov 2008 06:47:30 +0000
Subject: [PATCH] Branch b1_6 decay qos ost/oss penalties if MDS is not
 creating objects i=nathan i=johann

---
 lustre/include/obd.h         |  4 +++-
 lustre/lov/lov_qos.c         | 32 ++++++++++++++++++++++++++++----
 lustre/ptlrpc/pack_generic.c |  2 +-
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index 15470a12af..9db73b5760 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -579,10 +579,11 @@ struct echo_client_obd {
 struct lov_qos_oss {
         struct obd_uuid     lqo_uuid;       /* ptlrpc's c_remote_uuid */
         struct list_head    lqo_oss_list;   /* link to lov_qos */
-        __u32               lqo_ost_count;  /* number of osts on this oss */
         __u64               lqo_bavail;     /* total bytes avail on OSS */
         __u64               lqo_penalty;    /* current penalty */
         __u64               lqo_penalty_per_obj; /* penalty decrease every obj*/
+        time_t              lqo_used;       /* last used time, seconds */
+        __u32               lqo_ost_count;  /* number of osts on this oss */
 };
 
 struct ltd_qos {
@@ -590,6 +591,7 @@ struct ltd_qos {
         __u64               ltq_penalty;     /* current penalty */
         __u64               ltq_penalty_per_obj; /* penalty decrease every obj*/
         __u64               ltq_weight;      /* net weighting */
+        time_t              ltq_used;        /* last used time, seconds */
         unsigned int        ltq_usable:1;    /* usable for striping */
 };
 
diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c
index 501fab4644..2e807f3b79 100644
--- a/lustre/lov/lov_qos.c
+++ b/lustre/lov/lov_qos.c
@@ -161,6 +161,7 @@ static int qos_calc_ppo(struct obd_device *obd)
         __u64 ba_max, ba_min, temp;
         __u32 num_active;
         int rc, i, prio_wide;
+        time_t now, age;
         ENTRY;
 
         if (!lov->lov_qos.lq_dirty)
@@ -183,6 +184,7 @@ static int qos_calc_ppo(struct obd_device *obd)
 
         ba_min = (__u64)(-1);
         ba_max = 0;
+        now = cfs_time_current_sec();
         /* Calculate OST penalty per object */
         /* (lov ref taken in alloc_qos) */
         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
@@ -205,8 +207,17 @@ static int qos_calc_ppo(struct obd_device *obd)
                 lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj =
                         (temp * prio_wide) >> 8;
 
-                if (lov->lov_qos.lq_reset == 0)
+                age = (now - lov->lov_tgts[i]->ltd_qos.ltq_used) >> 3;
+                if (lov->lov_qos.lq_reset || age > 32 * lov->desc.ld_qos_maxage)
                         lov->lov_tgts[i]->ltd_qos.ltq_penalty = 0;
+                else if (age > lov->desc.ld_qos_maxage)
+                        /* Decay the penalty by half for every 8x the update
+                         * interval that the device has been idle.  That gives
+                         * lots of time for the statfs information to be
+                         * updated (which the penalty is only a proxy for),
+                         * and avoids penalizing OSS/OSTs under light load. */
+                        lov->lov_tgts[i]->ltd_qos.ltq_penalty >>=
+                                (age / lov->desc.ld_qos_maxage);
         }
 
         num_active = lov->lov_qos.lq_active_oss_count - 1;
@@ -215,7 +226,7 @@ static int qos_calc_ppo(struct obd_device *obd)
                    we have to double the OST penalty */
                 num_active = 1;
                 for (i = 0; i < lov->desc.ld_tgt_count; i++)
-                        if (lov->lov_tgts[i]) 
+                        if (lov->lov_tgts[i])
                             lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj <<= 1;
         }
 
@@ -224,8 +235,17 @@ static int qos_calc_ppo(struct obd_device *obd)
                 temp = oss->lqo_bavail >> 1;
                 do_div(temp, oss->lqo_ost_count * num_active);
                 oss->lqo_penalty_per_obj = (temp * prio_wide) >> 8;
-                if (lov->lov_qos.lq_reset == 0)
+
+                age = (now - oss->lqo_used) >> 3;
+                if (lov->lov_qos.lq_reset || age > 32 * lov->desc.ld_qos_maxage)
                         oss->lqo_penalty = 0;
+                else if (age > lov->desc.ld_qos_maxage)
+                        /* Decay the penalty by half for every 8x the update
+                         * interval that the device has been idle.  That gives
+                         * lots of time for the statfs information to be
+                         * updated (which the penalty is only a proxy for),
+                         * and avoids penalizing OSS/OSTs under light load. */
+                        oss->lqo_penalty >>= (age / lov->desc.ld_qos_maxage);
         }
 
         lov->lov_qos.lq_dirty = 0;
@@ -240,7 +260,7 @@ static int qos_calc_ppo(struct obd_device *obd)
                 /* Difference is less than 20% */
                 lov->lov_qos.lq_same_space = 1;
                 /* Reset weights for the next time we enter qos mode */
-                lov->lov_qos.lq_reset = 0;
+                lov->lov_qos.lq_reset = 1;
         }
         rc = 0;
 
@@ -282,6 +302,10 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
         lov->lov_tgts[index]->ltd_qos.ltq_penalty >>= 1;
         oss->lqo_penalty >>= 1;
 
+        /* mark the OSS and OST as recently used */
+        lov->lov_tgts[index]->ltd_qos.ltq_used =
+                oss->lqo_used = cfs_time_current_sec();
+
         /* Set max penalties for this OST and OSS */
         lov->lov_tgts[index]->ltd_qos.ltq_penalty +=
                 lov->lov_tgts[index]->ltd_qos.ltq_penalty_per_obj *
diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c
index cc08ecadda..8603df3a8b 100644
--- a/lustre/ptlrpc/pack_generic.c
+++ b/lustre/ptlrpc/pack_generic.c
@@ -2243,9 +2243,9 @@ void lustre_swab_lov_desc (struct lov_desc *ld)
         __swab32s (&ld->ld_tgt_count);
         __swab32s (&ld->ld_active_tgt_count);
         __swab32s (&ld->ld_default_stripe_count);
+        __swab32s (&ld->ld_pattern);
         __swab64s (&ld->ld_default_stripe_size);
         __swab64s (&ld->ld_default_stripe_offset);
-        __swab32s (&ld->ld_pattern);
         __swab32s (&ld->ld_qos_maxage);
         /* uuid endian insensitive */
 }
-- 
GitLab