From 62d9e07681e462ca633fe7b94547c33e8549220e Mon Sep 17 00:00:00 2001
From: tianzy <tianzy>
Date: Mon, 17 Nov 2008 07:06:45 +0000
Subject: [PATCH] Branch HEAD decay qos ost/oss penalties if MDS is not
 creating objects i=nathan i=johann

---
 lustre/include/obd.h         |  4 +++-
 lustre/lov/lov_qos.c         | 30 +++++++++++++++++++++++++++---
 lustre/ptlrpc/pack_generic.c |  2 +-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index 2a5b2d8701..5dd346e919 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -596,10 +596,11 @@ struct echo_client_obd {
 struct lov_qos_oss {
         struct obd_uuid     lqo_uuid;       /* ptlrpc's c_remote_uuid */
         struct list_head    lqo_oss_list;   /* link to lov_qos */
-        __u32               lqo_ost_count;  /* number of osts on this oss */
         __u64               lqo_bavail;     /* total bytes avail on OSS */
         __u64               lqo_penalty;    /* current penalty */
         __u64               lqo_penalty_per_obj; /* penalty decrease every obj*/
+        time_t              lqo_used;       /* last used time, seconds */
+        __u32               lqo_ost_count;  /* number of osts on this oss */
 };
 
 struct ltd_qos {
@@ -607,6 +608,7 @@ struct ltd_qos {
         __u64               ltq_penalty;     /* current penalty */
         __u64               ltq_penalty_per_obj; /* penalty decrease every obj*/
         __u64               ltq_weight;      /* net weighting */
+        time_t              ltq_used;        /* last used time, seconds */
         unsigned int        ltq_usable:1;    /* usable for striping */
 };
 
diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c
index b292fc3678..08aa2383d2 100644
--- a/lustre/lov/lov_qos.c
+++ b/lustre/lov/lov_qos.c
@@ -161,6 +161,7 @@ static int qos_calc_ppo(struct obd_device *obd)
         __u64 ba_max, ba_min, temp;
         __u32 num_active;
         int rc, i, prio_wide;
+        time_t now, age;
         ENTRY;
 
         if (!lov->lov_qos.lq_dirty)
@@ -183,6 +184,7 @@ static int qos_calc_ppo(struct obd_device *obd)
 
         ba_min = (__u64)(-1);
         ba_max = 0;
+        now = cfs_time_current_sec();
         /* Calculate OST penalty per object */
         /* (lov ref taken in alloc_qos) */
         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
@@ -205,8 +207,17 @@ static int qos_calc_ppo(struct obd_device *obd)
                 lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj =
                         (temp * prio_wide) >> 8;
 
-                if (lov->lov_qos.lq_reset == 0)
+                age = (now - lov->lov_tgts[i]->ltd_qos.ltq_used) >> 3;
+                if (lov->lov_qos.lq_reset || age > 32 * lov->desc.ld_qos_maxage)
                         lov->lov_tgts[i]->ltd_qos.ltq_penalty = 0;
+                else if (age > lov->desc.ld_qos_maxage)
+                        /* Decay the penalty by half for every 8x the update
+                         * interval that the device has been idle.  That gives
+                         * lots of time for the statfs information to be
+                         * updated (which the penalty is only a proxy for),
+                         * and avoids penalizing OSS/OSTs under light load. */
+                        lov->lov_tgts[i]->ltd_qos.ltq_penalty >>=
+                                (age / lov->desc.ld_qos_maxage);
         }
 
         num_active = lov->lov_qos.lq_active_oss_count - 1;
@@ -226,8 +237,17 @@ static int qos_calc_ppo(struct obd_device *obd)
                 temp = oss->lqo_bavail >> 1;
                 do_div(temp, oss->lqo_ost_count * num_active);
                 oss->lqo_penalty_per_obj = (temp * prio_wide) >> 8;
-                if (lov->lov_qos.lq_reset == 0)
+
+                age = (now - oss->lqo_used) >> 3;
+                if (lov->lov_qos.lq_reset || age > 32 * lov->desc.ld_qos_maxage)
                         oss->lqo_penalty = 0;
+                else if (age > lov->desc.ld_qos_maxage)
+                        /* Decay the penalty by half for every 8x the update
+                         * interval that the device has been idle.  That gives
+                         * lots of time for the statfs information to be
+                         * updated (which the penalty is only a proxy for),
+                         * and avoids penalizing OSS/OSTs under light load. */
+                        oss->lqo_penalty >>= (age / lov->desc.ld_qos_maxage);
         }
 
         lov->lov_qos.lq_dirty = 0;
@@ -242,7 +262,7 @@ static int qos_calc_ppo(struct obd_device *obd)
                 /* Difference is less than 20% */
                 lov->lov_qos.lq_same_space = 1;
                 /* Reset weights for the next time we enter qos mode */
-                lov->lov_qos.lq_reset = 0;
+                lov->lov_qos.lq_reset = 1;
         }
         rc = 0;
 
@@ -285,6 +305,10 @@ static int qos_used(struct lov_obd *lov, struct ost_pool *osts,
         lov->lov_tgts[index]->ltd_qos.ltq_penalty >>= 1;
         oss->lqo_penalty >>= 1;
 
+        /* mark the OSS and OST as recently used */
+        lov->lov_tgts[index]->ltd_qos.ltq_used =
+                oss->lqo_used = cfs_time_current_sec();
+
         /* Set max penalties for this OST and OSS */
         lov->lov_tgts[index]->ltd_qos.ltq_penalty +=
                 lov->lov_tgts[index]->ltd_qos.ltq_penalty_per_obj *
diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c
index 61fbe258f8..2b9466a04b 100644
--- a/lustre/ptlrpc/pack_generic.c
+++ b/lustre/ptlrpc/pack_generic.c
@@ -1936,9 +1936,9 @@ void lustre_swab_lov_desc (struct lov_desc *ld)
         __swab32s (&ld->ld_tgt_count);
         __swab32s (&ld->ld_active_tgt_count);
         __swab32s (&ld->ld_default_stripe_count);
+        __swab32s (&ld->ld_pattern);
         __swab64s (&ld->ld_default_stripe_size);
         __swab64s (&ld->ld_default_stripe_offset);
-        __swab32s (&ld->ld_pattern);
         __swab32s (&ld->ld_qos_maxage);
         /* uuid endian insensitive */
 }
-- 
GitLab