diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 15470a12af14226efb9a02725a22fddd4e3b130b..9db73b57606f7b8b364a5855c88999e748c873bf 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -579,10 +579,11 @@ struct echo_client_obd { struct lov_qos_oss { struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */ struct list_head lqo_oss_list; /* link to lov_qos */ - __u32 lqo_ost_count; /* number of osts on this oss */ __u64 lqo_bavail; /* total bytes avail on OSS */ __u64 lqo_penalty; /* current penalty */ __u64 lqo_penalty_per_obj; /* penalty decrease every obj*/ + time_t lqo_used; /* last used time, seconds */ + __u32 lqo_ost_count; /* number of osts on this oss */ }; struct ltd_qos { @@ -590,6 +591,7 @@ struct ltd_qos { __u64 ltq_penalty; /* current penalty */ __u64 ltq_penalty_per_obj; /* penalty decrease every obj*/ __u64 ltq_weight; /* net weighting */ + time_t ltq_used; /* last used time, seconds */ unsigned int ltq_usable:1; /* usable for striping */ }; diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index 501fab464459bbaa019db7d09380a4a8975a0701..2e807f3b792a7b317ce20c8db27387be247ad72b 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -161,6 +161,7 @@ static int qos_calc_ppo(struct obd_device *obd) __u64 ba_max, ba_min, temp; __u32 num_active; int rc, i, prio_wide; + time_t now, age; ENTRY; if (!lov->lov_qos.lq_dirty) @@ -183,6 +184,7 @@ static int qos_calc_ppo(struct obd_device *obd) ba_min = (__u64)(-1); ba_max = 0; + now = cfs_time_current_sec(); /* Calculate OST penalty per object */ /* (lov ref taken in alloc_qos) */ for (i = 0; i < lov->desc.ld_tgt_count; i++) { @@ -205,8 +207,17 @@ static int qos_calc_ppo(struct obd_device *obd) lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj = (temp * prio_wide) >> 8; - if (lov->lov_qos.lq_reset == 0) + age = (now - lov->lov_tgts[i]->ltd_qos.ltq_used) >> 3; + if (lov->lov_qos.lq_reset || age > 32 * lov->desc.ld_qos_maxage) lov->lov_tgts[i]->ltd_qos.ltq_penalty = 0; + else if (age > lov->desc.ld_qos_maxage) + /* Decay the penalty by half for every 8x the update + * interval that the device has been idle. That gives + * lots of time for the statfs information to be + * updated (which the penalty is only a proxy for), + * and avoids penalizing OSS/OSTs under light load. */ + lov->lov_tgts[i]->ltd_qos.ltq_penalty >>= + (age / lov->desc.ld_qos_maxage); } num_active = lov->lov_qos.lq_active_oss_count - 1; @@ -215,7 +226,7 @@ static int qos_calc_ppo(struct obd_device *obd) we have to double the OST penalty */ num_active = 1; for (i = 0; i < lov->desc.ld_tgt_count; i++) - if (lov->lov_tgts[i]) + if (lov->lov_tgts[i]) lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj <<= 1; } @@ -224,8 +235,17 @@ static int qos_calc_ppo(struct obd_device *obd) temp = oss->lqo_bavail >> 1; do_div(temp, oss->lqo_ost_count * num_active); oss->lqo_penalty_per_obj = (temp * prio_wide) >> 8; - if (lov->lov_qos.lq_reset == 0) + + age = (now - oss->lqo_used) >> 3; + if (lov->lov_qos.lq_reset || age > 32 * lov->desc.ld_qos_maxage) oss->lqo_penalty = 0; + else if (age > lov->desc.ld_qos_maxage) + /* Decay the penalty by half for every 8x the update + * interval that the device has been idle. That gives + * lots of time for the statfs information to be + * updated (which the penalty is only a proxy for), + * and avoids penalizing OSS/OSTs under light load. */ + oss->lqo_penalty >>= (age / lov->desc.ld_qos_maxage); } lov->lov_qos.lq_dirty = 0; @@ -240,7 +260,7 @@ static int qos_calc_ppo(struct obd_device *obd) /* Difference is less than 20% */ lov->lov_qos.lq_same_space = 1; /* Reset weights for the next time we enter qos mode */ - lov->lov_qos.lq_reset = 0; + lov->lov_qos.lq_reset = 1; } rc = 0; @@ -282,6 +302,10 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) lov->lov_tgts[index]->ltd_qos.ltq_penalty >>= 1; oss->lqo_penalty >>= 1; + /* mark the OSS and OST as recently used */ + lov->lov_tgts[index]->ltd_qos.ltq_used = + oss->lqo_used = cfs_time_current_sec(); + /* Set max penalties for this OST and OSS */ lov->lov_tgts[index]->ltd_qos.ltq_penalty += lov->lov_tgts[index]->ltd_qos.ltq_penalty_per_obj * diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index cc08ecadda84185f08c8d8db3121fc33d8ed42cb..8603df3a8b242c62ad596ad2b7aad8e8836575bd 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2243,9 +2243,9 @@ void lustre_swab_lov_desc (struct lov_desc *ld) __swab32s (&ld->ld_tgt_count); __swab32s (&ld->ld_active_tgt_count); __swab32s (&ld->ld_default_stripe_count); + __swab32s (&ld->ld_pattern); __swab64s (&ld->ld_default_stripe_size); __swab64s (&ld->ld_default_stripe_offset); - __swab32s (&ld->ld_pattern); __swab32s (&ld->ld_qos_maxage); /* uuid endian insensitive */ }