From 33d8a07a33e2bcb6b04a3650482931fd6e90af90 Mon Sep 17 00:00:00 2001 From: jxiong <jxiong> Date: Wed, 5 Nov 2008 02:47:41 +0000 Subject: [PATCH] b=15715 r=adilger,green Fixed the race of destroying and enqueuing a ldlm lock at OST side. --- lustre/include/interval_tree.h | 11 +++++++++-- lustre/include/obd_support.h | 1 + lustre/ldlm/interval_tree.c | 4 ++++ lustre/ldlm/ldlm_extent.c | 25 +++++++++++++++++++++---- lustre/tests/sanityN.sh | 21 +++++++++++++++++++-- 5 files changed, 54 insertions(+), 8 deletions(-) diff --git a/lustre/include/interval_tree.h b/lustre/include/interval_tree.h index 117d5c0a9e..b50278b085 100644 --- a/lustre/include/interval_tree.h +++ b/lustre/include/interval_tree.h @@ -48,8 +48,10 @@ struct interval_node { struct interval_node *in_left; struct interval_node *in_right; struct interval_node *in_parent; - __u8 in_color; - __u8 res1[7]; /* tags, 8-bytes aligned */ + unsigned in_color:1, + in_intree:1, /** set if the node is in tree */ + in_res1:30; + __u8 in_res2[4]; /** tags, 8-bytes aligned */ __u64 in_max_high; struct interval_node_extent { __u64 start; @@ -62,6 +64,11 @@ enum interval_iter { INTERVAL_ITER_STOP = 2 }; +static inline int interval_is_intree(struct interval_node *node) +{ + return node->in_intree == 1; +} + static inline __u64 interval_low(struct interval_node *node) { return node->in_extent.start; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 0507ce63da..fbd4a9650a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -253,6 +253,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LDLM_CLOSE_THREAD 0x313 #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 #define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 +#define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x315 diff --git a/lustre/ldlm/interval_tree.c b/lustre/ldlm/interval_tree.c index 68480bb873..60dcbeb061 100644 --- a/lustre/ldlm/interval_tree.c +++ b/lustre/ldlm/interval_tree.c @@ -389,6 +389,7 @@ struct interval_node *interval_insert(struct interval_node *node, struct interval_node **p, *parent = NULL; ENTRY; + LASSERT(!interval_is_intree(node)); p = root; while (*p) { parent = *p; @@ -412,6 +413,7 @@ struct interval_node *interval_insert(struct interval_node *node, *p = node; interval_insert_color(node, root); + node->in_intree = 1; RETURN(NULL); } @@ -527,6 +529,8 @@ void interval_erase(struct interval_node *node, int color; ENTRY; + LASSERT(interval_is_intree(node)); + node->in_intree = 0; if (!node->in_left) { child = node->in_right; } else if (!node->in_right) { diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index 1541416591..ce51cd3071 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -49,6 +49,7 @@ #include <lustre_dlm.h> #include <obd_support.h> #include <obd.h> +#include <obd_class.h> #include <lustre_lib.h> #include "ldlm_internal.h" @@ -707,9 +708,24 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq, ldlm_resource_add_lock(res, &res->lr_waiting, lock); unlock_res(res); rc = ldlm_run_ast_work(&rpc_list, LDLM_WORK_BL_AST); - lock_res(res); + if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_OST_FAIL_RACE) && + !ns_is_client(res->lr_namespace)) + class_fail_export(lock->l_export); + + lock_res(res); if (rc == -ERESTART) { + + /* 15715: The lock was granted and destroyed after + * resource lock was dropped. Interval node was freed + * in ldlm_lock_destroy. Anyway, this always happens + * when a client is being evicted. So it would be + * ok to return an error. -jay */ + if (lock->l_destroyed) { + *err = -EAGAIN; + GOTO(out, rc = -EAGAIN); + } + /* lock was granted while resource was unlocked. */ if (lock->l_granted_mode == lock->l_req_mode) { /* bug 11300: if the lock has been granted, @@ -798,6 +814,7 @@ void ldlm_interval_free(struct ldlm_interval *node) { if (node) { LASSERT(list_empty(&node->li_group)); + LASSERT(!interval_is_intree(&node->li_node)); OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node)); } } @@ -850,6 +867,7 @@ void ldlm_extent_add_lock(struct ldlm_resource *res, node = lock->l_tree_node; LASSERT(node != NULL); + LASSERT(!interval_is_intree(&node->li_node)); idx = lock_mode_to_index(lock->l_granted_mode); LASSERT(lock->l_granted_mode == 1 << idx); @@ -877,14 +895,13 @@ void ldlm_extent_add_lock(struct ldlm_resource *res, void ldlm_extent_unlink_lock(struct ldlm_lock *lock) { struct ldlm_resource *res = lock->l_resource; - struct ldlm_interval *node; + struct ldlm_interval *node = lock->l_tree_node; struct ldlm_interval_tree *tree; int idx; - if (lock->l_granted_mode != lock->l_req_mode) + if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */ return; - LASSERT(lock->l_tree_node != NULL); idx = lock_mode_to_index(lock->l_granted_mode); LASSERT(lock->l_granted_mode == 1 << idx); tree = &res->lr_itree[idx]; diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 3341f434a1..d4b2ac8513 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -598,7 +598,7 @@ test_30() { #bug #11110 run_test 30 "recreate file race =========" -test_31() { +test_31a() { mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" writes=`LANG=C dd if=/dev/zero of=$DIR/$tdir/$tfile count=1 2>&1 | awk 'BEGIN { FS="+" } /out/ {print $1}'` @@ -608,7 +608,24 @@ test_31() { awk 'BEGIN { FS="+" } /in/ {print $1}'` [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes } -run_test 31 "voluntary cancel / blocking ast race==============" +run_test 31a "voluntary cancel / blocking ast race==============" + +test_31b() { + remote_ost || { skip "local OST" && return 0; } + remote_ost_nodsh && skip "remote OST w/o dsh" && return 0 + mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" + lfs setstripe $DIR/$tdir/$tfile -i 0 -c 1 + cp /etc/hosts $DIR/$tdir/$tfile + #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 + lctl set_param fail_loc=0x314 + #define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 + do_facet ost1 lctl set_param fail_loc=0x316 + # Don't crash kernel + cat $DIR2/$tdir/$tfile > /dev/null 2>&1 + lctl set_param fail_loc=0 + do_facet ost1 lctl set_param fail_loc=0 +} +run_test 31b "voluntary OST cancel / blocking ast race==============" # enable/disable lockless truncate feature, depending on the arg 0/1 enable_lockless_truncate() { -- GitLab