diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 8450683793719c292d0375b7ea9a59863146f740..4ee524fc153b4ff01cc78a8e3ca9b50cbadb7a18 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -143,6 +143,9 @@ typedef enum { #define LDLM_FL_BL_AST 0x10000000 #define LDLM_FL_BL_DONE 0x20000000 +/* measure lock contention and return -EUSERS if locking contention is high */ +#define LDLM_FL_DENY_ON_CONTENTION 0x40000000 + /* The blocking callback is overloaded to perform two functions. These flags * indicate which operation should be performed. */ #define LDLM_CB_BLOCKING 1 @@ -294,9 +297,9 @@ typedef enum { /* default values for the "max_nolock_size", "contention_time" * and "contended_locks" namespace tunables */ -#define NS_DEFAULT_MAX_NOLOCK_BYTES 131072 +#define NS_DEFAULT_MAX_NOLOCK_BYTES 0 #define NS_DEFAULT_CONTENTION_SECONDS 2 -#define NS_DEFAULT_CONTENDED_LOCKS 0 +#define NS_DEFAULT_CONTENDED_LOCKS 32 struct ldlm_namespace { char *ns_name; @@ -549,9 +552,6 @@ int ldlm_replay_locks(struct obd_import *imp); void ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *, ldlm_iterator_t iter, void *data); -/* measure lock contention and return -EBUSY if locking contention is high */ -#define LDLM_FL_DENY_ON_CONTENTION 0x10000000 - /* ldlm_flock.c */ int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data); diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index 90218859763c4bcec6ea84dd22d137adfef595a7..30631583a0da6cf77848e8777c1f1c17ab08c6f2 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -227,11 +227,14 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req, __u64 req_end = req->l_req_extent.end; int compat = 1; int scan = 0; + int check_contention; ENTRY; lockmode_verify(req_mode); list_for_each(tmp, queue) { + check_contention = 1; + lock = list_entry(tmp, struct ldlm_lock, l_res_link); if (req == lock) @@ -342,16 +345,21 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req, lock->l_policy_data.l_extent.start > req_end) { /* if a non group lock doesn't overlap skip it */ continue; - } + } else if (lock->l_req_extent.end < req_start || + lock->l_req_extent.start > req_end) + /* false contention, the requests doesn't really overlap */ + check_contention = 0; if (!work_list) RETURN(0); /* don't count conflicting glimpse locks */ - *contended_locks += - !(lock->l_req_mode == LCK_PR && - lock->l_policy_data.l_extent.start == 0 && - lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF); + if (lock->l_req_mode == LCK_PR && + lock->l_policy_data.l_extent.start == 0 && + lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF) + check_contention = 0; + + *contended_locks += check_contention; compat = 0; if (lock->l_blocking_ast) @@ -364,7 +372,7 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req, req->l_req_mode != LCK_GROUP && req_end - req_start <= req->l_resource->lr_namespace->ns_max_nolock_size) - GOTO(destroylock, compat = -EBUSY); + GOTO(destroylock, compat = -EUSERS); RETURN(compat); destroylock: diff --git a/lustre/llite/file.c b/lustre/llite/file.c index a79b7d7a8a2040f8e549e4c5439a9ade32a254c8..365c4e34df25761a2b0fea55e20597795df4fc01 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1225,7 +1225,7 @@ static int ll_file_get_tree_lock(struct ll_lock_tree *tree, struct file *file, rc = ll_tree_lock(tree, node, buf, count, ast_flags); if (rc == 0) tree_locked = 1; - else if (rc == -EBUSY) + else if (rc == -EUSERS) ll_set_file_contended(inode); else GOTO(out, rc); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 4c3094fa36fa517d83cbb3a3d7f5b487fbe0bffc..9eb04f74cc6fea52721c46c5dc2ad0925a490a39 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -403,7 +403,8 @@ struct ll_async_page { llap_defer_uptodate:1, llap_origin:3, llap_ra_used:1, - llap_ignore_quota:1; + llap_ignore_quota:1, + llap_lockless_io_page:1; void *llap_cookie; struct page *llap_page; struct list_head llap_pending_write; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 8abfbee277c8e9d9da02b07b48870f3882d2df2d..691ccd40bf5e291d21d1797c29b1e4cb2cc0c810 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -645,7 +645,8 @@ static struct ll_async_page *llap_from_page(struct page *page, unsigned origin) csum = crc32_le(csum, kmap(page), CFS_PAGE_SIZE); kunmap(page); if (origin == LLAP_ORIGIN_READAHEAD || - origin == LLAP_ORIGIN_READPAGE) { + origin == LLAP_ORIGIN_READPAGE || + origin == LLAP_ORIGIN_LOCKLESS_IO) { llap->llap_checksum = 0; } else if (origin == LLAP_ORIGIN_COMMIT_WRITE || llap->llap_checksum == 0) { @@ -899,11 +900,7 @@ int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) RETURN(ret); } -/* the kernel calls us here when a page is unhashed from the page cache. - * the page will be locked and the kernel is holding a spinlock, so - * we need to be careful. we're just tearing down our book-keeping - * here. */ -void ll_removepage(struct page *page) +static void __ll_put_llap(struct page *page) { struct inode *inode = page->mapping->host; struct obd_export *exp; @@ -912,17 +909,6 @@ void ll_removepage(struct page *page) int rc; ENTRY; - LASSERT(!in_interrupt()); - - /* sync pages or failed read pages can leave pages in the page - * cache that don't have our data associated with them anymore */ - if (page_private(page) == 0) { - EXIT; - return; - } - - LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n"); - exp = ll_i2obdexp(inode); if (exp == NULL) { CERROR("page %p ind %lu gave null export\n", page, page->index); @@ -955,6 +941,31 @@ void ll_removepage(struct page *page) sbi->ll_async_page_count--; spin_unlock(&sbi->ll_lock); OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size); + + EXIT; +} + +/* the kernel calls us here when a page is unhashed from the page cache. + * the page will be locked and the kernel is holding a spinlock, so + * we need to be careful. we're just tearing down our book-keeping + * here. */ +void ll_removepage(struct page *page) +{ + ENTRY; + + LASSERT(!in_interrupt()); + + /* sync pages or failed read pages can leave pages in the page + * cache that don't have our data associated with them anymore */ + if (page_private(page) == 0) { + EXIT; + return; + } + + LASSERT(!llap_cast_private(page)->llap_lockless_io_page); + LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n"); + __ll_put_llap(page); + EXIT; } @@ -1530,13 +1541,13 @@ static void ll_file_put_pages(struct page **pages, int numpages) for (i = 0, pp = pages; i < numpages; i++, pp++) { if (*pp) { LL_CDEBUG_PAGE(D_PAGE, (*pp), "free\n"); - ll_removepage(*pp); + __ll_put_llap(*pp); if (page_private(*pp)) CERROR("the llap wasn't freed\n"); (*pp)->mapping = NULL; if (page_count(*pp) != 1) CERROR("page %p, flags %#lx, count %i, private %p\n", - (*pp), (*pp)->flags, page_count(*pp), + (*pp), (unsigned long)(*pp)->flags, page_count(*pp), (void*)page_private(*pp)); __free_pages(*pp, 0); } @@ -1570,6 +1581,7 @@ static struct page **ll_file_prepare_pages(int numpages, struct inode *inode, llap = llap_from_page(page, LLAP_ORIGIN_LOCKLESS_IO); if (IS_ERR(llap)) GOTO(err, rc = PTR_ERR(llap)); + llap->llap_lockless_io_page = 1; } RETURN(pages); err: @@ -1578,10 +1590,13 @@ err: } static ssize_t ll_file_copy_pages(struct page **pages, int numpages, - char *buf, loff_t pos, size_t count, int rw) + char *buf, loff_t pos, size_t count, + int rw) { ssize_t amount = 0; int i; + int updatechecksum = ll_i2sbi(pages[0]->mapping->host)->ll_flags & + LL_SBI_CHECKSUM; ENTRY; for (i = 0; i < numpages; i++) { @@ -1595,10 +1610,18 @@ static ssize_t ll_file_copy_pages(struct page **pages, int numpages, "buf = %p, bytes = %u\n", (rw == WRITE) ? "CFU" : "CTU", vaddr + offset, buf, bytes); - if (rw == WRITE) + if (rw == WRITE) { left = copy_from_user(vaddr + offset, buf, bytes); - else + if (updatechecksum) { + struct ll_async_page *llap; + + llap = llap_cast_private(pages[i]); + llap->llap_checksum = crc32_le(0, vaddr, + CFS_PAGE_SIZE); + } + } else { left = copy_to_user(buf, vaddr + offset, bytes); + } kunmap(pages[i]); amount += bytes; if (left) { diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index a8e43dcc858ed45ee11a833535ac9cda12fdd691..a59ddd4311f5ba2ab79625340a092be91cb94e2d 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -176,7 +176,8 @@ int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc) memset(lov_lockhp, 0, sizeof(*lov_lockhp)); if (lov->lov_tgts[req->rq_idx] && lov->lov_tgts[req->rq_idx]->ltd_active) { - if (rc != -EINTR) + /* -EUSERS used by OST to report file contention */ + if (rc != -EINTR && rc != -EUSERS) CERROR("enqueue objid "LPX64" subobj " LPX64" on OST idx %d: rc %d\n", set->set_oi->oi_md->lsm_object_id,