From dab8a7621beb2240db9c1883ff741b1c5584e09b Mon Sep 17 00:00:00 2001 From: johann <johann> Date: Thu, 3 Jul 2008 07:31:29 +0000 Subject: [PATCH] Branch b1_6 b=15950 i=wangdi i=shadow The direct IO path doesn't call check_rpcs to submit a new RPC once one is completed. As a result, some RPCs are stuck in the queue and are never sent. Merge brw_interpret() and brw_interpret_oap(). --- lustre/ChangeLog | 23 +++++++++----- lustre/include/obd_support.h | 1 + lustre/osc/osc_request.c | 61 +++++++++++++----------------------- lustre/tests/sanity.sh | 25 +++++++++++++++ 4 files changed, 62 insertions(+), 48 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 922e531136..8b6c22b72e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -18,10 +18,10 @@ tbd Sun Microsystems, Inc. removed cwd "./" (refer to Bugzilla 14399). * A new quota file format has been introduced in 1.6.5. The format conversion from prior releases is handled transparently, - but releases older than 1.4.12/1.6.5 will not understand this new - format. The automatic format conversion can be avoided by running + but releases older than 1.4.12/1.6.5 don't understand this new + format. The automatic format conversion can be avoided by running the following command on the MDS: - 'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'. + 'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'. For more information, please refer to bugzilla 13904. Severity : enhancement @@ -46,7 +46,7 @@ Severity : major Bugzilla : 15924 Description: do not process already freed flock Details : flock can possibly be freed by another thread before it reaches - to ldlm_flock_completion_ast. + to ldlm_flock_completion_ast. Severity : normal Bugzilla : 14480 @@ -71,7 +71,7 @@ Bugzilla : 14742 Frequency : rare Description: ASSERTION(CheckWriteback(page,cmd)) failed Details : badly clear PG_Writeback bit in ll_ap_completion can produce false - positive assertion. + positive assertion. Severity : normal Frequency : only with broken builds/installations @@ -149,8 +149,8 @@ Details : VM protocol want old IO finished before start new, in this case Severity : normal Frequency : rare Bugzilla : 12888 -Description: mds_mfd_close() ASSERTION(rc == 0) -Details : In mds_mfd_close(), we need protect inode's writecount change +Description: mds_mfd_close() ASSERTION(rc == 0) +Details : In mds_mfd_close(), we need protect inode's writecount change within its orphan write semaphore to prevent possible races. Severity : minor @@ -216,7 +216,7 @@ Severity : normal Bugzilla : 15953 Description: more ldlm soft lockups Details : In ldlm_resource_add_lock(), call to ldlm_resource_dump() - starve other threads from the resource lock for a long time in + starve other threads from the resource lock for a long time in case of long waiting queue, so change the debug level from D_OTHER to the less frequently used D_INFO. @@ -240,6 +240,13 @@ Description: this bug _only_ happens when inode quota limitation is very low Details : if remaining quota equates 1, it is a sign to demonstate that quota is effective now. So least quota qunit should be 2. +Severity : normal +Bugzilla : 15950 +Description: Hung threads in invalidate_inode_pages2_range +Details : The direct IO path doesn't call check_rpcs to submit a new RPC once + one is completed. As a result, some RPCs are stuck in the queue + and are never sent. + ------------------------------------------------------------------------------- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 03ba08f7b9..066a3f3f93 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -228,6 +228,7 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a #define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c +#define OBD_FAIL_OSC_DIO_PAUSE 0x40d #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 00a3e9a2a3..9c5a2db342 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -63,6 +63,7 @@ static quota_interface_t *quota_interface = NULL; extern quota_interface_t osc_quota_interface; static void osc_release_ppga(struct brw_page **ppga, obd_count count); +static int brw_interpret(struct ptlrpc_request *request, void *data, int rc); int osc_cleanup(struct obd_device *obd); static quota_interface_t *quota_interface; @@ -814,7 +815,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body) CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant); if (body->oa.o_valid & OBD_MD_FLGRANT) cli->cl_avail_grant += body->oa.o_grant; - /* waiters are woken in brw_interpret_oap */ + /* waiters are woken in brw_interpret */ client_obd_list_unlock(&cli->cl_loi_list_lock); } @@ -1425,33 +1426,6 @@ int osc_brw_redo_request(struct ptlrpc_request *request, RETURN(0); } -static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) -{ - struct osc_brw_async_args *aa = data; - int i; - ENTRY; - - rc = osc_brw_fini_request(request, rc); - CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc); - if (osc_recoverable_error(rc)) { - rc = osc_brw_redo_request(request, aa); - if (rc == 0) - RETURN(0); - } - client_obd_list_lock(&aa->aa_cli->cl_loi_list_lock); - if (lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE) - aa->aa_cli->cl_w_in_flight--; - else - aa->aa_cli->cl_r_in_flight--; - - for (i = 0; i < aa->aa_page_count; i++) - osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1); - client_obd_list_unlock(&aa->aa_cli->cl_loi_list_lock); - osc_release_ppga(aa->aa_ppga, aa->aa_page_count); - - RETURN(rc); -} - static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, struct brw_page **pga, struct ptlrpc_request_set *set) @@ -1487,6 +1461,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, cli->cl_w_in_flight); ptlrpc_lprocfs_brw(request, OST_WRITE, aa->aa_requested_nob); } + LASSERT(list_empty(&aa->aa_oaps)); if (rc == 0) { request->rq_interpret_reply = brw_interpret; @@ -1497,10 +1472,12 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, else cli->cl_w_in_flight++; client_obd_list_unlock(&cli->cl_loi_list_lock); + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DIO_PAUSE, 3); } else if (cmd == OBD_BRW_WRITE) { client_obd_list_lock(&cli->cl_loi_list_lock); for (i = 0; i < page_count; i++) osc_release_write_grant(cli, pga[i], 0); + osc_wake_cache_waiters(cli); client_obd_list_unlock(&cli->cl_loi_list_lock); } @@ -1957,10 +1934,9 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, EXIT; } -static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc) +static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) { struct osc_brw_async_args *aa = data; - struct osc_async_page *oap, *tmp; struct client_obd *cli; ENTRY; @@ -1983,19 +1959,24 @@ static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc) else cli->cl_r_in_flight--; - /* the caller may re-use the oap after the completion call so - * we need to clean it up a little */ - list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) { - list_del_init(&oap->oap_rpc_item); - osc_ap_completion(cli, aa->aa_oa, oap, 1, rc); + if (!list_empty(&aa->aa_oaps)) { /* from osc_send_oap_rpc() */ + struct osc_async_page *oap, *tmp; + /* the caller may re-use the oap after the completion call so + * we need to clean it up a little */ + list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) { + list_del_init(&oap->oap_rpc_item); + osc_ap_completion(cli, aa->aa_oa, oap, 1, rc); + } + OBDO_FREE(aa->aa_oa); + } else { /* from async_internal() */ + int i; + for (i = 0; i < aa->aa_page_count; i++) + osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1); } - osc_wake_cache_waiters(cli); osc_check_rpcs(cli); client_obd_list_unlock(&cli->cl_loi_list_lock); - OBDO_FREE(aa->aa_oa); - osc_release_ppga(aa->aa_ppga, aa->aa_page_count); RETURN(rc); } @@ -2295,7 +2276,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight", page_count, aa, cli->cl_r_in_flight, cli->cl_w_in_flight); - req->rq_interpret_reply = brw_interpret_oap; + req->rq_interpret_reply = brw_interpret; ptlrpcd_add_req(req); RETURN(1); } @@ -3810,7 +3791,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf) oscc_init(obd); /* We need to allocate a few requests more, because - brw_interpret_oap tries to create new requests before freeing + brw_interpret tries to create new requests before freeing previous ones. Ideally we want to have 2x max_rpcs_in_flight reserved, but I afraid that might be too much wasted RAM in fact, so 2 is just my guess and still should work. */ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 2d2f725095..9cea5c1731 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -4551,6 +4551,31 @@ test_119c() # bug 13099 } run_test 119c "Testing for direct read hitting hole" +test_119d() # bug 15950 +{ + MAX_RPCS_IN_FLIGHT=`$LCTL get_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight` + $LCTL set_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight 1 + BSIZE=1048576 + $SETSTRIPE $DIR/$tfile -i 0 -c 1 || error "setstripe failed" + $DIRECTIO write $DIR/$tfile 0 1 $BSIZE || error "first directio failed" + #define OBD_FAIL_OSC_DIO_PAUSE 0x40d + lctl set_param fail_loc=0x40d + $DIRECTIO write $DIR/$tfile 1 4 $BSIZE & + pid_dio=$! + sleep 1 + cat $DIR/$tfile > /dev/null & + lctl set_param fail_loc=0 + pid_reads=$! + wait $pid_dio + log "the DIO writes have completed, now wait for the reads (should not block very long)" + sleep 2 + [ -n "`ps h -p $pid_reads -o comm`" ] && \ + error "the read rpcs have not completed in 2s" + rm -f $DIR/$tfile + $LCTL set_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight $MAX_RPCS_IN_FLIGHT +} +run_test 119d "The DIO path should try to send a new rpc once one is completed" + test_120a() { mkdir -p $DIR/$tdir [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ -- GitLab