Commit 05f326a7 authored by Vladimir Saveliev's avatar Vladimir Saveliev Committed by Oleg Drokin
Browse files

LU-12687 osc: consume grants for direct I/O

New IO engine implementation lost consuming grants by direct I/O
writes. That led to early emergence of out of space condition during
direct I/O. The below illustrates the problem:
  # OSTSIZE=100000 sh llmount.sh
  # dd if=/dev/zero of=/mnt/lustre/file bs=4k count=100 oflag=direct
  dd: error writing ‘/mnt/lustre/file’: No space left on device

Consume grants for direct I/O.

Try to consume grants in osc_queue_sync_pages() when it is called for
pages which are being writted in direct i/o.

Tests are added to verify grant consumption in buffered and direct i/o
and to verify direct i/o overwrite when ost is full.
The overwrite test is for ldiskfs only as zfs is unable to overwrite
when it is full.

Fixes: 9fe4b52a

 ("LU-1030 osc: new IO engine implementation")
Signed-off-by: default avatarVladimir Saveliev <c17830@cray.com>
Change-Id: I9a199452c564e8e8ad02f79231e8481166f3666e
Cray-bug-id: LUS-7036
Reviewed-on: https://review.whamcloud.com/35896

Tested-by: default avatarjenkins <devops@whamcloud.com>
Tested-by: default avatarMaloo <maloo@whamcloud.com>
Reviewed-by: default avatarWang Shilong <wshilong@whamcloud.com>
Reviewed-by: default avatarAndreas Dilger <adilger@whamcloud.com>
Reviewed-by: default avatarMike Pershin <mpershin@whamcloud.com>
parent a2e38010
......@@ -487,6 +487,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721
#define OBD_FAIL_TGT_REPLY_DATA_RACE 0x722
#define OBD_FAIL_TGT_RECOVERY_CONNECT 0x724
#define OBD_FAIL_TGT_NO_GRANT 0x725
#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
......
......@@ -2610,6 +2610,28 @@ int osc_queue_sync_pages(const struct lu_env *env, const struct cl_io *io,
ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK);
ext->oe_ndelay = !!(brw_flags & OBD_BRW_NDELAY);
ext->oe_dio = !!(brw_flags & OBD_BRW_NOCACHE);
if (ext->oe_dio && !ext->oe_rw) { /* direct io write */
int grants;
int ppc;
ppc = 1 << (cli->cl_chunkbits - PAGE_SHIFT);
grants = cli->cl_grant_extent_tax;
grants += (1 << cli->cl_chunkbits) *
((page_count + ppc - 1) / ppc);
spin_lock(&cli->cl_loi_list_lock);
if (osc_reserve_grant(cli, grants) == 0) {
list_for_each_entry(oap, list, oap_pending_item) {
osc_consume_write_grant(cli,
&oap->oap_brw_page);
atomic_long_inc(&obd_dirty_pages);
}
osc_unreserve_grant_nolock(cli, grants, 0);
ext->oe_grants = grants;
}
spin_unlock(&cli->cl_loi_list_lock);
}
ext->oe_is_rdma_only = !!(brw_flags & OBD_BRW_RDMA_ONLY);
ext->oe_nr_pages = page_count;
ext->oe_mppr = mppr;
......
......@@ -902,6 +902,9 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
ENTRY;
if (OBD_FAIL_CHECK(OBD_FAIL_TGT_NO_GRANT))
RETURN(0);
/* When tgd_grant_compat_disable is set, we don't grant any space to
* clients not supporting OBD_CONNECT_GRANT_PARAM.
* Otherwise, space granted to such a client is inflated since it
......
......@@ -140,6 +140,9 @@ start_mds() {
for num in $(seq $MDSCOUNT); do
start_mdt $num $@ || return 94
done
for num in $(seq $MDSCOUNT); do
wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds${num} FULL
done
}
start_mgsmds() {
......@@ -165,6 +168,7 @@ stop_mgs() {
start_ost() {
echo "start ost1 service on `facet_active_host ost1`"
start ost1 $(ostdevname 1) $OST_MOUNT_OPTS $@ || return 95
wait_clients_import_state ${CLIENTS:-$HOSTNAME} ost1 FULL
}
stop_ost() {
......@@ -176,6 +180,7 @@ stop_ost() {
start_ost2() {
echo "start ost2 service on `facet_active_host ost2`"
start ost2 $(ostdevname 2) $OST_MOUNT_OPTS $@ || return 92
wait_clients_import_state ${CLIENTS:-$HOSTNAME} ost2 FULL
}
stop_ost2() {
......@@ -9053,6 +9058,35 @@ test_126() {
}
run_test 126 "mount in parallel shouldn't cause a crash"
test_127() {
[[ "$ost1_FSTYPE" == ldiskfs ]] || skip "ldiskfs only test"
cleanup
setup
zconf_umount_clients $RCLIENTS $MOUNT
wait_osp_active ost ${FSNAME}-OST0000 0 1
local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
local avail1=($($LCTL get_param -n osc.${osc_tgt}.kbytesavail))
$LFS setstripe -i 0 $DIR/$tfile || error "failed creating $DIR/$tfile"
dd if=/dev/zero of=$DIR/$tfile bs=1M oflag=direct || true
local avail2=($($LCTL get_param -n osc.${osc_tgt}.kbytesavail))
if ((avail2 * 100 / avail1 > 1)); then
lfs df $DIR
ls -l $DIR/$tfile
error "more than 1% space left: before=$avail1 after=$avail2"
fi
local mbs=$(($(stat -c %s $DIR/$tfile) / (1024 * 1024)))
dd if=/dev/zero of=$DIR/$tfile bs=1M count=$mbs conv=notrunc \
oflag=direct || error "overwrite failed"
}
run_test 127 "direct io overwrite on full ost"
if ! combined_mgs_mds ; then
stop mgs
fi
......
......@@ -7839,18 +7839,25 @@ test_64c() {
}
run_test 64c "verify grant shrink"
 
import_param() {
local tgt=$1
local param=$2
$LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }"
}
# this does exactly what osc_request.c:osc_announce_cached() does in
# order to calculate max amount of grants to ask from server
want_grant() {
local tgt=$1
 
local nrpages=$($LCTL get_param -n osc.${tgt}.max_pages_per_rpc)
local rpc_in_flight=$($LCTL get_param -n osc.${tgt}.max_rpcs_in_flight)
local nrpages=$($LCTL get_param -n osc.$tgt.max_pages_per_rpc)
local rpc_in_flight=$($LCTL get_param -n osc.$tgt.max_rpcs_in_flight)
 
((rpc_in_flight ++));
((rpc_in_flight++));
nrpages=$((nrpages * rpc_in_flight))
 
local dirty_max_pages=$($LCTL get_param -n osc.${tgt}.max_dirty_mb)
local dirty_max_pages=$($LCTL get_param -n osc.$tgt.max_dirty_mb)
 
dirty_max_pages=$((dirty_max_pages * 1024 * 1024 / PAGE_SIZE))
 
......@@ -7858,13 +7865,11 @@ want_grant() {
local undirty=$((nrpages * PAGE_SIZE))
 
local max_extent_pages
max_extent_pages=$($LCTL get_param osc.${tgt}.import |
grep grant_max_extent_size | awk '{print $2}')
max_extent_pages=$(import_param $tgt grant_max_extent_size)
max_extent_pages=$((max_extent_pages / PAGE_SIZE))
local nrextents=$(((nrpages + max_extent_pages - 1) / max_extent_pages))
local grant_extent_tax
grant_extent_tax=$($LCTL get_param osc.${tgt}.import |
grep grant_extent_tax | awk '{print $2}')
grant_extent_tax=$(import_param $tgt grant_extent_tax)
 
undirty=$((undirty + nrextents * grant_extent_tax))
 
......@@ -7878,56 +7883,171 @@ grant_chunk() {
local max_brw_size
local grant_extent_tax
 
max_brw_size=$($LCTL get_param osc.${tgt}.import |
grep max_brw_size | awk '{print $2}')
max_brw_size=$(import_param $tgt max_brw_size)
 
grant_extent_tax=$($LCTL get_param osc.${tgt}.import |
grep grant_extent_tax | awk '{print $2}')
grant_extent_tax=$(import_param $tgt grant_extent_tax)
 
echo $(((max_brw_size + grant_extent_tax) * 2))
}
 
test_64d() {
[ $OST1_VERSION -lt $(version_code 2.10.56) ] &&
[ $OST1_VERSION -ge $(version_code 2.10.56) ] ||
skip "OST < 2.10.55 doesn't limit grants enough"
 
local tgt=$($LCTL dl | grep "0000-osc-[^mM]" | awk '{print $4}')
local file=$DIR/$tfile
local tgt=$($LCTL dl | awk '/OST0000-osc-[^mM]/ { print $4 }')
 
[[ $($LCTL get_param osc.${tgt}.import |
grep "connect_flags:.*grant_param") ]] ||
[[ "$($LCTL get_param osc.${tgt}.import)" =~ "grant_param" ]] ||
skip "no grant_param connect flag"
 
local olddebug=$($LCTL get_param -n debug 2> /dev/null)
local olddebug="$($LCTL get_param -n debug 2> /dev/null)"
$LCTL set_param -n -n debug="$OLDDEBUG" || true
stack_trap "$LCTL set_param -n debug='$olddebug'" EXIT
 
$LCTL set_param debug="$OLDDEBUG" 2> /dev/null || true
 
local max_cur_granted=$(($(want_grant $tgt) + $(grant_chunk $tgt)))
stack_trap "rm -f $file" EXIT
stack_trap "rm -f $DIR/$tfile && wait_delete_completed" EXIT
 
$LFS setstripe $file -i 0 -c 1
dd if=/dev/zero of=$file bs=1M count=1000 &
$LFS setstripe $DIR/$tfile -i 0 -c 1
dd if=/dev/zero of=$DIR/$tfile bs=1M count=1000 &
ddpid=$!
 
while true
do
local cur_grant=$($LCTL get_param -n osc.${tgt}.cur_grant_bytes)
if [[ $cur_grant -gt $max_cur_granted ]]
then
while kill -0 $ddpid; do
local cur_grant=$($LCTL get_param -n osc.$tgt.cur_grant_bytes)
if [[ $cur_grant -gt $max_cur_granted ]]; then
kill $ddpid
error "cur_grant $cur_grant > $max_cur_granted"
fi
kill -0 $ddpid
[[ $? -ne 0 ]] && break;
sleep 2
done
 
rm -f $DIR/$tfile
wait_delete_completed
$LCTL set_param debug="$olddebug" 2> /dev/null || true
sleep 1
done
}
run_test 64d "check grant limit exceed"
 
check_grants() {
local tgt=$1
local expected=$2
local msg=$3
local cur_grants=$($LCTL get_param -n osc.$tgt.cur_grant_bytes)
((cur_grants == expected)) ||
error "$msg: grants mismatch: $cur_grants, expected $expected"
}
round_up_p2() {
echo $((($1 + $2 - 1) & ~($2 - 1)))
}
test_64e() {
[ $PARALLEL == "yes" ] && skip "skip parallel run"
[ $OST1_VERSION -ge $(version_code 2.11.56) ] ||
skip "Need OSS version at least 2.11.56"
# Remount client to reset grant
remount_client $MOUNT || error "failed to remount client"
local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
local init_grants=$(import_param $osc_tgt initial_grant)
check_grants $osc_tgt $init_grants "init grants"
local extent_tax=$(import_param $osc_tgt grant_extent_tax)
local max_brw_size=$(import_param $osc_tgt max_brw_size)
local gbs=$(import_param $osc_tgt grant_block_size)
# write random number of bytes from max_brw_size / 4 to max_brw_size
local write_bytes=$(shuf -i $((max_brw_size / 4))-$max_brw_size -n 1)
# align for direct io
write_bytes=$(round_up_p2 $write_bytes PAGE_SIZE)
# round to grant consumption unit
local wb_round_up=$(round_up_p2 $write_bytes gbs)
local grants=$((wb_round_up + extent_tax))
$LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed"
# define OBD_FAIL_TGT_NO_GRANT 0x725
# make the server not grant more back
do_facet ost1 $LCTL set_param fail_loc=0x725
dd if=/dev/zero of=$DIR/$tfile bs=$write_bytes count=1 oflag=direct
do_facet ost1 $LCTL set_param fail_loc=0
check_grants $osc_tgt $((init_grants - grants)) "dio w/o grant alloc"
rm -f $DIR/$tfile || error "rm failed"
# Remount client to reset grant
remount_client $MOUNT || error "failed to remount client"
osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
$LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed"
# define OBD_FAIL_TGT_NO_GRANT 0x725
# make the server not grant more back
do_facet ost1 $LCTL set_param fail_loc=0x725
$MULTIOP $DIR/$tfile "oO_WRONLY:w${write_bytes}yc"
do_facet ost1 $LCTL set_param fail_loc=0
check_grants $osc_tgt $((init_grants - grants)) "buf io w/o grant alloc"
}
run_test 64e "check grant consumption (no grant allocation)"
test_64f() {
[ $PARALLEL == "yes" ] && skip "skip parallel run"
# Remount client to reset grant
remount_client $MOUNT || error "failed to remount client"
local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
local init_grants=$(import_param $osc_tgt initial_grant)
local extent_tax=$(import_param $osc_tgt grant_extent_tax)
local max_brw_size=$(import_param $osc_tgt max_brw_size)
local gbs=$(import_param $osc_tgt grant_block_size)
local chunk=$(grant_chunk $osc_tgt)
# write random number of bytes from max_brw_size / 4 to max_brw_size
local write_bytes=$(shuf -i $((max_brw_size / 4))-$max_brw_size -n 1)
# align for direct io
write_bytes=$(round_up_p2 $write_bytes PAGE_SIZE)
# round to grant consumption unit
local wb_round_up=$(round_up_p2 $write_bytes gbs)
local grants=$((wb_round_up + extent_tax))
$LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed"
dd if=/dev/zero of=$DIR/$tfile bs=$write_bytes count=1 oflag=direct ||
error "error writing to $DIR/$tfile"
check_grants $osc_tgt $((init_grants - grants + chunk)) \
"direct io with grant allocation"
rm -f $DIR/$tfile || error "rm failed"
# Remount client to reset grant
remount_client $MOUNT || error "failed to remount client"
osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
$LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed"
local cmd="oO_WRONLY:w${write_bytes}_yc"
$MULTIOP $DIR/$tfile $cmd &
MULTIPID=$!
sleep 1
check_grants $osc_tgt $((init_grants - grants)) \
"buffered io, not write rpc"
kill -USR1 $MULTIPID
wait
check_grants $osc_tgt $((init_grants - grants + chunk)) \
"buffered io, one RPC"
}
run_test 64f "check grant consumption (with grant allocation)"
# bug 1414 - set/get directories' stripe info
test_65a() {
[ $PARALLEL == "yes" ] && skip "skip parallel run"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment