From d11ae71ec97aab632df0b1aed20018da7a9db6a9 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 16 Jul 2013 06:08:41 +0200 Subject: [PATCH 10/21] block: fix I/O throttling accounting blind spot RH-Author: Fam Zheng Message-id: <1373954921-25694-1-git-send-email-famz@redhat.com> Patchwork-id: 52511 O-Subject: [PATCH RHEL-6.5 qemu-kvm v4 10/15] block: fix I/O throttling accounting blind spot Bugzilla: 956825 RH-Acked-by: Laszlo Ersek RH-Acked-by: Paolo Bonzini RH-Acked-by: Stefan Hajnoczi From: Stefan Hajnoczi I/O throttling relies on bdrv_acct_done() which is called when a request completes. This leaves a blind spot since we only charge for completed requests, not submitted requests. For example, if there is 1 operation remaining in this time slice the guest could submit 3 operations and they will all be submitted successfully since they don't actually get accounted for until they complete. Originally we probably thought this is okay since the requests will be accounted when the time slice is extended. In practice it causes fluctuations since the guest can exceed its I/O limit and it will be punished for this later on. Account for I/O upon submission so that I/O limits are enforced properly. Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf (cherry picked from commit 5905fbc9c94ccd744c1b249472eafcc2d827548a) Signed-off-by: Fam Zheng Conflicts: block.c include/block/block_int.h --- v4: zero out bs_new->slice_submitted [Laszlo] --- block.c | 23 +++++++++++------------ block_int.h | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) Signed-off-by: Miroslav Rezanina --- block.c | 23 +++++++++++------------ block_int.h | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/block.c b/block.c index 8c2f94b..3e582a8 100644 --- a/block.c +++ b/block.c @@ -136,7 +136,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs) bs->slice_start = 0; bs->slice_end = 0; bs->slice_time = 0; - memset(&bs->io_base, 0, sizeof(bs->io_base)); } static void bdrv_block_timer(void *opaque) @@ -1161,8 +1160,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) tmp.slice_time = bs_top->slice_time; tmp.slice_start = bs_top->slice_start; tmp.slice_end = bs_top->slice_end; + tmp.slice_submitted = bs_top->slice_submitted; tmp.io_limits = bs_top->io_limits; - tmp.io_base = bs_top->io_base; tmp.throttled_reqs = bs_top->throttled_reqs; tmp.block_timer = bs_top->block_timer; tmp.io_limits_enabled = bs_top->io_limits_enabled; @@ -1219,7 +1218,7 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) bs_new->dirty_bitmap = NULL; qemu_co_queue_init(&bs_new->throttled_reqs); - memset(&bs_new->io_base, 0, sizeof(bs_new->io_base)); + memset(&bs_new->slice_submitted, 0, sizeof(bs_new->slice_submitted)); memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits)); bdrv_iostatus_disable(bs_new); @@ -3622,9 +3621,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, slice_time = bs->slice_end - bs->slice_start; slice_time /= (NANOSECONDS_PER_SECOND); bytes_limit = bps_limit * slice_time; - bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write]; + bytes_base = bs->slice_submitted.bytes[is_write]; if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write]; + bytes_base += bs->slice_submitted.bytes[!is_write]; } /* bytes_base: the bytes of data which have been read/written; and @@ -3682,9 +3681,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, slice_time = bs->slice_end - bs->slice_start; slice_time /= (NANOSECONDS_PER_SECOND); ios_limit = iops_limit * slice_time; - ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write]; + ios_base = bs->slice_submitted.ios[is_write]; if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write]; + ios_base += bs->slice_submitted.ios[!is_write]; } if (ios_base + 1 <= ios_limit) { @@ -3729,11 +3728,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, bs->slice_start = now; bs->slice_end = now + bs->slice_time; - bs->io_base.bytes[is_write] = bs->nr_bytes[is_write]; - bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write]; - - bs->io_base.ios[is_write] = bs->nr_ops[is_write]; - bs->io_base.ios[!is_write] = bs->nr_ops[!is_write]; + memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); } elapsed_time = now - bs->slice_start; @@ -3761,6 +3756,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, *wait = 0; } + bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * + BDRV_SECTOR_SIZE; + bs->slice_submitted.ios[is_write]++; + return false; } diff --git a/block_int.h b/block_int.h index 0e405f5..abf6609 100644 --- a/block_int.h +++ b/block_int.h @@ -284,7 +284,7 @@ struct BlockDriverState { int64_t slice_start; int64_t slice_end; BlockIOLimit io_limits; - BlockIOBaseValue io_base; + BlockIOBaseValue slice_submitted; CoQueue throttled_reqs; QEMUTimer *block_timer; bool io_limits_enabled; -- 1.7.1