Skip to content

Commit

Permalink
bdev/qos: add the bandwidth rate limit control
Browse files Browse the repository at this point in the history
This patch is to add the core control policy for the
bandwidth rate limit (max bytes per second). Change
the existing functions for a common name and specially
handle the case when IOPS and bandwidth rate limiting
are both enabled.

Change-Id: I9f4565958d472559ef6d8bea52b1fe2a5f3c8969
Signed-off-by: GangCao <[email protected]>
Reviewed-on: https://review.gerrithub.io/413821
Tested-by: SPDK Automated Test System <[email protected]>
Reviewed-by: Daniel Verkamp <[email protected]>
Reviewed-by: Jim Harris <[email protected]>
  • Loading branch information
Comphix authored and jimharris committed Jun 12, 2018
1 parent 8da7772 commit 7191c4b
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 17 deletions.
80 changes: 64 additions & 16 deletions lib/bdev/bdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ int __itt_init_ittlib(const char *, __itt_group_id);
#define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000
#define SPDK_BDEV_SEC_TO_USEC 1000000ULL
#define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1
#define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE 512
#define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 10000
#define SPDK_BDEV_QOS_MIN_BW_IN_MB_PER_SEC 10

Expand Down Expand Up @@ -130,9 +131,16 @@ struct spdk_bdev_qos {
* only valid for the master channel which manages the outstanding IOs. */
uint64_t max_ios_per_timeslice;

/** Maximum allowed bytes to be issued in one timeslice (e.g., 1ms) and
* only valid for the master channel which manages the outstanding IOs. */
uint64_t max_byte_per_timeslice;

/** Submitted IO in one timeslice (e.g., 1ms) */
uint64_t io_submitted_this_timeslice;

/** Submitted byte in one timeslice (e.g., 1ms) */
uint64_t byte_submitted_this_timeslice;

/** Polller that processes queued I/O commands each time slice. */
struct spdk_poller *poller;
};
Expand Down Expand Up @@ -862,6 +870,26 @@ spdk_bdev_put_io(struct spdk_bdev_io *bdev_io)
}
}

static uint64_t
_spdk_bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
{
struct spdk_bdev *bdev = bdev_io->bdev;

switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
case SPDK_BDEV_IO_TYPE_NVME_IO:
case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
return bdev_io->u.nvme_passthru.nbytes;
case SPDK_BDEV_IO_TYPE_READ:
case SPDK_BDEV_IO_TYPE_WRITE:
case SPDK_BDEV_IO_TYPE_UNMAP:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
return bdev_io->u.bdev.num_blocks * bdev->blocklen;
default:
return 0;
}
}

static void
_spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch)
{
Expand All @@ -871,16 +899,23 @@ _spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch)
struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;

while (!TAILQ_EMPTY(&qos->queued)) {
if (qos->io_submitted_this_timeslice < qos->max_ios_per_timeslice) {
bdev_io = TAILQ_FIRST(&qos->queued);
TAILQ_REMOVE(&qos->queued, bdev_io, link);
qos->io_submitted_this_timeslice++;
ch->io_outstanding++;
shared_resource->io_outstanding++;
bdev->fn_table->submit_request(ch->channel, bdev_io);
} else {
if (qos->max_ios_per_timeslice > 0 &&
qos->io_submitted_this_timeslice >= qos->max_ios_per_timeslice) {
break;
}

if (qos->max_byte_per_timeslice > 0 &&
qos->byte_submitted_this_timeslice >= qos->max_byte_per_timeslice) {
break;
}

bdev_io = TAILQ_FIRST(&qos->queued);
TAILQ_REMOVE(&qos->queued, bdev_io, link);
qos->io_submitted_this_timeslice++;
qos->byte_submitted_this_timeslice += _spdk_bdev_get_io_size_in_byte(bdev_io);
ch->io_outstanding++;
shared_resource->io_outstanding++;
bdev->fn_table->submit_request(ch->channel, bdev_io);
}
}

Expand Down Expand Up @@ -1000,14 +1035,23 @@ spdk_bdev_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
}

static void
spdk_bdev_qos_update_max_ios_per_timeslice(struct spdk_bdev_qos *qos)
spdk_bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
{
uint64_t max_ios_per_timeslice = 0;
uint64_t max_ios_per_timeslice = 0, max_byte_per_timeslice = 0;

max_ios_per_timeslice = qos->iops_rate_limit * SPDK_BDEV_QOS_TIMESLICE_IN_USEC /
SPDK_BDEV_SEC_TO_USEC;
qos->max_ios_per_timeslice = spdk_max(max_ios_per_timeslice,
SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE);
if (qos->iops_rate_limit > 0) {
max_ios_per_timeslice = qos->iops_rate_limit * SPDK_BDEV_QOS_TIMESLICE_IN_USEC /
SPDK_BDEV_SEC_TO_USEC;
qos->max_ios_per_timeslice = spdk_max(max_ios_per_timeslice,
SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE);
}

if (qos->byte_rate_limit > 0) {
max_byte_per_timeslice = qos->byte_rate_limit * SPDK_BDEV_QOS_TIMESLICE_IN_USEC /
SPDK_BDEV_SEC_TO_USEC;
qos->max_byte_per_timeslice = spdk_max(max_byte_per_timeslice,
SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE);
}
}

static int
Expand All @@ -1017,6 +1061,7 @@ spdk_bdev_channel_poll_qos(void *arg)

/* Reset for next round of rate limiting */
qos->io_submitted_this_timeslice = 0;
qos->byte_submitted_this_timeslice = 0;

_spdk_bdev_qos_io_submit(qos->ch);

Expand Down Expand Up @@ -1075,8 +1120,9 @@ _spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
qos->thread = spdk_io_channel_get_thread(io_ch);

TAILQ_INIT(&qos->queued);
spdk_bdev_qos_update_max_ios_per_timeslice(qos);
spdk_bdev_qos_update_max_quota_per_timeslice(qos);
qos->io_submitted_this_timeslice = 0;
qos->byte_submitted_this_timeslice = 0;

qos->poller = spdk_poller_register(spdk_bdev_channel_poll_qos,
qos,
Expand Down Expand Up @@ -1266,7 +1312,9 @@ spdk_bdev_qos_destroy(struct spdk_bdev *bdev)
new_qos->ch = NULL;
new_qos->thread = NULL;
new_qos->max_ios_per_timeslice = 0;
new_qos->max_byte_per_timeslice = 0;
new_qos->io_submitted_this_timeslice = 0;
new_qos->byte_submitted_this_timeslice = 0;
new_qos->poller = NULL;
TAILQ_INIT(&new_qos->queued);

Expand Down Expand Up @@ -3077,7 +3125,7 @@ _spdk_bdev_update_qos_limit_iops_msg(void *cb_arg)
struct spdk_bdev *bdev = ctx->bdev;

pthread_mutex_lock(&bdev->mutex);
spdk_bdev_qos_update_max_ios_per_timeslice(bdev->qos);
spdk_bdev_qos_update_max_quota_per_timeslice(bdev->qos);
pthread_mutex_unlock(&bdev->mutex);

_spdk_bdev_set_qos_limit_done(ctx, 0);
Expand Down
17 changes: 16 additions & 1 deletion test/unit/lib/bdev/mt/bdev.c/bdev_ut.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,12 @@ basic_qos(void)
bdev->qos = calloc(1, sizeof(*bdev->qos));
SPDK_CU_ASSERT_FATAL(bdev->qos != NULL);
TAILQ_INIT(&bdev->qos->queued);
/*
* Enable both IOPS and bandwidth rate limits.
* In this case, both rate limits will take equal effect.
*/
bdev->qos->iops_rate_limit = 2000; /* 2 I/O per millisecond */
bdev->qos->byte_rate_limit = 8192000; /* 8K byte per millisecond with 4K block size */

g_get_io_channel = true;

Expand Down Expand Up @@ -732,7 +737,12 @@ io_during_qos_queue(void)
bdev->qos = calloc(1, sizeof(*bdev->qos));
SPDK_CU_ASSERT_FATAL(bdev->qos != NULL);
TAILQ_INIT(&bdev->qos->queued);
/*
* Enable both IOPS and bandwidth rate limits.
* In this case, IOPS rate limit will take effect first.
*/
bdev->qos->iops_rate_limit = 1000; /* 1000 I/O per second, or 1 per millisecond */
bdev->qos->byte_rate_limit = 8192000; /* 8K byte per millisecond with 4K block size */

g_get_io_channel = true;

Expand Down Expand Up @@ -815,7 +825,12 @@ io_during_qos_reset(void)
bdev->qos = calloc(1, sizeof(*bdev->qos));
SPDK_CU_ASSERT_FATAL(bdev->qos != NULL);
TAILQ_INIT(&bdev->qos->queued);
bdev->qos->iops_rate_limit = 1000; /* 1000 I/O per second, or 1 per millisecond */
/*
* Enable both IOPS and bandwidth rate limits.
* In this case, bandwidth rate limit will take effect first.
*/
bdev->qos->iops_rate_limit = 2000; /* 2000 I/O per second, or 2 per millisecond */
bdev->qos->byte_rate_limit = 4096000; /* 4K byte per millisecond with 4K block size */

g_get_io_channel = true;

Expand Down

0 comments on commit 7191c4b

Please sign in to comment.