Skip to content

Commit

Permalink
block/mq-deadline: Add cgroup support
Browse files Browse the repository at this point in the history
Maintain statistics per cgroup and export these to user space. These
statistics are essential for verifying whether the proper I/O priorities
have been assigned to requests. An example of the statistics data with
this patch applied:

$ cat /sys/fs/cgroup/io.stat
11:2 rbytes=0 wbytes=0 rios=3 wios=0 dbytes=0 dios=0 [NONE] dispatched=0 inserted=0 merged=171 [RT] dispatched=0 inserted=0 merged=0 [BE] dispatched=0 inserted=0 merged=0 [IDLE] dispatched=0 inserted=0 merged=0
8:32 rbytes=2142720 wbytes=0 rios=105 wios=0 dbytes=0 dios=0 [NONE] dispatched=0 inserted=0 merged=171 [RT] dispatched=0 inserted=0 merged=0 [BE] dispatched=0 inserted=0 merged=0 [IDLE] dispatched=0 inserted=0 merged=0

Cc: Damien Le Moal <[email protected]>
Cc: Hannes Reinecke <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Ming Lei <[email protected]>
Cc: Johannes Thumshirn <[email protected]>
Cc: Himanshu Madhani <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
bvanassche authored and axboe committed Jun 21, 2021
1 parent 38ba64d commit 08a9ad8
Show file tree
Hide file tree
Showing 5 changed files with 308 additions and 14 deletions.
6 changes: 6 additions & 0 deletions block/Kconfig.iosched
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ config MQ_IOSCHED_DEADLINE
help
MQ version of the deadline IO scheduler.

config MQ_IOSCHED_DEADLINE_CGROUP
tristate
default y
depends on MQ_IOSCHED_DEADLINE
depends on BLK_CGROUP

config MQ_IOSCHED_KYBER
tristate "Kyber I/O scheduler"
default y
Expand Down
2 changes: 2 additions & 0 deletions block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
mq-deadline-y += mq-deadline-main.o
mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
Expand Down
126 changes: 126 additions & 0 deletions block/mq-deadline-cgroup.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// SPDX-License-Identifier: GPL-2.0

#include <linux/blk-cgroup.h>
#include <linux/ioprio.h>

#include "mq-deadline-cgroup.h"

static struct blkcg_policy dd_blkcg_policy;

static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp)
{
struct dd_blkcg *pd;

pd = kzalloc(sizeof(*pd), gfp);
if (!pd)
return NULL;
pd->stats = alloc_percpu_gfp(typeof(*pd->stats),
GFP_KERNEL | __GFP_ZERO);
if (!pd->stats) {
kfree(pd);
return NULL;
}
return &pd->cpd;
}

static void dd_cpd_free(struct blkcg_policy_data *cpd)
{
struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd);

free_percpu(dd_blkcg->stats);
kfree(dd_blkcg);
}

static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd)
{
return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy),
struct dd_blkcg, cpd);
}

/*
* Convert an association between a block cgroup and a request queue into a
* pointer to the mq-deadline information associated with a (blkcg, queue) pair.
*/
struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
{
struct blkg_policy_data *pd;

pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy);
if (!pd)
return NULL;

return dd_blkcg_from_pd(pd);
}

static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
{
static const char *const prio_class_name[] = {
[IOPRIO_CLASS_NONE] = "NONE",
[IOPRIO_CLASS_RT] = "RT",
[IOPRIO_CLASS_BE] = "BE",
[IOPRIO_CLASS_IDLE] = "IDLE",
};
struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd);
int res = 0;
u8 prio;

for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++)
res += scnprintf(buf + res, size - res,
" [%s] dispatched=%u inserted=%u merged=%u",
prio_class_name[prio],
ddcg_sum(blkcg, dispatched, prio) +
ddcg_sum(blkcg, merged, prio) -
ddcg_sum(blkcg, completed, prio),
ddcg_sum(blkcg, inserted, prio) -
ddcg_sum(blkcg, completed, prio),
ddcg_sum(blkcg, merged, prio));

return res;
}

static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q,
struct blkcg *blkcg)
{
struct dd_blkg *pd;

pd = kzalloc(sizeof(*pd), gfp);
if (!pd)
return NULL;
return &pd->pd;
}

static void dd_pd_free(struct blkg_policy_data *pd)
{
struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd);

kfree(dd_blkg);
}

static struct blkcg_policy dd_blkcg_policy = {
.cpd_alloc_fn = dd_cpd_alloc,
.cpd_free_fn = dd_cpd_free,

.pd_alloc_fn = dd_pd_alloc,
.pd_free_fn = dd_pd_free,
.pd_stat_fn = dd_pd_stat,
};

int dd_activate_policy(struct request_queue *q)
{
return blkcg_activate_policy(q, &dd_blkcg_policy);
}

void dd_deactivate_policy(struct request_queue *q)
{
blkcg_deactivate_policy(q, &dd_blkcg_policy);
}

int __init dd_blkcg_init(void)
{
return blkcg_policy_register(&dd_blkcg_policy);
}

void __exit dd_blkcg_exit(void)
{
blkcg_policy_unregister(&dd_blkcg_policy);
}
114 changes: 114 additions & 0 deletions block/mq-deadline-cgroup.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/* SPDX-License-Identifier: GPL-2.0 */

#if !defined(_MQ_DEADLINE_CGROUP_H_)
#define _MQ_DEADLINE_CGROUP_H_

#include <linux/blk-cgroup.h>

struct request_queue;

/**
* struct io_stats_per_prio - I/O statistics per I/O priority class.
* @inserted: Number of inserted requests.
* @merged: Number of merged requests.
* @dispatched: Number of dispatched requests.
* @completed: Number of I/O completions.
*/
struct io_stats_per_prio {
local_t inserted;
local_t merged;
local_t dispatched;
local_t completed;
};

/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */
struct blkcg_io_stats {
struct io_stats_per_prio stats[4];
};

/**
* struct dd_blkcg - Per cgroup data.
* @cpd: blkcg_policy_data structure.
* @stats: I/O statistics.
*/
struct dd_blkcg {
struct blkcg_policy_data cpd; /* must be the first member */
struct blkcg_io_stats __percpu *stats;
};

/*
* Count one event of type 'event_type' and with I/O priority class
* 'prio_class'.
*/
#define ddcg_count(ddcg, event_type, prio_class) do { \
if (ddcg) { \
struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats); \
\
BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
BUILD_BUG_ON(!__same_type((prio_class), u8)); \
local_inc(&io_stats->stats[(prio_class)].event_type); \
put_cpu_ptr(io_stats); \
} \
} while (0)

/*
* Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls
* across all CPUs. No locking or barriers since it is fine if the returned
* sum is slightly outdated.
*/
#define ddcg_sum(ddcg, event_type, prio) ({ \
unsigned int cpu; \
u32 sum = 0; \
\
BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
BUILD_BUG_ON(!__same_type((prio), u8)); \
for_each_present_cpu(cpu) \
sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)-> \
stats[(prio)].event_type); \
sum; \
})

#ifdef CONFIG_BLK_CGROUP

/**
* struct dd_blkg - Per (cgroup, request queue) data.
* @pd: blkg_policy_data structure.
*/
struct dd_blkg {
struct blkg_policy_data pd; /* must be the first member */
};

struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio);
int dd_activate_policy(struct request_queue *q);
void dd_deactivate_policy(struct request_queue *q);
int __init dd_blkcg_init(void);
void __exit dd_blkcg_exit(void);

#else /* CONFIG_BLK_CGROUP */

static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
{
return NULL;
}

static inline int dd_activate_policy(struct request_queue *q)
{
return 0;
}

static inline void dd_deactivate_policy(struct request_queue *q)
{
}

static inline int dd_blkcg_init(void)
{
return 0;
}

static inline void dd_blkcg_exit(void)
{
}

#endif /* CONFIG_BLK_CGROUP */

#endif /* _MQ_DEADLINE_CGROUP_H_ */
Loading

0 comments on commit 08a9ad8

Please sign in to comment.