Skip to content

Commit

Permalink
block: add scalable completion tracking of requests
Browse files Browse the repository at this point in the history
For legacy block, we simply track them in the request queue. For
blk-mq, we track them on a per-sw queue basis, which we can then
sum up through the hardware queues and finally to a per device
state.

The stats are tracked in, roughly, 0.1s interval windows.

Add sysfs files to display the stats.

The feature is off by default, to avoid any extra overhead. In-kernel
users of it can turn it on by setting QUEUE_FLAG_STATS in the queue
flags. We currently don't turn it on if someone just reads any of
the stats files, that is something we could add as well.

Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
axboe committed Nov 10, 2016
1 parent ebc4ff6 commit cf43e6b
Show file tree
Hide file tree
Showing 10 changed files with 427 additions and 3 deletions.
2 changes: 1 addition & 1 deletion block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
Expand Down
14 changes: 12 additions & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2464,6 +2464,11 @@ void blk_start_request(struct request *req)
{
blk_dequeue_request(req);

if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
blk_stat_set_issue_time(&req->issue_stat);
req->rq_flags |= RQF_STATS;
}

/*
* We are now handing the request to the hardware, initialize
* resid_len to full count and add the timeout handler.
Expand Down Expand Up @@ -2683,8 +2688,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
*/
void blk_finish_request(struct request *req, int error)
{
struct request_queue *q = req->q;

if (req->rq_flags & RQF_STATS)
blk_stat_add(&q->rq_stats[rq_data_dir(req)], req);

if (req->rq_flags & RQF_QUEUED)
blk_queue_end_tag(req->q, req);
blk_queue_end_tag(q, req);

BUG_ON(blk_queued_rq(req));

Expand All @@ -2704,7 +2714,7 @@ void blk_finish_request(struct request *req, int error)
if (blk_bidi_rq(req))
__blk_put_request(req->next_rq->q, req->next_rq);

__blk_put_request(req->q, req);
__blk_put_request(q, req);
}
}
EXPORT_SYMBOL(blk_finish_request);
Expand Down
47 changes: 47 additions & 0 deletions block/blk-mq-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret;
}

static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
unsigned int i;

hctx_for_each_ctx(hctx, ctx, i) {
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
}
}

static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t count)
{
blk_mq_stat_clear(hctx);
return count;
}

static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
{
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
pre, (long long) stat->nr_samples,
(long long) stat->mean, (long long) stat->min,
(long long) stat->max);
}

static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
{
struct blk_rq_stat stat[2];
ssize_t ret;

blk_stat_init(&stat[BLK_STAT_READ]);
blk_stat_init(&stat[BLK_STAT_WRITE]);

blk_hctx_stat_get(hctx, stat);

ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
return ret;
}

static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
Expand Down Expand Up @@ -317,6 +358,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
.show = blk_mq_hw_sysfs_poll_show,
.store = blk_mq_hw_sysfs_poll_store,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
.show = blk_mq_hw_sysfs_stat_show,
.store = blk_mq_hw_sysfs_stat_store,
};

static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
Expand All @@ -327,6 +373,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
&blk_mq_hw_sysfs_stat.attr,
NULL,
};

Expand Down
25 changes: 25 additions & 0 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include "blk-stat.h"

static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
Expand Down Expand Up @@ -403,10 +404,27 @@ static void blk_mq_ipi_complete_request(struct request *rq)
put_cpu();
}

static void blk_mq_stat_add(struct request *rq)
{
if (rq->rq_flags & RQF_STATS) {
/*
* We could rq->mq_ctx here, but there's less of a risk
* of races if we have the completion event add the stats
* to the local software queue.
*/
struct blk_mq_ctx *ctx;

ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
}
}

static void __blk_mq_complete_request(struct request *rq)
{
struct request_queue *q = rq->q;

blk_mq_stat_add(rq);

if (!q->softirq_done_fn)
blk_mq_end_request(rq, rq->errors);
else
Expand Down Expand Up @@ -450,6 +468,11 @@ void blk_mq_start_request(struct request *rq)
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);

if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
blk_stat_set_issue_time(&rq->issue_stat);
rq->rq_flags |= RQF_STATS;
}

blk_add_timer(rq);

/*
Expand Down Expand Up @@ -1784,6 +1807,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);

/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
Expand Down
3 changes: 3 additions & 0 deletions block/blk-mq.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef INT_BLK_MQ_H
#define INT_BLK_MQ_H

#include "blk-stat.h"

struct blk_mq_tag_set;

struct blk_mq_ctx {
Expand All @@ -18,6 +20,7 @@ struct blk_mq_ctx {

/* incremented at completion time */
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
struct blk_rq_stat stat[2];

struct request_queue *queue;
struct kobject kobj;
Expand Down
Loading

0 comments on commit cf43e6b

Please sign in to comment.