Skip to content

Commit

Permalink
block: add support for IO CPU affinity
Browse files Browse the repository at this point in the history
This patch adds support for controlling the IO completion CPU of
either all requests on a queue, or on a per-request basis. We export
a sysfs variable (rq_affinity) which, if set, migrates completions
of requests to the CPU that originally submitted it. A bio helper
(bio_set_completion_cpu()) is also added, so that queuers can ask
for completion on that specific CPU.

In testing, this has been show to cut the system time by as much
as 20-40% on synthetic workloads where CPU affinity is desired.

This requires a little help from the architecture, so it'll only
work as designed for archs that are using the new generic smp
helper infrastructure.

Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Jens Axboe committed Oct 9, 2008
1 parent 18887ad commit c7c22e4
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 60 deletions.
46 changes: 23 additions & 23 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
memset(rq, 0, sizeof(*rq));

INIT_LIST_HEAD(&rq->queuelist);
INIT_LIST_HEAD(&rq->donelist);
rq->cpu = -1;
rq->q = q;
rq->sector = rq->hard_sector = (sector_t) -1;
INIT_HLIST_NODE(&rq->hash);
Expand Down Expand Up @@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q)
}
EXPORT_SYMBOL(blk_unplug);

static void blk_invoke_request_fn(struct request_queue *q)
{
/*
* one level of recursion is ok and is much faster than kicking
* the unplug handling
*/
if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
queue_flag_set(QUEUE_FLAG_PLUGGED, q);
kblockd_schedule_work(q, &q->unplug_work);
}
}

/**
* blk_start_queue - restart a previously stopped queue
* @q: The &struct request_queue in question
Expand All @@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q)
WARN_ON(!irqs_disabled());

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

/*
* one level of recursion is ok and is much faster than kicking
* the unplug handling
*/
if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
blk_plug_device(q);
kblockd_schedule_work(q, &q->unplug_work);
}
blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(blk_start_queue);

Expand Down Expand Up @@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q)
* Only recurse once to avoid overrunning the stack, let the unplug
* handling reinvoke the handler shortly if we already got there.
*/
if (!elv_queue_empty(q)) {
if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
blk_plug_device(q);
kblockd_schedule_work(q, &q->unplug_work);
}
}
if (!elv_queue_empty(q))
blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(__blk_run_queue);

Expand Down Expand Up @@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request);

void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cpu = bio->bi_comp_cpu;
req->cmd_type = REQ_TYPE_FS;

/*
Expand Down Expand Up @@ -1198,13 +1196,15 @@ static int __make_request(struct request_queue *q, struct bio *bio)
init_request_from_bio(req, bio);

spin_lock_irq(q->queue_lock);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = blk_cpu_to_group(smp_processor_id());
if (elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
out:
if (sync)
__generic_unplug_device(q);

spin_unlock_irq(q->queue_lock);
return 0;

Expand Down
2 changes: 1 addition & 1 deletion block/blk-settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
}
EXPORT_SYMBOL(blk_queue_update_dma_alignment);

static int __init blk_settings_init(void)
int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
blk_max_pfn = max_pfn - 1;
Expand Down
126 changes: 95 additions & 31 deletions block/blk-softirq.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,70 @@

static DEFINE_PER_CPU(struct list_head, blk_cpu_done);

/*
* Softirq action handler - move entries to local list and loop over them
* while passing them to the queue registered handler.
*/
static void blk_done_softirq(struct softirq_action *h)
{
struct list_head *cpu_list, local_list;

local_irq_disable();
cpu_list = &__get_cpu_var(blk_cpu_done);
list_replace_init(cpu_list, &local_list);
local_irq_enable();

while (!list_empty(&local_list)) {
struct request *rq;

rq = list_entry(local_list.next, struct request, csd.list);
list_del_init(&rq->csd.list);
rq->q->softirq_done_fn(rq);
}
}

#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
static void trigger_softirq(void *data)
{
struct request *rq = data;
unsigned long flags;
struct list_head *list;

local_irq_save(flags);
list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&rq->csd.list, list);

if (list->next == &rq->csd.list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);

local_irq_restore(flags);
}

/*
* Setup and invoke a run of 'trigger_softirq' on the given cpu.
*/
static int raise_blk_irq(int cpu, struct request *rq)
{
if (cpu_online(cpu)) {
struct call_single_data *data = &rq->csd;

data->func = trigger_softirq;
data->info = rq;
data->flags = 0;

__smp_call_function_single(cpu, data);
return 0;
}

return 1;
}
#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
static int raise_blk_irq(int cpu, struct request *rq)
{
return 1;
}
#endif

static int __cpuinit blk_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
Expand All @@ -33,33 +97,10 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}


static struct notifier_block blk_cpu_notifier __cpuinitdata = {
static struct notifier_block __cpuinitdata blk_cpu_notifier = {
.notifier_call = blk_cpu_notify,
};

/*
* splice the completion data to a local structure and hand off to
* process_completion_queue() to complete the requests
*/
static void blk_done_softirq(struct softirq_action *h)
{
struct list_head *cpu_list, local_list;

local_irq_disable();
cpu_list = &__get_cpu_var(blk_cpu_done);
list_replace_init(cpu_list, &local_list);
local_irq_enable();

while (!list_empty(&local_list)) {
struct request *rq;

rq = list_entry(local_list.next, struct request, donelist);
list_del_init(&rq->donelist);
rq->q->softirq_done_fn(rq);
}
}

/**
* blk_complete_request - end I/O on a request
* @req: the request being processed
Expand All @@ -71,25 +112,48 @@ static void blk_done_softirq(struct softirq_action *h)
* through a softirq handler. The user must have registered a completion
* callback through blk_queue_softirq_done().
**/

void blk_complete_request(struct request *req)
{
struct list_head *cpu_list;
struct request_queue *q = req->q;
unsigned long flags;
int ccpu, cpu, group_cpu;

BUG_ON(!req->q->softirq_done_fn);
BUG_ON(!q->softirq_done_fn);

local_irq_save(flags);
cpu = smp_processor_id();
group_cpu = blk_cpu_to_group(cpu);

cpu_list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&req->donelist, cpu_list);
raise_softirq_irqoff(BLOCK_SOFTIRQ);
/*
* Select completion CPU
*/
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
ccpu = req->cpu;
else
ccpu = cpu;

if (ccpu == cpu || ccpu == group_cpu) {
struct list_head *list;
do_local:
list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&req->csd.list, list);

/*
* if the list only contains our just added request,
* signal a raise of the softirq. If there are already
* entries there, someone already raised the irq but it
* hasn't run yet.
*/
if (list->next == &req->csd.list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
} else if (raise_blk_irq(ccpu, req))
goto do_local;

local_irq_restore(flags);
}
EXPORT_SYMBOL(blk_complete_request);

int __init blk_softirq_init(void)
__init int blk_softirq_init(void)
{
int i;

Expand Down
31 changes: 31 additions & 0 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
return ret;
}

static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);

return queue_var_show(set != 0, page);
}

static ssize_t
queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
{
ssize_t ret = -EINVAL;
#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
unsigned long val;

ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
if (val)
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
else
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
spin_unlock_irq(q->queue_lock);
#endif
return ret;
}

static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
Expand Down Expand Up @@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
.store = queue_nomerges_store,
};

static struct queue_sysfs_entry queue_rq_affinity_entry = {
.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};

static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
Expand All @@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
&queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
NULL,
};

Expand Down
12 changes: 12 additions & 0 deletions block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)

#endif /* BLK_DEV_INTEGRITY */

static inline int blk_cpu_to_group(int cpu)
{
#ifdef CONFIG_SCHED_MC
cpumask_t mask = cpu_coregroup_map(cpu);
return first_cpu(mask);
#elif defined(CONFIG_SCHED_SMT)
return first_cpu(per_cpu(cpu_sibling_map, cpu));
#else
return cpu;
#endif
}

#endif
1 change: 1 addition & 0 deletions fs/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ void bio_init(struct bio *bio)
{
memset(bio, 0, sizeof(*bio));
bio->bi_flags = 1 << BIO_UPTODATE;
bio->bi_comp_cpu = -1;
atomic_set(&bio->bi_cnt, 1);
}

Expand Down
11 changes: 11 additions & 0 deletions include/linux/bio.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ struct bio {

unsigned int bi_max_vecs; /* max bvl_vecs we can hold */

unsigned int bi_comp_cpu; /* completion CPU */

struct bio_vec *bi_io_vec; /* the actual vec list */

bio_end_io_t *bi_end_io;
Expand All @@ -105,6 +107,7 @@ struct bio {
#define BIO_BOUNCED 5 /* bio is a bounce bio */
#define BIO_USER_MAPPED 6 /* contains user pages */
#define BIO_EOPNOTSUPP 7 /* not supported */
#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))

/*
Expand Down Expand Up @@ -342,6 +345,14 @@ void zero_fill_bio(struct bio *bio);
extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
extern unsigned int bvec_nr_vecs(unsigned short idx);

/*
* Allow queuer to specify a completion CPU for this bio
*/
static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
{
bio->bi_comp_cpu = cpu;
}

/*
* bio_set is used to allow other portions of the IO system to
* allocate their own private memory pools for bio and iovec structures.
Expand Down
5 changes: 4 additions & 1 deletion include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/stringify.h>
#include <linux/bsg.h>
#include <linux/smp.h>

#include <asm/scatterlist.h>

Expand Down Expand Up @@ -139,7 +140,8 @@ enum rq_flag_bits {
*/
struct request {
struct list_head queuelist;
struct list_head donelist;
struct call_single_data csd;
int cpu;

struct request_queue *q;

Expand Down Expand Up @@ -420,6 +422,7 @@ struct request_queue
#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */

static inline int queue_is_locked(struct request_queue *q)
{
Expand Down
Loading

0 comments on commit c7c22e4

Please sign in to comment.