Skip to content

Commit

Permalink
block: Fix a race between the cgroup code and request queue initializ…
Browse files Browse the repository at this point in the history
…ation

Initialize the request queue lock earlier such that the following
race can no longer occur:

blk_init_queue_node()             blkcg_print_blkgs()
  blk_alloc_queue_node (1)
    q->queue_lock = &q->__queue_lock (2)
    blkcg_init_queue(q) (3)
                                    spin_lock_irq(blkg->q->queue_lock) (4)
  q->queue_lock = lock (5)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)

(1) allocate an uninitialized queue;
(2) initialize queue_lock to its default internal lock;
(3) initialize blkcg part of request queue, which will create blkg and
    then insert it to blkg_list;
(4) traverse blkg_list and find the created blkg, and then take its
    queue lock, here it is the default *internal lock*;
(5) *race window*, now queue_lock is overridden with *driver specified
    lock*;
(6) now unlock *driver specified lock*, not the locked *internal lock*,
    unlock balance breaks.

The changes in this patch are as follows:
- Move the .queue_lock initialization from blk_init_queue_node() into
  blk_alloc_queue_node().
- Only override the .queue_lock pointer for legacy queues because it
  is not useful for blk-mq queues to override this pointer.
- For all all block drivers that initialize .queue_lock explicitly,
  change the blk_alloc_queue() call in the driver into a
  blk_alloc_queue_node() call and remove the explicit .queue_lock
  initialization. Additionally, initialize the spin lock that will
  be used as queue lock earlier if necessary.

Reported-by: Joseph Qi <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
Reviewed-by: Joseph Qi <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Philipp Reisner <[email protected]>
Cc: Ulf Hansson <[email protected]>
Cc: Kees Cook <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
KAGA-KOKO authored and axboe committed Feb 28, 2018
1 parent 5ee0524 commit 498f665
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
24 changes: 16 additions & 8 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,19 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
kblockd_schedule_work(&q->timeout_work);
}

/**
* blk_alloc_queue_node - allocate a request queue
* @gfp_mask: memory allocation flags
* @node_id: NUMA node to allocate memory from
* @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
* serialize calls to the legacy .request_fn() callback. Ignored for
* blk-mq request queues.
*
* Note: pass the queue lock as the third argument to this function instead of
* setting the queue lock pointer explicitly to avoid triggering a sporadic
* crash in the blkcg code. This function namely calls blkcg_init_queue() and
* the queue lock pointer must be set before blkcg_init_queue() is called.
*/
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
spinlock_t *lock)
{
Expand Down Expand Up @@ -940,11 +953,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
mutex_init(&q->sysfs_lock);
spin_lock_init(&q->__queue_lock);

/*
* By default initialize queue_lock to internal lock and driver can
* override it later if need be.
*/
q->queue_lock = &q->__queue_lock;
if (!q->mq_ops)
q->queue_lock = lock ? : &q->__queue_lock;

/*
* A queue starts its life with bypass turned on to avoid
Expand Down Expand Up @@ -1031,13 +1041,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
{
struct request_queue *q;

q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
if (!q)
return NULL;

q->request_fn = rfn;
if (lock)
q->queue_lock = lock;
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
return NULL;
Expand Down
3 changes: 1 addition & 2 deletions drivers/block/drbd/drbd_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig

drbd_init_set_defaults(device);

q = blk_alloc_queue(GFP_KERNEL);
q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
if (!q)
goto out_no_q;
device->rq_queue = q;
Expand Down Expand Up @@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
q->queue_lock = &resource->req_lock;

device->md_io.page = alloc_page(GFP_KERNEL);
if (!device->md_io.page)
Expand Down
7 changes: 3 additions & 4 deletions drivers/block/umem.c
Original file line number Diff line number Diff line change
Expand Up @@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
card->Active = -1; /* no page is active */
card->bio = NULL;
card->biotail = &card->bio;
spin_lock_init(&card->lock);

card->queue = blk_alloc_queue(GFP_KERNEL);
card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
&card->lock);
if (!card->queue)
goto failed_alloc;

blk_queue_make_request(card->queue, mm_make_request);
card->queue->queue_lock = &card->lock;
card->queue->queuedata = card;

tasklet_init(&card->tasklet, process_page, (unsigned long)card);
Expand Down Expand Up @@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
dev_printk(KERN_INFO, &card->dev->dev,
"Window size %d bytes, IRQ %d\n", data, dev->irq);

spin_lock_init(&card->lock);

pci_set_drvdata(dev, card);

if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */
Expand Down

0 comments on commit 498f665

Please sign in to comment.