Skip to content

Commit

Permalink
block: drop barrier ordering by queue draining
Browse files Browse the repository at this point in the history
Filesystems will take all the responsibilities for ordering requests
around commit writes and will only indicate how the commit writes
themselves should be handled by block layers.  This patch drops
barrier ordering by queue draining from block layer.  Ordering by
draining implementation was somewhat invasive to request handling.
List of notable changes follow.

* Each queue has 1 bit color which is flipped on each barrier issue.
  This is used to track whether a given request is issued before the
  current barrier or not.  REQ_ORDERED_COLOR flag and coloring
  implementation in __elv_add_request() are removed.

* Requests which shouldn't be processed yet for draining were stalled
  by returning -EAGAIN from blk_do_ordered() according to the test
  result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq().
  This logic is removed.

* Draining completion logic in elv_completed_request() removed.

* All barrier sequence requests were queued to request queue and then
  trckled to lower layer according to progress and thus maintaining
  request orders during requeue was necessary.  This is replaced by
  queueing the next request in the barrier sequence only after the
  current one is complete from blk_ordered_complete_seq(), which
  removes the need for multiple proxy requests in struct request_queue
  and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of
  elv_insert().

* As barriers no longer have ordering constraints, there's no need to
  dump the whole elevator onto the dispatch queue on each barrier.
  Insert barriers at the front instead.

* If other barrier requests come to the front of the dispatch queue
  while one is already in progress, they are stored in
  q->pending_barriers and restored to dispatch queue one-by-one after
  each barrier completion from blk_ordered_complete_seq().

Signed-off-by: Tejun Heo <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
htejun authored and Jens Axboe committed Sep 10, 2010
1 parent dd83100 commit 28e7d18
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 220 deletions.
220 changes: 86 additions & 134 deletions block/blk-barrier.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

#include "blk.h"

static struct request *queue_next_ordseq(struct request_queue *q);

/*
* Cache flushing for ordered writes handling
*/
Expand All @@ -19,55 +21,33 @@ unsigned blk_ordered_cur_seq(struct request_queue *q)
return 1 << ffz(q->ordseq);
}

unsigned blk_ordered_req_seq(struct request *rq)
{
struct request_queue *q = rq->q;

BUG_ON(q->ordseq == 0);

if (rq == &q->pre_flush_rq)
return QUEUE_ORDSEQ_PREFLUSH;
if (rq == &q->bar_rq)
return QUEUE_ORDSEQ_BAR;
if (rq == &q->post_flush_rq)
return QUEUE_ORDSEQ_POSTFLUSH;

/*
* !fs requests don't need to follow barrier ordering. Always
* put them at the front. This fixes the following deadlock.
*
* http://thread.gmane.org/gmane.linux.kernel/537473
*/
if (rq->cmd_type != REQ_TYPE_FS)
return QUEUE_ORDSEQ_DRAIN;

if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
(q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
return QUEUE_ORDSEQ_DRAIN;
else
return QUEUE_ORDSEQ_DONE;
}

bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
static struct request *blk_ordered_complete_seq(struct request_queue *q,
unsigned seq, int error)
{
struct request *rq;
struct request *next_rq = NULL;

if (error && !q->orderr)
q->orderr = error;

BUG_ON(q->ordseq & seq);
q->ordseq |= seq;

if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
return false;

/*
* Okay, sequence complete.
*/
q->ordseq = 0;
rq = q->orig_bar_rq;
__blk_end_request_all(rq, q->orderr);
return true;
if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
/* not complete yet, queue the next ordered sequence */
next_rq = queue_next_ordseq(q);
} else {
/* complete this barrier request */
__blk_end_request_all(q->orig_bar_rq, q->orderr);
q->orig_bar_rq = NULL;
q->ordseq = 0;

/* dispatch the next barrier if there's one */
if (!list_empty(&q->pending_barriers)) {
next_rq = list_entry_rq(q->pending_barriers.next);
list_move(&next_rq->queuelist, &q->queue_head);
}
}
return next_rq;
}

static void pre_flush_end_io(struct request *rq, int error)
Expand All @@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error)
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
}

static void queue_flush(struct request_queue *q, unsigned which)
static void queue_flush(struct request_queue *q, struct request *rq,
rq_end_io_fn *end_io)
{
struct request *rq;
rq_end_io_fn *end_io;

if (which == QUEUE_ORDERED_DO_PREFLUSH) {
rq = &q->pre_flush_rq;
end_io = pre_flush_end_io;
} else {
rq = &q->post_flush_rq;
end_io = post_flush_end_io;
}

blk_rq_init(q, rq);
rq->cmd_type = REQ_TYPE_FS;
rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
rq->cmd_flags = REQ_FLUSH;
rq->rq_disk = q->orig_bar_rq->rq_disk;
rq->end_io = end_io;

elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
}

static inline struct request *start_ordered(struct request_queue *q,
struct request *rq)
static struct request *queue_next_ordseq(struct request_queue *q)
{
unsigned skip = 0;

q->orderr = 0;
q->ordered = q->next_ordered;
q->ordseq |= QUEUE_ORDSEQ_STARTED;

/*
* For an empty barrier, there's no actual BAR request, which
* in turn makes POSTFLUSH unnecessary. Mask them off.
*/
if (!blk_rq_sectors(rq))
q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
QUEUE_ORDERED_DO_POSTFLUSH);

/* stash away the original request */
blk_dequeue_request(rq);
q->orig_bar_rq = rq;
rq = NULL;

/*
* Queue ordered sequence. As we stack them at the head, we
* need to queue in reverse order. Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
* request gets inbetween ordered sequence.
*/
if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
rq = &q->post_flush_rq;
} else
skip |= QUEUE_ORDSEQ_POSTFLUSH;
struct request *rq = &q->bar_rq;

if (q->ordered & QUEUE_ORDERED_DO_BAR) {
rq = &q->bar_rq;
switch (blk_ordered_cur_seq(q)) {
case QUEUE_ORDSEQ_PREFLUSH:
queue_flush(q, rq, pre_flush_end_io);
break;

case QUEUE_ORDSEQ_BAR:
/* initialize proxy request and queue it */
blk_rq_init(q, rq);
init_request_from_bio(rq, q->orig_bar_rq->bio);
rq->cmd_flags &= ~REQ_HARDBARRIER;
if (q->ordered & QUEUE_ORDERED_DO_FUA)
rq->cmd_flags |= REQ_FUA;
rq->end_io = bar_end_io;

elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
} else
skip |= QUEUE_ORDSEQ_BAR;
break;

if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
rq = &q->pre_flush_rq;
} else
skip |= QUEUE_ORDSEQ_PREFLUSH;
case QUEUE_ORDSEQ_POSTFLUSH:
queue_flush(q, rq, post_flush_end_io);
break;

if (queue_in_flight(q))
rq = NULL;
else
skip |= QUEUE_ORDSEQ_DRAIN;

/*
* Complete skipped sequences. If whole sequence is complete,
* return %NULL to tell elevator that this request is gone.
*/
if (blk_ordered_complete_seq(q, skip, 0))
rq = NULL;
default:
BUG();
}
return rq;
}

struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
{
const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
(rq->cmd_flags & REQ_HARDBARRIER);

if (!q->ordseq) {
if (!is_barrier)
return rq;

if (q->next_ordered != QUEUE_ORDERED_NONE)
return start_ordered(q, rq);
else {
/*
* Queue ordering not supported. Terminate
* with prejudice.
*/
blk_dequeue_request(rq);
__blk_end_request_all(rq, -EOPNOTSUPP);
return NULL;
}
unsigned skip = 0;

if (!(rq->cmd_flags & REQ_HARDBARRIER))
return rq;

if (q->ordseq) {
/*
* Barrier is already in progress and they can't be
* processed in parallel. Queue for later processing.
*/
list_move_tail(&rq->queuelist, &q->pending_barriers);
return NULL;
}

if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
/*
* Queue ordering not supported. Terminate
* with prejudice.
*/
blk_dequeue_request(rq);
__blk_end_request_all(rq, -EOPNOTSUPP);
return NULL;
}

/*
* Ordered sequence in progress
* Start a new ordered sequence
*/
q->orderr = 0;
q->ordered = q->next_ordered;
q->ordseq |= QUEUE_ORDSEQ_STARTED;

/* Special requests are not subject to ordering rules. */
if (rq->cmd_type != REQ_TYPE_FS &&
rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
return rq;
/*
* For an empty barrier, there's no actual BAR request, which
* in turn makes POSTFLUSH unnecessary. Mask them off.
*/
if (!blk_rq_sectors(rq))
q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
QUEUE_ORDERED_DO_POSTFLUSH);

/* Ordered by draining. Wait for turn. */
WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
rq = ERR_PTR(-EAGAIN);
/* stash away the original request */
blk_dequeue_request(rq);
q->orig_bar_rq = rq;

return rq;
if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
skip |= QUEUE_ORDSEQ_PREFLUSH;

if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
skip |= QUEUE_ORDSEQ_BAR;

if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
skip |= QUEUE_ORDSEQ_POSTFLUSH;

/* complete skipped sequences and return the first sequence */
return blk_ordered_complete_seq(q, skip, 0);
}

static void bio_end_empty_barrier(struct bio *bio, int err)
Expand Down
11 changes: 9 additions & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
init_timer(&q->unplug_timer);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->pending_barriers);
INIT_WORK(&q->unplug_work, blk_unplug_work);

kobject_init(&q->kobj, &blk_queue_ktype);
Expand Down Expand Up @@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const bool sync = (bio->bi_rw & REQ_SYNC);
const bool unplug = (bio->bi_rw & REQ_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
int where = ELEVATOR_INSERT_SORT;
int rw_flags;

/* REQ_HARDBARRIER is no more */
Expand All @@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)

spin_lock_irq(q->queue_lock);

if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
if (bio->bi_rw & REQ_HARDBARRIER) {
where = ELEVATOR_INSERT_FRONT;
goto get_rq;
}

if (elv_queue_empty(q))
goto get_rq;

el_ret = elv_merge(q, &req, bio);
Expand Down Expand Up @@ -1303,7 +1310,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)

/* insert the request into the elevator */
drive_stat_acct(req, 1);
__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
__elv_add_request(q, req, where, 0);
out:
if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
Expand Down
2 changes: 1 addition & 1 deletion block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
rq = list_entry_rq(q->queue_head.next);
rq = blk_do_ordered(q, rq);
if (rq)
return !IS_ERR(rq) ? rq : NULL;
return rq;
}

if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
Expand Down
Loading

0 comments on commit 28e7d18

Please sign in to comment.