Skip to content

Commit

Permalink
block: Implement support for WRITE SAME
Browse files Browse the repository at this point in the history
The WRITE SAME command supported on some SCSI devices allows the same
block to be efficiently replicated throughout a block range. Only a
single logical block is transferred from the host and the storage device
writes the same data to all blocks described by the I/O.

This patch implements support for WRITE SAME in the block layer. The
blkdev_issue_write_same() function can be used by filesystems and block
drivers to replicate a buffer across a block range. This can be used to
efficiently initialize software RAID devices, etc.

Signed-off-by: Martin K. Petersen <[email protected]>
Acked-by: Mike Snitzer <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
martinkpetersen authored and axboe committed Sep 20, 2012
1 parent f31dc1c commit 4363ac7
Show file tree
Hide file tree
Showing 11 changed files with 181 additions and 6 deletions.
14 changes: 14 additions & 0 deletions Documentation/ABI/testing/sysfs-block
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,17 @@ Description:
when a discarded area is read the discard_zeroes_data
parameter will be set to one. Otherwise it will be 0 and
the result of reading a discarded area is undefined.

What: /sys/block/<disk>/queue/write_same_max_bytes
Date: January 2012
Contact: Martin K. Petersen <[email protected]>
Description:
Some devices support a write same operation in which a
single data block can be written to a range of several
contiguous blocks on storage. This can be used to wipe
areas on disk or to initialize drives in a RAID
configuration. write_same_max_bytes indicates how many
bytes can be written in a single write same command. If
write_same_max_bytes is 0, write same is not supported
by the device.

14 changes: 12 additions & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}

if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
err = -EOPNOTSUPP;
goto end_io;
}

/*
* Various block parts want %current->io_context and lazy ioc
* allocation ends up trading a lot of pain for a small amount of
Expand Down Expand Up @@ -1809,15 +1814,20 @@ EXPORT_SYMBOL(generic_make_request);
*/
void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);

bio->bi_rw |= rw;

/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio)) {
unsigned int count;

if (unlikely(rw & REQ_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);

if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
Expand Down
74 changes: 74 additions & 0 deletions block/blk-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);

/**
* blkdev_issue_write_same - queue a write same operation
* @bdev: target blockdev
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @page: page containing data to write
*
* Description:
* Issue a write same request for the sectors in question.
*/
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask,
struct page *page)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_write_same_sectors;
struct bio_batch bb;
struct bio *bio;
int ret = 0;

if (!q)
return -ENXIO;

max_write_same_sectors = q->limits.max_write_same_sectors;

if (max_write_same_sectors == 0)
return -EOPNOTSUPP;

atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;

while (nr_sects) {
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
ret = -ENOMEM;
break;
}

bio->bi_sector = sector;
bio->bi_end_io = bio_batch_end_io;
bio->bi_bdev = bdev;
bio->bi_private = &bb;
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);

if (nr_sects > max_write_same_sectors) {
bio->bi_size = max_write_same_sectors << 9;
nr_sects -= max_write_same_sectors;
sector += max_write_same_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}

atomic_inc(&bb.done);
submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
}

/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);

if (!test_bit(BIO_UPTODATE, &bb.flags))
ret = -ENOTSUPP;

return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);

/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
Expand Down
9 changes: 9 additions & 0 deletions block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;

if (req->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(req->bio, next->bio))
return 0;

/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
Expand Down Expand Up @@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;

/* must be using the same buffer */
if (rq->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(rq->bio, bio))
return false;

return true;
}

Expand Down
16 changes: 16 additions & 0 deletions block/blk-settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
Expand Down Expand Up @@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);

Expand Down Expand Up @@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);

/**
* blk_queue_max_write_same_sectors - set max sectors for a single write same
* @q: the request queue for the device
* @max_write_same_sectors: maximum number of sectors to write per command
**/
void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors)
{
q->limits.max_write_same_sectors = max_write_same_sectors;
}
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);

/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
Expand Down Expand Up @@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,

t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);

t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
Expand Down
13 changes: 13 additions & 0 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page);
}

static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)q->limits.max_write_same_sectors << 9);
}


static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
Expand Down Expand Up @@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show,
};

static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};

static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
Expand Down Expand Up @@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
Expand Down
1 change: 1 addition & 0 deletions drivers/md/raid0.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);

/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
Expand Down
9 changes: 6 additions & 3 deletions fs/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1487,9 +1487,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)

bp->bv1 = bi->bi_io_vec[0];
bp->bv2 = bi->bi_io_vec[0];
bp->bv2.bv_offset += first_sectors << 9;
bp->bv2.bv_len -= first_sectors << 9;
bp->bv1.bv_len = first_sectors << 9;

if (bio_is_rw(bi)) {
bp->bv2.bv_offset += first_sectors << 9;
bp->bv2.bv_len -= first_sectors << 9;
bp->bv1.bv_len = first_sectors << 9;
}

bp->bio1.bi_io_vec = &bp->bv1;
bp->bio2.bi_io_vec = &bp->bv2;
Expand Down
3 changes: 3 additions & 0 deletions include/linux/bio.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ static inline bool bio_is_rw(struct bio *bio)
if (!bio_has_data(bio))
return false;

if (bio->bi_rw & REQ_WRITE_SAME)
return false;

return true;
}

Expand Down
5 changes: 4 additions & 1 deletion include/linux/blk_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ enum rq_flag_bits {
__REQ_PRIO, /* boost priority in cfq */
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_WRITE_SAME, /* write same block many times */

__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_FUA, /* forced unit access */
Expand Down Expand Up @@ -185,13 +186,15 @@ enum rq_flag_bits {
#define REQ_META (1 << __REQ_META)
#define REQ_PRIO (1 << __REQ_PRIO)
#define REQ_DISCARD (1 << __REQ_DISCARD)
#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)

#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK

/* This mask is used for both bio and request merge checking */
Expand Down
29 changes: 29 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;

Expand Down Expand Up @@ -614,9 +615,20 @@ static inline bool blk_check_merge_flags(unsigned int flags1,
if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
return false;

if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
return false;

return true;
}

static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
{
if (bio_data(a) == bio_data(b))
return true;

return false;
}

/*
* q->prep_rq_fn return values
*/
Expand Down Expand Up @@ -818,6 +830,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
if (unlikely(cmd_flags & REQ_DISCARD))
return q->limits.max_discard_sectors;

if (unlikely(cmd_flags & REQ_WRITE_SAME))
return q->limits.max_write_same_sectors;

return q->limits.max_sectors;
}

Expand Down Expand Up @@ -886,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
Expand Down Expand Up @@ -1016,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
Expand Down Expand Up @@ -1193,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}

static inline unsigned int bdev_write_same(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);

if (q)
return q->limits.max_write_same_sectors;

return 0;
}

static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
Expand Down

0 comments on commit 4363ac7

Please sign in to comment.