Skip to content

Commit

Permalink
Merge tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux
Browse files Browse the repository at this point in the history
Pull block updates from Jens Axboe:
 "Pretty quiet round this time around. This contains:

   - NVMe updates via Keith:
        - nvme fabrics spec updates (Guixin, Max)
        - nvme target udpates (Guixin, Evan)
        - nvme attribute refactoring (Daniel)
        - nvme-fc numa fix (Keith)

   - MD updates via Song:
        - Fix/Cleanup RCU usage from conf->disks[i].rdev (Yu Kuai)
        - Fix raid5 hang issue (Junxiao Bi)
        - Add Yu Kuai as Reviewer of the md subsystem
        - Remove deprecated flavors (Song Liu)
        - raid1 read error check support (Li Nan)
        - Better handle events off-by-1 case (Alex Lyakas)

   - Efficiency improvements for passthrough (Kundan)

   - Support for mapping integrity data directly (Keith)

   - Zoned write fix (Damien)

   - rnbd fixes (Kees, Santosh, Supriti)

   - Default to a sane discard size granularity (Christoph)

   - Make the default max transfer size naming less confusing
     (Christoph)

   - Remove support for deprecated host aware zoned model (Christoph)

   - Misc fixes (me, Li, Matthew, Min, Ming, Randy, liyouhong, Daniel,
     Bart, Christoph)"

* tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux: (78 commits)
  block: Treat sequential write preferred zone type as invalid
  block: remove disk_clear_zoned
  sd: remove the !ZBC && blk_queue_is_zoned case in sd_read_block_characteristics
  drivers/block/xen-blkback/common.h: Fix spelling typo in comment
  blk-cgroup: fix rcu lockdep warning in blkg_lookup()
  blk-cgroup: don't use removal safe list iterators
  block: floor the discard granularity to the physical block size
  mtd_blkdevs: use the default discard granularity
  bcache: use the default discard granularity
  zram: use the default discard granularity
  null_blk: use the default discard granularity
  nbd: use the default discard granularity
  ubd: use the default discard granularity
  block: default the discard granularity to sector size
  bcache: discard_granularity should not be smaller than a sector
  block: remove two comments in bio_split_discard
  block: rename and document BLK_DEF_MAX_SECTORS
  loop: don't abuse BLK_DEF_MAX_SECTORS
  aoe: don't abuse BLK_DEF_MAX_SECTORS
  null_blk: don't cap max_hw_sectors to BLK_DEF_MAX_SECTORS
  ...
  • Loading branch information
torvalds committed Jan 11, 2024
2 parents d05e626 + 587371e commit 01d550f
Show file tree
Hide file tree
Showing 79 changed files with 1,254 additions and 2,660 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -20079,6 +20079,7 @@ F: include/linux/property.h

SOFTWARE RAID (Multiple Disks) SUPPORT
M: Song Liu <[email protected]>
R: Yu Kuai <[email protected]>
L: [email protected]
S: Supported
Q: https://patchwork.kernel.org/project/linux-raid/list/
Expand Down
1 change: 0 additions & 1 deletion arch/um/drivers/ubd_kern.c
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
ubd_dev->cow.fd = err;
}
if (ubd_dev->no_trim == 0) {
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
}
Expand Down
218 changes: 216 additions & 2 deletions block/bio-integrity.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,

memset(bip, 0, sizeof(*bip));

/* always report as many vecs as asked explicitly, not inline vecs */
bip->bip_max_vcnt = nr_vecs;
if (nr_vecs > inline_vecs) {
bip->bip_max_vcnt = nr_vecs;
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
&bip->bip_max_vcnt, gfp_mask);
if (!bip->bip_vec)
goto err;
} else {
bip->bip_vec = bip->bip_inline_vecs;
bip->bip_max_vcnt = inline_vecs;
}

bip->bip_bio = bio;
Expand All @@ -91,6 +91,47 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
}
EXPORT_SYMBOL(bio_integrity_alloc);

static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
bool dirty)
{
int i;

for (i = 0; i < nr_vecs; i++) {
if (dirty && !PageCompound(bv[i].bv_page))
set_page_dirty_lock(bv[i].bv_page);
unpin_user_page(bv[i].bv_page);
}
}

static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
{
unsigned short nr_vecs = bip->bip_max_vcnt - 1;
struct bio_vec *copy = &bip->bip_vec[1];
size_t bytes = bip->bip_iter.bi_size;
struct iov_iter iter;
int ret;

iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
WARN_ON_ONCE(ret != bytes);

bio_integrity_unpin_bvec(copy, nr_vecs, true);
}

static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
{
bool dirty = bio_data_dir(bip->bip_bio) == READ;

if (bip->bip_flags & BIP_COPY_USER) {
if (dirty)
bio_integrity_uncopy_user(bip);
kfree(bvec_virt(bip->bip_vec));
return;
}

bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
}

/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
Expand All @@ -105,6 +146,8 @@ void bio_integrity_free(struct bio *bio)

if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
kfree(bvec_virt(bip->bip_vec));
else if (bip->bip_flags & BIP_INTEGRITY_USER)
bio_integrity_unmap_user(bip);

__bio_integrity_free(bs, bip);
bio->bi_integrity = NULL;
Expand Down Expand Up @@ -160,6 +203,177 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_integrity_add_page);

static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
int nr_vecs, unsigned int len,
unsigned int direction, u32 seed)
{
bool write = direction == ITER_SOURCE;
struct bio_integrity_payload *bip;
struct iov_iter iter;
void *buf;
int ret;

buf = kmalloc(len, GFP_KERNEL);
if (!buf)
return -ENOMEM;

if (write) {
iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
if (!copy_from_iter_full(buf, len, &iter)) {
ret = -EFAULT;
goto free_buf;
}

bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
} else {
memset(buf, 0, len);

/*
* We need to preserve the original bvec and the number of vecs
* in it for completion handling
*/
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
}

if (IS_ERR(bip)) {
ret = PTR_ERR(bip);
goto free_buf;
}

if (write)
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
else
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));

ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf));
if (ret != len) {
ret = -ENOMEM;
goto free_bip;
}

bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
bip->bip_iter.bi_sector = seed;
return 0;
free_bip:
bio_integrity_free(bio);
free_buf:
kfree(buf);
return ret;
}

static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
int nr_vecs, unsigned int len, u32 seed)
{
struct bio_integrity_payload *bip;

bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
if (IS_ERR(bip))
return PTR_ERR(bip);

memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
bip->bip_flags |= BIP_INTEGRITY_USER;
bip->bip_iter.bi_sector = seed;
bip->bip_iter.bi_size = len;
return 0;
}

static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
int nr_vecs, ssize_t bytes, ssize_t offset)
{
unsigned int nr_bvecs = 0;
int i, j;

for (i = 0; i < nr_vecs; i = j) {
size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
struct folio *folio = page_folio(pages[i]);

bytes -= size;
for (j = i + 1; j < nr_vecs; j++) {
size_t next = min_t(size_t, PAGE_SIZE, bytes);

if (page_folio(pages[j]) != folio ||
pages[j] != pages[j - 1] + 1)
break;
unpin_user_page(pages[j]);
size += next;
bytes -= next;
}

bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
offset = 0;
nr_bvecs++;
}

return nr_bvecs;
}

int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
u32 seed)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
unsigned int direction, nr_bvecs;
struct iov_iter iter;
int ret, nr_vecs;
size_t offset;
bool copy;

if (bio_integrity(bio))
return -EINVAL;
if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
return -E2BIG;

if (bio_data_dir(bio) == READ)
direction = ITER_DEST;
else
direction = ITER_SOURCE;

iov_iter_ubuf(&iter, direction, ubuf, bytes);
nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
if (nr_vecs > BIO_MAX_VECS)
return -E2BIG;
if (nr_vecs > UIO_FASTIOV) {
bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
if (!bvec)
return -ENOMEM;
pages = NULL;
}

copy = !iov_iter_is_aligned(&iter, align, align);
ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
if (unlikely(ret < 0))
goto free_bvec;

nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
if (pages != stack_pages)
kvfree(pages);
if (nr_bvecs > queue_max_integrity_segments(q))
copy = true;

if (copy)
ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
direction, seed);
else
ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
if (ret)
goto release_pages;
if (bvec != stack_vec)
kfree(bvec);

return 0;

release_pages:
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
free_bvec:
if (bvec != stack_vec)
kfree(bvec);
return ret;
}
EXPORT_SYMBOL_GPL(bio_integrity_map_user);

/**
* bio_integrity_process - Process integrity metadata for a bio
* @bio: bio to generate/verify integrity metadata for
Expand Down
53 changes: 29 additions & 24 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -944,7 +944,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,

if ((addr1 | mask) != (addr2 | mask))
return false;
if (bv->bv_len + len > queue_max_segment_size(q))
if (len > queue_max_segment_size(q) - bv->bv_len)
return false;
return bvec_try_merge_page(bv, page, len, offset, same_page);
}
Expand All @@ -966,10 +966,13 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page)
{
unsigned int max_size = max_sectors << SECTOR_SHIFT;

if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;

if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
len = min3(len, max_size, queue_max_segment_size(q));
if (len > max_size - bio->bi_iter.bi_size)
return 0;

if (bio->bi_vcnt > 0) {
Expand Down Expand Up @@ -1145,13 +1148,22 @@ EXPORT_SYMBOL(bio_add_folio);

void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
struct bvec_iter_all iter_all;
struct bio_vec *bvec;
struct folio_iter fi;

bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
bio_release_page(bio, bvec->bv_page);
bio_for_each_folio_all(fi, bio) {
struct page *page;
size_t done = 0;

if (mark_dirty) {
folio_lock(fi.folio);
folio_mark_dirty(fi.folio);
folio_unlock(fi.folio);
}
page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
do {
bio_release_page(bio, page++);
done += PAGE_SIZE;
} while (done < fi.length);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
Expand Down Expand Up @@ -1439,18 +1451,12 @@ EXPORT_SYMBOL(bio_free_pages);
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
*
* The problem is that we cannot run set_page_dirty() from interrupt context
* The problem is that we cannot run folio_mark_dirty() from interrupt context
* because the required locks are not interrupt-safe. So what we can do is to
* mark the pages dirty _before_ performing IO. And in interrupt context,
* check that the pages are still dirty. If so, fine. If not, redirty them
* in process context.
*
* We special-case compound pages here: normally this means reads into hugetlb
* pages. The logic in here doesn't really work right for compound pages
* because the VM does not uniformly chase down the head page in all cases.
* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
* handle them at all. So we skip compound pages here at an early stage.
*
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
Expand All @@ -1466,12 +1472,12 @@ EXPORT_SYMBOL(bio_free_pages);
*/
void bio_set_pages_dirty(struct bio *bio)
{
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
struct folio_iter fi;

bio_for_each_segment_all(bvec, bio, iter_all) {
if (!PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
bio_for_each_folio_all(fi, bio) {
folio_lock(fi.folio);
folio_mark_dirty(fi.folio);
folio_unlock(fi.folio);
}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
Expand Down Expand Up @@ -1515,12 +1521,11 @@ static void bio_dirty_fn(struct work_struct *work)

void bio_check_pages_dirty(struct bio *bio)
{
struct bio_vec *bvec;
struct folio_iter fi;
unsigned long flags;
struct bvec_iter_all iter_all;

bio_for_each_segment_all(bvec, bio, iter_all) {
if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
bio_for_each_folio_all(fi, bio) {
if (!folio_test_dirty(fi.folio))
goto defer;
}

Expand Down
Loading

0 comments on commit 01d550f

Please sign in to comment.