Skip to content

Commit

Permalink
Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/viro/vfs

Pull iov_iter updates from Al Viro:

 - bio_{map,copy}_user_iov() series; those are cleanups - fixes from the
   same pile went into mainline (and stable) in late September.

 - fs/iomap.c iov_iter-related fixes

 - new primitive - iov_iter_for_each_range(), which applies a function
   to kernel-mapped segments of an iov_iter.

   Usable for kvec and bvec ones, the latter does kmap()/kunmap() around
   the callback. _Not_ usable for iovec- or pipe-backed iov_iter; the
   latter is not hard to fix if the need ever appears, the former is by
   design.

   Another related primitive will have to wait for the next cycle - it
   passes page + offset + size instead of pointer + size, and that one
   will be usable for everything _except_ kvec. Unfortunately, that one
   didn't get exposure in -next yet, so...

 - a bit more lustre iov_iter work, including a use case for
   iov_iter_for_each_range() (checksum calculation)

 - vhost/scsi leak fix in failure exit

 - misc cleanups and detritectomy...

* 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (21 commits)
  iomap_dio_actor(): fix iov_iter bugs
  switch ksocknal_lib_recv_...() to use of iov_iter_for_each_range()
  lustre: switch struct ksock_conn to iov_iter
  vhost/scsi: switch to iov_iter_get_pages()
  fix a page leak in vhost_scsi_iov_to_sgl() error recovery
  new primitive: iov_iter_for_each_range()
  lnet_return_rx_credits_locked: don't abuse list_entry
  xen: don't open-code iov_iter_kvec()
  orangefs: remove detritus from struct orangefs_kiocb_s
  kill iov_shorten()
  bio_alloc_map_data(): do bmd->iter setup right there
  bio_copy_user_iov(): saner bio size calculation
  bio_map_user_iov(): get rid of copying iov_iter
  bio_copy_from_iter(): get rid of copying iov_iter
  move more stuff down into bio_copy_user_iov()
  blk_rq_map_user_iov(): move iov_iter_advance() down
  bio_map_user_iov(): get rid of the iov_for_each()
  bio_map_user_iov(): move alignment check into the main loop
  don't rely upon subsequent bio_add_pc_page() calls failing
  ... and with iov_iter_get_pages_alloc() it becomes even simpler
  ...
  • Loading branch information
torvalds committed Nov 17, 2017
2 parents 93f30c7 + cfe057f commit 16382e1
Show file tree
Hide file tree
Showing 15 changed files with 202 additions and 440 deletions.
192 changes: 73 additions & 119 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1062,14 +1062,21 @@ struct bio_map_data {
struct iovec iov[];
};

static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
gfp_t gfp_mask)
{
if (iov_count > UIO_MAXIOV)
struct bio_map_data *bmd;
if (data->nr_segs > UIO_MAXIOV)
return NULL;

return kmalloc(sizeof(struct bio_map_data) +
sizeof(struct iovec) * iov_count, gfp_mask);
bmd = kmalloc(sizeof(struct bio_map_data) +
sizeof(struct iovec) * data->nr_segs, gfp_mask);
if (!bmd)
return NULL;
memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
bmd->iter = *data;
bmd->iter.iov = bmd->iov;
return bmd;
}

/**
Expand All @@ -1080,7 +1087,7 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
* Copy all pages from iov_iter to bio.
* Returns 0 on success, or error on failure.
*/
static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
{
int i;
struct bio_vec *bvec;
Expand All @@ -1091,9 +1098,9 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
ret = copy_page_from_iter(bvec->bv_page,
bvec->bv_offset,
bvec->bv_len,
&iter);
iter);

if (!iov_iter_count(&iter))
if (!iov_iter_count(iter))
break;

if (ret < bvec->bv_len)
Expand Down Expand Up @@ -1187,40 +1194,18 @@ int bio_uncopy_user(struct bio *bio)
*/
struct bio *bio_copy_user_iov(struct request_queue *q,
struct rq_map_data *map_data,
const struct iov_iter *iter,
struct iov_iter *iter,
gfp_t gfp_mask)
{
struct bio_map_data *bmd;
struct page *page;
struct bio *bio;
int i, ret;
int nr_pages = 0;
int i = 0, ret;
int nr_pages;
unsigned int len = iter->count;
unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;

for (i = 0; i < iter->nr_segs; i++) {
unsigned long uaddr;
unsigned long end;
unsigned long start;

uaddr = (unsigned long) iter->iov[i].iov_base;
end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
>> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;

/*
* Overflow, abort
*/
if (end < start)
return ERR_PTR(-EINVAL);

nr_pages += end - start;
}

if (offset)
nr_pages++;

bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
bmd = bio_alloc_map_data(iter, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);

Expand All @@ -1230,9 +1215,10 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
* shortlived one.
*/
bmd->is_our_pages = map_data ? 0 : 1;
memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
bmd->iter = *iter;
bmd->iter.iov = bmd->iov;

nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
if (nr_pages > BIO_MAX_PAGES)
nr_pages = BIO_MAX_PAGES;

ret = -ENOMEM;
bio = bio_kmalloc(gfp_mask, nr_pages);
Expand Down Expand Up @@ -1281,17 +1267,24 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
if (ret)
goto cleanup;

if (map_data)
map_data->offset += bio->bi_iter.bi_size;

/*
* success
*/
if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
(map_data && map_data->from_user)) {
ret = bio_copy_from_iter(bio, *iter);
ret = bio_copy_from_iter(bio, iter);
if (ret)
goto cleanup;
} else {
iov_iter_advance(iter, bio->bi_iter.bi_size);
}

bio->bi_private = bmd;
if (map_data && map_data->null_mapped)
bio_set_flag(bio, BIO_NULL_MAPPED);
return bio;
cleanup:
if (!map_data)
Expand All @@ -1312,111 +1305,74 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user_iov(struct request_queue *q,
const struct iov_iter *iter,
struct iov_iter *iter,
gfp_t gfp_mask)
{
int j;
int nr_pages = 0;
struct page **pages;
struct bio *bio;
int cur_page = 0;
int ret, offset;
struct iov_iter i;
struct iovec iov;
int ret;
struct bio_vec *bvec;

iov_for_each(iov, i, *iter) {
unsigned long uaddr = (unsigned long) iov.iov_base;
unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;

/*
* Overflow, abort
*/
if (end < start)
return ERR_PTR(-EINVAL);

nr_pages += end - start;
/*
* buffer must be aligned to at least logical block size for now
*/
if (uaddr & queue_dma_alignment(q))
return ERR_PTR(-EINVAL);
}

if (!nr_pages)
if (!iov_iter_count(iter))
return ERR_PTR(-EINVAL);

bio = bio_kmalloc(gfp_mask, nr_pages);
bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
if (!bio)
return ERR_PTR(-ENOMEM);

ret = -ENOMEM;
pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
if (!pages)
goto out;
while (iov_iter_count(iter)) {
struct page **pages;
ssize_t bytes;
size_t offs, added = 0;
int npages;

iov_for_each(iov, i, *iter) {
unsigned long uaddr = (unsigned long) iov.iov_base;
unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;

ret = get_user_pages_fast(uaddr, local_nr_pages,
(iter->type & WRITE) != WRITE,
&pages[cur_page]);
if (unlikely(ret < local_nr_pages)) {
for (j = cur_page; j < page_limit; j++) {
if (!pages[j])
break;
put_page(pages[j]);
}
ret = -EFAULT;
bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
if (unlikely(bytes <= 0)) {
ret = bytes ? bytes : -EFAULT;
goto out_unmap;
}

offset = offset_in_page(uaddr);
for (j = cur_page; j < page_limit; j++) {
unsigned int bytes = PAGE_SIZE - offset;
unsigned short prev_bi_vcnt = bio->bi_vcnt;
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);

if (len <= 0)
break;

if (bytes > len)
bytes = len;

/*
* sorry...
*/
if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
bytes)
break;
if (unlikely(offs & queue_dma_alignment(q))) {
ret = -EINVAL;
j = 0;
} else {
for (j = 0; j < npages; j++) {
struct page *page = pages[j];
unsigned int n = PAGE_SIZE - offs;
unsigned short prev_bi_vcnt = bio->bi_vcnt;

/*
* check if vector was merged with previous
* drop page reference if needed
*/
if (bio->bi_vcnt == prev_bi_vcnt)
put_page(pages[j]);
if (n > bytes)
n = bytes;

len -= bytes;
offset = 0;
}
if (!bio_add_pc_page(q, bio, page, n, offs))
break;

cur_page = j;
/*
* check if vector was merged with previous
* drop page reference if needed
*/
if (bio->bi_vcnt == prev_bi_vcnt)
put_page(page);

added += n;
bytes -= n;
offs = 0;
}
iov_iter_advance(iter, added);
}
/*
* release the pages we didn't map into the bio, if any
*/
while (j < page_limit)
while (j < npages)
put_page(pages[j++]);
kvfree(pages);
/* couldn't stuff something into bio? */
if (bytes)
break;
}

kfree(pages);

bio_set_flag(bio, BIO_USER_MAPPED);

/*
Expand All @@ -1432,8 +1388,6 @@ struct bio *bio_map_user_iov(struct request_queue *q,
bio_for_each_segment_all(bvec, bio, j) {
put_page(bvec->bv_page);
}
out:
kfree(pages);
bio_put(bio);
return ERR_PTR(ret);
}
Expand Down
7 changes: 0 additions & 7 deletions block/blk-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,6 @@ static int __blk_rq_map_user_iov(struct request *rq,
bio->bi_opf &= ~REQ_OP_MASK;
bio->bi_opf |= req_op(rq);

if (map_data && map_data->null_mapped)
bio_set_flag(bio, BIO_NULL_MAPPED);

iov_iter_advance(iter, bio->bi_iter.bi_size);
if (map_data)
map_data->offset += bio->bi_iter.bi_size;

orig_bio = bio;

/*
Expand Down
4 changes: 2 additions & 2 deletions drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1683,10 +1683,10 @@ ksocknal_destroy_conn(struct ksock_conn *conn)
case SOCKNAL_RX_LNET_PAYLOAD:
last_rcv = conn->ksnc_rx_deadline -
cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n",
CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
&conn->ksnc_ipaddr, conn->ksnc_port,
conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
cfs_duration_sec(cfs_time_sub(cfs_time_current(),
last_rcv)));
lnet_finalize(conn->ksnc_peer->ksnp_ni,
Expand Down
9 changes: 2 additions & 7 deletions drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,7 @@ struct ksock_conn {
__u8 ksnc_rx_scheduled; /* being progressed */
__u8 ksnc_rx_state; /* what is being read */
int ksnc_rx_nob_left; /* # bytes to next hdr/body */
int ksnc_rx_nob_wanted;/* bytes actually wanted */
int ksnc_rx_niov; /* # iovec frags */
struct kvec *ksnc_rx_iov; /* the iovec frags */
int ksnc_rx_nkiov; /* # page frags */
struct bio_vec *ksnc_rx_kiov; /* the page frags */
struct iov_iter ksnc_rx_to; /* copy destination */
union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */
__u32 ksnc_rx_csum; /* partial checksum for incoming
* data
Expand Down Expand Up @@ -701,8 +697,7 @@ int ksocknal_lib_setup_sock(struct socket *so);
int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
void ksocknal_lib_eager_ack(struct ksock_conn *conn);
int ksocknal_lib_recv_iov(struct ksock_conn *conn);
int ksocknal_lib_recv_kiov(struct ksock_conn *conn);
int ksocknal_lib_recv(struct ksock_conn *conn);
int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
int *rxmem, int *nagle);

Expand Down
Loading

0 comments on commit 16382e1

Please sign in to comment.