Skip to content

Commit

Permalink
Merge tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Support directly accessing host page cache from virtiofs. This can
   improve I/O performance for various workloads, as well as reducing
   the memory requirement by eliminating double caching. Thanks to Vivek
   Goyal for doing most of the work on this.

 - Allow automatic submounting inside virtiofs. This allows unique
   st_dev/ st_ino values to be assigned inside the guest to files
   residing on different filesystems on the host. Thanks to Max Reitz
   for the patches.

 - Fix an old use after free bug found by Pradeep P V K.

* tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (25 commits)
  virtiofs: calculate number of scatter-gather elements accurately
  fuse: connection remove fix
  fuse: implement crossmounts
  fuse: Allow fuse_fill_super_common() for submounts
  fuse: split fuse_mount off of fuse_conn
  fuse: drop fuse_conn parameter where possible
  fuse: store fuse_conn in fuse_req
  fuse: add submount support to <uapi/linux/fuse.h>
  fuse: fix page dereference after free
  virtiofs: add logic to free up a memory range
  virtiofs: maintain a list of busy elements
  virtiofs: serialize truncate/punch_hole and dax fault path
  virtiofs: define dax address space operations
  virtiofs: add DAX mmap support
  virtiofs: implement dax read/write operations
  virtiofs: introduce setupmapping/removemapping commands
  virtiofs: implement FUSE_INIT map_alignment field
  virtiofs: keep a list of free dax memory ranges
  virtiofs: add a mount option to enable dax
  virtiofs: set up virtio_fs dax_device
  ...
  • Loading branch information
torvalds committed Oct 19, 2020
2 parents 922a763 + 42d3e2d commit 6945653
Show file tree
Hide file tree
Showing 20 changed files with 2,689 additions and 496 deletions.
2 changes: 1 addition & 1 deletion Documentation/filesystems/fuse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ filesystems. A good example is sshfs: a secure network filesystem
using the sftp protocol.

The userspace library and utilities are available from the
`FUSE homepage: <http://fuse.sourceforge.net/>`_
`FUSE homepage: <https://github.com/libfuse/>`_

Filesystem type
===============
Expand Down
2 changes: 1 addition & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -7238,7 +7238,7 @@ FUSE: FILESYSTEM IN USERSPACE
M: Miklos Szeredi <[email protected]>
L: [email protected]
S: Maintained
W: http://fuse.sourceforge.net/
W: https://github.com/libfuse/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
F: Documentation/filesystems/fuse.rst
F: fs/fuse/
Expand Down
3 changes: 2 additions & 1 deletion drivers/dax/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff)
{
phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
phys_addr_t phys_off = (start_sect + sector) * 512;

if (pgoff)
*pgoff = PHYS_PFN(phys_off);
Expand Down
29 changes: 23 additions & 6 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
}

/**
* dax_layout_busy_page - find first pinned page in @mapping
* dax_layout_busy_page_range - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1
* @start: Starting offset. Page containing 'start' is included.
* @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
* pages from 'start' till the end of file are included.
*
* DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when
Expand All @@ -573,12 +576,15 @@ static void *grab_mapping_entry(struct xa_state *xas,
* to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true.
*/
struct page *dax_layout_busy_page(struct address_space *mapping)
struct page *dax_layout_busy_page_range(struct address_space *mapping,
loff_t start, loff_t end)
{
XA_STATE(xas, &mapping->i_pages, 0);
void *entry;
unsigned int scanned = 0;
struct page *page = NULL;
pgoff_t start_idx = start >> PAGE_SHIFT;
pgoff_t end_idx;
XA_STATE(xas, &mapping->i_pages, start_idx);

/*
* In the 'limited' case get_user_pages() for dax is disabled.
Expand All @@ -589,22 +595,27 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (!dax_mapping(mapping) || !mapping_mapped(mapping))
return NULL;

/* If end == LLONG_MAX, all pages from start to till end of file */
if (end == LLONG_MAX)
end_idx = ULONG_MAX;
else
end_idx = end >> PAGE_SHIFT;
/*
* If we race get_user_pages_fast() here either we'll see the
* elevated page count in the iteration and wait, or
* get_user_pages_fast() will see that the page it took a reference
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without
* holding those locks, and unmap_mapping_range() will not zero the
* holding those locks, and unmap_mapping_pages() will not zero the
* pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new
* references from being established.
*/
unmap_mapping_range(mapping, 0, 0, 0);
unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);

xas_lock_irq(&xas);
xas_for_each(&xas, entry, ULONG_MAX) {
xas_for_each(&xas, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
if (unlikely(dax_is_locked(entry)))
Expand All @@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
xas_unlock_irq(&xas);
return page;
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);

struct page *dax_layout_busy_page(struct address_space *mapping)
{
return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page);

static int __dax_invalidate_entry(struct address_space *mapping,
Expand Down
16 changes: 15 additions & 1 deletion fs/fuse/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ config FUSE_FS

There's also a companion library: libfuse2. This library is available
from the FUSE homepage:
<http://fuse.sourceforge.net/>
<https://github.com/libfuse/>
although chances are your distribution already has that library
installed if you've installed the "fuse" package itself.

Expand Down Expand Up @@ -38,3 +38,17 @@ config VIRTIO_FS

If you want to share files between guests or with the host, answer Y
or M.

config FUSE_DAX
bool "Virtio Filesystem Direct Host Memory Access support"
default y
select INTERVAL_TREE
depends on VIRTIO_FS
depends on FS_DAX
depends on DAX_DRIVER
help
This allows bypassing guest page cache and allows mapping host page
cache directly in guest address space.

If you want to allow mounting a Virtio Filesystem with the "dax"
option, answer Y.
6 changes: 4 additions & 2 deletions fs/fuse/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o

fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
virtiofs-y += virtio_fs.o
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
fuse-$(CONFIG_FUSE_DAX) += dax.o

virtiofs-y := virtio_fs.o
20 changes: 15 additions & 5 deletions fs/fuse/control.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
{
unsigned val;
struct fuse_conn *fc;
struct fuse_mount *fm;
ssize_t ret;

ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
Expand All @@ -174,18 +175,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
if (!fc)
goto out;

down_read(&fc->killsb);
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val;
if (fc->sb) {

/*
* Get any fuse_mount belonging to this fuse_conn; s_bdi is
* shared between all of them
*/

if (!list_empty(&fc->mounts)) {
fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
if (fc->num_background < fc->congestion_threshold) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} else {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
}
}
spin_unlock(&fc->bg_lock);
up_read(&fc->killsb);
fuse_conn_put(fc);
out:
return ret;
Expand Down
21 changes: 12 additions & 9 deletions fs/fuse/cuse.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

struct cuse_conn {
struct list_head list; /* linked on cuse_conntbl */
struct fuse_mount fm; /* Dummy mount referencing fc */
struct fuse_conn fc; /* fuse connection */
struct cdev *cdev; /* associated character device */
struct device *dev; /* device representing @cdev */
Expand Down Expand Up @@ -134,7 +135,7 @@ static int cuse_open(struct inode *inode, struct file *file)
* Generic permission check is already done against the chrdev
* file, proceed to open.
*/
rc = fuse_do_open(&cc->fc, 0, file, 0);
rc = fuse_do_open(&cc->fm, 0, file, 0);
if (rc)
fuse_conn_put(&cc->fc);
return rc;
Expand All @@ -143,10 +144,10 @@ static int cuse_open(struct inode *inode, struct file *file)
static int cuse_release(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
struct fuse_mount *fm = ff->fm;

fuse_sync_release(NULL, ff, file->f_flags);
fuse_conn_put(fc);
fuse_conn_put(fm->fc);

return 0;
}
Expand All @@ -155,7 +156,7 @@ static long cuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc);
struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = 0;

if (cc->unrestricted_ioctl)
Expand All @@ -168,7 +169,7 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc);
struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = FUSE_IOCTL_COMPAT;

if (cc->unrestricted_ioctl)
Expand Down Expand Up @@ -313,9 +314,10 @@ struct cuse_init_args {
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
*/
static void cuse_process_init_reply(struct fuse_conn *fc,
static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args *args, int error)
{
struct fuse_conn *fc = fm->fc;
struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
Expand Down Expand Up @@ -424,7 +426,7 @@ static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct page *page;
struct fuse_conn *fc = &cc->fc;
struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;

Expand Down Expand Up @@ -460,7 +462,7 @@ static int cuse_send_init(struct cuse_conn *cc)
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;

rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
err_free_page:
Expand Down Expand Up @@ -506,7 +508,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
* Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns.
*/
fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
&fuse_dev_fiq_ops, NULL);

fud = fuse_dev_alloc_install(&cc->fc);
if (!fud) {
Expand Down
Loading

0 comments on commit 6945653

Please sign in to comment.