Skip to content

Commit

Permalink
[PATCH] r/o bind mounts: elevate write count for open()s
Browse files Browse the repository at this point in the history
This is the first really tricky patch in the series.  It elevates the writer
count on a mount each time a non-special file is opened for write.

We used to do this in may_open(), but Miklos pointed out that __dentry_open()
is used as well to create filps.  This will cover even those cases, while a
call in may_open() would not have.

There is also an elevated count around the vfs_create() call in open_namei().
See the comments for more details, but we need this to fix a 'create, remount,
fail r/w open()' race.

Some filesystems forego the use of normal vfs calls to create
struct files.   Make sure that these users elevate the mnt
writer count because they will get __fput(), and we need
to make sure they're balanced.

Acked-by: Al Viro <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Dave Hansen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Al Viro <[email protected]>
  • Loading branch information
hansendc authored and Al Viro committed Apr 19, 2008
1 parent 42a74f2 commit 4a3fd21
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 14 deletions.
14 changes: 14 additions & 0 deletions fs/file_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,17 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
file->f_mapping = dentry->d_inode->i_mapping;
file->f_mode = mode;
file->f_op = fop;

/*
* These mounts don't really matter in practice
* for r/o bind mounts. They aren't userspace-
* visible. We do this for consistency, and so
* that we can do debugging checks at __fput()
*/
if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
error = mnt_want_write(mnt);
WARN_ON(error);
}
return error;
}
EXPORT_SYMBOL(init_file);
Expand All @@ -221,10 +232,13 @@ EXPORT_SYMBOL(fput);
*/
void drop_file_write_access(struct file *file)
{
struct vfsmount *mnt = file->f_path.mnt;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;

put_write_access(inode);
if (!special_file(inode->i_mode))
mnt_drop_write(mnt);
}
EXPORT_SYMBOL_GPL(drop_file_write_access);

Expand Down
75 changes: 65 additions & 10 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
return -EACCES;

flag &= ~O_TRUNC;
} else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
return -EROFS;
}

error = vfs_permission(nd, acc_mode);
if (error)
Expand Down Expand Up @@ -1724,18 +1723,32 @@ static inline int open_to_namei_flags(int flag)
return flag;
}

static int open_will_write_to_fs(int flag, struct inode *inode)
{
/*
* We'll never write to the fs underlying
* a device file.
*/
if (special_file(inode->i_mode))
return 0;
return (flag & O_TRUNC);
}

/*
* Note that the low bits of "flag" aren't the same as in the open
* system call. See open_to_namei_flags().
* Note that the low bits of the passed in "open_flag"
* are not the same as in the local variable "flag". See
* open_to_namei_flags() for more details.
*/
struct file *do_filp_open(int dfd, const char *pathname,
int open_flag, int mode)
{
struct file *filp;
struct nameidata nd;
int acc_mode, error;
struct path path;
struct dentry *dir;
int count = 0;
int will_write;
int flag = open_to_namei_flags(open_flag);

acc_mode = ACC_MODE(flag);
Expand Down Expand Up @@ -1791,17 +1804,30 @@ struct file *do_filp_open(int dfd, const char *pathname,
}

if (IS_ERR(nd.intent.open.file)) {
mutex_unlock(&dir->d_inode->i_mutex);
error = PTR_ERR(nd.intent.open.file);
goto exit_dput;
goto exit_mutex_unlock;
}

/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
error = __open_namei_create(&nd, &path, flag, mode);
/*
* This write is needed to ensure that a
* ro->rw transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
*/
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit_mutex_unlock;
error = __open_namei_create(&nd, &path, flag, mode);
if (error) {
mnt_drop_write(nd.path.mnt);
goto exit;
return nameidata_to_filp(&nd, open_flag);
}
filp = nameidata_to_filp(&nd, open_flag);
mnt_drop_write(nd.path.mnt);
return filp;
}

/*
Expand Down Expand Up @@ -1831,11 +1857,40 @@ struct file *do_filp_open(int dfd, const char *pathname,
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
/*
* Consider:
* 1. may_open() truncates a file
* 2. a rw->ro mount transition occurs
* 3. nameidata_to_filp() fails due to
* the ro mount.
* That would be inconsistent, and should
* be avoided. Taking this mnt write here
* ensures that (2) can not occur.
*/
will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
if (will_write) {
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit;
}
error = may_open(&nd, acc_mode, flag);
if (error)
if (error) {
if (will_write)
mnt_drop_write(nd.path.mnt);
goto exit;
return nameidata_to_filp(&nd, open_flag);
}
filp = nameidata_to_filp(&nd, open_flag);
/*
* It is now safe to drop the mnt write
* because the filp has had a write taken
* on its behalf.
*/
if (will_write)
mnt_drop_write(nd.path.mnt);
return filp;

exit_mutex_unlock:
mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
path_put_conditional(&path, &nd);
exit:
Expand Down
36 changes: 34 additions & 2 deletions fs/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,35 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
return error;
}

/*
* You have to be very careful that these write
* counts get cleaned up in error cases and
* upon __fput(). This should probably never
* be called outside of __dentry_open().
*/
static inline int __get_file_write_access(struct inode *inode,
struct vfsmount *mnt)
{
int error;
error = get_write_access(inode);
if (error)
return error;
/*
* Do not take mount writer counts on
* special files since no writes to
* the mount itself will occur.
*/
if (!special_file(inode->i_mode)) {
/*
* Balanced in __fput()
*/
error = mnt_want_write(mnt);
if (error)
put_write_access(inode);
}
return error;
}

static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
int (*open)(struct inode *, struct file *))
Expand All @@ -742,7 +771,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
error = __get_file_write_access(inode, mnt);
if (error)
goto cleanup_file;
}
Expand Down Expand Up @@ -784,8 +813,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,

cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
if (f->f_mode & FMODE_WRITE) {
put_write_access(inode);
if (!special_file(inode->i_mode))
mnt_drop_write(mnt);
}
file_kill(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
Expand Down
16 changes: 14 additions & 2 deletions ipc/mqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
int oflag, mode_t mode, struct mq_attr __user *u_attr)
{
struct mq_attr attr;
struct file *result;
int ret;

if (u_attr) {
Expand All @@ -612,13 +613,24 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
}

mode &= ~current->fs->umask;
ret = mnt_want_write(mqueue_mnt);
if (ret)
goto out;
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
dentry->d_fsdata = NULL;
if (ret)
goto out;
goto out_drop_write;

return dentry_open(dentry, mqueue_mnt, oflag);
result = dentry_open(dentry, mqueue_mnt, oflag);
/*
* dentry_open() took a persistent mnt_want_write(),
* so we can now drop this one.
*/
mnt_drop_write(mqueue_mnt);
return result;

out_drop_write:
mnt_drop_write(mqueue_mnt);
out:
dput(dentry);
mntput(mqueue_mnt);
Expand Down

0 comments on commit 4a3fd21

Please sign in to comment.