Skip to content

Commit

Permalink
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git…
Browse files Browse the repository at this point in the history
…/viro/vfs

Pull vfs d_inode/d_flags memory ordering fixes from Al Viro:
 "Fallout from tree-wide audit for ->d_inode/->d_flags barriers use.
  Basically, the problem is that negative pinned dentries require
  careful treatment - unless ->d_lock is locked or parent is held at
  least shared, another thread can make them positive right under us.

  Most of the uses turned out to be safe - the main surprises as far as
  filesystems are concerned were

   - race in dget_parent() fastpath, that might end up with the caller
     observing the returned dentry _negative_, due to insufficient
     barriers. It is positive in memory, but we could end up seeing the
     wrong value of ->d_inode in CPU cache. Fixed.

   - manual checks that result of lookup_one_len_unlocked() is positive
     (and rejection of negatives). Again, insufficient barriers (we
     might end up with inconsistent observed values of ->d_inode and
     ->d_flags). Fixed by switching to a new primitive that does the
     checks itself and returns ERR_PTR(-ENOENT) instead of a negative
     dentry. That way we get rid of boilerplate converting negatives
     into ERR_PTR(-ENOENT) in the callers and have a single place to
     deal with the barrier-related mess - inside fs/namei.c rather than
     in every caller out there.

  The guts of pathname resolution *do* need to be careful - the race
  found by Ritesh is real, as well as several similar races.
  Fortunately, it turns out that we can take care of that with fairly
  local changes in there.

  The tree-wide audit had not been fun, and I hate the idea of repeating
  it. I think the right approach would be to annotate the places where
  we are _not_ guaranteed ->d_inode/->d_flags stability and have sparse
  catch regressions. But I'm still not sure what would be the least
  invasive way of doing that and it's clearly the next cycle fodder"

* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  fs/namei.c: fix missing barriers when checking positivity
  fix dget_parent() fastpath race
  new helper: lookup_positive_unlocked()
  fs/namei.c: pull positivity check into follow_managed()
  • Loading branch information
torvalds committed Dec 6, 2019
2 parents b0d4bea + 2fa6b1e commit 0aecba6
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 74 deletions.
7 changes: 1 addition & 6 deletions fs/cifs/cifsfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -730,11 +730,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
struct inode *dir = d_inode(dentry);
struct dentry *child;

if (!dir) {
dput(dentry);
dentry = ERR_PTR(-ENOENT);
break;
}
if (!S_ISDIR(dir->i_mode)) {
dput(dentry);
dentry = ERR_PTR(-ENOTDIR);
Expand All @@ -751,7 +746,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
while (*s && *s != sep)
s++;

child = lookup_one_len_unlocked(p, dentry, s - p);
child = lookup_positive_unlocked(p, dentry, s - p);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
Expand Down
6 changes: 4 additions & 2 deletions fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
flags = READ_ONCE(dentry->d_flags);
flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
flags |= type_flags;
WRITE_ONCE(dentry->d_flags, flags);
smp_store_release(&dentry->d_flags, flags);
}

static inline void __d_clear_type_and_inode(struct dentry *dentry)
Expand Down Expand Up @@ -903,17 +903,19 @@ struct dentry *dget_parent(struct dentry *dentry)
{
int gotref;
struct dentry *ret;
unsigned seq;

/*
* Do optimistic parent lookup without any
* locking.
*/
rcu_read_lock();
seq = raw_seqcount_begin(&dentry->d_seq);
ret = READ_ONCE(dentry->d_parent);
gotref = lockref_get_not_zero(&ret->d_lockref);
rcu_read_unlock();
if (likely(gotref)) {
if (likely(ret == READ_ONCE(dentry->d_parent)))
if (!read_seqcount_retry(&dentry->d_seq, seq))
return ret;
dput(ret);
}
Expand Down
6 changes: 1 addition & 5 deletions fs/debugfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,9 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;

dentry = lookup_one_len_unlocked(name, parent, strlen(name));
dentry = lookup_positive_unlocked(name, parent, strlen(name));
if (IS_ERR(dentry))
return NULL;
if (!d_really_is_positive(dentry)) {
dput(dentry);
return NULL;
}
return dentry;
}
EXPORT_SYMBOL_GPL(debugfs_lookup);
Expand Down
2 changes: 1 addition & 1 deletion fs/kernfs/mount.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
dput(dentry);
return ERR_PTR(-EINVAL);
}
dtmp = lookup_one_len_unlocked(kntmp->name, dentry,
dtmp = lookup_positive_unlocked(kntmp->name, dentry,
strlen(kntmp->name));
dput(dentry);
if (IS_ERR(dtmp))
Expand Down
56 changes: 32 additions & 24 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1210,25 +1210,25 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* - Flagged as automount point
*
* This may only be called in refwalk mode.
* On success path->dentry is known positive.
*
* Serialization is taken care of in namespace.c
*/
static int follow_managed(struct path *path, struct nameidata *nd)
{
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
unsigned managed;
unsigned flags;
bool need_mntput = false;
int ret = 0;

/* Given that we're not holding a lock here, we retain the value in a
* local variable for each dentry as we look at it so that we don't see
* the components of that value change under us */
while (managed = READ_ONCE(path->dentry->d_flags),
managed &= DCACHE_MANAGED_DENTRY,
unlikely(managed != 0)) {
while (flags = smp_load_acquire(&path->dentry->d_flags),
unlikely(flags & DCACHE_MANAGED_DENTRY)) {
/* Allow the filesystem to manage the transit without i_mutex
* being held. */
if (managed & DCACHE_MANAGE_TRANSIT) {
if (flags & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false);
Expand All @@ -1237,7 +1237,7 @@ static int follow_managed(struct path *path, struct nameidata *nd)
}

/* Transit to a mounted filesystem. */
if (managed & DCACHE_MOUNTED) {
if (flags & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path);
if (mounted) {
dput(path->dentry);
Expand All @@ -1256,7 +1256,7 @@ static int follow_managed(struct path *path, struct nameidata *nd)
}

/* Handle an automount point */
if (managed & DCACHE_NEED_AUTOMOUNT) {
if (flags & DCACHE_NEED_AUTOMOUNT) {
ret = follow_automount(path, nd, &need_mntput);
if (ret < 0)
break;
Expand All @@ -1269,10 +1269,12 @@ static int follow_managed(struct path *path, struct nameidata *nd)

if (need_mntput && path->mnt == mnt)
mntput(path->mnt);
if (ret == -EISDIR || !ret)
ret = 1;
if (need_mntput)
nd->flags |= LOOKUP_JUMPED;
if (ret == -EISDIR || !ret)
ret = 1;
if (ret > 0 && unlikely(d_flags_negative(flags)))
ret = -ENOENT;
if (unlikely(ret < 0))
path_put_conditional(path, nd);
return ret;
Expand Down Expand Up @@ -1621,10 +1623,6 @@ static int lookup_fast(struct nameidata *nd,
dput(dentry);
return status;
}
if (unlikely(d_is_negative(dentry))) {
dput(dentry);
return -ENOENT;
}

path->mnt = mnt;
path->dentry = dentry;
Expand Down Expand Up @@ -1811,11 +1809,6 @@ static int walk_component(struct nameidata *nd, int flags)
if (unlikely(err < 0))
return err;

if (unlikely(d_is_negative(path.dentry))) {
path_to_nameidata(&path, nd);
return -ENOENT;
}

seq = 0; /* we are already out of RCU mode */
inode = d_backing_inode(path.dentry);
}
Expand Down Expand Up @@ -2568,6 +2561,26 @@ struct dentry *lookup_one_len_unlocked(const char *name,
}
EXPORT_SYMBOL(lookup_one_len_unlocked);

/*
* Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
* on negatives. Returns known positive or ERR_PTR(); that's what
* most of the users want. Note that pinned negative with unlocked parent
* _can_ become positive at any time, so callers of lookup_one_len_unlocked()
* need to be very careful; pinned positives have ->d_inode stable, so
* this one avoids such problems.
*/
struct dentry *lookup_positive_unlocked(const char *name,
struct dentry *base, int len)
{
struct dentry *ret = lookup_one_len_unlocked(name, base, len);
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
dput(ret);
ret = ERR_PTR(-ENOENT);
}
return ret;
}
EXPORT_SYMBOL(lookup_positive_unlocked);

#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
Expand Down Expand Up @@ -2662,7 +2675,7 @@ mountpoint_last(struct nameidata *nd)
return PTR_ERR(path.dentry);
}
}
if (d_is_negative(path.dentry)) {
if (d_flags_negative(smp_load_acquire(&path.dentry->d_flags))) {
dput(path.dentry);
return -ENOENT;
}
Expand Down Expand Up @@ -3356,11 +3369,6 @@ static int do_last(struct nameidata *nd,
if (unlikely(error < 0))
return error;

if (unlikely(d_is_negative(path.dentry))) {
path_to_nameidata(&path, nd);
return -ENOENT;
}

/*
* create/update audit record if it already exists.
*/
Expand Down
4 changes: 1 addition & 3 deletions fs/nfsd/nfs3xdr.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,13 +863,11 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
dchild = lookup_one_len_unlocked(name, dparent, namlen);
dchild = lookup_positive_unlocked(name, dparent, namlen);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
goto out;
if (d_really_is_negative(dchild))
goto out;
if (dchild->d_inode->i_ino != ino)
goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
Expand Down
11 changes: 1 addition & 10 deletions fs/nfsd/nfs4xdr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2991,18 +2991,9 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
__be32 nfserr;
int ignore_crossmnt = 0;

dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
if (d_really_is_negative(dentry)) {
/*
* we're not holding the i_mutex here, so there's
* a window where this directory entry could have gone
* away.
*/
dput(dentry);
return nfserr_noent;
}

exp_get(exp);
/*
Expand Down
24 changes: 8 additions & 16 deletions fs/overlayfs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,14 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
int err;
bool last_element = !post[0];

this = lookup_one_len_unlocked(name, base, namelen);
this = lookup_positive_unlocked(name, base, namelen);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
if (err == -ENOENT || err == -ENAMETOOLONG)
goto out;
goto out_err;
}
if (!this->d_inode)
goto put_and_out;

if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
Expand Down Expand Up @@ -651,17 +649,15 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
if (err)
return ERR_PTR(err);

index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
kfree(name.name);
if (IS_ERR(index)) {
if (PTR_ERR(index) == -ENOENT)
index = NULL;
return index;
}

if (d_is_negative(index))
err = 0;
else if (ovl_is_whiteout(index))
if (ovl_is_whiteout(index))
err = -ESTALE;
else if (ovl_dentry_weird(index))
err = -EIO;
Expand All @@ -685,7 +681,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
if (err)
return ERR_PTR(err);

index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
Expand All @@ -700,9 +696,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
}

inode = d_inode(index);
if (d_is_negative(index)) {
goto out_dput;
} else if (ovl_is_whiteout(index) && !verify) {
if (ovl_is_whiteout(index) && !verify) {
/*
* When index lookup is called with !verify for decoding an
* overlay file handle, a whiteout index implies that decode
Expand Down Expand Up @@ -1131,7 +1125,7 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct dentry *lowerdir = poe->lowerstack[i].dentry;

this = lookup_one_len_unlocked(name->name, lowerdir,
this = lookup_positive_unlocked(name->name, lowerdir,
name->len);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
Expand All @@ -1148,10 +1142,8 @@ bool ovl_lower_positive(struct dentry *dentry)
break;
}
} else {
if (this->d_inode) {
positive = !ovl_is_whiteout(this);
done = true;
}
positive = !ovl_is_whiteout(this);
done = true;
dput(this);
}
}
Expand Down
8 changes: 1 addition & 7 deletions fs/quota/dquot.c
Original file line number Diff line number Diff line change
Expand Up @@ -2487,21 +2487,15 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;

dentry = lookup_one_len_unlocked(qf_name, sb->s_root, strlen(qf_name));
dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name));
if (IS_ERR(dentry))
return PTR_ERR(dentry);

if (d_really_is_negative(dentry)) {
error = -ENOENT;
goto out;
}

error = security_quota_on(dentry);
if (!error)
error = dquot_load_quota_inode(d_inode(dentry), type, format_id,
DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);

out:
dput(dentry);
return error;
}
Expand Down
5 changes: 5 additions & 0 deletions include/linux/dcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,11 @@ static inline bool d_is_negative(const struct dentry *dentry)
return d_is_miss(dentry);
}

static inline bool d_flags_negative(unsigned flags)
{
return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE;
}

static inline bool d_is_positive(const struct dentry *dentry)
{
return !d_is_negative(dentry);
Expand Down
1 change: 1 addition & 0 deletions include/linux/namei.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);

extern int follow_down_one(struct path *);
extern int follow_down(struct path *);
Expand Down

0 comments on commit 0aecba6

Please sign in to comment.