summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c658
1 files changed, 469 insertions, 189 deletions
diff --git a/fs/namei.c b/fs/namei.c
index df9946e83db4..8f77a8cea289 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put);
* to restart the path walk from the beginning in ref-walk mode.
*/
-static inline void lock_rcu_walk(void)
-{
- br_read_lock(&vfsmount_lock);
- rcu_read_lock();
-}
-
-static inline void unlock_rcu_walk(void)
-{
- rcu_read_unlock();
- br_read_unlock(&vfsmount_lock);
-}
-
/**
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
@@ -508,56 +496,75 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
- int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
- want_root = 1;
- spin_lock(&fs->lock);
- if (nd->root.mnt != fs->root.mnt ||
- nd->root.dentry != fs->root.dentry)
- goto err_root;
+
+ /*
+ * After legitimizing the bastards, terminate_walk()
+ * will do the right thing for non-RCU mode, and all our
+ * subsequent exit cases should rcu_read_unlock()
+ * before returning. Do vfsmount first; if dentry
+ * can't be legitimized, just set nd->path.dentry to NULL
+ * and rely on dput(NULL) being a no-op.
+ */
+ if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
+ return -ECHILD;
+ nd->flags &= ~LOOKUP_RCU;
+
+ if (!lockref_get_not_dead(&parent->d_lockref)) {
+ nd->path.dentry = NULL;
+ rcu_read_unlock();
+ return -ECHILD;
}
- spin_lock(&parent->d_lock);
+
+ /*
+ * For a negative lookup, the lookup sequence point is the parents
+ * sequence point, and it only needs to revalidate the parent dentry.
+ *
+ * For a positive lookup, we need to move both the parent and the
+ * dentry from the RCU domain to be properly refcounted. And the
+ * sequence number in the dentry validates *both* dentry counters,
+ * since we checked the sequence number of the parent after we got
+ * the child sequence number. So we know the parent must still
+ * be valid if the child sequence number is still valid.
+ */
if (!dentry) {
- if (!__d_rcu_to_refcount(parent, nd->seq))
- goto err_parent;
+ if (read_seqcount_retry(&parent->d_seq, nd->seq))
+ goto out;
BUG_ON(nd->inode != parent->d_inode);
} else {
- if (dentry->d_parent != parent)
- goto err_parent;
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (!__d_rcu_to_refcount(dentry, nd->seq))
- goto err_child;
- /*
- * If the sequence check on the child dentry passed, then
- * the child has not been removed from its parent. This
- * means the parent dentry must be valid and able to take
- * a reference at this point.
- */
- BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
- BUG_ON(!parent->d_lockref.count);
- parent->d_lockref.count++;
- spin_unlock(&dentry->d_lock);
+ if (!lockref_get_not_dead(&dentry->d_lockref))
+ goto out;
+ if (read_seqcount_retry(&dentry->d_seq, nd->seq))
+ goto drop_dentry;
}
- spin_unlock(&parent->d_lock);
- if (want_root) {
+
+ /*
+ * Sequence counts matched. Now make sure that the root is
+ * still valid and get it if required.
+ */
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ spin_lock(&fs->lock);
+ if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
+ goto unlock_and_drop_dentry;
path_get(&nd->root);
spin_unlock(&fs->lock);
}
- mntget(nd->path.mnt);
- unlock_rcu_walk();
- nd->flags &= ~LOOKUP_RCU;
+ rcu_read_unlock();
return 0;
-err_child:
- spin_unlock(&dentry->d_lock);
-err_parent:
- spin_unlock(&parent->d_lock);
-err_root:
- if (want_root)
- spin_unlock(&fs->lock);
+unlock_and_drop_dentry:
+ spin_unlock(&fs->lock);
+drop_dentry:
+ rcu_read_unlock();
+ dput(dentry);
+ goto drop_root_mnt;
+out:
+ rcu_read_unlock();
+drop_root_mnt:
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
return -ECHILD;
}
@@ -585,16 +592,23 @@ static int complete_walk(struct nameidata *nd)
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- spin_lock(&dentry->d_lock);
- if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
- spin_unlock(&dentry->d_lock);
- unlock_rcu_walk();
+
+ if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+ rcu_read_unlock();
+ return -ECHILD;
+ }
+ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
+ rcu_read_unlock();
+ mntput(nd->path.mnt);
+ return -ECHILD;
+ }
+ if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
+ rcu_read_unlock();
+ dput(dentry);
+ mntput(nd->path.mnt);
return -ECHILD;
}
- BUG_ON(nd->inode != dentry->d_inode);
- spin_unlock(&dentry->d_lock);
- mntget(nd->path.mnt);
- unlock_rcu_walk();
+ rcu_read_unlock();
}
if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -636,29 +650,6 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
}
}
-static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
-{
- int ret;
-
- if (IS_ERR(link))
- goto fail;
-
- if (*link == '/') {
- set_root(nd);
- path_put(&nd->path);
- nd->path = nd->root;
- path_get(&nd->root);
- nd->flags |= LOOKUP_JUMPED;
- }
- nd->inode = nd->path.dentry->d_inode;
-
- ret = link_path_walk(link, nd);
- return ret;
-fail:
- path_put(&nd->path);
- return PTR_ERR(link);
-}
-
static void path_put_conditional(struct path *path, struct nameidata *nd)
{
dput(path->dentry);
@@ -850,7 +841,20 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
error = 0;
s = nd_get_link(nd);
if (s) {
- error = __vfs_follow_link(nd, s);
+ if (unlikely(IS_ERR(s))) {
+ path_put(&nd->path);
+ put_link(nd, link, *p);
+ return PTR_ERR(s);
+ }
+ if (*s == '/') {
+ set_root(nd);
+ path_put(&nd->path);
+ nd->path = nd->root;
+ path_get(&nd->root);
+ nd->flags |= LOOKUP_JUMPED;
+ }
+ nd->inode = nd->path.dentry->d_inode;
+ error = link_path_walk(s, nd);
if (unlikely(error))
put_link(nd, link, *p);
}
@@ -895,15 +899,15 @@ int follow_up(struct path *path)
struct mount *parent;
struct dentry *mountpoint;
- br_read_lock(&vfsmount_lock);
+ read_seqlock_excl(&mount_lock);
parent = mnt->mnt_parent;
if (parent == mnt) {
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
return 0;
}
mntget(&parent->mnt);
mountpoint = dget(mnt->mnt_mountpoint);
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
@@ -1034,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags)
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
- * namespace got unmounted before we managed to get the
- * vfsmount_lock */
+ * namespace got unmounted before lookup_mnt() could
+ * get it */
}
/* Handle an automount point */
@@ -1097,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
if (!d_mountpoint(path->dentry))
break;
- mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+ mounted = __lookup_mnt(path->mnt, path->dentry);
if (!mounted)
break;
path->mnt = &mounted->mnt;
@@ -1118,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd)
{
while (d_mountpoint(nd->path.dentry)) {
struct mount *mounted;
- mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
+ mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
if (!mounted)
break;
nd->path.mnt = &mounted->mnt;
@@ -1160,7 +1164,7 @@ failed:
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- unlock_rcu_walk();
+ rcu_read_unlock();
return -ECHILD;
}
@@ -1294,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
}
/*
- * Call i_op->lookup on the dentry. The dentry must be negative but may be
- * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
+ * Call i_op->lookup on the dentry. The dentry must be negative and
+ * unhashed.
*
* dir->d_inode->i_mutex must be held
*/
@@ -1487,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd)
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- unlock_rcu_walk();
+ rcu_read_unlock();
}
}
@@ -1497,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
{
- if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
- if (likely(inode->i_op->follow_link))
- return follow;
-
- /* This gets set once for the inode lifetime */
- spin_lock(&inode->i_lock);
- inode->i_opflags |= IOP_NOFOLLOW;
- spin_unlock(&inode->i_lock);
- }
- return 0;
+ return unlikely(d_is_symlink(dentry)) ? follow : 0;
}
static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1538,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
if (!inode)
goto out_path_put;
- if (should_follow_link(inode, follow)) {
+ if (should_follow_link(path->dentry, follow)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
err = -ECHILD;
@@ -1597,26 +1592,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
}
/*
- * We really don't want to look at inode->i_op->lookup
- * when we don't have to. So we keep a cache bit in
- * the inode ->i_opflags field that says "yes, we can
- * do lookup on this inode".
- */
-static inline int can_lookup(struct inode *inode)
-{
- if (likely(inode->i_opflags & IOP_LOOKUP))
- return 1;
- if (likely(!inode->i_op->lookup))
- return 0;
-
- /* We do this once for the lifetime of the inode */
- spin_lock(&inode->i_lock);
- inode->i_opflags |= IOP_LOOKUP;
- spin_unlock(&inode->i_lock);
- return 1;
-}
-
-/*
* We can do the critical dentry name comparison and hashing
* operations one word at a time, but we are limited to:
*
@@ -1819,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (err)
return err;
}
- if (!can_lookup(nd->inode)) {
+ if (!d_is_directory(nd->path.dentry)) {
err = -ENOTDIR;
break;
}
@@ -1837,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
if (flags & LOOKUP_ROOT) {
- struct inode *inode = nd->root.dentry->d_inode;
+ struct dentry *root = nd->root.dentry;
+ struct inode *inode = root->d_inode;
if (*name) {
- if (!can_lookup(inode))
+ if (!d_is_directory(root))
return -ENOTDIR;
retval = inode_permission(inode, MAY_EXEC);
if (retval)
@@ -1848,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
- lock_rcu_walk();
+ rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
@@ -1858,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->root.mnt = NULL;
+ nd->m_seq = read_seqbegin(&mount_lock);
if (*name=='/') {
if (flags & LOOKUP_RCU) {
- lock_rcu_walk();
+ rcu_read_lock();
set_root_rcu(nd);
} else {
set_root(nd);
@@ -1872,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
struct fs_struct *fs = current->fs;
unsigned seq;
- lock_rcu_walk();
+ rcu_read_lock();
do {
seq = read_seqcount_begin(&fs->seq);
@@ -1893,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
dentry = f.file->f_path.dentry;
if (*name) {
- if (!can_lookup(dentry->d_inode)) {
+ if (!d_is_directory(dentry)) {
fdput(f);
return -ENOTDIR;
}
@@ -1904,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (f.need_put)
*fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- lock_rcu_walk();
+ rcu_read_lock();
} else {
path_get(&nd->path);
fdput(f);
@@ -1975,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
- if (!can_lookup(nd->inode)) {
+ if (!d_is_directory(nd->path.dentry)) {
path_put(&nd->path);
err = -ENOTDIR;
}
@@ -2184,6 +2162,198 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd,
return s;
}
+/**
+ * mountpoint_last - look up last component for umount
+ * @nd: pathwalk nameidata - currently pointing at parent directory of "last"
+ * @path: pointer to container for result
+ *
+ * This is a special lookup_last function just for umount. In this case, we
+ * need to resolve the path without doing any revalidation.
+ *
+ * The nameidata should be the result of doing a LOOKUP_PARENT pathwalk. Since
+ * mountpoints are always pinned in the dcache, their ancestors are too. Thus,
+ * in almost all cases, this lookup will be served out of the dcache. The only
+ * cases where it won't are if nd->last refers to a symlink or the path is
+ * bogus and it doesn't exist.
+ *
+ * Returns:
+ * -error: if there was an error during lookup. This includes -ENOENT if the
+ * lookup found a negative dentry. The nd->path reference will also be
+ * put in this case.
+ *
+ * 0: if we successfully resolved nd->path and found it to not to be a
+ * symlink that needs to be followed. "path" will also be populated.
+ * The nd->path reference will also be put.
+ *
+ * 1: if we successfully resolved nd->last and found it to be a symlink
+ * that needs to be followed. "path" will be populated with the path
+ * to the link, and nd->path will *not* be put.
+ */
+static int
+mountpoint_last(struct nameidata *nd, struct path *path)
+{
+ int error = 0;
+ struct dentry *dentry;
+ struct dentry *dir = nd->path.dentry;
+
+ /* If we're in rcuwalk, drop out of it to handle last component */
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlazy_walk(nd, NULL)) {
+ error = -ECHILD;
+ goto out;
+ }
+ }
+
+ nd->flags &= ~LOOKUP_PARENT;
+
+ if (unlikely(nd->last_type != LAST_NORM)) {
+ error = handle_dots(nd, nd->last_type);
+ if (error)
+ goto out;
+ dentry = dget(nd->path.dentry);
+ goto done;
+ }
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ dentry = d_lookup(dir, &nd->last);
+ if (!dentry) {
+ /*
+ * No cached dentry. Mounted dentries are pinned in the cache,
+ * so that means that this dentry is probably a symlink or the
+ * path doesn't actually point to a mounted dentry.
+ */
+ dentry = d_alloc(dir, &nd->last);
+ if (!dentry) {
+ error = -ENOMEM;
+ mutex_unlock(&dir->d_inode->i_mutex);
+ goto out;
+ }
+ dentry = lookup_real(dir->d_inode, dentry, nd->flags);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
+ mutex_unlock(&dir->d_inode->i_mutex);
+ goto out;
+ }
+ }
+ mutex_unlock(&dir->d_inode->i_mutex);
+
+done:
+ if (!dentry->d_inode) {
+ error = -ENOENT;
+ dput(dentry);
+ goto out;
+ }
+ path->dentry = dentry;
+ path->mnt = mntget(nd->path.mnt);
+ if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
+ return 1;
+ follow_mount(path);
+ error = 0;
+out:
+ terminate_walk(nd);
+ return error;
+}
+
+/**
+ * path_mountpoint - look up a path to be umounted
+ * @dfd: directory file descriptor to start walk from
+ * @name: full pathname to walk
+ * @path: pointer to container for result
+ * @flags: lookup flags
+ *
+ * Look up the given name, but don't attempt to revalidate the last component.
+ * Returns 0 and "path" will be valid on success; Returns error otherwise.
+ */
+static int
+path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+{
+ struct file *base = NULL;
+ struct nameidata nd;
+ int err;
+
+ err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
+ if (unlikely(err))
+ return err;
+
+ current->total_link_count = 0;
+ err = link_path_walk(name, &nd);
+ if (err)
+ goto out;
+
+ err = mountpoint_last(&nd, path);
+ while (err > 0) {
+ void *cookie;
+ struct path link = *path;
+ err = may_follow_link(&link, &nd);
+ if (unlikely(err))
+ break;
+ nd.flags |= LOOKUP_PARENT;
+ err = follow_link(&link, &nd, &cookie);
+ if (err)
+ break;
+ err = mountpoint_last(&nd, path);
+ put_link(&nd, &link, cookie);
+ }
+out:
+ if (base)
+ fput(base);
+
+ if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT))
+ path_put(&nd.root);
+
+ return err;
+}
+
+static int
+filename_mountpoint(int dfd, struct filename *s, struct path *path,
+ unsigned int flags)
+{
+ int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+ if (unlikely(error == -ECHILD))
+ error = path_mountpoint(dfd, s->name, path, flags);
+ if (unlikely(error == -ESTALE))
+ error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+ if (likely(!error))
+ audit_inode(s, path->dentry, 0);
+ return error;
+}
+
+/**
+ * user_path_mountpoint_at - lookup a path from userland in order to umount it
+ * @dfd: directory file descriptor
+ * @name: pathname from userland
+ * @flags: lookup flags
+ * @path: pointer to container to hold result
+ *
+ * A umount is a special case for path walking. We're not actually interested
+ * in the inode in this situation, and ESTALE errors can be a problem. We
+ * simply want track down the dentry and vfsmount attached at the mountpoint
+ * and avoid revalidating the last component.
+ *
+ * Returns 0 and populates "path" on success.
+ */
+int
+user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags,
+ struct path *path)
+{
+ struct filename *s = getname(name);
+ int error;
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+ error = filename_mountpoint(dfd, s, path, flags);
+ putname(s);
+ return error;
+}
+
+int
+kern_path_mountpoint(int dfd, const char *name, struct path *path,
+ unsigned int flags)
+{
+ struct filename s = {.name = name};
+ return filename_mountpoint(dfd, &s, path, flags);
+}
+EXPORT_SYMBOL(kern_path_mountpoint);
+
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
@@ -2220,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
-static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
{
+ struct inode *inode = victim->d_inode;
int error;
- if (!victim->d_inode)
+ if (d_is_negative(victim))
return -ENOENT;
+ BUG_ON(!inode);
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2235,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
- IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+
+ if (check_sticky(dir, inode) || IS_APPEND(inode) ||
+ IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
return -EPERM;
if (isdir) {
- if (!S_ISDIR(victim->d_inode->i_mode))
+ if (!d_is_directory(victim) && !d_is_autodir(victim))
return -ENOTDIR;
if (IS_ROOT(victim))
return -EBUSY;
- } else if (S_ISDIR(victim->d_inode->i_mode))
+ } else if (d_is_directory(victim) || d_is_autodir(victim))
return -EISDIR;
if (IS_DEADDIR(dir))
return -ENOENT;
@@ -2452,6 +2625,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
int acc_mode;
int create_error = 0;
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+ bool excl;
BUG_ON(dentry->d_inode);
@@ -2465,10 +2639,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
mode &= ~current_umask();
- if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
+ excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT);
+ if (excl)
open_flag &= ~O_TRUNC;
- *opened |= FILE_CREATED;
- }
/*
* Checking write permission is tricky, bacuse we don't know if we are
@@ -2521,12 +2694,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
goto out;
}
- acc_mode = op->acc_mode;
- if (*opened & FILE_CREATED) {
- fsnotify_create(dir, dentry);
- acc_mode = MAY_OPEN;
- }
-
if (error) { /* returned 1, that is */
if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
error = -EIO;
@@ -2536,9 +2703,19 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
dput(dentry);
dentry = file->f_path.dentry;
}
- if (create_error && dentry->d_inode == NULL) {
- error = create_error;
- goto out;
+ if (*opened & FILE_CREATED)
+ fsnotify_create(dir, dentry);
+ if (!dentry->d_inode) {
+ WARN_ON(*opened & FILE_CREATED);
+ if (create_error) {
+ error = create_error;
+ goto out;
+ }
+ } else {
+ if (excl && !(*opened & FILE_CREATED)) {
+ error = -EEXIST;
+ goto out;
+ }
}
goto looked_up;
}
@@ -2547,6 +2724,12 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
* We didn't have the inode before the open, so check open permission
* here.
*/
+ acc_mode = op->acc_mode;
+ if (*opened & FILE_CREATED) {
+ WARN_ON(!(open_flag & O_CREAT));
+ fsnotify_create(dir, dentry);
+ acc_mode = MAY_OPEN;
+ }
error = may_open(&file->f_path, acc_mode, open_flag);
if (error)
fput(file);
@@ -2768,7 +2951,7 @@ retry_lookup:
/*
* create/update audit record if it already exists.
*/
- if (path->dentry->d_inode)
+ if (d_is_positive(path->dentry))
audit_inode(name, path->dentry, 0);
/*
@@ -2797,12 +2980,12 @@ retry_lookup:
finish_lookup:
/* we _can_ be in RCU mode here */
error = -ENOENT;
- if (!inode) {
+ if (d_is_negative(path->dentry)) {
path_to_nameidata(path, nd);
goto out;
}
- if (should_follow_link(inode, !symlink_ok)) {
+ if (should_follow_link(path->dentry, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
error = -ECHILD;
@@ -2831,10 +3014,11 @@ finish_open:
}
audit_inode(name, nd->path.dentry, 0);
error = -EISDIR;
- if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+ if ((open_flag & O_CREAT) &&
+ (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
goto out;
error = -ENOTDIR;
- if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
+ if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
goto out;
if (!S_ISREG(nd->inode->i_mode))
will_truncate = false;
@@ -3060,7 +3244,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
nd.root.mnt = mnt;
nd.root.dentry = dentry;
- if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+ if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
return ERR_PTR(-ELOOP);
file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3110,8 +3294,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
goto unlock;
error = -EEXIST;
- if (dentry->d_inode)
+ if (d_is_positive(dentry))
goto fail;
+
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
@@ -3432,8 +3617,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
return do_rmdir(AT_FDCWD, pathname);
}
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir: parent directory
+ * @dentry: victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode. The caller
+ * should then break the delegation on that inode and retry. Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
{
+ struct inode *target = dentry->d_inode;
int error = may_delete(dir, dentry, 0);
if (error)
@@ -3442,22 +3646,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
if (!dir->i_op->unlink)
return -EPERM;
- mutex_lock(&dentry->d_inode->i_mutex);
+ mutex_lock(&target->i_mutex);
if (d_mountpoint(dentry))
error = -EBUSY;
else {
error = security_inode_unlink(dir, dentry);
if (!error) {
+ error = try_break_deleg(target, delegated_inode);
+ if (error)
+ goto out;
error = dir->i_op->unlink(dir, dentry);
if (!error)
dont_mount(dentry);
}
}
- mutex_unlock(&dentry->d_inode->i_mutex);
+out:
+ mutex_unlock(&target->i_mutex);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
- fsnotify_link_count(dentry->d_inode);
+ fsnotify_link_count(target);
d_delete(dentry);
}
@@ -3477,6 +3685,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
+ struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3491,7 +3700,7 @@ retry:
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit1;
-
+retry_deleg:
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
error = PTR_ERR(dentry);
@@ -3500,19 +3709,25 @@ retry:
if (nd.last.name[nd.last.len])
goto slashes;
inode = dentry->d_inode;
- if (!inode)
+ if (d_is_negative(dentry))
goto slashes;
ihold(inode);
error = security_path_unlink(&nd.path, dentry);
if (error)
goto exit2;
- error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+ error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
exit2:
dput(dentry);
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (inode)
iput(inode); /* truncate the inode here */
+ inode = NULL;
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
@@ -3525,8 +3740,12 @@ exit1:
return error;
slashes:
- error = !dentry->d_inode ? -ENOENT :
- S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+ if (d_is_negative(dentry))
+ error = -ENOENT;
+ else if (d_is_directory(dentry) || d_is_autodir(dentry))
+ error = -EISDIR;
+ else
+ error = -ENOTDIR;
goto exit2;
}
@@ -3602,7 +3821,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
return sys_symlinkat(oldname, AT_FDCWD, newname);
}
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry: object to be linked
+ * @dir: new parent
+ * @new_dentry: where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode. The caller should then break the delegation
+ * and retry. Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
{
struct inode *inode = old_dentry->d_inode;
unsigned max_links = dir->i_sb->s_max_links;
@@ -3638,8 +3876,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
error = -ENOENT;
else if (max_links && inode->i_nlink >= max_links)
error = -EMLINK;
- else
- error = dir->i_op->link(old_dentry, dir, new_dentry);
+ else {
+ error = try_break_deleg(inode, delegated_inode);
+ if (!error)
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
+ }
if (!error && (inode->i_state & I_LINKABLE)) {
spin_lock(&inode->i_lock);
@@ -3666,6 +3907,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
{
struct dentry *new_dentry;
struct path old_path, new_path;
+ struct inode *delegated_inode = NULL;
int how = 0;
int error;
@@ -3704,9 +3946,14 @@ retry:
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+ error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
out_dput:
done_path_create(&new_path, new_dentry);
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, how)) {
how |= LOOKUP_REVAL;
goto retry;
@@ -3731,7 +3978,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* That's where 4.4 screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _three_ objects - parents and victim (if it exists).
+ * c) we have to lock _four_ objects - parents and victim (if it exists),
+ * and source (if it is not a directory).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -3804,9 +4052,11 @@ out:
}
static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ struct inode **delegated_inode)
{
struct inode *target = new_dentry->d_inode;
+ struct inode *source = old_dentry->d_inode;
int error;
error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3814,13 +4064,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
return error;
dget(new_dentry);
- if (target)
- mutex_lock(&target->i_mutex);
+ lock_two_nondirectories(source, target);
error = -EBUSY;
if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
goto out;
+ error = try_break_deleg(source, delegated_inode);
+ if (error)
+ goto out;
+ if (target) {
+ error = try_break_deleg(target, delegated_inode);
+ if (error)
+ goto out;
+ }
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
goto out;
@@ -3830,17 +4087,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
d_move(old_dentry, new_dentry);
out:
- if (target)
- mutex_unlock(&target->i_mutex);
+ unlock_two_nondirectories(source, target);
dput(new_dentry);
return error;
}
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir: parent of source
+ * @old_dentry: source
+ * @new_dir: parent of destination
+ * @new_dentry: destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode. The caller should then
+ * break the delegation and retry. Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ struct inode **delegated_inode)
{
int error;
- int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+ int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
const unsigned char *old_name;
if (old_dentry->d_inode == new_dentry->d_inode)
@@ -3865,7 +4143,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (is_dir)
error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
else
- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
if (!error)
fsnotify_move(old_dir, new_dir, old_name, is_dir,
new_dentry->d_inode, old_dentry);
@@ -3881,6 +4159,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
struct nameidata oldnd, newnd;
+ struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
unsigned int lookup_flags = 0;
@@ -3920,6 +4199,7 @@ retry:
newnd.flags &= ~LOOKUP_PARENT;
newnd.flags |= LOOKUP_RENAME_TARGET;
+retry_deleg:
trap = lock_rename(new_dir, old_dir);
old_dentry = lookup_hash(&oldnd);
@@ -3928,10 +4208,10 @@ retry:
goto exit3;
/* source must exist */
error = -ENOENT;
- if (!old_dentry->d_inode)
+ if (d_is_negative(old_dentry))
goto exit4;
/* unless the source is a directory trailing slashes give -ENOTDIR */
- if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+ if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
error = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
goto exit4;
@@ -3956,13 +4236,19 @@ retry:
if (error)
goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
+ new_dir->d_inode, new_dentry,
+ &delegated_inode);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
mnt_drop_write(oldnd.path.mnt);
exit2:
if (retry_estale(error, lookup_flags))
@@ -4025,11 +4311,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
return res;
}
-int vfs_follow_link(struct nameidata *nd, const char *link)
-{
- return __vfs_follow_link(nd, link);
-}
-
/* get the link contents into pagecache */
static char *page_getlink(struct dentry * dentry, struct page **ppage)
{
@@ -4141,7 +4422,6 @@ EXPORT_SYMBOL(vfs_path_lookup);
EXPORT_SYMBOL(inode_permission);
EXPORT_SYMBOL(unlock_rename);
EXPORT_SYMBOL(vfs_create);
-EXPORT_SYMBOL(vfs_follow_link);
EXPORT_SYMBOL(vfs_link);
EXPORT_SYMBOL(vfs_mkdir);
EXPORT_SYMBOL(vfs_mknod);