summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c329
1 files changed, 197 insertions, 132 deletions
diff --git a/fs/namei.c b/fs/namei.c
index caa28051e197..3531deebad30 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put);
* to restart the path walk from the beginning in ref-walk mode.
*/
-static inline void lock_rcu_walk(void)
-{
- br_read_lock(&vfsmount_lock);
- rcu_read_lock();
-}
-
-static inline void unlock_rcu_walk(void)
-{
- rcu_read_unlock();
- br_read_unlock(&vfsmount_lock);
-}
-
/**
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
@@ -512,26 +500,22 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
BUG_ON(!(nd->flags & LOOKUP_RCU));
/*
- * Get a reference to the parent first: we're
- * going to make "path_put(nd->path)" valid in
- * non-RCU context for "terminate_walk()".
- *
- * If this doesn't work, return immediately with
- * RCU walking still active (and then we will do
- * the RCU walk cleanup in terminate_walk()).
+ * After legitimizing the bastards, terminate_walk()
+ * will do the right thing for non-RCU mode, and all our
+ * subsequent exit cases should rcu_read_unlock()
+ * before returning. Do vfsmount first; if dentry
+ * can't be legitimized, just set nd->path.dentry to NULL
+ * and rely on dput(NULL) being a no-op.
*/
- if (!lockref_get_not_dead(&parent->d_lockref))
+ if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
return -ECHILD;
-
- /*
- * After the mntget(), we terminate_walk() will do
- * the right thing for non-RCU mode, and all our
- * subsequent exit cases should unlock_rcu_walk()
- * before returning.
- */
- mntget(nd->path.mnt);
nd->flags &= ~LOOKUP_RCU;
+ if (!lockref_get_not_dead(&parent->d_lockref)) {
+ nd->path.dentry = NULL;
+ goto out;
+ }
+
/*
* For a negative lookup, the lookup sequence point is the parents
* sequence point, and it only needs to revalidate the parent dentry.
@@ -566,17 +550,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
spin_unlock(&fs->lock);
}
- unlock_rcu_walk();
+ rcu_read_unlock();
return 0;
unlock_and_drop_dentry:
spin_unlock(&fs->lock);
drop_dentry:
- unlock_rcu_walk();
+ rcu_read_unlock();
dput(dentry);
goto drop_root_mnt;
out:
- unlock_rcu_walk();
+ rcu_read_unlock();
drop_root_mnt:
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
@@ -608,17 +592,22 @@ static int complete_walk(struct nameidata *nd)
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
+ if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+ rcu_read_unlock();
+ return -ECHILD;
+ }
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
- unlock_rcu_walk();
+ rcu_read_unlock();
+ mntput(nd->path.mnt);
return -ECHILD;
}
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
- unlock_rcu_walk();
+ rcu_read_unlock();
dput(dentry);
+ mntput(nd->path.mnt);
return -ECHILD;
}
- mntget(nd->path.mnt);
- unlock_rcu_walk();
+ rcu_read_unlock();
}
if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -909,15 +898,15 @@ int follow_up(struct path *path)
struct mount *parent;
struct dentry *mountpoint;
- br_read_lock(&vfsmount_lock);
+ read_seqlock_excl(&mount_lock);
parent = mnt->mnt_parent;
if (parent == mnt) {
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
return 0;
}
mntget(&parent->mnt);
mountpoint = dget(mnt->mnt_mountpoint);
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
@@ -1048,8 +1037,8 @@ static int follow_managed(struct path *path, unsigned flags)
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
- * namespace got unmounted before we managed to get the
- * vfsmount_lock */
+ * namespace got unmounted before lookup_mnt() could
+ * get it */
}
/* Handle an automount point */
@@ -1111,7 +1100,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
if (!d_mountpoint(path->dentry))
break;
- mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+ mounted = __lookup_mnt(path->mnt, path->dentry);
if (!mounted)
break;
path->mnt = &mounted->mnt;
@@ -1132,7 +1121,7 @@ static void follow_mount_rcu(struct nameidata *nd)
{
while (d_mountpoint(nd->path.dentry)) {
struct mount *mounted;
- mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
+ mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
if (!mounted)
break;
nd->path.mnt = &mounted->mnt;
@@ -1174,7 +1163,7 @@ failed:
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- unlock_rcu_walk();
+ rcu_read_unlock();
return -ECHILD;
}
@@ -1308,8 +1297,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
}
/*
- * Call i_op->lookup on the dentry. The dentry must be negative but may be
- * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
+ * Call i_op->lookup on the dentry. The dentry must be negative and
+ * unhashed.
*
* dir->d_inode->i_mutex must be held
*/
@@ -1501,7 +1490,7 @@ static void terminate_walk(struct nameidata *nd)
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- unlock_rcu_walk();
+ rcu_read_unlock();
}
}
@@ -1511,18 +1500,9 @@ static void terminate_walk(struct nameidata *nd)
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
{
- if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
- if (likely(inode->i_op->follow_link))
- return follow;
-
- /* This gets set once for the inode lifetime */
- spin_lock(&inode->i_lock);
- inode->i_opflags |= IOP_NOFOLLOW;
- spin_unlock(&inode->i_lock);
- }
- return 0;
+ return unlikely(d_is_symlink(dentry)) ? follow : 0;
}
static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1552,7 +1532,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
if (!inode)
goto out_path_put;
- if (should_follow_link(inode, follow)) {
+ if (should_follow_link(path->dentry, follow)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
err = -ECHILD;
@@ -1611,26 +1591,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
}
/*
- * We really don't want to look at inode->i_op->lookup
- * when we don't have to. So we keep a cache bit in
- * the inode ->i_opflags field that says "yes, we can
- * do lookup on this inode".
- */
-static inline int can_lookup(struct inode *inode)
-{
- if (likely(inode->i_opflags & IOP_LOOKUP))
- return 1;
- if (likely(!inode->i_op->lookup))
- return 0;
-
- /* We do this once for the lifetime of the inode */
- spin_lock(&inode->i_lock);
- inode->i_opflags |= IOP_LOOKUP;
- spin_unlock(&inode->i_lock);
- return 1;
-}
-
-/*
* We can do the critical dentry name comparison and hashing
* operations one word at a time, but we are limited to:
*
@@ -1638,11 +1598,6 @@ static inline int can_lookup(struct inode *inode)
* do a "get_unaligned()" if this helps and is sufficiently
* fast.
*
- * - Little-endian machines (so that we can generate the mask
- * of low bytes efficiently). Again, we *could* do a byte
- * swapping load on big-endian architectures if that is not
- * expensive enough to make the optimization worthless.
- *
* - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
* do not trap on the (extremely unlikely) case of a page
* crossing operation.
@@ -1686,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
if (!len)
goto done;
}
- mask = ~(~0ul << len*8);
+ mask = bytemask_from_count(len);
hash += mask & a;
done:
return fold_hash(hash);
@@ -1833,7 +1788,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (err)
return err;
}
- if (!can_lookup(nd->inode)) {
+ if (!d_is_directory(nd->path.dentry)) {
err = -ENOTDIR;
break;
}
@@ -1851,9 +1806,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
if (flags & LOOKUP_ROOT) {
- struct inode *inode = nd->root.dentry->d_inode;
+ struct dentry *root = nd->root.dentry;
+ struct inode *inode = root->d_inode;
if (*name) {
- if (!can_lookup(inode))
+ if (!d_is_directory(root))
return -ENOTDIR;
retval = inode_permission(inode, MAY_EXEC);
if (retval)
@@ -1862,8 +1818,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
- lock_rcu_walk();
+ rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
@@ -1872,9 +1829,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->root.mnt = NULL;
+ nd->m_seq = read_seqbegin(&mount_lock);
if (*name=='/') {
if (flags & LOOKUP_RCU) {
- lock_rcu_walk();
+ rcu_read_lock();
set_root_rcu(nd);
} else {
set_root(nd);
@@ -1886,7 +1844,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
struct fs_struct *fs = current->fs;
unsigned seq;
- lock_rcu_walk();
+ rcu_read_lock();
do {
seq = read_seqcount_begin(&fs->seq);
@@ -1907,7 +1865,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
dentry = f.file->f_path.dentry;
if (*name) {
- if (!can_lookup(dentry->d_inode)) {
+ if (!d_is_directory(dentry)) {
fdput(f);
return -ENOTDIR;
}
@@ -1918,7 +1876,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (f.need_put)
*fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- lock_rcu_walk();
+ rcu_read_lock();
} else {
path_get(&nd->path);
fdput(f);
@@ -1989,7 +1947,7 @@ static int path_lookupat(int dfd, const char *name,
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
- if (!can_lookup(nd->inode)) {
+ if (!d_is_directory(nd->path.dentry)) {
path_put(&nd->path);
err = -ENOTDIR;
}
@@ -2281,7 +2239,7 @@ done:
}
path->dentry = dentry;
path->mnt = mntget(nd->path.mnt);
- if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+ if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
return 1;
follow_mount(path);
error = 0;
@@ -2426,12 +2384,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
-static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
{
+ struct inode *inode = victim->d_inode;
int error;
- if (!victim->d_inode)
+ if (d_is_negative(victim))
return -ENOENT;
+ BUG_ON(!inode);
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2441,15 +2401,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
- IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+
+ if (check_sticky(dir, inode) || IS_APPEND(inode) ||
+ IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
return -EPERM;
if (isdir) {
- if (!S_ISDIR(victim->d_inode->i_mode))
+ if (!d_is_directory(victim) && !d_is_autodir(victim))
return -ENOTDIR;
if (IS_ROOT(victim))
return -EBUSY;
- } else if (S_ISDIR(victim->d_inode->i_mode))
+ } else if (d_is_directory(victim) || d_is_autodir(victim))
return -EISDIR;
if (IS_DEADDIR(dir))
return -ENOENT;
@@ -2468,6 +2429,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
@@ -2983,7 +2945,7 @@ retry_lookup:
/*
* create/update audit record if it already exists.
*/
- if (path->dentry->d_inode)
+ if (d_is_positive(path->dentry))
audit_inode(name, path->dentry, 0);
/*
@@ -3012,12 +2974,12 @@ retry_lookup:
finish_lookup:
/* we _can_ be in RCU mode here */
error = -ENOENT;
- if (!inode) {
+ if (d_is_negative(path->dentry)) {
path_to_nameidata(path, nd);
goto out;
}
- if (should_follow_link(inode, !symlink_ok)) {
+ if (should_follow_link(path->dentry, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
error = -ECHILD;
@@ -3046,10 +3008,11 @@ finish_open:
}
audit_inode(name, nd->path.dentry, 0);
error = -EISDIR;
- if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+ if ((open_flag & O_CREAT) &&
+ (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
goto out;
error = -ENOTDIR;
- if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
+ if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
goto out;
if (!S_ISREG(nd->inode->i_mode))
will_truncate = false;
@@ -3275,7 +3238,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
nd.root.mnt = mnt;
nd.root.dentry = dentry;
- if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+ if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
return ERR_PTR(-ELOOP);
file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3325,8 +3288,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
goto unlock;
error = -EEXIST;
- if (dentry->d_inode)
+ if (d_is_positive(dentry))
goto fail;
+
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
@@ -3647,8 +3611,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
return do_rmdir(AT_FDCWD, pathname);
}
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir: parent directory
+ * @dentry: victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode. The caller
+ * should then break the delegation on that inode and retry. Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
{
+ struct inode *target = dentry->d_inode;
int error = may_delete(dir, dentry, 0);
if (error)
@@ -3657,22 +3640,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
if (!dir->i_op->unlink)
return -EPERM;
- mutex_lock(&dentry->d_inode->i_mutex);
+ mutex_lock(&target->i_mutex);
if (d_mountpoint(dentry))
error = -EBUSY;
else {
error = security_inode_unlink(dir, dentry);
if (!error) {
+ error = try_break_deleg(target, delegated_inode);
+ if (error)
+ goto out;
error = dir->i_op->unlink(dir, dentry);
if (!error)
dont_mount(dentry);
}
}
- mutex_unlock(&dentry->d_inode->i_mutex);
+out:
+ mutex_unlock(&target->i_mutex);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
- fsnotify_link_count(dentry->d_inode);
+ fsnotify_link_count(target);
d_delete(dentry);
}
@@ -3692,6 +3679,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
+ struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3706,7 +3694,7 @@ retry:
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit1;
-
+retry_deleg:
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
error = PTR_ERR(dentry);
@@ -3715,19 +3703,25 @@ retry:
if (nd.last.name[nd.last.len])
goto slashes;
inode = dentry->d_inode;
- if (!inode)
+ if (d_is_negative(dentry))
goto slashes;
ihold(inode);
error = security_path_unlink(&nd.path, dentry);
if (error)
goto exit2;
- error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+ error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
exit2:
dput(dentry);
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (inode)
iput(inode); /* truncate the inode here */
+ inode = NULL;
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
@@ -3740,8 +3734,12 @@ exit1:
return error;
slashes:
- error = !dentry->d_inode ? -ENOENT :
- S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+ if (d_is_negative(dentry))
+ error = -ENOENT;
+ else if (d_is_directory(dentry) || d_is_autodir(dentry))
+ error = -EISDIR;
+ else
+ error = -ENOTDIR;
goto exit2;
}
@@ -3817,7 +3815,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
return sys_symlinkat(oldname, AT_FDCWD, newname);
}
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry: object to be linked
+ * @dir: new parent
+ * @new_dentry: where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode. The caller should then break the delegation
+ * and retry. Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
{
struct inode *inode = old_dentry->d_inode;
unsigned max_links = dir->i_sb->s_max_links;
@@ -3853,8 +3870,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
error = -ENOENT;
else if (max_links && inode->i_nlink >= max_links)
error = -EMLINK;
- else
- error = dir->i_op->link(old_dentry, dir, new_dentry);
+ else {
+ error = try_break_deleg(inode, delegated_inode);
+ if (!error)
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
+ }
if (!error && (inode->i_state & I_LINKABLE)) {
spin_lock(&inode->i_lock);
@@ -3881,6 +3901,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
{
struct dentry *new_dentry;
struct path old_path, new_path;
+ struct inode *delegated_inode = NULL;
int how = 0;
int error;
@@ -3919,9 +3940,14 @@ retry:
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+ error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
out_dput:
done_path_create(&new_path, new_dentry);
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, how)) {
how |= LOOKUP_REVAL;
goto retry;
@@ -3946,7 +3972,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* That's where 4.4 screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _three_ objects - parents and victim (if it exists).
+ * c) we have to lock _four_ objects - parents and victim (if it exists),
+ * and source (if it is not a directory).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4019,9 +4046,11 @@ out:
}
static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ struct inode **delegated_inode)
{
struct inode *target = new_dentry->d_inode;
+ struct inode *source = old_dentry->d_inode;
int error;
error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4029,13 +4058,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
return error;
dget(new_dentry);
- if (target)
- mutex_lock(&target->i_mutex);
+ lock_two_nondirectories(source, target);
error = -EBUSY;
if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
goto out;
+ error = try_break_deleg(source, delegated_inode);
+ if (error)
+ goto out;
+ if (target) {
+ error = try_break_deleg(target, delegated_inode);
+ if (error)
+ goto out;
+ }
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
goto out;
@@ -4045,17 +4081,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
d_move(old_dentry, new_dentry);
out:
- if (target)
- mutex_unlock(&target->i_mutex);
+ unlock_two_nondirectories(source, target);
dput(new_dentry);
return error;
}
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir: parent of source
+ * @old_dentry: source
+ * @new_dir: parent of destination
+ * @new_dentry: destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode. The caller should then
+ * break the delegation and retry. Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ struct inode **delegated_inode)
{
int error;
- int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+ int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
const unsigned char *old_name;
if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4080,7 +4137,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (is_dir)
error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
else
- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
if (!error)
fsnotify_move(old_dir, new_dir, old_name, is_dir,
new_dentry->d_inode, old_dentry);
@@ -4096,6 +4153,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
struct nameidata oldnd, newnd;
+ struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
unsigned int lookup_flags = 0;
@@ -4135,6 +4193,7 @@ retry:
newnd.flags &= ~LOOKUP_PARENT;
newnd.flags |= LOOKUP_RENAME_TARGET;
+retry_deleg:
trap = lock_rename(new_dir, old_dir);
old_dentry = lookup_hash(&oldnd);
@@ -4143,10 +4202,10 @@ retry:
goto exit3;
/* source must exist */
error = -ENOENT;
- if (!old_dentry->d_inode)
+ if (d_is_negative(old_dentry))
goto exit4;
/* unless the source is a directory trailing slashes give -ENOTDIR */
- if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+ if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
error = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
goto exit4;
@@ -4171,13 +4230,19 @@ retry:
if (error)
goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
+ new_dir->d_inode, new_dentry,
+ &delegated_inode);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
mnt_drop_write(oldnd.path.mnt);
exit2:
if (retry_estale(error, lookup_flags))