diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 423 |
1 files changed, 244 insertions, 179 deletions
diff --git a/fs/namei.c b/fs/namei.c index 46ea9cc16647..c651f02c9fec 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -15,7 +15,8 @@ */ #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/kernel.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/namei.h> @@ -116,52 +117,42 @@ * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ -static int do_getname(const char __user *filename, char *page) -{ - int retval; - unsigned long len = PATH_MAX; - - if (!segment_eq(get_fs(), KERNEL_DS)) { - if ((unsigned long) filename >= TASK_SIZE) - return -EFAULT; - if (TASK_SIZE - (unsigned long) filename < PATH_MAX) - len = TASK_SIZE - (unsigned long) filename; - } - - retval = strncpy_from_user(page, filename, len); - if (retval > 0) { - if (retval < len) - return 0; - return -ENAMETOOLONG; - } else if (!retval) - retval = -ENOENT; - return retval; -} - static char *getname_flags(const char __user *filename, int flags, int *empty) { - char *result = __getname(); - int retval; + char *result = __getname(), *err; + int len; - if (!result) + if (unlikely(!result)) return ERR_PTR(-ENOMEM); - retval = do_getname(filename, result); - if (retval < 0) { - if (retval == -ENOENT && empty) + len = strncpy_from_user(result, filename, PATH_MAX); + err = ERR_PTR(len); + if (unlikely(len < 0)) + goto error; + + /* The empty path is special. */ + if (unlikely(!len)) { + if (empty) *empty = 1; - if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { - __putname(result); - return ERR_PTR(retval); - } + err = ERR_PTR(-ENOENT); + if (!(flags & LOOKUP_EMPTY)) + goto error; } - audit_getname(result); - return result; + + err = ERR_PTR(-ENAMETOOLONG); + if (likely(len < PATH_MAX)) { + audit_getname(result); + return result; + } + +error: + __putname(result); + return err; } char *getname(const char __user * filename) { - return getname_flags(filename, 0, 0); + return getname_flags(filename, 0, NULL); } #ifdef CONFIG_AUDITSYSCALL @@ -228,10 +219,7 @@ static int acl_permission_check(struct inode *inode, int mask) { unsigned int mode = inode->i_mode; - if (current_user_ns() != inode_userns(inode)) - goto other_perms; - - if (likely(current_fsuid() == inode->i_uid)) + if (likely(uid_eq(current_fsuid(), inode->i_uid))) mode >>= 6; else { if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { @@ -244,7 +232,6 @@ static int acl_permission_check(struct inode *inode, int mask) mode >>= 3; } -other_perms: /* * If the DACs are ok we don't need any capability check. */ @@ -280,10 +267,10 @@ int generic_permission(struct inode *inode, int mask) if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ - if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) + if (inode_capable(inode, CAP_DAC_OVERRIDE)) return 0; if (!(mask & MAY_WRITE)) - if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) + if (inode_capable(inode, CAP_DAC_READ_SEARCH)) return 0; return -EACCES; } @@ -293,7 +280,7 @@ int generic_permission(struct inode *inode, int mask) * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) - if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) + if (inode_capable(inode, CAP_DAC_OVERRIDE)) return 0; /* @@ -301,7 +288,7 @@ int generic_permission(struct inode *inode, int mask) */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) - if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) + if (inode_capable(inode, CAP_DAC_READ_SEARCH)) return 0; return -EACCES; @@ -642,7 +629,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) cond_resched(); current->total_link_count++; - touch_atime(link->mnt, dentry); + touch_atime(link); nd_set_link(nd, NULL); error = security_inode_follow_link(link->dentry, nd); @@ -1054,53 +1041,65 @@ static void follow_dotdot(struct nameidata *nd) } /* - * Allocate a dentry with name and parent, and perform a parent - * directory ->lookup on it. Returns the new dentry, or ERR_PTR - * on error. parent->d_inode->i_mutex must be held. d_lookup must - * have verified that no child exists while under i_mutex. + * This looks up the name in dcache, possibly revalidates the old dentry and + * allocates a new one if not found or not valid. In the need_lookup argument + * returns whether i_op->lookup is necessary. + * + * dir->d_inode->i_mutex must be held */ -static struct dentry *d_alloc_and_lookup(struct dentry *parent, - struct qstr *name, struct nameidata *nd) +static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, + struct nameidata *nd, bool *need_lookup) { - struct inode *inode = parent->d_inode; struct dentry *dentry; - struct dentry *old; + int error; - /* Don't create child dentry for a dead directory. */ - if (unlikely(IS_DEADDIR(inode))) - return ERR_PTR(-ENOENT); + *need_lookup = false; + dentry = d_lookup(dir, name); + if (dentry) { + if (d_need_lookup(dentry)) { + *need_lookup = true; + } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { + error = d_revalidate(dentry, nd); + if (unlikely(error <= 0)) { + if (error < 0) { + dput(dentry); + return ERR_PTR(error); + } else if (!d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; + } + } + } + } - dentry = d_alloc(parent, name); - if (unlikely(!dentry)) - return ERR_PTR(-ENOMEM); + if (!dentry) { + dentry = d_alloc(dir, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); - old = inode->i_op->lookup(inode, dentry, nd); - if (unlikely(old)) { - dput(dentry); - dentry = old; + *need_lookup = true; } return dentry; } /* - * We already have a dentry, but require a lookup to be performed on the parent - * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error. - * parent->d_inode->i_mutex must be held. d_lookup must have verified that no - * child exists while under i_mutex. + * Call i_op->lookup on the dentry. The dentry must be negative but may be + * hashed if it was pouplated with DCACHE_NEED_LOOKUP. + * + * dir->d_inode->i_mutex must be held */ -static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry, - struct nameidata *nd) +static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - struct inode *inode = parent->d_inode; struct dentry *old; /* Don't create child dentry for a dead directory. */ - if (unlikely(IS_DEADDIR(inode))) { + if (unlikely(IS_DEADDIR(dir))) { dput(dentry); return ERR_PTR(-ENOENT); } - old = inode->i_op->lookup(inode, dentry, nd); + old = dir->i_op->lookup(dir, dentry, nd); if (unlikely(old)) { dput(dentry); dentry = old; @@ -1108,6 +1107,19 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr return dentry; } +static struct dentry *__lookup_hash(struct qstr *name, + struct dentry *base, struct nameidata *nd) +{ + bool need_lookup; + struct dentry *dentry; + + dentry = lookup_dcache(name, base, nd, &need_lookup); + if (!need_lookup) + return dentry; + + return lookup_real(base->d_inode, dentry, nd); +} + /* * It's more convoluted than I'd like it to be, but... it's still fairly * small and for now I'd prefer to have fast path as straight as possible. @@ -1129,16 +1141,31 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, */ if (nd->flags & LOOKUP_RCU) { unsigned seq; - *inode = nd->inode; - dentry = __d_lookup_rcu(parent, name, &seq, inode); + dentry = __d_lookup_rcu(parent, name, &seq, nd->inode); if (!dentry) goto unlazy; - /* Memory barrier in read_seqcount_begin of child is enough */ + /* + * This sequence count validates that the inode matches + * the dentry name information from lookup. + */ + *inode = dentry->d_inode; + if (read_seqcount_retry(&dentry->d_seq, seq)) + return -ECHILD; + + /* + * This sequence count validates that the parent had no + * changes while we did the lookup of the dentry above. + * + * The memory barrier in read_seqcount_begin of child is + * enough, we can use __read_seqcount_retry here. + */ if (__read_seqcount_retry(&parent->d_seq, nd->seq)) return -ECHILD; nd->seq = seq; + if (unlikely(d_need_lookup(dentry))) + goto unlazy; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { status = d_revalidate(dentry, nd); if (unlikely(status <= 0)) { @@ -1147,8 +1174,6 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, goto unlazy; } } - if (unlikely(d_need_lookup(dentry))) - goto unlazy; path->mnt = mnt; path->dentry = dentry; if (unlikely(!__follow_mount_rcu(nd, path, inode))) @@ -1163,38 +1188,14 @@ unlazy: dentry = __d_lookup(parent, name); } - if (dentry && unlikely(d_need_lookup(dentry))) { + if (unlikely(!dentry)) + goto need_lookup; + + if (unlikely(d_need_lookup(dentry))) { dput(dentry); - dentry = NULL; - } -retry: - if (unlikely(!dentry)) { - struct inode *dir = parent->d_inode; - BUG_ON(nd->inode != dir); - - mutex_lock(&dir->i_mutex); - dentry = d_lookup(parent, name); - if (likely(!dentry)) { - dentry = d_alloc_and_lookup(parent, name, nd); - if (IS_ERR(dentry)) { - mutex_unlock(&dir->i_mutex); - return PTR_ERR(dentry); - } - /* known good */ - need_reval = 0; - status = 1; - } else if (unlikely(d_need_lookup(dentry))) { - dentry = d_inode_lookup(parent, dentry, nd); - if (IS_ERR(dentry)) { - mutex_unlock(&dir->i_mutex); - return PTR_ERR(dentry); - } - /* known good */ - need_reval = 0; - status = 1; - } - mutex_unlock(&dir->i_mutex); + goto need_lookup; } + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) status = d_revalidate(dentry, nd); if (unlikely(status <= 0)) { @@ -1204,12 +1205,10 @@ retry: } if (!d_invalidate(dentry)) { dput(dentry); - dentry = NULL; - need_reval = 1; - goto retry; + goto need_lookup; } } - +done: path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd->flags); @@ -1221,6 +1220,16 @@ retry: nd->flags |= LOOKUP_JUMPED; *inode = path->dentry->d_inode; return 0; + +need_lookup: + BUG_ON(nd->inode != parent->d_inode); + + mutex_lock(&parent->d_inode->i_mutex); + dentry = __lookup_hash(name, parent, nd); + mutex_unlock(&parent->d_inode->i_mutex); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + goto done; } static inline int may_lookup(struct nameidata *nd) @@ -1374,6 +1383,100 @@ static inline int can_lookup(struct inode *inode) return 1; } +/* + * We can do the critical dentry name comparison and hashing + * operations one word at a time, but we are limited to: + * + * - Architectures with fast unaligned word accesses. We could + * do a "get_unaligned()" if this helps and is sufficiently + * fast. + * + * - Little-endian machines (so that we can generate the mask + * of low bytes efficiently). Again, we *could* do a byte + * swapping load on big-endian architectures if that is not + * expensive enough to make the optimization worthless. + * + * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we + * do not trap on the (extremely unlikely) case of a page + * crossing operation. + * + * - Furthermore, we need an efficient 64-bit compile for the + * 64-bit case in order to generate the "number of bytes in + * the final mask". Again, that could be replaced with a + * efficient population count instruction or similar. + */ +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#include <asm/word-at-a-time.h> + +#ifdef CONFIG_64BIT + +static inline unsigned int fold_hash(unsigned long hash) +{ + hash += hash >> (8*sizeof(int)); + return hash; +} + +#else /* 32-bit case */ + +#define fold_hash(x) (x) + +#endif + +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long a, mask; + unsigned long hash = 0; + + for (;;) { + a = load_unaligned_zeropad(name); + if (len < sizeof(unsigned long)) + break; + hash += a; + hash *= 9; + name += sizeof(unsigned long); + len -= sizeof(unsigned long); + if (!len) + goto done; + } + mask = ~(~0ul << len*8); + hash += mask & a; +done: + return fold_hash(hash); +} +EXPORT_SYMBOL(full_name_hash); + +/* + * Calculate the length and hash of the path component, and + * return the length of the component; + */ +static inline unsigned long hash_name(const char *name, unsigned int *hashp) +{ + unsigned long a, b, adata, bdata, mask, hash, len; + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + + hash = a = 0; + len = -sizeof(unsigned long); + do { + hash = (hash + a) * 9; + len += sizeof(unsigned long); + a = load_unaligned_zeropad(name+len); + b = a ^ REPEAT_BYTE('/'); + } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); + + adata = prep_zero_mask(a, adata, &constants); + bdata = prep_zero_mask(b, bdata, &constants); + + mask = create_zero_mask(adata | bdata); + + hash += a & zero_bytemask(mask); + *hashp = fold_hash(hash); + + return len + find_zero(mask); +} + +#else + unsigned int full_name_hash(const unsigned char *name, unsigned int len) { unsigned long hash = init_name_hash(); @@ -1402,6 +1505,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) return len; } +#endif + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1723,59 +1828,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, return err; } -static struct dentry *__lookup_hash(struct qstr *name, - struct dentry *base, struct nameidata *nd) -{ - struct inode *inode = base->d_inode; - struct dentry *dentry; - int err; - - err = inode_permission(inode, MAY_EXEC); - if (err) - return ERR_PTR(err); - - /* - * Don't bother with __d_lookup: callers are for creat as - * well as unlink, so a lot of the time it would cost - * a double lookup. - */ - dentry = d_lookup(base, name); - - if (dentry && d_need_lookup(dentry)) { - /* - * __lookup_hash is called with the parent dir's i_mutex already - * held, so we are good to go here. - */ - dentry = d_inode_lookup(base, dentry, nd); - if (IS_ERR(dentry)) - return dentry; - } - - if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) { - int status = d_revalidate(dentry, nd); - if (unlikely(status <= 0)) { - /* - * The dentry failed validation. - * If d_revalidate returned 0 attempt to invalidate - * the dentry otherwise d_revalidate is asking us - * to return a fail status. - */ - if (status < 0) { - dput(dentry); - return ERR_PTR(status); - } else if (!d_invalidate(dentry)) { - dput(dentry); - dentry = NULL; - } - } - } - - if (!dentry) - dentry = d_alloc_and_lookup(base, name, nd); - - return dentry; -} - /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. @@ -1801,6 +1853,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; unsigned int c; + int err; WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); @@ -1825,6 +1878,10 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) return ERR_PTR(err); } + err = inode_permission(base->d_inode, MAY_EXEC); + if (err) + return ERR_PTR(err); + return __lookup_hash(&this, base, NULL); } @@ -1849,7 +1906,7 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) { - return user_path_at_empty(dfd, name, flags, path, 0); + return user_path_at_empty(dfd, name, flags, path, NULL); } static int user_path_parent(int dfd, const char __user *path, @@ -1876,19 +1933,15 @@ static int user_path_parent(int dfd, const char __user *path, */ static inline int check_sticky(struct inode *dir, struct inode *inode) { - uid_t fsuid = current_fsuid(); + kuid_t fsuid = current_fsuid(); if (!(dir->i_mode & S_ISVTX)) return 0; - if (current_user_ns() != inode_userns(inode)) - goto other_userns; - if (inode->i_uid == fsuid) + if (uid_eq(inode->i_uid, fsuid)) return 0; - if (dir->i_uid == fsuid) + if (uid_eq(dir->i_uid, fsuid)) return 0; - -other_userns: - return !ns_capable(inode_userns(inode), CAP_FOWNER); + return !inode_capable(inode, CAP_FOWNER); } /* @@ -2476,8 +2529,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && - !ns_capable(inode_userns(dir), CAP_MKNOD)) + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) @@ -2569,6 +2621,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int error = may_create(dir, dentry); + unsigned max_links = dir->i_sb->s_max_links; if (error) return error; @@ -2581,6 +2634,9 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (error) return error; + if (max_links && dir->i_nlink >= max_links) + return -EMLINK; + error = dir->i_op->mkdir(dir, dentry, mode); if (!error) fsnotify_mkdir(dir, dentry); @@ -2622,7 +2678,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /* * The dentry_unhash() helper will try to drop the dentry early: we - * should have a usage count of 2 if we're the only user of this + * should have a usage count of 1 if we're the only user of this * dentry, and if that is true (possibly after pruning the dcache), * then we drop the dentry now. * @@ -2911,6 +2967,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { struct inode *inode = old_dentry->d_inode; + unsigned max_links = dir->i_sb->s_max_links; int error; if (!inode) @@ -2941,6 +2998,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de /* Make sure we don't allow creating hardlink to an unlinked file */ if (inode->i_nlink == 0) error = -ENOENT; + else if (max_links && inode->i_nlink >= max_links) + error = -EMLINK; else error = dir->i_op->link(old_dentry, dir, new_dentry); mutex_unlock(&inode->i_mutex); @@ -3050,6 +3109,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, { int error = 0; struct inode *target = new_dentry->d_inode; + unsigned max_links = new_dir->i_sb->s_max_links; /* * If we are going to change the parent - check write permissions, @@ -3073,6 +3133,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; + error = -EMLINK; + if (max_links && !target && new_dir != old_dir && + new_dir->i_nlink >= max_links) + goto out; + if (target) shrink_dcache_parent(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); @@ -3371,9 +3436,9 @@ retry: if (err) goto fail; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr, symname, len-1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, page, fsdata); |