diff options
Diffstat (limited to 'fs')
67 files changed, 901 insertions, 352 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b1dc51888048..699941e90667 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1368,9 +1368,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); - if (!new_valid_dev(rdev)) - return -EINVAL; - /* build extension */ if (S_ISBLK(mode)) sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index e8aa57dc8d6d..cb899af1babc 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -829,9 +829,6 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); - if (!new_valid_dev(rdev)) - return -EINVAL; - v9ses = v9fs_inode2v9ses(dir); dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b0b9a779f9ac..3a93755e880f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -35,6 +35,7 @@ #include <linux/utsname.h> #include <linux/coredump.h> #include <linux/sched.h> +#include <linux/dax.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> @@ -1236,6 +1237,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, if (vma->vm_flags & VM_DONTDUMP) return 0; + /* support for DAX */ + if (vma_is_dax(vma)) { + if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) + goto whole; + if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) + goto whole; + return 0; + } + /* Hugetlb memory check */ if (vma->vm_flags & VM_HUGETLB) { if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d3634bfb7fe1..b1adb92e69de 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -35,6 +35,7 @@ #include <linux/elf-fdpic.h> #include <linux/elfcore.h> #include <linux/coredump.h> +#include <linux/dax.h> #include <asm/uaccess.h> #include <asm/param.h> @@ -103,19 +104,36 @@ static void __exit exit_elf_fdpic_binfmt(void) core_initcall(init_elf_fdpic_binfmt); module_exit(exit_elf_fdpic_binfmt); -static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) +static int is_elf(struct elfhdr *hdr, struct file *file) { if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0) return 0; if (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN) return 0; - if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr)) + if (!elf_check_arch(hdr)) return 0; if (!file->f_op->mmap) return 0; return 1; } +#ifndef elf_check_fdpic +#define elf_check_fdpic(x) 0 +#endif + +#ifndef elf_check_const_displacement +#define elf_check_const_displacement(x) 0 +#endif + +static int is_constdisp(struct elfhdr *hdr) +{ + if (!elf_check_fdpic(hdr)) + return 1; + if (elf_check_const_displacement(hdr)) + return 1; + return 0; +} + /*****************************************************************************/ /* * read the program headers table into memory @@ -191,8 +209,18 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* check that this is a binary we know how to deal with */ retval = -ENOEXEC; - if (!is_elf_fdpic(&exec_params.hdr, bprm->file)) + if (!is_elf(&exec_params.hdr, bprm->file)) + goto error; + if (!elf_check_fdpic(&exec_params.hdr)) { +#ifdef CONFIG_MMU + /* binfmt_elf handles non-fdpic elf except on nommu */ goto error; +#else + /* nommu can only load ET_DYN (PIE) ELF */ + if (exec_params.hdr.e_type != ET_DYN) + goto error; +#endif + } /* read the program header table */ retval = elf_fdpic_fetch_phdrs(&exec_params, bprm->file); @@ -269,13 +297,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) } - if (elf_check_const_displacement(&exec_params.hdr)) + if (is_constdisp(&exec_params.hdr)) exec_params.flags |= ELF_FDPIC_FLAG_CONSTDISP; /* perform insanity checks on the interpreter */ if (interpreter_name) { retval = -ELIBBAD; - if (!is_elf_fdpic(&interp_params.hdr, interpreter)) + if (!is_elf(&interp_params.hdr, interpreter)) goto error; interp_params.flags = ELF_FDPIC_FLAG_PRESENT; @@ -306,9 +334,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) retval = -ENOEXEC; if (stack_size == 0) - goto error; + stack_size = 131072UL; /* same as exec.c's default commit */ - if (elf_check_const_displacement(&interp_params.hdr)) + if (is_constdisp(&interp_params.hdr)) interp_params.flags |= ELF_FDPIC_FLAG_CONSTDISP; /* flush all traces of the currently running executable */ @@ -319,7 +347,10 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* there's now no turning back... the old userspace image is dead, * defunct, deceased, etc. */ - set_personality(PER_LINUX_FDPIC); + if (elf_check_fdpic(&exec_params.hdr)) + set_personality(PER_LINUX_FDPIC); + else + set_personality(PER_LINUX); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; @@ -374,10 +405,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) PAGE_ALIGN(current->mm->start_brk); #else - /* create a stack and brk area big enough for everyone - * - the brk heap starts at the bottom and works up - * - the stack starts at the top and works down - */ + /* create a stack area and zero-size brk area */ stack_size = (stack_size + PAGE_SIZE - 1) & PAGE_MASK; if (stack_size < PAGE_SIZE * 2) stack_size = PAGE_SIZE * 2; @@ -400,8 +428,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->brk = current->mm->start_brk; current->mm->context.end_brk = current->mm->start_brk; - current->mm->context.end_brk += - (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; current->mm->start_stack = current->mm->start_brk + stack_size; #endif @@ -1206,6 +1232,20 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) return 0; } + /* support for DAX */ + if (vma_is_dax(vma)) { + if (vma->vm_flags & VM_SHARED) { + dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags); + kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + } else { + dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags); + kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + } + return dump_ok; + } + /* By default, dump shared memory if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) { if (file_inode(vma->vm_file)->i_nlink == 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4439fbb4ff45..0e4f2bfcc37d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6360,9 +6360,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - if (!new_valid_dev(rdev)) - return -EINVAL; - /* * 2 for inode item and ref * 2 for dir items diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 48851f6ea6ec..dcf26537c935 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -686,7 +686,7 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, if (get_user(nmsgs, &udata->nmsgs)) return -EFAULT; - if (nmsgs > I2C_RDRW_IOCTL_MAX_MSGS) + if (nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; if (get_user(datap, &udata->msgs)) diff --git a/fs/direct-io.c b/fs/direct-io.c index 18e7554cf94c..cb5337d8c273 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -109,6 +109,8 @@ struct dio_submit { struct dio { int flags; /* doesn't change */ int rw; + blk_qc_t bio_cookie; + struct block_device *bio_bdev; struct inode *inode; loff_t i_size; /* i_size when submitted */ dio_iodone_t *end_io; /* IO completion function */ @@ -397,11 +399,14 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); - if (sdio->submit_io) + dio->bio_bdev = bio->bi_bdev; + + if (sdio->submit_io) { sdio->submit_io(dio->rw, bio, dio->inode, sdio->logical_offset_in_bio); - else - submit_bio(dio->rw, bio); + dio->bio_cookie = BLK_QC_T_NONE; + } else + dio->bio_cookie = submit_bio(dio->rw, bio); sdio->bio = NULL; sdio->boundary = 0; @@ -440,7 +445,8 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - io_schedule(); + if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) + io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 09a6bb1ad63c..994e078da4bb 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -80,9 +80,6 @@ static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, struct inode *inode; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - inode = exofs_new_inode(dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8d15febd0aa3..4c69c94cafd8 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -684,6 +684,9 @@ struct ext2_inode_info { struct rw_semaphore xattr_sem; #endif rwlock_t i_meta_lock; +#ifdef CONFIG_FS_DAX + struct rw_semaphore dax_sem; +#endif /* * truncate_mutex is for serialising ext2_truncate() against @@ -699,6 +702,14 @@ struct ext2_inode_info { #endif }; +#ifdef CONFIG_FS_DAX +#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem) +#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem) +#else +#define dax_sem_down_write(ext2_inode) +#define dax_sem_up_write(ext2_inode) +#endif + /* * Inode dynamic state flags */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 1982c3f11aec..11a42c5a09ae 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -27,27 +27,103 @@ #include "acl.h" #ifdef CONFIG_FS_DAX +/* + * The lock ordering for ext2 DAX fault paths is: + * + * mmap_sem (MM) + * sb_start_pagefault (vfs, freeze) + * ext2_inode_info->dax_sem + * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) + * ext2_inode_info->truncate_mutex + * + * The default page_lock and i_size verification done by non-DAX fault paths + * is sufficient because ext2 doesn't support hole punching. + */ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + if (vmf->flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + down_read(&ei->dax_sem); + + ret = __dax_fault(vma, vmf, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + if (vmf->flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + return ret; } static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { - return dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + if (flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + down_read(&ei->dax_sem); + + ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + if (flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + return ret; } static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&ei->dax_sem); + + ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + sb_end_pagefault(inode->i_sb); + return ret; +} + +static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret = VM_FAULT_NOPAGE; + loff_t size; + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&ei->dax_sem); + + /* check that the faulting page hasn't raced with truncate */ + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + + up_read(&ei->dax_sem); + sb_end_pagefault(inode->i_sb); + return ret; } static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, .pmd_fault = ext2_dax_pmd_fault, .page_mkwrite = ext2_dax_mkwrite, - .pfn_mkwrite = dax_pfn_mkwrite, + .pfn_mkwrite = ext2_dax_pfn_mkwrite, }; static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c60a248c640c..0aa9bf6e6e53 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1085,6 +1085,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de ext2_free_data(inode, p, q); } +/* dax_sem must be held when calling this function */ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; @@ -1100,6 +1101,10 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) blocksize = inode->i_sb->s_blocksize; iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); +#ifdef CONFIG_FS_DAX + WARN_ON(!rwsem_is_locked(&ei->dax_sem)); +#endif + n = ext2_block_to_path(inode, iblock, offsets, NULL); if (n == 0) return; @@ -1185,7 +1190,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset) return; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; + + dax_sem_down_write(EXT2_I(inode)); __ext2_truncate_blocks(inode, offset); + dax_sem_up_write(EXT2_I(inode)); } static int ext2_setsize(struct inode *inode, loff_t newsize) @@ -1213,8 +1221,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) if (error) return error; + dax_sem_down_write(EXT2_I(inode)); truncate_setsize(inode, newsize); __ext2_truncate_blocks(inode, newsize); + dax_sem_up_write(EXT2_I(inode)); inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; if (inode_needs_sync(inode)) { diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index b4841e3066a5..3267a80dbbe2 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -143,9 +143,6 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, struct inode * inode; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - err = dquot_initialize(dir); if (err) return err; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 900e19cf9ef6..3a71cea68420 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -192,6 +192,9 @@ static void init_once(void *foo) init_rwsem(&ei->xattr_sem); #endif mutex_init(&ei->truncate_mutex); +#ifdef CONFIG_FS_DAX + init_rwsem(&ei->dax_sem); +#endif inode_init_once(&ei->vfs_inode); } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b4b3c1f91814..61eaf74dca37 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3333,8 +3333,8 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, atomic_inc(&pa->pa_count); return pa; } - cur_distance = abs64(goal_block - cpa->pa_pstart); - new_distance = abs64(goal_block - pa->pa_pstart); + cur_distance = abs(goal_block - cpa->pa_pstart); + new_distance = abs(goal_block - pa->pa_pstart); if (cur_distance <= new_distance) return cpa; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e48b80c49090..2c32110f9fc0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -481,9 +481,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; - if (!new_valid_dev(rdev)) - return -EINVAL; - f2fs_balance_fs(sbi); inode = f2fs_new_inode(dir, mode); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 487527b42d94..ad8a5b757cc7 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -388,8 +388,13 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) */ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { - __be64 *hc = ip->i_hash_cache; + __be64 *hc; + + spin_lock(&ip->i_inode.i_lock); + hc = ip->i_hash_cache; ip->i_hash_cache = NULL; + spin_unlock(&ip->i_inode.i_lock); + kvfree(hc); } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9287a2d17b8c..5e425469f0c2 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -897,8 +897,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { i_size_write(inode, pos + count); - /* Marks the inode as dirty */ file_update_time(file); + mark_inode_dirty(inode); } return generic_write_sync(file, pos, count); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9bd1244caf38..32e74710b1aa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -246,8 +246,8 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) */ static int do_promote(struct gfs2_glock *gl) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh, *tmp; @@ -260,10 +260,10 @@ restart: if (may_grant(gl, gh)) { if (gh->gh_list.prev == &gl->gl_holders && glops->go_lock) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); /* FIXME: eliminate this eventually */ ret = glops->go_lock(gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (ret) { if (ret == 1) return 2; @@ -361,7 +361,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) unsigned state = ret & LM_OUT_ST_MASK; int rv; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -405,7 +405,7 @@ retry: pr_err("wanted %u got %u\n", gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return; } @@ -414,9 +414,9 @@ retry: gfs2_demote_wake(gl); if (state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); rv = glops->go_xmote_bh(gl, gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (rv) { do_error(gl, rv); goto out; @@ -429,7 +429,7 @@ retry: out: clear_bit(GLF_LOCK, &gl->gl_flags); out_locked: - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -441,8 +441,8 @@ out_locked: */ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -464,7 +464,7 @@ __acquires(&gl->gl_spin) (gl->gl_state == LM_ST_EXCLUSIVE) || (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) glops->go_sync(gl); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) @@ -485,7 +485,7 @@ __acquires(&gl->gl_spin) gfs2_glock_put(gl); } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); } /** @@ -513,8 +513,8 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) */ static void run_queue(struct gfs2_glock *gl, const int nonblock) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh = NULL; int ret; @@ -596,7 +596,7 @@ static void glock_work_func(struct work_struct *work) finish_xmote(gl, gl->gl_reply); drop_ref = 1; } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { @@ -612,7 +612,7 @@ static void glock_work_func(struct work_struct *work) } } run_queue(gl, 0); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!delay) gfs2_glock_put(gl); else { @@ -876,8 +876,8 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) */ static inline void add_to_queue(struct gfs2_holder *gh) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -926,10 +926,10 @@ fail: do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) sdp->sd_lockstruct.ls_ops->lm_cancel(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); } return; @@ -967,7 +967,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { @@ -977,7 +977,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) gl->gl_lockref.count--; } run_queue(gl, 1); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!(gh->gh_flags & GL_ASYNC)) error = gfs2_glock_wait(gh); @@ -1010,7 +1010,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) unsigned delay = 0; int fast_path = 0; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1018,9 +1018,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); glops->go_unlock(gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); } if (list_empty(&gl->gl_holders) && @@ -1033,7 +1033,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_add_to_lru(gl); trace_gfs2_glock_queue(gh, 0); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (likely(fast_path)) return; @@ -1217,9 +1217,9 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) delay = gl->gl_hold_time; } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); handle_callback(gl, state, delay, true); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1259,7 +1259,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) * @gl: Pointer to the glock * @ret: The return value from the dlm * - * The gl_reply field is under the gl_spin lock so that it is ok + * The gl_reply field is under the gl_lockref.lock lock so that it is ok * to use a bitfield shared with other glock state fields. */ @@ -1267,20 +1267,20 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return; } } gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); @@ -1326,14 +1326,14 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); - if (!spin_trylock(&gl->gl_spin)) { + if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: list_add(&gl->gl_lru, &lru_list); atomic_inc(&lru_count); continue; } if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } clear_bit(GLF_LRU, &gl->gl_flags); @@ -1343,7 +1343,7 @@ add_back_to_lru: WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gl->gl_lockref.count--; - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } } @@ -1461,10 +1461,10 @@ static void clear_glock(struct gfs2_glock *gl) { gfs2_glock_remove_from_lru(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1482,9 +1482,9 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) { - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gfs2_dump_glock(seq, gl); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } static void dump_glock_func(struct gfs2_glock *gl) @@ -1518,10 +1518,10 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) ret = gfs2_truncatei_resume(ip); gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); run_queue(gl, 1); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } static const char *state2str(unsigned state) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 32572f71f027..f7cdaa8b4c83 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -141,7 +141,7 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * struct pid *pid; /* Look in glock's list of holders for one with current task as owner */ - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) @@ -151,7 +151,7 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * } gh = NULL; out: - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return gh; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1f6c9c3fe5cb..f348cfb6b69a 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -146,11 +146,11 @@ static void rgrp_go_sync(struct gfs2_glock *gl) struct gfs2_rgrpd *rgd; int error; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_rgrp_brelse(rgd); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; @@ -162,11 +162,11 @@ static void rgrp_go_sync(struct gfs2_glock *gl) mapping_set_error(mapping, error); gfs2_ail_empty_gl(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_free_clones(rgd); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -542,7 +542,7 @@ static int freeze_go_demote_ok(const struct gfs2_glock *gl) * iopen_go_callback - schedule the dcache entry for the inode to be deleted * @gl: the glock * - * gl_spin lock is held while calling this + * gl_lockref.lock lock is held while calling this */ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 121ed08d9d9f..de7b4f97ac75 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -334,9 +334,8 @@ struct gfs2_glock { struct lm_lockname gl_name; struct lockref gl_lockref; -#define gl_spin gl_lockref.lock - /* State fields protected by gl_spin */ + /* State fields protected by gl_lockref.lock */ unsigned int gl_state:2, /* Current state */ gl_target:2, /* Target state */ gl_demote_state:2, /* State requested by remote node */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 284c1542783e..8b907c5cc913 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -50,7 +50,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, s64 delta = sample - s->stats[index]; s->stats[index] += (delta >> 3); index++; - s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2); + s->stats[index] += ((abs(delta) - s->stats[index]) >> 2); } /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 241a399bf83d..fb2b42cf46b5 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -50,7 +50,7 @@ static void gfs2_init_glock_once(void *foo) struct gfs2_glock *gl = foo; INIT_HLIST_BL_NODE(&gl->gl_list); - spin_lock_init(&gl->gl_spin); + spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 02586e7eb964..baab99b69d8a 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1291,6 +1291,9 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, up_write(&s->s_umount); blkdev_put(bdev, mode); down_write(&s->s_umount); + } else { + /* s_mode must be set before deactivate_locked_super calls */ + s->s_mode = mode; } memset(&args, 0, sizeof(args)); @@ -1314,7 +1317,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 475985d14758..c134c0462cee 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -729,9 +729,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gl->gl_object = NULL; - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); gfs2_glock_add_to_lru(gl); gfs2_glock_put(gl); } @@ -933,8 +933,9 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; - rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; - rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; + rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK; + rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr + + rgd->rd_length) * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index b95d0d625f32..0c1bde395062 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -176,6 +176,8 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) unlock_buffer(bh); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops); + else + bd = bh->b_private; lock_buffer(bh); gfs2_log_lock(sdp); } @@ -236,6 +238,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) lock_page(bh->b_page); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops); + else + bd = bh->b_private; unlock_page(bh->b_page); lock_buffer(bh); gfs2_log_lock(sdp); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9e92c9c2d319..ae4d5a1fa4c9 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -227,8 +227,6 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de int err; if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM; - if (!new_valid_dev(rdev)) - return -EINVAL; hpfs_lock(dir->i_sb); err = -ENOSPC; fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 35976bdccafc..9d7551f5c32a 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1372,9 +1372,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, tid_t tid; struct tblock *tblk; - if (!new_valid_dev(rdev)) - return -EINVAL; - jfs_info("jfs_mknod: %pd", dentry); rc = dquot_initialize(dir); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4cd9798f4948..8f9176caf098 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -496,9 +496,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); - if (!new_valid_dev(sb->s_bdev->bd_dev)) - return -EOVERFLOW; - sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 356816e7bc90..f0e3e9e747dd 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1165,8 +1165,6 @@ out: static int ncp_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - if (!new_valid_dev(rdev)) - return -EINVAL; if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) { ncp_dbg(1, "mode = 0%ho\n", mode); return ncp_create_new(dir, dentry, mode, rdev, 0); diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9cd4eb3a1e22..ddd0138f410c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -229,7 +229,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) struct parallel_io *par; loff_t f_offset = header->args.offset; size_t bytes_left = header->args.count; - unsigned int pg_offset, pg_len; + unsigned int pg_offset = header->args.pgbase, pg_len; struct page **pages = header->args.pages; int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); @@ -262,7 +262,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length = be.be_length - (isect - be.be_f_offset); } - pg_offset = f_offset & ~PAGE_CACHE_MASK; if (is_dio) { if (pg_offset + bytes_left > PAGE_CACHE_SIZE) pg_len = PAGE_CACHE_SIZE - pg_offset; @@ -273,9 +272,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) pg_len = PAGE_CACHE_SIZE; } - isect += (pg_offset >> SECTOR_SHIFT); - extent_length -= (pg_offset >> SECTOR_SHIFT); - if (is_hole(&be)) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ @@ -301,6 +297,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length -= (pg_len >> SECTOR_SHIFT); f_offset += pg_len; bytes_left -= pg_len; + pg_offset = 0; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { header->res.eof = 1; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f7c0a7538a..a7f2e6e33305 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv) } #if defined(CONFIG_NFS_V4_1) -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); -} - /* * The callback service for NFSv4.1 callbacks */ @@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - return 0; -} - static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { @@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc svc_shutdown_net(serv, net); } -static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, + struct net *net, struct rpc_xprt *xprt) { struct nfs_net *nn = net_generic(net, nfs_net_id); int ret; @@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n goto err_bind; } - switch (minorversion) { - case 0: - ret = nfs4_callback_up_net(serv, net); - break; - case 1: - case 2: - ret = nfs41_callback_up_net(serv, net); - break; - default: - printk(KERN_ERR "NFS: unknown callback version: %d\n", - minorversion); - ret = -EINVAL; - break; - } + ret = -EPROTONOSUPPORT; + if (minorversion == 0) + ret = nfs4_callback_up_net(serv, net); + else if (xprt->ops->bc_up) + ret = xprt->ops->bc_up(serv, net); if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); @@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_create; } - ret = nfs_callback_up_net(minorversion, serv, net); + ret = nfs_callback_up_net(minorversion, serv, net, xprt); if (ret < 0) goto err_net; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 84326e9fb47a..ff8195bd75ea 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -61,7 +61,6 @@ struct cb_compound_hdr_res { }; struct cb_getattrargs { - struct sockaddr *addr; struct nfs_fh fh; uint32_t bitmap[2]; }; @@ -76,7 +75,6 @@ struct cb_getattrres { }; struct cb_recallargs { - struct sockaddr *addr; struct nfs_fh fh; nfs4_stateid stateid; uint32_t truncate; @@ -119,9 +117,6 @@ extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps); -extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); - #define RCA4_TYPE_MASK_RDATA_DLG 0 #define RCA4_TYPE_MASK_WDATA_DLG 1 #define RCA4_TYPE_MASK_DIR_DLG 2 @@ -134,7 +129,6 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, #define RCA4_TYPE_MASK_ALL 0xf31f struct cb_recallanyargs { - struct sockaddr *craa_addr; uint32_t craa_objs_to_keep; uint32_t craa_type_mask; }; @@ -144,7 +138,6 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, struct cb_process_state *cps); struct cb_recallslotargs { - struct sockaddr *crsa_addr; uint32_t crsa_target_highest_slotid; }; extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, @@ -152,7 +145,6 @@ extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, struct cb_process_state *cps); struct cb_layoutrecallargs { - struct sockaddr *cbl_addr; uint32_t cbl_recall_type; uint32_t cbl_layout_type; uint32_t cbl_layoutchanged; @@ -196,9 +188,6 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion, struct net *net); -extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); -extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #endif /* CONFIG_NFS_V4 */ /* * nfs41: Callbacks are expected to not cause substantial latency, @@ -209,6 +198,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; -extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b85cf7a30232..807eb6ef4f91 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -17,9 +17,7 @@ #include "nfs4session.h" #include "nfs4trace.h" -#ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK -#endif __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res, diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 6b1697a01dde..646cdac73488 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,19 +18,21 @@ #include "internal.h" #include "nfs4session.h" -#define CB_OP_TAGLEN_MAXSZ (512) -#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) -#define CB_OP_GETATTR_BITMAP_MAXSZ (4) -#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - CB_OP_GETATTR_BITMAP_MAXSZ + \ - 2 + 2 + 3 + 3) -#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status +#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + /* change, size, ctime, mtime */\ + (2 + 2 + 3 + 3) * 4) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - 4 + 1 + 3) + NFS4_MAX_SESSIONID_LEN + \ + (1 + 3) * 4) // seqid, 3 slotids #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ @@ -157,7 +159,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (unlikely(status != 0)) return status; /* We do not like overly long tags! */ - if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) { printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); @@ -198,7 +200,6 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr status = decode_fh(xdr, &args->fh); if (unlikely(status != 0)) goto out; - args->addr = svc_addr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); @@ -210,7 +211,6 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 *p; __be32 status; - args->addr = svc_addr(rqstp); status = decode_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; @@ -236,7 +236,6 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, __be32 status = 0; uint32_t iomode; - args->cbl_addr = svc_addr(rqstp); p = read_buf(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); @@ -383,13 +382,12 @@ static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = read_buf(xdr, len); + p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(sid->data, p, len); + memcpy(sid->data, p, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -500,7 +498,6 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, uint32_t bitmap[2]; __be32 *p, status; - args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -519,7 +516,6 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, { __be32 *p; - args->crsa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -684,13 +680,12 @@ static __be32 encode_sessionid(struct xdr_stream *xdr, const struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = xdr_reserve_space(xdr, len); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(p, sid, len); + memcpy(p, sid, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -704,7 +699,9 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, if (unlikely(status != 0)) goto out; - encode_sessionid(xdr, &res->csr_sessionid); + status = encode_sessionid(xdr, &res->csr_sessionid); + if (status) + goto out; p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 57c5a02f6213..d6d5d2a48e83 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->time_delta = fsinfo->time_delta; + server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index be806ead7f4d..5166adcfc0fb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -721,14 +721,12 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; - filemap_flush(inode->i_mapping); - rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL) goto out_enoent; - - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + if (stateid != NULL && + !clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3d8e4ffa0a33..ce5a21861074 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1714,9 +1714,6 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); - if (!new_valid_dev(rdev)) - return -EINVAL; - attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index fbc5a56de875..03516c80855a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -339,6 +339,19 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) } } +static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) +{ + struct nfs4_deviceid_node *node; + int i; + + if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) + return; + for (i = 0; i < fls->mirror_array_cnt; i++) { + node = &fls->mirror_array[i]->mirror_ds->id_node; + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + } +} + static struct pnfs_layout_segment * ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_layoutget_res *lgr, @@ -499,6 +512,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = ff_layout_check_layout(lgr); if (rc) goto out_err_free; + ff_layout_mark_devices_valid(fls); ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); @@ -741,17 +755,17 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, +ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) { - struct nfs4_ff_layout_segment *fls; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_pnfs_ds *ds; int idx; - fls = FF_LAYOUT_LSEG(pgio->pg_lseg); /* mirrors are sorted by efficiency */ - for (idx = 0; idx < fls->mirror_array_cnt; idx++) { - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); + for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { + ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); if (ds) { *best_idx = idx; return ds; @@ -782,7 +796,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, if (pgio->pg_lseg == NULL) goto out_mds; - ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); + ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); if (!ds) goto out_mds; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); @@ -1035,7 +1049,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: - if (ff_layout_has_available_ds(lseg)) + if (ff_layout_no_fallback_to_mds(lseg) || + ff_layout_has_available_ds(lseg)) return -NFS4ERR_RESET_TO_PNFS; reset: dprintk("%s Retry through MDS. Error %d\n", __func__, @@ -1153,7 +1168,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, } /* NFS_PROTO call done callback routines */ - static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { @@ -1171,6 +1185,10 @@ static int ff_layout_read_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_PNFS: + if (ff_layout_choose_best_ds_for_read(hdr->lseg, + hdr->pgio_mirror_idx + 1, + &hdr->pgio_mirror_idx)) + goto out_eagain; set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &hdr->lseg->pls_layout->plh_flags); pnfs_read_resend_pnfs(hdr); @@ -1179,11 +1197,13 @@ static int ff_layout_read_done_cb(struct rpc_task *task, ff_layout_reset_read(hdr); return task->tk_status; case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; + goto out_eagain; } return 0; +out_eagain: + rpc_restart_call_prepare(task); + return -EAGAIN; } static bool diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 68cc0d9828f9..2bb08bc6aaf0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -10,6 +10,7 @@ #define FS_NFS_NFS4FLEXFILELAYOUT_H #define FF_FLAGS_NO_LAYOUTCOMMIT 1 +#define FF_FLAGS_NO_IO_THRU_MDS 2 #include "../pnfs.h" @@ -146,6 +147,12 @@ FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) } static inline bool +ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg) +{ + return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS; +} + +static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { return nfs4_test_deviceid_unavailable(node); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 99a45283b9ee..09b190015df4 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -16,9 +16,7 @@ #include <linux/nfs_fs.h> #include "internal.h" -#ifdef NFS_DEBUG -# define NFSDBG_FACILITY NFSDBG_MOUNT -#endif +#define NFSDBG_FACILITY NFSDBG_MOUNT /* * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 814c1255f1d2..b587ccd31083 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, struct nfs42_layoutstat_data *); +int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0f020e4d8421..3e92a3cde15d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, return PTR_ERR(task); return 0; } + +static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, + struct file *dst_f, loff_t src_offset, + loff_t dst_offset, loff_t count) +{ + struct inode *src_inode = file_inode(src_f); + struct inode *dst_inode = file_inode(dst_f); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct nfs42_clone_args args = { + .src_fh = NFS_FH(src_inode), + .dst_fh = NFS_FH(dst_inode), + .src_offset = src_offset, + .dst_offset = dst_offset, + .dst_bitmask = server->cache_consistency_bitmask, + }; + struct nfs42_clone_res res = { + .server = server, + }; + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); + if (status) + return status; + + status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); + if (status) + return status; + + res.dst_fattr = nfs_alloc_fattr(); + if (!res.dst_fattr) + return -ENOMEM; + + status = nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); + if (status == 0) + status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); + + kfree(res.dst_fattr); + return status; +} + +int nfs42_proc_clone(struct file *src_f, struct file *dst_f, + loff_t src_offset, loff_t dst_offset, loff_t count) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE], + }; + struct inode *inode = file_inode(src_f); + struct nfs_server *server = NFS_SERVER(file_inode(src_f)); + struct nfs4_exception exception = { }; + int err; + + if (!nfs_server_capable(inode, NFS_CAP_CLONE)) + return -EOPNOTSUPP; + + do { + err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, + dst_offset, count); + if (err == -ENOTSUPP || err == -EOPNOTSUPP) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; + return -EOPNOTSUPP; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; + +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0eb29e14070d..0ca482a51e53 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -34,6 +34,12 @@ 1 /* opaque devaddr4 length */ + \ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) +#define encode_clone_maxsz (encode_stateid_maxsz + \ + encode_stateid_maxsz + \ + 2 /* src offset */ + \ + 2 /* dst offset */ + \ + 2 /* count */) +#define decode_clone_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -65,7 +71,20 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) - +#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_clone_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_clone_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_clone_maxsz + \ + decode_getattr_maxsz) static void encode_fallocate(struct xdr_stream *xdr, struct nfs42_falloc_args *args) @@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr, encode_uint32(xdr, 0); } +static void encode_clone(struct xdr_stream *xdr, + struct nfs42_clone_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->src_stateid); + encode_nfs4_stateid(xdr, &args->dst_stateid); + p = reserve_space(xdr, 3*8); + p = xdr_encode_hyper(p, args->src_offset); + p = xdr_encode_hyper(p, args->dst_offset); + xdr_encode_hyper(p, args->count); +} + /* * Encode ALLOCATE request */ @@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode CLONE request + */ +static void nfs4_xdr_enc_clone(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_clone_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->src_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dst_fh, &hdr); + encode_clone(xdr, args, &hdr); + encode_getfattr(xdr, args->dst_bitmask, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTSTATS); } +static int decode_clone(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_CLONE); +} + /* * Decode ALLOCATE request */ @@ -351,4 +411,39 @@ out: return status; } +/* + * Decode CLONE request + */ +static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_clone_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_savefh(xdr); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_clone(xdr); + if (status) + goto out; + status = decode_getfattr(xdr, res->dst_fattr, res->server); + +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 50cfc4ca7a02..4afdee420d25 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -183,10 +183,12 @@ struct nfs4_state { struct nfs4_exception { - long timeout; - int retry; struct nfs4_state *state; struct inode *inode; + long timeout; + unsigned char delay : 1, + recovering : 1, + retry : 1; }; struct nfs4_state_recovery_ops { diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index b0dbe0abed53..4aa571956cd6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -4,6 +4,7 @@ * Copyright (C) 1992 Rick Sladkey */ #include <linux/fs.h> +#include <linux/file.h> #include <linux/falloc.h> #include <linux/nfs_fs.h> #include "delegation.h" @@ -192,8 +193,138 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_deallocate(filep, offset, len); return nfs42_proc_allocate(filep, offset, len); } + +static noinline long +nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + struct inode *dst_inode = file_inode(dst_file); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct fd src_file; + struct inode *src_inode; + unsigned int bs = server->clone_blksize; + int ret; + + /* dst file must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) + return -EINVAL; + + ret = mnt_want_write_file(dst_file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + src_inode = file_inode(src_file.file); + + /* src and dst must be different files */ + ret = -EINVAL; + if (src_inode == dst_inode) + goto out_fput; + + /* src file must be opened for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* src and dst must be regular files */ + ret = -EISDIR; + if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src_file.file->f_path.mnt != dst_file->f_path.mnt || + src_inode->i_sb != dst_inode->i_sb) + goto out_fput; + + /* check alignment w.r.t. clone_blksize */ + ret = -EINVAL; + if (bs) { + if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) + goto out_fput; + if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) + goto out_fput; + } + + /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ + if (dst_inode < src_inode) { + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD); + } + + /* flush all pending writes on both src and dst so that server + * has the latest data */ + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; + + ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); + + /* truncate inode page cache of the dst range so that future reads can fetch + * new data from server */ + if (!ret) + truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); + +out_unlock: + if (dst_inode < src_inode) { + mutex_unlock(&src_inode->i_mutex); + mutex_unlock(&dst_inode->i_mutex); + } else { + mutex_unlock(&dst_inode->i_mutex); + mutex_unlock(&src_inode->i_mutex); + } +out_fput: + fdput(src_file); +out_drop_write: + mnt_drop_write_file(dst_file); + return ret; +} + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + struct nfs_ioctl_clone_range_args args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count); +} +#else +static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + return -ENOTTY; +} + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + return -ENOTTY; +} #endif /* CONFIG_NFS_V4_2 */ +long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + + switch (cmd) { + case NFS_IOC_CLONE: + return nfs42_ioctl_clone(file, arg, 0, 0, 0); + case NFS_IOC_CLONE_RANGE: + return nfs42_ioctl_clone_range(file, argp); + } + + return -ENOTTY; +} + const struct file_operations nfs4_file_operations = { #ifdef CONFIG_NFS_V4_2 .llseek = nfs4_file_llseek, @@ -216,4 +347,9 @@ const struct file_operations nfs4_file_operations = { #endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, +#ifdef CONFIG_COMPAT + .unlocked_ioctl = nfs4_ioctl, +#else + .compat_ioctl = nfs4_ioctl, +#endif /* CONFIG_COMPAT */ }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e5ff69455c7..ff5bddc49a2a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -78,7 +78,6 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); @@ -239,6 +238,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE + | FATTR4_WORD2_CLONE_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[3] = { @@ -344,13 +344,16 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_do_handle_exception(struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; struct inode *inode = exception->inode; int ret = errorcode; + exception->delay = 0; + exception->recovering = 0; exception->retry = 0; switch(errorcode) { case 0: @@ -359,11 +362,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - return 0; - } + if (inode && nfs_async_inode_return_delegation(inode, + NULL) == 0) + goto wait_on_recovery; if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); @@ -409,11 +410,12 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ ret = -EBUSY; break; } - case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client, &exception->timeout); - if (ret != 0) - break; + nfs_inc_server_stats(server, NFSIOS_DELAY); + case -NFS4ERR_GRACE: + exception->delay = 1; + return 0; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; @@ -434,14 +436,85 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ /* We failed to handle the error */ return nfs4_map_errors(ret); wait_on_recovery: - ret = nfs4_wait_clnt_recover(clp); + exception->recovering = 1; + return 0; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + ret = nfs4_delay(server->client, &exception->timeout); + goto out_retry; + } + if (exception->recovering) { + ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; + goto out_retry; + } + return ret; +out_retry: + if (ret == 0) + exception->retry = 1; + return ret; +} + +static int +nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + rpc_delay(task, nfs4_update_delay(&exception->timeout)); + goto out_retry; + } + if (exception->recovering) { + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + goto out_retry; + } if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - return -EIO; + ret = -EIO; + return ret; +out_retry: if (ret == 0) exception->retry = 1; return ret; } +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, + struct nfs4_state *state, long *timeout) +{ + struct nfs4_exception exception = { + .state = state, + }; + + if (task->tk_status >= 0) + return 0; + if (timeout) + exception.timeout = *timeout; + task->tk_status = nfs4_async_handle_exception(task, server, + task->tk_status, + &exception); + if (exception.delay && timeout) + *timeout = exception.timeout; + if (exception.retry) + return -EAGAIN; + return 0; +} + /* * Return 'true' if 'clp' is using an rpc_client that is integrity protected * or 'false' otherwise. @@ -4530,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) + struct page **pages) { struct page *newpage, **spages; int rc = 0; @@ -4674,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu goto out_free; args.acl_len = npages * PAGE_SIZE; - args.acl_pgbase = 0; dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -4766,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_return_delegation(inode); @@ -4955,79 +5027,6 @@ out: #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, - struct nfs4_state *state, long *timeout) -{ - struct nfs_client *clp = server->nfs_client; - - if (task->tk_status >= 0) - return 0; - switch(task->tk_status) { - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - if (state == NULL) - break; - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - } - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_lease_recovery(clp); - goto wait_on_recovery; - case -NFS4ERR_MOVED: - if (nfs4_schedule_migration_recovery(server) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - goto wait_on_recovery; -#if defined(CONFIG_NFS_V4_1) - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR %d, Reset session\n", __func__, - task->tk_status); - nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto wait_on_recovery; -#endif /* CONFIG_NFS_V4_1 */ - case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); - rpc_delay(task, nfs4_update_delay(timeout)); - goto restart_call; - case -NFS4ERR_GRACE: - rpc_delay(task, NFS4_POLL_RETRY_MAX); - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_OLD_STATEID: - goto restart_call; - } - task->tk_status = nfs4_map_errors(task->tk_status); - return 0; -recovery_failed: - task->tk_status = -EIO; - return 0; -wait_on_recovery: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - goto recovery_failed; -restart_call: - task->tk_status = 0; - return -EAGAIN; -} - static void nfs4_init_boot_verifier(const struct nfs_client *clp, nfs4_verifier *bootverf) { @@ -5522,7 +5521,7 @@ struct nfs4_unlockdata { struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; struct file_lock fl; - const struct nfs_server *server; + struct nfs_server *server; unsigned long timestamp; }; @@ -8718,7 +8717,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK - | NFS_CAP_LAYOUTSTATS, + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 788adf3897c7..dfed4f5c8fcc 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun *p = cpu_to_be32(FATTR4_WORD0_ACL); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); - xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len); } static void @@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, args->acl_pgbase, args->acl_len); + args->acl_pages, 0, args->acl_len); encode_nops(&hdr); } @@ -4375,6 +4375,11 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) goto xdr_error; if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0) goto xdr_error; if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0) @@ -4574,6 +4579,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_mode(xdr, bitmap, &fmode); if (status < 0) goto xdr_error; @@ -4627,6 +4636,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); if (status < 0) goto xdr_error; @@ -4764,6 +4777,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +/* + * The granularity of a CLONE operation. + */ +static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { unsigned int savep; @@ -4789,15 +4824,28 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) goto xdr_error; fsinfo->wtpref = fsinfo->wtmax; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); if (status != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); if (status) goto xdr_error; + status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize); + if (status) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -7465,6 +7513,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), + PROC(CLONE, enc_clone, dec_clone), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 9bc9f04fb7f6..89a15dbe5efc 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -90,7 +90,7 @@ #define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ -static char nfs_root_parms[256] __initdata = ""; +static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = ""; /* Text-based mount options passed to super.c */ static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8abe27165ad0..93496c059837 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1912,12 +1912,13 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); - if (!hdr->pnfs_error) { + if (likely(!hdr->pnfs_error)) { pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_write_error(hdr); hdr->mds_ops->rpc_release(hdr); } @@ -2028,11 +2029,12 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(hdr); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_read_error(hdr); hdr->mds_ops->rpc_release(hdr); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 01b8cc8e8cfc..0a5e33f33b5c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -246,6 +246,13 @@ static void nfs_readpage_retry(struct rpc_task *task, nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Yes, so retry the read at the end of the hdr */ hdr->mds_offset += resp->count; argp->offset += resp->count; @@ -268,7 +275,7 @@ static void nfs_readpage_result(struct rpc_task *task, hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (hdr->res.count != hdr->args.count) + } else if (hdr->res.count < hdr->args.count) nfs_readpage_retry(task, hdr); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 383a027de452..f1268280244e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2816,7 +2816,6 @@ out_invalid_transport_udp: * NFS client for backwards compatibility */ unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; /* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ @@ -2827,7 +2826,6 @@ char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); -EXPORT_SYMBOL_GPL(nfs_callback_tcpport); EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 75ab7622e0cc..7b9316406930 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1505,6 +1505,13 @@ static void nfs_writeback_result(struct rpc_task *task, task->tk_status = -EIO; return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 37dd6b05b1b5..c9a1a491aa91 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -120,9 +120,6 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) struct nilfs_transaction_info ti; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 5f1c9c29eb8c..47f96988fdd4 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -712,9 +712,6 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - if (!new_valid_dev(rdev)) - return -EINVAL; - retval = dquot_initialize(dir); if (retval) return retval; diff --git a/fs/stat.c b/fs/stat.c index cccc1aab9a8b..d4a61d8dc021 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -367,8 +367,6 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) INIT_STRUCT_STAT64_PADDING(tmp); #ifdef CONFIG_MIPS /* mips has weird padding, so we don't get 64 bits there */ - if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev)) - return -EOVERFLOW; tmp.st_dev = new_encode_dev(stat->dev); tmp.st_rdev = new_encode_dev(stat->rdev); #else diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index ba66d508006a..7ff7712f284e 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -35,3 +35,18 @@ config UBIFS_FS_ZLIB default y help Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. + +config UBIFS_ATIME_SUPPORT + bool "Access time support" if UBIFS_FS + depends on UBIFS_FS + default n + help + Originally UBIFS did not support atime, because it looked like a bad idea due + increased flash wear. This option adds atime support and it is disabled by default + to preserve the old behavior. If you enable this option, UBIFS starts updating atime, + which means that file-system read operations will cause writes (inode atime + updates). This may affect file-system performance and increase flash device wear, + so be careful. How often atime is updated depends on the selected strategy: + strictatime is the "heavy", relatime is "lighter", etc. + + If unsure, say 'N' diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4c46a9865fa7..595ca0debe11 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2573,7 +2573,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, { int err, failing; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; failing = power_cut_emulated(c, lnum, 1); @@ -2595,7 +2595,7 @@ int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 1)) return -EROFS; @@ -2611,7 +2611,7 @@ int dbg_leb_unmap(struct ubifs_info *c, int lnum) { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 0)) return -EROFS; @@ -2627,7 +2627,7 @@ int dbg_leb_map(struct ubifs_info *c, int lnum) { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 0)) return -EROFS; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c27c66c224a..e49bd2808bf3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -449,13 +449,14 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } out: + kfree(file->private_data); + file->private_data = NULL; + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; } - kfree(file->private_data); - file->private_data = NULL; /* 2 is a special value indicating that there are no more direntries */ ctx->pos = 2; return 0; @@ -787,9 +788,6 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); - if (!new_valid_dev(rdev)) - return -EINVAL; - if (S_ISBLK(mode) || S_ISCHR(mode)) { dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); if (!dev) @@ -1188,6 +1186,9 @@ const struct inode_operations ubifs_dir_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct file_operations ubifs_dir_operations = { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a3dfe2ae79f2..0edc12856147 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1354,6 +1354,47 @@ static inline int mctime_update_needed(const struct inode *inode, return 0; } +#ifdef CONFIG_UBIFS_ATIME_SUPPORT +/** + * ubifs_update_time - update time of inode. + * @inode: inode to update + * + * This function updates time of the inode. + */ +int ubifs_update_time(struct inode *inode, struct timespec *time, + int flags) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; + int iflags = I_DIRTY_TIME; + int err, release; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&ui->ui_mutex); + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + + if (!(inode->i_sb->s_flags & MS_LAZYTIME)) + iflags |= I_DIRTY_SYNC; + + release = ui->dirty; + __mark_inode_dirty(inode, iflags); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_budget(c, &req); + return 0; +} +#endif + /** * update_ctime - update mtime and ctime of an inode. * @inode: inode to update @@ -1537,6 +1578,9 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) if (err) return err; vma->vm_ops = &ubifs_file_vm_ops; +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + file_accessed(file); +#endif return 0; } @@ -1557,6 +1601,9 @@ const struct inode_operations ubifs_file_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct inode_operations ubifs_symlink_inode_operations = { @@ -1568,6 +1615,9 @@ const struct inode_operations ubifs_symlink_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct file_operations ubifs_file_operations = { diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index dc9f27e9d61b..9a517109da0f 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1498,11 +1498,10 @@ static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, } /* nnode is being committed, so copy it */ - n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + n = kmemdup(nnode, sizeof(struct ubifs_nnode), GFP_NOFS); if (unlikely(!n)) return ERR_PTR(-ENOMEM); - memcpy(n, nnode, sizeof(struct ubifs_nnode)); n->cnext = NULL; __set_bit(DIRTY_CNODE, &n->flags); __clear_bit(COW_CNODE, &n->flags); @@ -1549,11 +1548,10 @@ static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, } /* pnode is being committed, so copy it */ - p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + p = kmemdup(pnode, sizeof(struct ubifs_pnode), GFP_NOFS); if (unlikely(!p)) return ERR_PTR(-ENOMEM); - memcpy(p, pnode, sizeof(struct ubifs_pnode)); p->cnext = NULL; __set_bit(DIRTY_CNODE, &p->flags); __clear_bit(COW_CNODE, &p->flags); diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index ee7cb5ebb6e8..8ece6ca58c0b 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -155,13 +155,8 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) */ static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) { - if (new_valid_dev(rdev)) { - dev->new = cpu_to_le32(new_encode_dev(rdev)); - return sizeof(dev->new); - } else { - dev->huge = cpu_to_le64(huge_encode_dev(rdev)); - return sizeof(dev->huge); - } + dev->new = cpu_to_le32(new_encode_dev(rdev)); + return sizeof(dev->new); } /** diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 695fc71d5244..586d59347fff 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -789,7 +789,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ ubifs_err(c, "corruption %d", ret); - ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + ubifs_scan_a_node(c, buf, len, lnum, offs, 0); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; @@ -1331,8 +1331,7 @@ void ubifs_destroy_size_tree(struct ubifs_info *c) struct size_entry *e, *n; rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { - if (e->inode) - iput(e->inode); + iput(e->inode); kfree(e); } @@ -1533,8 +1532,7 @@ int ubifs_recover_size(struct ubifs_info *c) err = fix_size_in_place(c, e); if (err) return err; - if (e->inode) - iput(e->inode); + iput(e->inode); } } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 9547a27868ad..8ee3133dd8e4 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -128,7 +128,10 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) if (err) goto out_ino; - inode->i_flags |= (S_NOCMTIME | S_NOATIME); + inode->i_flags |= S_NOCMTIME; +#ifndef CONFIG_UBIFS_ATIME_SUPPORT + inode->i_flags |= S_NOATIME; +#endif set_nlink(inode, le32_to_cpu(ino->nlink)); i_uid_write(inode, le32_to_cpu(ino->uid)); i_gid_write(inode, le32_to_cpu(ino->gid)); @@ -2139,7 +2142,12 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, if (err) goto out_deact; /* We do not support atime */ - sb->s_flags |= MS_ACTIVE | MS_NOATIME; + sb->s_flags |= MS_ACTIVE; +#ifndef CONFIG_UBIFS_ATIME_SUPPORT + sb->s_flags |= MS_NOATIME; +#else + ubifs_msg(c, "full atime support is enabled."); +#endif } /* 'fill_super()' opens ubi again so we must close it here */ diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 957f5757f374..fa9a20cc60d6 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -198,11 +198,10 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, { struct ubifs_znode *zn; - zn = kmalloc(c->max_znode_sz, GFP_NOFS); + zn = kmemdup(znode, c->max_znode_sz, GFP_NOFS); if (unlikely(!zn)) return ERR_PTR(-ENOMEM); - memcpy(zn, znode, c->max_znode_sz); zn->cnext = NULL; __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index de759022f3d6..01142e129d16 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -858,9 +858,9 @@ struct ubifs_compressor { * @mod_dent: non-zero if the operation removes or modifies an existing * directory entry * @new_ino: non-zero if the operation adds a new inode - * @new_ino_d: now much data newly created inode contains + * @new_ino_d: how much data newly created inode contains * @dirtied_ino: how many inodes the operation makes dirty - * @dirtied_ino_d: now much data dirtied inode contains + * @dirtied_ino_d: how much data dirtied inode contains * @idx_growth: how much the index will supposedly grow * @data_growth: how much new data the operation will supposedly add * @dd_growth: how much data that makes other data dirty the operation will @@ -1746,6 +1746,9 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); +#ifdef CONFIG_UBIFS_ATIME_SUPPORT +int ubifs_update_time(struct inode *inode, struct timespec *time, int flags); +#endif /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index fd65b3f1923c..259fbabf143d 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -200,6 +200,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, int err; struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); + void *buf = NULL; struct ubifs_budget_req req = { .dirtied_ino = 2, .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; @@ -208,14 +209,17 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, if (err) return err; - kfree(ui->data); - ui->data = kmemdup(value, size, GFP_NOFS); - if (!ui->data) { + buf = kmemdup(value, size, GFP_NOFS); + if (!buf) { err = -ENOMEM; goto out_free; } + mutex_lock(&ui->ui_mutex); + kfree(ui->data); + ui->data = buf; inode->i_size = ui->ui_size = size; ui->data_len = size; + mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); host->i_ctime = ubifs_current_time(host); @@ -409,6 +413,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, ubifs_assert(inode->i_size == ui->data_len); ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); + mutex_lock(&ui->ui_mutex); if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { @@ -423,6 +428,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, err = ui->data_len; out_iput: + mutex_unlock(&ui->ui_mutex); iput(inode); out_unlock: kfree(xent); |