diff options
Diffstat (limited to 'fs')
69 files changed, 1886 insertions, 1210 deletions
@@ -1297,20 +1297,10 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, static long read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, - struct timespec __user *timeout) + ktime_t until) { - ktime_t until = KTIME_MAX; long ret = 0; - if (timeout) { - struct timespec ts; - - if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) - return -EFAULT; - - until = timespec_to_ktime(ts); - } - /* * Note that aio_read_events() is being called as the conditional - i.e. * we're calling it after prepare_to_wait() has set task state to @@ -1826,6 +1816,25 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, return ret; } +static long do_io_getevents(aio_context_t ctx_id, + long min_nr, + long nr, + struct io_event __user *events, + struct timespec64 *ts) +{ + ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX; + struct kioctx *ioctx = lookup_ioctx(ctx_id); + long ret = -EINVAL; + + if (likely(ioctx)) { + if (likely(min_nr <= nr && min_nr >= 0)) + ret = read_events(ioctx, min_nr, nr, events, until); + percpu_ref_put(&ioctx->users); + } + + return ret; +} + /* io_getevents: * Attempts to read at least min_nr events and up to nr events from * the completion queue for the aio_context specified by ctx_id. If @@ -1844,15 +1853,14 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, struct io_event __user *, events, struct timespec __user *, timeout) { - struct kioctx *ioctx = lookup_ioctx(ctx_id); - long ret = -EINVAL; + struct timespec64 ts; - if (likely(ioctx)) { - if (likely(min_nr <= nr && min_nr >= 0)) - ret = read_events(ioctx, min_nr, nr, events, timeout); - percpu_ref_put(&ioctx->users); + if (timeout) { + if (unlikely(get_timespec64(&ts, timeout))) + return -EFAULT; } - return ret; + + return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); } #ifdef CONFIG_COMPAT @@ -1862,17 +1870,14 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, struct io_event __user *, events, struct compat_timespec __user *, timeout) { - struct timespec t; - struct timespec __user *ut = NULL; + struct timespec64 t; if (timeout) { - if (compat_get_timespec(&t, timeout)) + if (compat_get_timespec64(&t, timeout)) return -EFAULT; - ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t))) - return -EFAULT; } - return sys_io_getevents(ctx_id, min_nr, nr, events, ut); + + return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); } #endif diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 5429b035e249..429326b6e2e7 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1498,7 +1498,9 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm) struct vm_area_struct *vma; for (vma = current->mm->mmap; vma; vma = vma->vm_next) { +#ifdef CONFIG_MMU unsigned long addr; +#endif if (!maydump(vma, cprm->mm_flags)) continue; diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index a37f003530d7..1175a1722411 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -447,8 +447,7 @@ int venus_fsync(struct super_block *sb, struct CodaFid *fid) UPARG(CODA_FSYNC); inp->coda_fsync.VFid = *fid; - error = coda_upcall(coda_vcp(sb), sizeof(union inputArgs), - &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index bd5d91e119ca..5fc5dc660600 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -54,8 +54,6 @@ #include <linux/if_tun.h> #include <linux/ctype.h> #include <linux/syscalls.h> -#include <linux/i2c.h> -#include <linux/i2c-dev.h> #include <linux/atalk.h> #include <linux/gfp.h> #include <linux/cec.h> @@ -137,22 +135,6 @@ static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return vfs_ioctl(file, cmd, arg); } -static int w_long(struct file *file, - unsigned int cmd, compat_ulong_t __user *argp) -{ - int err; - unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp)); - - if (valp == NULL) - return -EFAULT; - err = do_ioctl(file, cmd, (unsigned long)valp); - if (err) - return err; - if (convert_in_user(valp, argp)) - return -EFAULT; - return 0; -} - struct compat_video_event { int32_t type; compat_time_t timestamp; @@ -671,96 +653,6 @@ static int serial_struct_ioctl(struct file *file, return err; } -/* - * I2C layer ioctls - */ - -struct i2c_msg32 { - u16 addr; - u16 flags; - u16 len; - compat_caddr_t buf; -}; - -struct i2c_rdwr_ioctl_data32 { - compat_caddr_t msgs; /* struct i2c_msg __user *msgs */ - u32 nmsgs; -}; - -struct i2c_smbus_ioctl_data32 { - u8 read_write; - u8 command; - u32 size; - compat_caddr_t data; /* union i2c_smbus_data *data */ -}; - -struct i2c_rdwr_aligned { - struct i2c_rdwr_ioctl_data cmd; - struct i2c_msg msgs[0]; -}; - -static int do_i2c_rdwr_ioctl(struct file *file, - unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata) -{ - struct i2c_rdwr_aligned __user *tdata; - struct i2c_msg __user *tmsgs; - struct i2c_msg32 __user *umsgs; - compat_caddr_t datap; - u32 nmsgs; - int i; - - if (get_user(nmsgs, &udata->nmsgs)) - return -EFAULT; - if (nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) - return -EINVAL; - - if (get_user(datap, &udata->msgs)) - return -EFAULT; - umsgs = compat_ptr(datap); - - tdata = compat_alloc_user_space(sizeof(*tdata) + - nmsgs * sizeof(struct i2c_msg)); - tmsgs = &tdata->msgs[0]; - - if (put_user(nmsgs, &tdata->cmd.nmsgs) || - put_user(tmsgs, &tdata->cmd.msgs)) - return -EFAULT; - - for (i = 0; i < nmsgs; i++) { - if (copy_in_user(&tmsgs[i].addr, &umsgs[i].addr, 3*sizeof(u16))) - return -EFAULT; - if (get_user(datap, &umsgs[i].buf) || - put_user(compat_ptr(datap), &tmsgs[i].buf)) - return -EFAULT; - } - return do_ioctl(file, cmd, (unsigned long)tdata); -} - -static int do_i2c_smbus_ioctl(struct file *file, - unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata) -{ - struct i2c_smbus_ioctl_data __user *tdata; - union { - /* beginnings of those have identical layouts */ - struct i2c_smbus_ioctl_data32 data32; - struct i2c_smbus_ioctl_data data; - } v; - - tdata = compat_alloc_user_space(sizeof(*tdata)); - if (tdata == NULL) - return -ENOMEM; - - memset(&v, 0, sizeof(v)); - if (copy_from_user(&v.data32, udata, sizeof(v.data32))) - return -EFAULT; - v.data.data = compat_ptr(v.data32.data); - - if (copy_to_user(tdata, &v.data, sizeof(v.data))) - return -EFAULT; - - return do_ioctl(file, cmd, (unsigned long)tdata); -} - #define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) #define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t) #define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) @@ -1283,13 +1175,6 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER) COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO) COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM) COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE) -/* i2c */ -COMPATIBLE_IOCTL(I2C_SLAVE) -COMPATIBLE_IOCTL(I2C_SLAVE_FORCE) -COMPATIBLE_IOCTL(I2C_TENBIT) -COMPATIBLE_IOCTL(I2C_PEC) -COMPATIBLE_IOCTL(I2C_RETRIES) -COMPATIBLE_IOCTL(I2C_TIMEOUT) /* hiddev */ COMPATIBLE_IOCTL(HIDIOCGVERSION) COMPATIBLE_IOCTL(HIDIOCAPPLICATION) @@ -1464,13 +1349,6 @@ static long do_ioctl_trans(unsigned int cmd, case TIOCGSERIAL: case TIOCSSERIAL: return serial_struct_ioctl(file, cmd, argp); - /* i2c */ - case I2C_FUNCS: - return w_long(file, cmd, argp); - case I2C_RDWR: - return do_i2c_rdwr_ioctl(file, cmd, argp); - case I2C_SMBUS: - return do_i2c_smbus_ioctl(file, cmd, argp); /* Not implemented in the native kernel */ case RTC_IRQP_READ32: case RTC_IRQP_SET32: @@ -1580,6 +1458,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, case FICLONE: case FICLONERANGE: case FIDEDUPERANGE: + case FS_IOC_FIEMAP: goto do_ioctl; case FIBMAP: diff --git a/fs/coredump.c b/fs/coredump.c index 52c63d6c9143..1e2c87acac9b 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -680,16 +680,11 @@ void do_coredump(const siginfo_t *siginfo) * privs and don't want to unlink another user's coredump. */ if (!need_suid_safe) { - mm_segment_t old_fs; - - old_fs = get_fs(); - set_fs(KERNEL_DS); /* * If it doesn't exist, that's fine. If there's some * other problem, we'll catch it at the filp_open(). */ - (void) sys_unlink((const char __user *)cn.corename); - set_fs(old_fs); + do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); } /* diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index 11b29d491b7c..f937082f3244 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -1,6 +1,5 @@ config CRAMFS - tristate "Compressed ROM file system support (cramfs) (OBSOLETE)" - depends on BLOCK + tristate "Compressed ROM file system support (cramfs)" select ZLIB_INFLATE help Saying Y here includes support for CramFs (Compressed ROM File @@ -16,7 +15,39 @@ config CRAMFS cramfs. Note that the root file system (the one containing the directory /) cannot be compiled as a module. - This filesystem is obsoleted by SquashFS, which is much better - in terms of performance and features. + This filesystem is limited in capabilities and performance on + purpose to remain small and low on RAM usage. It is most suitable + for small embedded systems. If you have ample RAM to spare, you may + consider a more capable compressed filesystem such as SquashFS + which is much better in terms of performance and features. + + If unsure, say N. + +config CRAMFS_BLOCKDEV + bool "Support CramFs image over a regular block device" if EXPERT + depends on CRAMFS && BLOCK + default y + help + This option allows the CramFs driver to load data from a regular + block device such a disk partition or a ramdisk. + +config CRAMFS_MTD + bool "Support CramFs image directly mapped in physical memory" + depends on CRAMFS && MTD + default y if !CRAMFS_BLOCKDEV + help + This option allows the CramFs driver to load data directly from + a linear adressed memory range (usually non volatile memory + like flash) instead of going through the block device layer. + This saves some memory since no intermediate buffering is + necessary. + + The location of the CramFs image is determined by a + MTD device capable of direct memory mapping e.g. from + the 'physmap' map driver or a resulting MTD partition. + For example, this would mount the cramfs image stored in + the MTD partition named "xip_fs" on the /mnt mountpoint: + + mount -t cramfs mtd:xip_fs /mnt If unsure, say N. diff --git a/fs/cramfs/README b/fs/cramfs/README index 9d4e7ea311f4..d71b27e0ff15 100644 --- a/fs/cramfs/README +++ b/fs/cramfs/README @@ -49,17 +49,46 @@ same as the start of the (i+1)'th <block> if there is one). The first <block> immediately follows the last <block_pointer> for the file. <block_pointer>s are each 32 bits long. +When the CRAMFS_FLAG_EXT_BLOCK_POINTERS capability bit is set, each +<block_pointer>'s top bits may contain special flags as follows: + +CRAMFS_BLK_FLAG_UNCOMPRESSED (bit 31): + The block data is not compressed and should be copied verbatim. + +CRAMFS_BLK_FLAG_DIRECT_PTR (bit 30): + The <block_pointer> stores the actual block start offset and not + its end, shifted right by 2 bits. The block must therefore be + aligned to a 4-byte boundary. The block size is either blksize + if CRAMFS_BLK_FLAG_UNCOMPRESSED is also specified, otherwise + the compressed data length is included in the first 2 bytes of + the block data. This is used to allow discontiguous data layout + and specific data block alignments e.g. for XIP applications. + + The order of <file_data>'s is a depth-first descent of the directory tree, i.e. the same order as `find -size +0 \( -type f -o -type l \) -print'. <block>: The i'th <block> is the output of zlib's compress function -applied to the i'th blksize-sized chunk of the input data. +applied to the i'th blksize-sized chunk of the input data if the +corresponding CRAMFS_BLK_FLAG_UNCOMPRESSED <block_ptr> bit is not set, +otherwise it is the input data directly. (For the last <block> of the file, the input may of course be smaller.) Each <block> may be a different size. (See <block_pointer> above.) + <block>s are merely byte-aligned, not generally u32-aligned. +When CRAMFS_BLK_FLAG_DIRECT_PTR is specified then the corresponding +<block> may be located anywhere and not necessarily contiguous with +the previous/next blocks. In that case it is minimally u32-aligned. +If CRAMFS_BLK_FLAG_UNCOMPRESSED is also specified then the size is always +blksize except for the last block which is limited by the file length. +If CRAMFS_BLK_FLAG_DIRECT_PTR is set and CRAMFS_BLK_FLAG_UNCOMPRESSED +is not set then the first 2 bytes of the block contains the size of the +remaining block data as this cannot be determined from the placement of +logically adjacent blocks. + Holes ----- diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 7919967488cb..9a2ab419ba62 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -15,10 +15,15 @@ #include <linux/module.h> #include <linux/fs.h> +#include <linux/file.h> #include <linux/pagemap.h> +#include <linux/pfn_t.h> +#include <linux/ramfs.h> #include <linux/init.h> #include <linux/string.h> #include <linux/blkdev.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/super.h> #include <linux/slab.h> #include <linux/vfs.h> #include <linux/mutex.h> @@ -36,6 +41,9 @@ struct cramfs_sb_info { unsigned long blocks; unsigned long files; unsigned long flags; + void *linear_virt_addr; + resource_size_t linear_phys_addr; + size_t mtd_point_size; }; static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb) @@ -46,6 +54,7 @@ static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb) static const struct super_operations cramfs_ops; static const struct inode_operations cramfs_dir_inode_operations; static const struct file_operations cramfs_directory_operations; +static const struct file_operations cramfs_physmem_fops; static const struct address_space_operations cramfs_aops; static DEFINE_MUTEX(read_mutex); @@ -93,6 +102,10 @@ static struct inode *get_cramfs_inode(struct super_block *sb, case S_IFREG: inode->i_fop = &generic_ro_fops; inode->i_data.a_ops = &cramfs_aops; + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && + CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS && + CRAMFS_SB(sb)->linear_phys_addr) + inode->i_fop = &cramfs_physmem_fops; break; case S_IFDIR: inode->i_op = &cramfs_dir_inode_operations; @@ -140,6 +153,9 @@ static struct inode *get_cramfs_inode(struct super_block *sb, * BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to * worry about end-of-buffer issues even when decompressing a full * page cache. + * + * Note: This is all optimized away at compile time when + * CONFIG_CRAMFS_BLOCKDEV=n. */ #define READ_BUFFERS (2) /* NEXT_BUFFER(): Loop over [0..(READ_BUFFERS-1)]. */ @@ -160,10 +176,10 @@ static struct super_block *buffer_dev[READ_BUFFERS]; static int next_buffer; /* - * Returns a pointer to a buffer containing at least LEN bytes of - * filesystem starting at byte offset OFFSET into the filesystem. + * Populate our block cache and return a pointer to it. */ -static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len) +static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, + unsigned int len) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; struct page *pages[BLKS_PER_BUF]; @@ -239,11 +255,250 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i return read_buffers[buffer] + offset; } +/* + * Return a pointer to the linearly addressed cramfs image in memory. + */ +static void *cramfs_direct_read(struct super_block *sb, unsigned int offset, + unsigned int len) +{ + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); + + if (!len) + return NULL; + if (len > sbi->size || offset > sbi->size - len) + return page_address(ZERO_PAGE(0)); + return sbi->linear_virt_addr + offset; +} + +/* + * Returns a pointer to a buffer containing at least LEN bytes of + * filesystem starting at byte offset OFFSET into the filesystem. + */ +static void *cramfs_read(struct super_block *sb, unsigned int offset, + unsigned int len) +{ + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); + + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sbi->linear_virt_addr) + return cramfs_direct_read(sb, offset, len); + else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) + return cramfs_blkdev_read(sb, offset, len); + else + return NULL; +} + +/* + * For a mapping to be possible, we need a range of uncompressed and + * contiguous blocks. Return the offset for the first block and number of + * valid blocks for which that is true, or zero otherwise. + */ +static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages) +{ + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); + int i; + u32 *blockptrs, first_block_addr; + + /* + * We can dereference memory directly here as this code may be + * reached only when there is a direct filesystem image mapping + * available in memory. + */ + blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4); + first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS; + i = 0; + do { + u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT); + u32 expect = (first_block_addr + block_off) | + CRAMFS_BLK_FLAG_DIRECT_PTR | + CRAMFS_BLK_FLAG_UNCOMPRESSED; + if (blockptrs[i] != expect) { + pr_debug("range: block %d/%d got %#x expects %#x\n", + pgoff+i, pgoff + *pages - 1, + blockptrs[i], expect); + if (i == 0) + return 0; + break; + } + } while (++i < *pages); + + *pages = i; + return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT; +} + +#ifdef CONFIG_MMU + +/* + * Return true if the last page of a file in the filesystem image contains + * some other data that doesn't belong to that file. It is assumed that the + * last block is CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED + * (verified by cramfs_get_block_range() and directly accessible in memory. + */ +static bool cramfs_last_page_is_shared(struct inode *inode) +{ + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); + u32 partial, last_page, blockaddr, *blockptrs; + char *tail_data; + + partial = offset_in_page(inode->i_size); + if (!partial) + return false; + last_page = inode->i_size >> PAGE_SHIFT; + blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode)); + blockaddr = blockptrs[last_page] & ~CRAMFS_BLK_FLAGS; + blockaddr <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; + tail_data = sbi->linear_virt_addr + blockaddr + partial; + return memchr_inv(tail_data, 0, PAGE_SIZE - partial) ? true : false; +} + +static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *inode = file_inode(file); + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); + unsigned int pages, max_pages, offset; + unsigned long address, pgoff = vma->vm_pgoff; + char *bailout_reason; + int ret; + + ret = generic_file_readonly_mmap(file, vma); + if (ret) + return ret; + + /* + * Now try to pre-populate ptes for this vma with a direct + * mapping avoiding memory allocation when possible. + */ + + /* Could COW work here? */ + bailout_reason = "vma is writable"; + if (vma->vm_flags & VM_WRITE) + goto bailout; + + max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + bailout_reason = "beyond file limit"; + if (pgoff >= max_pages) + goto bailout; + pages = min(vma_pages(vma), max_pages - pgoff); + + offset = cramfs_get_block_range(inode, pgoff, &pages); + bailout_reason = "unsuitable block layout"; + if (!offset) + goto bailout; + address = sbi->linear_phys_addr + offset; + bailout_reason = "data is not page aligned"; + if (!PAGE_ALIGNED(address)) + goto bailout; + + /* Don't map the last page if it contains some other data */ + if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) { + pr_debug("mmap: %s: last page is shared\n", + file_dentry(file)->d_name.name); + pages--; + } + + if (!pages) { + bailout_reason = "no suitable block remaining"; + goto bailout; + } + + if (pages == vma_pages(vma)) { + /* + * The entire vma is mappable. remap_pfn_range() will + * make it distinguishable from a non-direct mapping + * in /proc/<pid>/maps by substituting the file offset + * with the actual physical address. + */ + ret = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, + pages * PAGE_SIZE, vma->vm_page_prot); + } else { + /* + * Let's create a mixed map if we can't map it all. + * The normal paging machinery will take care of the + * unpopulated ptes via cramfs_readpage(). + */ + int i; + vma->vm_flags |= VM_MIXEDMAP; + for (i = 0; i < pages && !ret; i++) { + unsigned long off = i * PAGE_SIZE; + pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV); + ret = vm_insert_mixed(vma, vma->vm_start + off, pfn); + } + } + + if (!ret) + pr_debug("mapped %s[%lu] at 0x%08lx (%u/%lu pages) " + "to vma 0x%08lx, page_prot 0x%llx\n", + file_dentry(file)->d_name.name, pgoff, + address, pages, vma_pages(vma), vma->vm_start, + (unsigned long long)pgprot_val(vma->vm_page_prot)); + return ret; + +bailout: + pr_debug("%s[%lu]: direct mmap impossible: %s\n", + file_dentry(file)->d_name.name, pgoff, bailout_reason); + /* Didn't manage any direct map, but normal paging is still possible */ + return 0; +} + +#else /* CONFIG_MMU */ + +static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; +} + +static unsigned long cramfs_physmem_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct inode *inode = file_inode(file); + struct super_block *sb = inode->i_sb; + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); + unsigned int pages, block_pages, max_pages, offset; + + pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (pgoff >= max_pages || pages > max_pages - pgoff) + return -EINVAL; + block_pages = pages; + offset = cramfs_get_block_range(inode, pgoff, &block_pages); + if (!offset || block_pages != pages) + return -ENOSYS; + addr = sbi->linear_phys_addr + offset; + pr_debug("get_unmapped for %s ofs %#lx siz %lu at 0x%08lx\n", + file_dentry(file)->d_name.name, pgoff*PAGE_SIZE, len, addr); + return addr; +} + +static unsigned int cramfs_physmem_mmap_capabilities(struct file *file) +{ + return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | + NOMMU_MAP_READ | NOMMU_MAP_EXEC; +} + +#endif /* CONFIG_MMU */ + +static const struct file_operations cramfs_physmem_fops = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .splice_read = generic_file_splice_read, + .mmap = cramfs_physmem_mmap, +#ifndef CONFIG_MMU + .get_unmapped_area = cramfs_physmem_get_unmapped_area, + .mmap_capabilities = cramfs_physmem_mmap_capabilities, +#endif +}; + static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); - kill_block_super(sb); + if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) { + if (sbi && sbi->mtd_point_size) + mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); + kill_mtd_super(sb); + } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { + kill_block_super(sb); + } kfree(sbi); } @@ -254,34 +509,24 @@ static int cramfs_remount(struct super_block *sb, int *flags, char *data) return 0; } -static int cramfs_fill_super(struct super_block *sb, void *data, int silent) +static int cramfs_read_super(struct super_block *sb, + struct cramfs_super *super, int silent) { - int i; - struct cramfs_super super; + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); unsigned long root_offset; - struct cramfs_sb_info *sbi; - struct inode *root; - sb->s_flags |= MS_RDONLY; - - sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - sb->s_fs_info = sbi; - - /* Invalidate the read buffers on mount: think disk change.. */ - mutex_lock(&read_mutex); - for (i = 0; i < READ_BUFFERS; i++) - buffer_blocknr[i] = -1; + /* We don't know the real size yet */ + sbi->size = PAGE_SIZE; /* Read the first block and get the superblock from it */ - memcpy(&super, cramfs_read(sb, 0, sizeof(super)), sizeof(super)); + mutex_lock(&read_mutex); + memcpy(super, cramfs_read(sb, 0, sizeof(*super)), sizeof(*super)); mutex_unlock(&read_mutex); /* Do sanity checks on the superblock */ - if (super.magic != CRAMFS_MAGIC) { + if (super->magic != CRAMFS_MAGIC) { /* check for wrong endianness */ - if (super.magic == CRAMFS_MAGIC_WEND) { + if (super->magic == CRAMFS_MAGIC_WEND) { if (!silent) pr_err("wrong endianness\n"); return -EINVAL; @@ -289,10 +534,12 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) /* check at 512 byte offset */ mutex_lock(&read_mutex); - memcpy(&super, cramfs_read(sb, 512, sizeof(super)), sizeof(super)); + memcpy(super, + cramfs_read(sb, 512, sizeof(*super)), + sizeof(*super)); mutex_unlock(&read_mutex); - if (super.magic != CRAMFS_MAGIC) { - if (super.magic == CRAMFS_MAGIC_WEND && !silent) + if (super->magic != CRAMFS_MAGIC) { + if (super->magic == CRAMFS_MAGIC_WEND && !silent) pr_err("wrong endianness\n"); else if (!silent) pr_err("wrong magic\n"); @@ -301,34 +548,34 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) } /* get feature flags first */ - if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) { + if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) { pr_err("unsupported filesystem features\n"); return -EINVAL; } /* Check that the root inode is in a sane state */ - if (!S_ISDIR(super.root.mode)) { + if (!S_ISDIR(super->root.mode)) { pr_err("root is not a directory\n"); return -EINVAL; } /* correct strange, hard-coded permissions of mkcramfs */ - super.root.mode |= (S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); + super->root.mode |= 0555; - root_offset = super.root.offset << 2; - if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) { - sbi->size = super.size; - sbi->blocks = super.fsid.blocks; - sbi->files = super.fsid.files; + root_offset = super->root.offset << 2; + if (super->flags & CRAMFS_FLAG_FSID_VERSION_2) { + sbi->size = super->size; + sbi->blocks = super->fsid.blocks; + sbi->files = super->fsid.files; } else { sbi->size = 1<<28; sbi->blocks = 0; sbi->files = 0; } - sbi->magic = super.magic; - sbi->flags = super.flags; + sbi->magic = super->magic; + sbi->flags = super->flags; if (root_offset == 0) pr_info("empty filesystem"); - else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && + else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && ((root_offset != sizeof(struct cramfs_super)) && (root_offset != 512 + sizeof(struct cramfs_super)))) { @@ -336,9 +583,18 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) return -EINVAL; } + return 0; +} + +static int cramfs_finalize_super(struct super_block *sb, + struct cramfs_inode *cramfs_root) +{ + struct inode *root; + /* Set it all up.. */ + sb->s_flags |= MS_RDONLY; sb->s_op = &cramfs_ops; - root = get_cramfs_inode(sb, &super.root, 0); + root = get_cramfs_inode(sb, cramfs_root, 0); if (IS_ERR(root)) return PTR_ERR(root); sb->s_root = d_make_root(root); @@ -347,10 +603,79 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } +static int cramfs_blkdev_fill_super(struct super_block *sb, void *data, + int silent) +{ + struct cramfs_sb_info *sbi; + struct cramfs_super super; + int i, err; + + sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + sb->s_fs_info = sbi; + + /* Invalidate the read buffers on mount: think disk change.. */ + for (i = 0; i < READ_BUFFERS; i++) + buffer_blocknr[i] = -1; + + err = cramfs_read_super(sb, &super, silent); + if (err) + return err; + return cramfs_finalize_super(sb, &super.root); +} + +static int cramfs_mtd_fill_super(struct super_block *sb, void *data, + int silent) +{ + struct cramfs_sb_info *sbi; + struct cramfs_super super; + int err; + + sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + sb->s_fs_info = sbi; + + /* Map only one page for now. Will remap it when fs size is known. */ + err = mtd_point(sb->s_mtd, 0, PAGE_SIZE, &sbi->mtd_point_size, + &sbi->linear_virt_addr, &sbi->linear_phys_addr); + if (err || sbi->mtd_point_size != PAGE_SIZE) { + pr_err("unable to get direct memory access to mtd:%s\n", + sb->s_mtd->name); + return err ? : -ENODATA; + } + + pr_info("checking physical address %pap for linear cramfs image\n", + &sbi->linear_phys_addr); + err = cramfs_read_super(sb, &super, silent); + if (err) + return err; + + /* Remap the whole filesystem now */ + pr_info("linear cramfs image on mtd:%s appears to be %lu KB in size\n", + sb->s_mtd->name, sbi->size/1024); + mtd_unpoint(sb->s_mtd, 0, PAGE_SIZE); + err = mtd_point(sb->s_mtd, 0, sbi->size, &sbi->mtd_point_size, + &sbi->linear_virt_addr, &sbi->linear_phys_addr); + if (err || sbi->mtd_point_size != sbi->size) { + pr_err("unable to get direct memory access to mtd:%s\n", + sb->s_mtd->name); + return err ? : -ENODATA; + } + + return cramfs_finalize_super(sb, &super.root); +} + static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + u64 id = 0; + + if (sb->s_bdev) + id = huge_encode_dev(sb->s_bdev->bd_dev); + else if (sb->s_dev) + id = huge_encode_dev(sb->s_dev); buf->f_type = CRAMFS_MAGIC; buf->f_bsize = PAGE_SIZE; @@ -502,34 +827,86 @@ static int cramfs_readpage(struct file *file, struct page *page) if (page->index < maxblock) { struct super_block *sb = inode->i_sb; - u32 blkptr_offset = OFFSET(inode) + page->index*4; - u32 start_offset, compr_len; + u32 blkptr_offset = OFFSET(inode) + page->index * 4; + u32 block_ptr, block_start, block_len; + bool uncompressed, direct; - start_offset = OFFSET(inode) + maxblock*4; mutex_lock(&read_mutex); - if (page->index) - start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4, - 4); - compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) - - start_offset); - mutex_unlock(&read_mutex); + block_ptr = *(u32 *) cramfs_read(sb, blkptr_offset, 4); + uncompressed = (block_ptr & CRAMFS_BLK_FLAG_UNCOMPRESSED); + direct = (block_ptr & CRAMFS_BLK_FLAG_DIRECT_PTR); + block_ptr &= ~CRAMFS_BLK_FLAGS; + + if (direct) { + /* + * The block pointer is an absolute start pointer, + * shifted by 2 bits. The size is included in the + * first 2 bytes of the data block when compressed, + * or PAGE_SIZE otherwise. + */ + block_start = block_ptr << CRAMFS_BLK_DIRECT_PTR_SHIFT; + if (uncompressed) { + block_len = PAGE_SIZE; + /* if last block: cap to file length */ + if (page->index == maxblock - 1) + block_len = + offset_in_page(inode->i_size); + } else { + block_len = *(u16 *) + cramfs_read(sb, block_start, 2); + block_start += 2; + } + } else { + /* + * The block pointer indicates one past the end of + * the current block (start of next block). If this + * is the first block then it starts where the block + * pointer table ends, otherwise its start comes + * from the previous block's pointer. + */ + block_start = OFFSET(inode) + maxblock * 4; + if (page->index) + block_start = *(u32 *) + cramfs_read(sb, blkptr_offset - 4, 4); + /* Beware... previous ptr might be a direct ptr */ + if (unlikely(block_start & CRAMFS_BLK_FLAG_DIRECT_PTR)) { + /* See comments on earlier code. */ + u32 prev_start = block_start; + block_start = prev_start & ~CRAMFS_BLK_FLAGS; + block_start <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; + if (prev_start & CRAMFS_BLK_FLAG_UNCOMPRESSED) { + block_start += PAGE_SIZE; + } else { + block_len = *(u16 *) + cramfs_read(sb, block_start, 2); + block_start += 2 + block_len; + } + } + block_start &= ~CRAMFS_BLK_FLAGS; + block_len = block_ptr - block_start; + } - if (compr_len == 0) + if (block_len == 0) ; /* hole */ - else if (unlikely(compr_len > (PAGE_SIZE << 1))) { - pr_err("bad compressed blocksize %u\n", - compr_len); + else if (unlikely(block_len > 2*PAGE_SIZE || + (uncompressed && block_len > PAGE_SIZE))) { + mutex_unlock(&read_mutex); + pr_err("bad data blocksize %u\n", block_len); goto err; + } else if (uncompressed) { + memcpy(pgdata, + cramfs_read(sb, block_start, block_len), + block_len); + bytes_filled = block_len; } else { - mutex_lock(&read_mutex); bytes_filled = cramfs_uncompress_block(pgdata, PAGE_SIZE, - cramfs_read(sb, start_offset, compr_len), - compr_len); - mutex_unlock(&read_mutex); - if (unlikely(bytes_filled < 0)) - goto err; + cramfs_read(sb, block_start, block_len), + block_len); } + mutex_unlock(&read_mutex); + if (unlikely(bytes_filled < 0)) + goto err; } memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled); @@ -573,10 +950,22 @@ static const struct super_operations cramfs_ops = { .statfs = cramfs_statfs, }; -static struct dentry *cramfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static struct dentry *cramfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super); + struct dentry *ret = ERR_PTR(-ENOPROTOOPT); + + if (IS_ENABLED(CONFIG_CRAMFS_MTD)) { + ret = mount_mtd(fs_type, flags, dev_name, data, + cramfs_mtd_fill_super); + if (!IS_ERR(ret)) + return ret; + } + if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) { + ret = mount_bdev(fs_type, flags, dev_name, data, + cramfs_blkdev_fill_super); + } + return ret; } static struct file_system_type cramfs_fs_type = { diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index e5e29f8c920b..846ca150d52e 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -36,27 +36,13 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <asm/unaligned.h> +#include <linux/kernel.h> #include "ecryptfs_kernel.h" #define DECRYPT 0 #define ENCRYPT 1 /** - * ecryptfs_to_hex - * @dst: Buffer to take hex character representation of contents of - * src; must be at least of size (src_size * 2) - * @src: Buffer to be converted to a hex string representation - * @src_size: number of bytes to convert - */ -void ecryptfs_to_hex(char *dst, char *src, size_t src_size) -{ - int x; - - for (x = 0; x < src_size; x++) - sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]); -} - -/** * ecryptfs_from_hex * @dst: Buffer to take the bytes from src hex; must be at least of * size (src_size / 2) @@ -899,8 +885,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, u32 flags; flags = get_unaligned_be32(page_virt); - for (i = 0; i < ((sizeof(ecryptfs_flag_map) - / sizeof(struct ecryptfs_flag_map_elem))); i++) + for (i = 0; i < ARRAY_SIZE(ecryptfs_flag_map); i++) if (flags & ecryptfs_flag_map[i].file_flag) { crypt_stat->flags |= ecryptfs_flag_map[i].local_flag; } else @@ -937,8 +922,7 @@ void ecryptfs_write_crypt_stat_flags(char *page_virt, u32 flags = 0; int i; - for (i = 0; i < ((sizeof(ecryptfs_flag_map) - / sizeof(struct ecryptfs_flag_map_elem))); i++) + for (i = 0; i < ARRAY_SIZE(ecryptfs_flag_map); i++) if (crypt_stat->flags & ecryptfs_flag_map[i].local_flag) flags |= ecryptfs_flag_map[i].file_flag; /* Version is in top 8 bits of the 32-bit flag vector */ @@ -1434,8 +1418,6 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER); if (!page_virt) { rc = -ENOMEM; - printk(KERN_ERR "%s: Unable to allocate page_virt\n", - __func__); goto out; } rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, @@ -1522,9 +1504,6 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, filename->encrypted_filename = kmalloc(filename->encrypted_filename_size, GFP_KERNEL); if (!filename->encrypted_filename) { - printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kmalloc [%zd] bytes\n", __func__, - filename->encrypted_filename_size); rc = -ENOMEM; goto out; } @@ -1669,12 +1648,10 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, BUG_ON(!mutex_is_locked(&key_tfm_list_mutex)); tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL); - if (key_tfm != NULL) + if (key_tfm) (*key_tfm) = tmp_tfm; if (!tmp_tfm) { rc = -ENOMEM; - printk(KERN_ERR "Error attempting to allocate from " - "ecryptfs_key_tfm_cache\n"); goto out; } mutex_init(&tmp_tfm->key_tfm_mutex); @@ -1690,7 +1667,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, "cipher with name = [%s]; rc = [%d]\n", tmp_tfm->cipher_name, rc); kmem_cache_free(ecryptfs_key_tfm_cache, tmp_tfm); - if (key_tfm != NULL) + if (key_tfm) (*key_tfm) = NULL; goto out; } @@ -1881,7 +1858,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, size_t src_byte_offset = 0; size_t dst_byte_offset = 0; - if (dst == NULL) { + if (!dst) { (*dst_size) = ecryptfs_max_decoded_size(src_size); goto out; } @@ -1949,9 +1926,6 @@ int ecryptfs_encrypt_and_encode_filename( filename = kzalloc(sizeof(*filename), GFP_KERNEL); if (!filename) { - printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kzalloc [%zd] bytes\n", __func__, - sizeof(*filename)); rc = -ENOMEM; goto out; } @@ -1980,9 +1954,6 @@ int ecryptfs_encrypt_and_encode_filename( + encoded_name_no_prefix_size); (*encoded_name) = kmalloc((*encoded_name_size) + 1, GFP_KERNEL); if (!(*encoded_name)) { - printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kzalloc [%zd] bytes\n", __func__, - (*encoded_name_size)); rc = -ENOMEM; kfree(filename->encrypted_filename); kfree(filename); @@ -2064,9 +2035,6 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, name, name_size); decoded_name = kmalloc(decoded_name_size, GFP_KERNEL); if (!decoded_name) { - printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kmalloc [%zd] bytes\n", __func__, - decoded_name_size); rc = -ENOMEM; goto out; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 3fbc0ff79699..e74cb2a0b299 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -31,6 +31,7 @@ #include <crypto/skcipher.h> #include <keys/user-type.h> #include <keys/encrypted-type.h> +#include <linux/kernel.h> #include <linux/fs.h> #include <linux/fs_stack.h> #include <linux/namei.h> @@ -51,7 +52,13 @@ #define ECRYPTFS_XATTR_NAME "user.ecryptfs" void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); -extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); +static inline void +ecryptfs_to_hex(char *dst, char *src, size_t src_size) +{ + char *end = bin2hex(dst, src, src_size); + *end = '\0'; +} + extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); struct ecryptfs_key_record { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index efc2db42d175..847904aa63a9 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -64,7 +64,6 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque) /* i_size will be overwritten for encrypted regular files */ fsstack_copy_inode_size(inode, lower_inode); inode->i_ino = lower_inode->i_ino; - inode->i_version++; inode->i_mapping->a_ops = &ecryptfs_aops; if (S_ISLNK(inode->i_mode)) @@ -334,9 +333,6 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); if (!dentry_info) { - printk(KERN_ERR "%s: Out of memory whilst attempting " - "to allocate ecryptfs_dentry_info struct\n", - __func__); dput(lower_dentry); return ERR_PTR(-ENOMEM); } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa218cd64f74..c89a58cfc991 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -639,11 +639,9 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, int rc = 0; s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) { - printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " - "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); + if (!s) return -ENOMEM; - } + (*packet_size) = 0; rc = ecryptfs_find_auth_tok_for_sig( &auth_tok_key, @@ -687,7 +685,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, * separator, and then the filename */ s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE + s->block_aligned_filename_size); - if (dest == NULL) { + if (!dest) { (*packet_size) = s->max_packet_size; goto out_unlock; } @@ -714,9 +712,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, s->block_aligned_filename = kzalloc(s->block_aligned_filename_size, GFP_KERNEL); if (!s->block_aligned_filename) { - printk(KERN_ERR "%s: Out of kernel memory whilst attempting to " - "kzalloc [%zd] bytes\n", __func__, - s->block_aligned_filename_size); rc = -ENOMEM; goto out_unlock; } @@ -769,10 +764,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, s->hash_desc = kmalloc(sizeof(*s->hash_desc) + crypto_shash_descsize(s->hash_tfm), GFP_KERNEL); if (!s->hash_desc) { - printk(KERN_ERR "%s: Out of kernel memory whilst attempting to " - "kmalloc [%zd] bytes\n", __func__, - sizeof(*s->hash_desc) + - crypto_shash_descsize(s->hash_tfm)); rc = -ENOMEM; goto out_release_free_unlock; } @@ -925,11 +916,9 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, (*filename_size) = 0; (*filename) = NULL; s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) { - printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " - "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); + if (!s) return -ENOMEM; - } + if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) { printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be " "at least [%d]\n", __func__, max_packet_size, @@ -1015,9 +1004,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, s->decrypted_filename = kmalloc(s->block_aligned_filename_size, GFP_KERNEL); if (!s->decrypted_filename) { - printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc [%zd] bytes\n", __func__, - s->block_aligned_filename_size); rc = -ENOMEM; goto out_unlock; } @@ -1097,9 +1083,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, } (*filename) = kmalloc(((*filename_size) + 1), GFP_KERNEL); if (!(*filename)) { - printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc [%zd] bytes\n", __func__, - ((*filename_size) + 1)); rc = -ENOMEM; goto out_free_unlock; } @@ -1333,7 +1316,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, if ((*new_auth_tok)->session_key.encrypted_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { printk(KERN_WARNING "Tag 1 packet contains key larger " - "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); + "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n"); rc = -EINVAL; goto out; } @@ -2525,11 +2508,9 @@ int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig) struct ecryptfs_key_sig *new_key_sig; new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL); - if (!new_key_sig) { - printk(KERN_ERR - "Error allocating from ecryptfs_key_sig_cache\n"); + if (!new_key_sig) return -ENOMEM; - } + memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); new_key_sig->keysig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; /* Caller must hold keysig_list_mutex */ @@ -2545,16 +2526,12 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig, u32 global_auth_tok_flags) { struct ecryptfs_global_auth_tok *new_auth_tok; - int rc = 0; new_auth_tok = kmem_cache_zalloc(ecryptfs_global_auth_tok_cache, GFP_KERNEL); - if (!new_auth_tok) { - rc = -ENOMEM; - printk(KERN_ERR "Error allocating from " - "ecryptfs_global_auth_tok_cache\n"); - goto out; - } + if (!new_auth_tok) + return -ENOMEM; + memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX); new_auth_tok->flags = global_auth_tok_flags; new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; @@ -2562,7 +2539,6 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, list_add(&new_auth_tok->mount_crypt_stat_list, &mount_crypt_stat->global_auth_tok_list); mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); -out: - return rc; + return 0; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 25aeaa7328ba..f2677c90d96e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -426,7 +426,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, mount_crypt_stat->global_default_cipher_key_size); if (!cipher_code) { ecryptfs_printk(KERN_ERR, - "eCryptfs doesn't support cipher: %s", + "eCryptfs doesn't support cipher: %s\n", mount_crypt_stat->global_default_cipher_name); rc = -EINVAL; goto out; @@ -781,7 +781,7 @@ static struct attribute *attributes[] = { NULL, }; -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = attributes, }; diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 286f10b0363b..9fdd5bcf4564 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -147,8 +147,6 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file) (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL); if (!(*daemon)) { rc = -ENOMEM; - printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " - "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); goto out; } (*daemon)->file = file; @@ -250,8 +248,6 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon, msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; - printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " - "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; @@ -386,7 +382,6 @@ int __init ecryptfs_init_messaging(void) GFP_KERNEL); if (!ecryptfs_daemon_hash) { rc = -ENOMEM; - printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); mutex_unlock(&ecryptfs_daemon_hash_mux); goto out; } @@ -398,7 +393,6 @@ int __init ecryptfs_init_messaging(void) GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { rc = -ENOMEM; - printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; } mutex_init(&ecryptfs_msg_ctx_lists_mux); @@ -442,15 +436,16 @@ void ecryptfs_release_messaging(void) } if (ecryptfs_daemon_hash) { struct ecryptfs_daemon *daemon; + struct hlist_node *n; int i; mutex_lock(&ecryptfs_daemon_hash_mux); for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { int rc; - hlist_for_each_entry(daemon, - &ecryptfs_daemon_hash[i], - euid_chain) { + hlist_for_each_entry_safe(daemon, n, + &ecryptfs_daemon_hash[i], + euid_chain) { rc = ecryptfs_exorcise_daemon(daemon); if (rc) printk(KERN_ERR "%s: Error whilst " diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index e4141f257495..f09cacaf8c80 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -163,12 +163,8 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, struct ecryptfs_message *msg; msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL); - if (!msg) { - printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kmalloc(%zd, GFP_KERNEL)\n", __func__, - (sizeof(*msg) + data_size)); + if (!msg) return -ENOMEM; - } mutex_lock(&msg_ctx->mux); msg_ctx->msg = msg; @@ -383,7 +379,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, goto memdup; } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { printk(KERN_WARNING "%s: Acceptable packet size range is " - "[%d-%zu], but amount of data written is [%zu].", + "[%d-%zu], but amount of data written is [%zu].\n", __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); return -EINVAL; } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 1f0c471b4ba3..cdf358b209d9 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -431,8 +431,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) } xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL); if (!xattr_virt) { - printk(KERN_ERR "Out of memory whilst attempting to write " - "inode size to xattr\n"); rc = -ENOMEM; goto out; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1e048144f17c..afd548ebc328 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2239,7 +2239,6 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, compat_size_t, sigsetsize) { long err; - compat_sigset_t csigmask; sigset_t ksigmask, sigsaved; /* @@ -2249,9 +2248,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, if (sigmask) { if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) + if (get_compat_sigset(&ksigmask, sigmask)) return -EFAULT; - sigset_from_compat(&ksigmask, &csigmask); sigsaved = current->blocked; set_current_blocked(&ksigmask); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b7558f292420..1eec25014f62 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -592,6 +592,44 @@ static int ext4_ioc_getfsmap(struct super_block *sb, return 0; } +static long ext4_ioctl_group_add(struct file *file, + struct ext4_new_group_data *input) +{ + struct super_block *sb = file_inode(file)->i_sb; + int err, err2=0; + + err = ext4_resize_begin(sb); + if (err) + return err; + + if (ext4_has_feature_bigalloc(sb)) { + ext4_msg(sb, KERN_ERR, + "Online resizing not supported with bigalloc"); + err = -EOPNOTSUPP; + goto group_add_out; + } + + err = mnt_want_write_file(file); + if (err) + goto group_add_out; + + err = ext4_group_add(sb, input); + if (EXT4_SB(sb)->s_journal) { + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } + if (err == 0) + err = err2; + mnt_drop_write_file(file); + if (!err && ext4_has_group_desc_csum(sb) && + test_opt(sb, INIT_INODE_TABLE)) + err = ext4_register_li_request(sb, input->group); +group_add_out: + ext4_resize_end(sb); + return err; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -776,44 +814,12 @@ mext_out: case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; - int err, err2=0; - - err = ext4_resize_begin(sb); - if (err) - return err; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, - sizeof(input))) { - err = -EFAULT; - goto group_add_out; - } - - if (ext4_has_feature_bigalloc(sb)) { - ext4_msg(sb, KERN_ERR, - "Online resizing not supported with bigalloc"); - err = -EOPNOTSUPP; - goto group_add_out; - } - - err = mnt_want_write_file(filp); - if (err) - goto group_add_out; + sizeof(input))) + return -EFAULT; - err = ext4_group_add(sb, &input); - if (EXT4_SB(sb)->s_journal) { - jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - } - if (err == 0) - err = err2; - mnt_drop_write_file(filp); - if (!err && ext4_has_group_desc_csum(sb) && - test_opt(sb, INIT_INODE_TABLE)) - err = ext4_register_li_request(sb, input.group); -group_add_out: - ext4_resize_end(sb); - return err; + return ext4_ioctl_group_add(filp, &input); } case EXT4_IOC_MIGRATE: @@ -1078,8 +1084,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case EXT4_IOC32_GROUP_ADD: { struct compat_ext4_new_group_input __user *uinput; - struct ext4_new_group_input input; - mm_segment_t old_fs; + struct ext4_new_group_data input; int err; uinput = compat_ptr(arg); @@ -1092,12 +1097,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) &uinput->reserved_blocks); if (err) return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD, - (unsigned long) &input); - set_fs(old_fs); - return err; + return ext4_ioctl_group_add(file, &input); } case EXT4_IOC_MOVE_EXT: case EXT4_IOC_RESIZE_FS: diff --git a/fs/fcntl.c b/fs/fcntl.c index 30f47d0f74a0..0522e283a4f4 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -563,6 +563,9 @@ static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __u { struct compat_flock64 fl; + BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); + BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); + memset(&fl, 0, sizeof(struct compat_flock64)); copy_flock_fields(&fl, kfl); if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) @@ -632,9 +635,8 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, if (err) break; err = fixup_compat_flock(&flock); - if (err) - return err; - err = put_compat_flock(&flock, compat_ptr(arg)); + if (!err) + err = put_compat_flock(&flock, compat_ptr(arg)); break; case F_GETLK64: case F_OFD_GETLK: @@ -642,12 +644,8 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, if (err) break; err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); - if (err) - break; - err = fixup_compat_flock(&flock); - if (err) - return err; - err = put_compat_flock64(&flock, compat_ptr(arg)); + if (!err) + err = put_compat_flock64(&flock, compat_ptr(arg)); break; case F_SETLK: case F_SETLKW: diff --git a/fs/fhandle.c b/fs/fhandle.c index 474adc8d2a3a..0ace128f5d23 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -213,8 +213,8 @@ out_err: return retval; } -long do_handle_open(int mountdirfd, - struct file_handle __user *ufh, int open_flag) +static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, + int open_flag) { long retval = 0; struct path path; diff --git a/fs/file.c b/fs/file.c index 4eecbf4244a5..3b080834b870 100644 --- a/fs/file.c +++ b/fs/file.c @@ -593,13 +593,16 @@ void __fd_install(struct files_struct *files, unsigned int fd, { struct fdtable *fdt; - might_sleep(); rcu_read_lock_sched(); - while (unlikely(files->resize_in_progress)) { + if (unlikely(files->resize_in_progress)) { rcu_read_unlock_sched(); - wait_event(files->resize_wait, !files->resize_in_progress); - rcu_read_lock_sched(); + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + BUG_ON(fdt->fd[fd] != NULL); + rcu_assign_pointer(fdt->fd[fd], file); + spin_unlock(&files->file_lock); + return; } /* coupled with smp_wmb() in expand_fdtable() */ smp_rmb(); @@ -632,7 +635,6 @@ int __close_fd(struct files_struct *files, unsigned fd) if (!file) goto out_unlock; rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); return filp_close(file, files); diff --git a/fs/internal.h b/fs/internal.h index 48cee21b4f14..df262f41a0ef 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -55,6 +55,7 @@ extern void __init chrdev_init(void); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); +long do_unlinkat(int dfd, struct filename *name); /* * namespace.c diff --git a/fs/iomap.c b/fs/iomap.c index b9f74803e56c..47d29ccffaef 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -856,6 +856,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, struct bio *bio; bool need_zeroout = false; int nr_pages, ret; + size_t copied = 0; if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL; @@ -867,7 +868,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, /*FALLTHRU*/ case IOMAP_UNWRITTEN: if (!(dio->flags & IOMAP_DIO_WRITE)) { - iov_iter_zero(length, dio->submit.iter); + length = iov_iter_zero(length, dio->submit.iter); dio->size += length; return length; } @@ -904,8 +905,11 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, } do { - if (dio->error) + size_t n; + if (dio->error) { + iov_iter_revert(dio->submit.iter, copied); return 0; + } bio = bio_alloc(GFP_KERNEL, nr_pages); bio_set_dev(bio, iomap->bdev); @@ -918,20 +922,24 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ret = bio_iov_iter_get_pages(bio, &iter); if (unlikely(ret)) { bio_put(bio); - return ret; + return copied ? copied : ret; } + n = bio->bi_iter.bi_size; if (dio->flags & IOMAP_DIO_WRITE) { bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); - task_io_account_write(bio->bi_iter.bi_size); + task_io_account_write(n); } else { bio_set_op_attrs(bio, REQ_OP_READ, 0); if (dio->flags & IOMAP_DIO_DIRTY) bio_set_pages_dirty(bio); } - dio->size += bio->bi_iter.bi_size; - pos += bio->bi_iter.bi_size; + iov_iter_advance(dio->submit.iter, n); + + dio->size += n; + pos += n; + copied += n; nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); @@ -947,9 +955,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, if (pad) iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); } - - iov_iter_advance(dio->submit.iter, length); - return length; + return copied; } ssize_t diff --git a/fs/namei.c b/fs/namei.c index 5424b10cfdc4..f0c7a7b9b6ca 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3459,7 +3459,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, goto out; child = vfs_tmpfile(path.dentry, op->mode, op->open_flag); error = PTR_ERR(child); - if (unlikely(IS_ERR(child))) + if (IS_ERR(child)) goto out2; dput(path.dentry); path.dentry = child; @@ -4010,10 +4010,9 @@ EXPORT_SYMBOL(vfs_unlink); * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ -static long do_unlinkat(int dfd, const char __user *pathname) +long do_unlinkat(int dfd, struct filename *name) { int error; - struct filename *name; struct dentry *dentry; struct path path; struct qstr last; @@ -4022,8 +4021,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: - name = filename_parentat(dfd, getname(pathname), lookup_flags, - &path, &last, &type); + name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (IS_ERR(name)) return PTR_ERR(name); @@ -4065,12 +4063,12 @@ exit2: mnt_drop_write(path.mnt); exit1: path_put(&path); - putname(name); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; inode = NULL; goto retry; } + putname(name); return error; slashes: @@ -4091,12 +4089,12 @@ SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) if (flag & AT_REMOVEDIR) return do_rmdir(dfd, pathname); - return do_unlinkat(dfd, pathname); + return do_unlinkat(dfd, getname(pathname)); } SYSCALL_DEFINE1(unlink, const char __user *, pathname) { - return do_unlinkat(AT_FDCWD, pathname); + return do_unlinkat(AT_FDCWD, getname(pathname)); } int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index b60627bcfc62..ef6729568432 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -67,7 +67,7 @@ out: */ void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq) { - if (atomic_dec_and_test(&dreq->count)) + if (refcount_dec_and_test(&dreq->count)) kfree(dreq); } @@ -87,7 +87,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req) dreq = container_of(req, struct nfs_cache_defer_req, req); dreq->deferred_req.revisit = nfs_dns_cache_revisit; - atomic_inc(&dreq->count); + refcount_inc(&dreq->count); return &dreq->deferred_req; } @@ -99,7 +99,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void) dreq = kzalloc(sizeof(*dreq), GFP_KERNEL); if (dreq) { init_completion(&dreq->completion); - atomic_set(&dreq->count, 1); + refcount_set(&dreq->count, 1); dreq->req.defer = nfs_dns_cache_defer; } return dreq; diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 4e6236a86cf7..220ee409abc4 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -16,7 +16,7 @@ struct nfs_cache_defer_req { struct cache_req req; struct cache_deferred_req deferred_req; struct completion completion; - atomic_t count; + refcount_t count; }; extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cd9d992feb2e..509dc5adeb8f 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -49,15 +49,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nn->nfs_callback_tcpport, PF_INET, net); + dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", + nn->nfs_callback_tcpport, PF_INET, net->ns.inum); ret = svc_create_xprt(serv, "tcp", net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nn->nfs_callback_tcpport6, PF_INET6, net); + dprintk("NFS: Callback listener port = %u (af %u, net %x\n", + nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum); } else if (ret != -EAFNOSUPPORT) goto out_err; return 0; @@ -185,7 +185,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc if (--nn->cb_users[minorversion]) return; - dprintk("NFS: destroy per-net callback data; net=%p\n", net); + dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); svc_shutdown_net(serv, net); } @@ -198,7 +198,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, if (nn->cb_users[minorversion]++) return 0; - dprintk("NFS: create per-net callback data; net=%p\n", net); + dprintk("NFS: create per-net callback data; net=%x\n", net->ns.inum); ret = svc_bind(serv, net); if (ret < 0) { @@ -223,7 +223,7 @@ err_socks: err_bind: nn->cb_users[minorversion]--; dprintk("NFS: Couldn't create callback socket: err = %d; " - "net = %p\n", ret, net); + "net = %x\n", ret, net->ns.inum); return ret; } diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 19151f6c0e97..2435af56b87e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -440,7 +440,7 @@ static bool referring_call_exists(struct nfs_client *clp, uint32_t nrclists, struct referring_call_list *rclists) { - bool status = 0; + bool status = false; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 22880ef6d8dd..0ac2fb1c6b63 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; - atomic_set(&clp->cl_count, 1); + refcount_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); @@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp) nn = net_generic(clp->cl_net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { + if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); spin_unlock(&nn->nfs_client_lock); @@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat sap)) continue; - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); return clp; } return NULL; @@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, /* Copy data from the source */ server->nfs_client = source->nfs_client; server->destroy = source->destroy; - atomic_inc(&server->nfs_client->cl_count); + refcount_inc(&server->nfs_client->cl_count); nfs_server_copy_userdata(server, source); server->fsid = fattr->fsid; @@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), - atomic_read(&clp->cl_count), + refcount_read(&clp->cl_count), clp->cl_hostname); rcu_read_unlock(); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 606dd3871f66..ade44ca0c66c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1041,6 +1041,33 @@ int nfs_delegations_present(struct nfs_client *clp) } /** + * nfs4_refresh_delegation_stateid - Update delegation stateid seqid + * @dst: stateid to refresh + * @inode: inode to check + * + * Returns "true" and updates "dst->seqid" * if inode had a delegation + * that matches our delegation stateid. Otherwise "false" is returned. + */ +bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct nfs_delegation *delegation; + bool ret = false; + if (!inode) + goto out; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && + nfs4_stateid_match_other(dst, &delegation->stateid)) { + dst->seqid = delegation->stateid.seqid; + return ret; + } + rcu_read_unlock(); +out: + return ret; +} + +/** * nfs4_copy_delegation_stateid - Copy inode's state ID information * @inode: inode to check * @flags: delegation type requirement diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index ddaf2644cf13..185a09f37a89 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -62,6 +62,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); +bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f439f1c45008..e51ae52ed14f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp) goto out; } filp->private_data = ctx; - if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { - /* This is a mountpoint, so d_revalidate will never - * have been called, so we need to refresh the - * inode (for close-open consistency) ourselves. - */ - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - } out: put_rpccred(cred); return res; @@ -253,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri desc->cache_entry_index = index; return 0; out_eof: - desc->eof = 1; + desc->eof = true; return -EBADCOOKIE; } @@ -307,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (array->eof_index >= 0) { status = -EBADCOOKIE; if (*desc->dir_cookie == array->last_cookie) - desc->eof = 1; + desc->eof = true; } out: return status; @@ -761,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ent = &array->array[i]; if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { - desc->eof = 1; + desc->eof = true; break; } desc->ctx->pos++; @@ -773,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ctx->duped = 1; } if (array->eof_index >= 0) - desc->eof = 1; + desc->eof = true; kunmap(desc->page); cache_page_release(desc); @@ -873,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res == -EBADCOOKIE) { res = 0; /* This means either end of directory */ - if (*desc->dir_cookie && desc->eof == 0) { + if (*desc->dir_cookie && !desc->eof) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc); if (res == 0) @@ -1241,8 +1234,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - if (nfs_mapping_need_revalidate_inode(inode)) - error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1393,6 +1385,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, @@ -2064,7 +2057,7 @@ out: * should mark the directories for revalidation. */ d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, + nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry); @@ -2369,15 +2362,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); -#define NFS_MAY_READ (NFS4_ACCESS_READ) -#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ - NFS4_ACCESS_EXTEND | \ - NFS4_ACCESS_DELETE) -#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ - NFS4_ACCESS_EXTEND) +#define NFS_MAY_READ (NFS_ACCESS_READ) +#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \ + NFS_ACCESS_EXTEND | \ + NFS_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \ + NFS_ACCESS_EXTEND) #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE -#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) -#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE) static int nfs_access_calc_mask(u32 access_result, umode_t umode) { @@ -2425,9 +2418,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) if (!may_block) goto out; - /* Be clever: ask server to check for all possible rights */ - cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE - | NFS_MAY_WRITE | NFS_MAY_READ; + /* + * Determine which access bits we want to ask for... + */ + cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; + if (S_ISDIR(inode->i_mode)) + cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; + else + cache.mask |= NFS_ACCESS_EXECUTE; cache.cred = cred; status = NFS_PROTO(inode)->access(inode, &cache); if (status != 0) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0214dd1e1060..81cca49a8375 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; - /* - * VFS doesn't require the open mode to match a flock() lock's type. - * NFS, however, may simulate flock() locking with posix locking which - * requires the open mode to match the lock type. - */ - switch (fl->fl_type) { - case F_UNLCK: + /* We're simulating flock() locks using posix locks on the server */ + if (fl->fl_type == F_UNLCK) return do_unlk(filp, cmd, fl, is_local); - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - return -EBADF; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - } - return do_setlk(filp, cmd, fl, is_local); } EXPORT_SYMBOL_GPL(nfs_flock); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 508126eb49f9..4e54d8b5413a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) return PNFS_NOT_ATTEMPTED; dprintk("%s USE DS: %s cl_count %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count)); /* No multipath support. Use first DS */ - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count)); hdr->pgio_done_cb = filelayout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) goto out_err; dprintk("%s ino %lu, how %d cl_count %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); + data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count)); data->commit_done_cb = filelayout_commit_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b0fa83a60754..c75ad982bcfc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, continue; if (!ff_mirror_match_fh(mirror, pos)) continue; - if (atomic_inc_not_zero(&pos->ref)) { + if (refcount_inc_not_zero(&pos->ref)) { spin_unlock(&inode->i_lock); return pos; } @@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) mirror = kzalloc(sizeof(*mirror), gfp_flags); if (mirror != NULL) { spin_lock_init(&mirror->lock); - atomic_set(&mirror->ref, 1); + refcount_set(&mirror->ref, 1); INIT_LIST_HEAD(&mirror->mirrors); } return mirror; @@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror) { - if (mirror != NULL && atomic_dec_and_test(&mirror->ref)) + if (mirror != NULL && refcount_dec_and_test(&mirror->ref)) ff_layout_free_mirror(mirror); } @@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) vers = nfs4_ff_layout_ds_version(lseg, idx); dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); + ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); hdr->pgio_done_cb = ff_layout_read_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) @@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), + offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); hdr->pgio_done_cb = ff_layout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); @@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) vers = nfs4_ff_layout_ds_version(lseg, idx); dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), + data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), vers); data->commit_done_cb = ff_layout_commit_done_cb; data->cred = ds_cred; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) @@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) continue; /* mirror refcount put in cleanup_layoutstats */ - if (!atomic_inc_not_zero(&mirror->ref)) + if (!refcount_inc_not_zero(&mirror->ref)) continue; dev = &mirror->mirror_ds->id_node; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 679cb087ef3f..411798346e48 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -14,6 +14,7 @@ #define FF_FLAGS_NO_IO_THRU_MDS 2 #define FF_FLAGS_NO_READ_IO 4 +#include <linux/refcount.h> #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom @@ -82,7 +83,7 @@ struct nfs4_ff_layout_mirror { nfs4_stateid stateid; struct rpc_cred __rcu *ro_cred; struct rpc_cred __rcu *rw_cred; - atomic_t ref; + refcount_t ref; spinlock_t lock; unsigned long flags; struct nfs4_ff_layoutstat read_stat; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1629056aa2c9..38b93d54c02e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { - atomic_set(&l_ctx->count, 1); + refcount_set(&l_ctx->count, 1); l_ctx->lockowner = current->files; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); @@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context do { if (pos->lockowner != current->files) continue; - atomic_inc(&pos->count); + refcount_inc(&pos->count); return pos; } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); return NULL; @@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) struct nfs_open_context *ctx = l_ctx->open_context; struct inode *inode = d_inode(ctx->dentry); - if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) + if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; list_del(&l_ctx->list); spin_unlock(&inode->i_lock); @@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->lock_context.count); + refcount_inc(&ctx->lock_context.count); return ctx; } EXPORT_SYMBOL_GPL(get_nfs_open_context); @@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) struct super_block *sb = ctx->dentry->d_sb; if (!list_empty(&ctx->list)) { - if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) + if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); - } else if (!atomic_dec_and_test(&ctx->lock_context.count)) + } else if (!refcount_dec_and_test(&ctx->lock_context.count)) return; if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); @@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_fs_proc_net_exit(net); nfs_cleanup_cb_ident_idr(net); + WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); + WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); } static struct pernet_operations nfs_net_ops = { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index bc673fb47fb3..49f848fd1f04 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -188,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs3_accessargs arg = { .fh = NFS_FH(inode), + .access = entry->mask, }; struct nfs3_accessres res; struct rpc_message msg = { @@ -196,25 +197,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_resp = &res, .rpc_cred = entry->cred, }; - int mode = entry->mask; int status = -ENOMEM; dprintk("NFS call access\n"); - - if (mode & MAY_READ) - arg.access |= NFS3_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE; - if (mode & MAY_EXEC) - arg.access |= NFS3_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND; - if (mode & MAY_EXEC) - arg.access |= NFS3_ACCESS_EXECUTE; - } - res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) goto out; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index dcfcf7fd7438..b374f680830c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -145,7 +145,7 @@ struct nfs4_lock_state { unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; - atomic_t ls_count; + refcount_t ls_count; fl_owner_t ls_owner; }; @@ -162,6 +162,7 @@ enum { NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ + NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */ }; struct nfs4_state { @@ -185,6 +186,8 @@ struct nfs4_state { unsigned int n_rdwr; /* Number of read/write references */ fmode_t state; /* State on the server (R,W, or RW) */ atomic_t count; + + wait_queue_head_t waitq; }; @@ -458,6 +461,10 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, const struct nfs_lock_context *, nfs4_stateid *, struct rpc_cred **); +extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state); +extern bool nfs4_copy_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); @@ -465,7 +472,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); -extern int nfs4_setup_sequence(const struct nfs_client *client, +extern int nfs4_setup_sequence(struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); @@ -475,6 +482,7 @@ extern int nfs4_sequence_done(struct rpc_task *task, extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); extern const nfs4_stateid zero_stateid; +extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e9bea90dc017..12bbab0becb4 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new, * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ if (pos->cl_cons_state > NFS_CS_READY) { - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); nfs_put_client(*prev); @@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new, * way that a SETCLIENTID_CONFIRM to pos can succeed is * if new and pos point to the same server: */ - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); nfs_put_client(prev); @@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; found: - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); *result = pos; status = 0; break; @@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident) spin_lock(&nn->nfs_client_lock); clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); return clp; } @@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, minorversion) == false) + if (!nfs4_cb_match_client(addr, clp, minorversion)) continue; if (!nfs4_has_session(clp)) @@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, sid->data, NFS4_MAX_SESSIONID_LEN) != 0) continue; - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); return clp; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f90090e8c959..56fa5a16e097 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + struct rpc_cred *cred, + struct nfs4_slot *slot, + bool is_privileged); static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, struct rpc_cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, @@ -254,15 +258,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE }; const u32 nfs4_fs_locations_bitmap[3] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE + FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID | FATTR4_WORD0_FILEID | FATTR4_WORD0_FS_LOCATIONS, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER + FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED @@ -644,13 +645,14 @@ static int nfs40_sequence_done(struct rpc_task *task, #if defined(CONFIG_NFS_V4_1) -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +static void nfs41_release_slot(struct nfs4_slot *slot) { struct nfs4_session *session; struct nfs4_slot_table *tbl; - struct nfs4_slot *slot = res->sr_slot; bool send_new_highest_used_slotid = false; + if (!slot) + return; tbl = slot->table; session = tbl->session; @@ -676,13 +678,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) send_new_highest_used_slotid = false; out_unlock: spin_unlock(&tbl->slot_tbl_lock); - res->sr_slot = NULL; if (send_new_highest_used_slotid) nfs41_notify_server(session->clp); if (waitqueue_active(&tbl->slot_waitq)) wake_up_all(&tbl->slot_waitq); } +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +{ + nfs41_release_slot(res->sr_slot); + res->sr_slot = NULL; +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -710,13 +717,6 @@ static int nfs41_sequence_process(struct rpc_task *task, /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: - /* If previous op on slot was interrupted and we reused - * the seq# and got a reply from the cache, then retry - */ - if (task->tk_status == -EREMOTEIO && interrupted) { - ++slot->seq_nr; - goto retry_nowait; - } /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; @@ -750,16 +750,16 @@ static int nfs41_sequence_process(struct rpc_task *task, * The slot id we used was probably retired. Try again * using a different slot id. */ + if (slot->seq_nr < slot->table->target_highest_slotid) + goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: /* * Was the last operation on this sequence interrupted? * If so, retry after bumping the sequence number. */ - if (interrupted) { - ++slot->seq_nr; - goto retry_nowait; - } + if (interrupted) + goto retry_new_seq; /* * Could this slot have been previously retired? * If so, then the server may be expecting seq_nr = 1! @@ -768,10 +768,11 @@ static int nfs41_sequence_process(struct rpc_task *task, slot->seq_nr = 1; goto retry_nowait; } - break; + goto session_recover; case -NFS4ERR_SEQ_FALSE_RETRY: - ++slot->seq_nr; - goto retry_nowait; + if (interrupted) + goto retry_new_seq; + goto session_recover; default: /* Just update the slot sequence no. */ slot->seq_done = 1; @@ -781,6 +782,11 @@ out: dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); out_noaction: return ret; +session_recover: + nfs4_schedule_session_recovery(session, res->sr_status); + goto retry_nowait; +retry_new_seq: + ++slot->seq_nr; retry_nowait: if (rpc_restart_call_prepare(task)) { nfs41_sequence_free_slot(res); @@ -857,6 +863,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; +static void +nfs4_sequence_process_interrupted(struct nfs_client *client, + struct nfs4_slot *slot, struct rpc_cred *cred) +{ + struct rpc_task *task; + + task = _nfs41_proc_sequence(client, cred, slot, true); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + #else /* !CONFIG_NFS_V4_1 */ static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) @@ -877,9 +894,34 @@ int nfs4_sequence_done(struct rpc_task *task, } EXPORT_SYMBOL_GPL(nfs4_sequence_done); +static void +nfs4_sequence_process_interrupted(struct nfs_client *client, + struct nfs4_slot *slot, struct rpc_cred *cred) +{ + WARN_ON_ONCE(1); + slot->interrupted = 0; +} + #endif /* !CONFIG_NFS_V4_1 */ -int nfs4_setup_sequence(const struct nfs_client *client, +static +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct nfs4_slot *slot) +{ + if (!slot) + return; + slot->privileged = args->sa_privileged ? 1 : 0; + args->sa_slot = slot; + + res->sr_slot = slot; + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; + +} + +int nfs4_setup_sequence(struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task) @@ -897,29 +939,28 @@ int nfs4_setup_sequence(const struct nfs_client *client, task->tk_timeout = 0; } - spin_lock(&tbl->slot_tbl_lock); - /* The state manager will wait until the slot table is empty */ - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) - goto out_sleep; + for (;;) { + spin_lock(&tbl->slot_tbl_lock); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; + + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* Try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; + } + spin_unlock(&tbl->slot_tbl_lock); - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - /* Try again in 1/4 second */ - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - goto out_sleep; + if (likely(!slot->interrupted)) + break; + nfs4_sequence_process_interrupted(client, + slot, task->tk_msg.rpc_cred); } - spin_unlock(&tbl->slot_tbl_lock); - - slot->privileged = args->sa_privileged ? 1 : 0; - args->sa_slot = slot; - res->sr_slot = slot; - if (session) { - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - res->sr_status = 1; - } + nfs4_sequence_attach_slot(args, res, slot); trace_nfs4_setup_sequence(session, args); out_start: @@ -1044,6 +1085,12 @@ struct nfs4_opendata { int rpc_status; }; +struct nfs4_open_createattrs { + struct nfs4_label *label; + struct iattr *sattr; + const __u32 verf[2]; +}; + static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, int err, struct nfs4_exception *exception) { @@ -1113,8 +1160,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, - const struct iattr *attrs, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, enum open_claim_type4 claim, gfp_t gfp_mask) { @@ -1122,6 +1168,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct inode *dir = d_inode(parent); struct nfs_server *server = NFS_SERVER(dir); struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); + struct nfs4_label *label = (c != NULL) ? c->label : NULL; struct nfs4_opendata *p; p = kzalloc(sizeof(*p), gfp_mask); @@ -1187,15 +1234,11 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_DELEG_PREV_FH: p->o_arg.fh = NFS_FH(d_inode(dentry)); } - if (attrs != NULL && attrs->ia_valid != 0) { - __u32 verf[2]; - + if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) { p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, attrs, sizeof(p->attrs)); + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); - verf[0] = jiffies; - verf[1] = current->pid; - memcpy(p->o_arg.u.verifier.data, verf, + memcpy(p->o_arg.u.verifier.data, c->verf, sizeof(p->o_arg.u.verifier.data)); } p->c_arg.fh = &p->o_res.fh; @@ -1334,6 +1377,25 @@ static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state) } #endif /* CONFIG_NFS_V4_1 */ +static void nfs_state_log_update_open_stateid(struct nfs4_state *state) +{ + if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + wake_up_all(&state->waitq); +} + +static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); + u32 stateid_seqid = be32_to_cpu(stateid->seqid); + + if (stateid_seqid == state_seqid + 1U || + (stateid_seqid == 1U && state_seqid == 0xffffffffU)) + nfs_state_log_update_open_stateid(state); + else + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); +} + static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1349,18 +1411,32 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) nfs4_state_mark_reclaim_nograce(clp, state); } +/* + * Check for whether or not the caller may update the open stateid + * to the value passed in by stateid. + * + * Note: This function relies heavily on the server implementing + * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2 + * correctly. + * i.e. The stateid seqids have to be initialised to 1, and + * are then incremented on every state transition. + */ static bool nfs_need_update_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid, nfs4_stateid *freeme) + const nfs4_stateid *stateid) { - if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) - return true; - if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { - nfs4_stateid_copy(freeme, &state->open_stateid); - nfs_test_and_clear_all_open_stateid(state); + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (stateid->seqid == cpu_to_be32(1)) + nfs_state_log_update_open_stateid(state); + else + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); return true; } - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) + + if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + nfs_state_log_out_of_order_open_stateid(state, stateid); return true; + } return false; } @@ -1399,11 +1475,14 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (nfs4_stateid_match_other(stateid, &state->open_stateid) && !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { nfs_resync_open_stateid_locked(state); - return; + goto out; } if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, 0); +out: + nfs_state_log_update_open_stateid(state); } static void nfs_clear_open_stateid(struct nfs4_state *state, @@ -1420,29 +1499,60 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, } static void nfs_set_open_stateid_locked(struct nfs4_state *state, - const nfs4_stateid *stateid, fmode_t fmode, - nfs4_stateid *freeme) + const nfs4_stateid *stateid, nfs4_stateid *freeme) { - switch (fmode) { - case FMODE_READ: - set_bit(NFS_O_RDONLY_STATE, &state->flags); + DEFINE_WAIT(wait); + int status = 0; + for (;;) { + + if (!nfs_need_update_open_stateid(state, stateid)) + return; + if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) break; - case FMODE_WRITE: - set_bit(NFS_O_WRONLY_STATE, &state->flags); + if (status) break; - case FMODE_READ|FMODE_WRITE: - set_bit(NFS_O_RDWR_STATE, &state->flags); + /* Rely on seqids for serialisation with NFSv4.0 */ + if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) + break; + + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + /* + * Ensure we process the state changes in the same order + * in which the server processed them by delaying the + * update of the stateid until we are in sequence. + */ + write_sequnlock(&state->seqlock); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); + if (!signal_pending(current)) { + if (schedule_timeout(5*HZ) == 0) + status = -EAGAIN; + else + status = 0; + } else + status = -EINTR; + finish_wait(&state->waitq, &wait); + rcu_read_lock(); + spin_lock(&state->owner->so_lock); + write_seqlock(&state->seqlock); } - if (!nfs_need_update_open_stateid(state, stateid, freeme)) - return; + + if (test_bit(NFS_OPEN_STATE, &state->flags) && + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs4_stateid_copy(freeme, &state->open_stateid); + nfs_test_and_clear_all_open_stateid(state); + } + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, status); + nfs_state_log_update_open_stateid(state); } -static void __update_open_stateid(struct nfs4_state *state, +static void nfs_state_set_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, - const nfs4_stateid *deleg_stateid, fmode_t fmode, nfs4_stateid *freeme) { @@ -1450,17 +1560,34 @@ static void __update_open_stateid(struct nfs4_state *state, * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ - spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); - if (deleg_stateid != NULL) { - nfs4_stateid_copy(&state->stateid, deleg_stateid); - set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs_set_open_stateid_locked(state, open_stateid, freeme); + switch (fmode) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); } - if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme); + set_bit(NFS_OPEN_STATE, &state->flags); + write_sequnlock(&state->seqlock); +} + +static void nfs_state_set_delegation(struct nfs4_state *state, + const nfs4_stateid *deleg_stateid, + fmode_t fmode) +{ + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, deleg_stateid); + set_bit(NFS_DELEGATED_STATE, &state->flags); write_sequnlock(&state->seqlock); - update_open_stateflags(state, fmode); - spin_unlock(&state->owner->so_lock); } static int update_open_stateid(struct nfs4_state *state, @@ -1478,6 +1605,12 @@ static int update_open_stateid(struct nfs4_state *state, fmode &= (FMODE_READ|FMODE_WRITE); rcu_read_lock(); + spin_lock(&state->owner->so_lock); + if (open_stateid != NULL) { + nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme); + ret = 1; + } + deleg_cur = rcu_dereference(nfsi->delegation); if (deleg_cur == NULL) goto no_delegation; @@ -1494,18 +1627,16 @@ static int update_open_stateid(struct nfs4_state *state, goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, - fmode, &freeme); + nfs_state_set_delegation(state, &deleg_cur->stateid, fmode); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); no_delegation: + if (ret) + update_open_stateflags(state, fmode); + spin_unlock(&state->owner->so_lock); rcu_read_unlock(); - if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, fmode, &freeme); - ret = 1; - } if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(clp); if (freeme.type != 0) @@ -1761,7 +1892,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context struct nfs4_opendata *opendata; opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, - NULL, NULL, claim, GFP_NOFS); + NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -2518,7 +2649,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); spin_unlock(&state->state_lock); nfs4_put_lock_state(prev); @@ -2692,8 +2823,7 @@ out: static int _nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, - struct iattr *sattr, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, int *opened) { struct nfs4_state_owner *sp; @@ -2705,6 +2835,8 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; + struct iattr *sattr = c->sattr; + struct nfs4_label *label = c->label; struct nfs4_label *olabel = NULL; int status; @@ -2723,8 +2855,8 @@ static int _nfs4_do_open(struct inode *dir, status = -ENOMEM; if (d_really_is_positive(dentry)) claim = NFS4_OPEN_CLAIM_FH; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - label, claim, GFP_KERNEL); + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, + c, claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -2805,10 +2937,18 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; + struct nfs4_open_createattrs c = { + .label = label, + .sattr = sattr, + .verf = { + [0] = (__u32)jiffies, + [1] = (__u32)current->pid, + }, + }; int status; do { - status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened); + status = _nfs4_do_open(dir, ctx, flags, &c, opened); res = ctx->state; trace_nfs4_open_file(ctx, flags, status); if (status == 0) @@ -3024,18 +3164,20 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->arg.lr_args = NULL; calldata->res.lr_res = NULL; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid, + calldata->inode)) + goto lr_restart; + /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_UNKNOWN_LAYOUTTYPE: case -NFS4ERR_WRONG_CRED: calldata->arg.lr_args = NULL; calldata->res.lr_res = NULL; - calldata->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; + goto lr_restart; } } @@ -3051,39 +3193,43 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.bitmask != NULL) { calldata->arg.bitmask = NULL; calldata->res.fattr = NULL; - task->tk_status = 0; - rpc_restart_call_prepare(task); - goto out_release; + goto out_restart; } break; + case -NFS4ERR_OLD_STATEID: + /* Did we race with OPEN? */ + if (nfs4_refresh_open_stateid(&calldata->arg.stateid, + state)) + goto out_restart; + goto out_release; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: nfs4_free_revoked_stateid(server, &calldata->arg.stateid, task->tk_msg.rpc_cred); - case -NFS4ERR_OLD_STATEID: + /* Fallthrough */ case -NFS4ERR_BAD_STATEID: - if (!nfs4_stateid_match(&calldata->arg.stateid, - &state->open_stateid)) { - rpc_restart_call_prepare(task); - goto out_release; - } - if (calldata->arg.fmode == 0) - break; + break; default: - if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - goto out_release; - } + if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) + goto out_restart; } nfs_clear_open_stateid(state, &calldata->arg.stateid, res_stateid, calldata->arg.fmode); out_release: + task->tk_status = 0; nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); + return; +lr_restart: + calldata->res.lr_ret = 0; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; } static void nfs4_close_prepare(struct rpc_task *task, void *data) @@ -3103,7 +3249,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); - nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid); /* Calculate the change in open mode */ calldata->arg.fmode = 0; if (state->n_rdwr == 0) { @@ -3121,7 +3266,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; if (!nfs4_valid_open_stateid(state) || - test_bit(NFS_OPEN_STATE, &state->flags) == 0) + !nfs4_refresh_open_stateid(&calldata->arg.stateid, state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -3215,6 +3360,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); + if (!nfs4_copy_open_stateid(&calldata->arg.stateid, state)) + goto out_free_calldata; /* Serialization for the sequence id */ alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); @@ -3889,6 +4036,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs4_accessargs args = { .fh = NFS_FH(inode), .bitmask = server->cache_consistency_bitmask, + .access = entry->mask, }; struct nfs4_accessres res = { .server = server, @@ -3899,26 +4047,8 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_resp = &res, .rpc_cred = entry->cred, }; - int mode = entry->mask; int status = 0; - /* - * Determine which access bits we want to ask for... - */ - if (mode & MAY_READ) - args.access |= NFS4_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_EXECUTE; - } - res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -4843,7 +4973,7 @@ static void nfs4_renew_release(void *calldata) struct nfs4_renewdata *data = calldata; struct nfs_client *clp = data->client; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(data); @@ -4891,7 +5021,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, if (renew_flags == 0) return 0; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); if (data == NULL) { @@ -5643,18 +5773,20 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) data->args.lr_args = NULL; data->res.lr_res = NULL; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid, + data->inode)) + goto lr_restart; + /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_UNKNOWN_LAYOUTTYPE: case -NFS4ERR_WRONG_CRED: data->args.lr_args = NULL; data->res.lr_res = NULL; - data->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; + goto lr_restart; } } @@ -5668,27 +5800,36 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) nfs4_free_revoked_stateid(data->res.server, data->args.stateid, task->tk_msg.rpc_cred); + /* Fallthrough */ case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) + goto out_restart; + task->tk_status = 0; + break; case -NFS4ERR_ACCESS: if (data->args.bitmask) { data->args.bitmask = NULL; data->res.fattr = NULL; - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; + goto out_restart; } + /* Fallthrough */ default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; + goto out_restart; } } data->rpc_status = task->tk_status; + return; +lr_restart: + data->res.lr_ret = 0; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); } static void nfs4_delegreturn_release(void *calldata) @@ -5896,7 +6037,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.seqid = seqid; p->res.seqid = seqid; p->lsp = lsp; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); p->l_ctx = nfs_get_lock_context(ctx); @@ -6112,7 +6253,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; p->server = server; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); return p; @@ -6568,6 +6709,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) return -ENOLCK; + /* + * Don't rely on the VFS having checked the file open mode, + * since it won't do this for flock() locks. + */ + switch (request->fl_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + } + status = nfs4_set_lock_state(state, request); if (status != 0) return status; @@ -6763,9 +6918,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); - u32 bitmask[3] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; + u32 bitmask[3]; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), .name = name, @@ -6784,12 +6937,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, dprintk("%s: start\n", __func__); + bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS; + bitmask[1] = nfs4_fattr_bitmap[1]; + /* Ask for the fileid of the absent filesystem if mounted_on_fileid * is not supported */ if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + bitmask[0] &= ~FATTR4_WORD0_FILEID; else - bitmask[0] |= FATTR4_WORD0_FILEID; + bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; @@ -7472,7 +7628,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, struct nfs41_exchange_id_data *calldata; int status; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return ERR_PTR(-EIO); status = -ENOMEM; @@ -8072,7 +8228,7 @@ static void nfs41_sequence_release(void *data) struct nfs4_sequence_data *calldata = data; struct nfs_client *clp = calldata->clp; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(calldata); @@ -8101,7 +8257,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) trace_nfs4_sequence(clp, task->tk_status); if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); - if (atomic_read(&clp->cl_count) == 1) + if (refcount_read(&clp->cl_count) == 1) goto out; if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { @@ -8135,6 +8291,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = { static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, + struct nfs4_slot *slot, bool is_privileged) { struct nfs4_sequence_data *calldata; @@ -8148,15 +8305,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .callback_ops = &nfs41_sequence_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; + struct rpc_task *ret; - if (!atomic_inc_not_zero(&clp->cl_count)) - return ERR_PTR(-EIO); + ret = ERR_PTR(-EIO); + if (!refcount_inc_not_zero(&clp->cl_count)) + goto out_err; + + ret = ERR_PTR(-ENOMEM); calldata = kzalloc(sizeof(*calldata), GFP_NOFS); - if (calldata == NULL) { - nfs_put_client(clp); - return ERR_PTR(-ENOMEM); - } + if (calldata == NULL) + goto out_put_clp; nfs4_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); if (is_privileged) nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; @@ -8164,7 +8324,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, calldata->clp = clp; task_setup_data.callback_data = calldata; - return rpc_run_task(&task_setup_data); + ret = rpc_run_task(&task_setup_data); + if (IS_ERR(ret)) + goto out_err; + return ret; +out_put_clp: + nfs_put_client(clp); +out_err: + nfs41_release_slot(slot); + return ret; } static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) @@ -8174,7 +8342,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return -EAGAIN; - task = _nfs41_proc_sequence(clp, cred, false); + task = _nfs41_proc_sequence(clp, cred, NULL, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -8188,7 +8356,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred, true); + task = _nfs41_proc_sequence(clp, cred, NULL, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; @@ -8588,18 +8756,27 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) server = NFS_SERVER(lrp->args.inode); switch (task->tk_status) { + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&lrp->args.stateid, + lrp->args.inode)) + goto out_restart; + /* Fallthrough */ default: task->tk_status = 0; + /* Fallthrough */ case 0: break; case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) break; - nfs4_sequence_free_slot(&lrp->res.seq_res); - rpc_restart_call_prepare(task); - return; + goto out_restart; } dprintk("<-- %s\n", __func__); + return; +out_restart: + task->tk_status = 0; + nfs4_sequence_free_slot(&lrp->res.seq_res); + rpc_restart_call_prepare(task); } static void nfs4_layoutreturn_release(void *calldata) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0378e2257ca7..54fd56d715a8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = { { .data = { 0 } }, .type = NFS4_SPECIAL_STATEID_TYPE, }; +const nfs4_stateid invalid_stateid = { + { + .seqid = cpu_to_be32(0xffffffffU), + .other = { 0 }, + }, + .type = NFS4_INVALID_STATEID_TYPE, +}; + static DEFINE_MUTEX(nfs_clid_init_mutex); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) @@ -645,6 +653,7 @@ nfs4_alloc_open_state(void) INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); seqlock_init(&state->seqlock); + init_waitqueue_head(&state->waitq); return state; } @@ -825,7 +834,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, ret = pos; } if (ret) - atomic_inc(&ret->ls_count); + refcount_inc(&ret->ls_count); return ret; } @@ -843,7 +852,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp == NULL) return NULL; nfs4_init_seqid_counter(&lsp->ls_seqid); - atomic_set(&lsp->ls_count, 1); + refcount_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); @@ -907,7 +916,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (lsp == NULL) return; state = lsp->ls_state; - if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock)) return; list_del(&lsp->ls_locks); if (list_empty(&state->lock_states)) @@ -927,7 +936,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; dst->fl_u.nfs4_fl.owner = lsp; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); } static void nfs4_fl_release_lock(struct file_lock *fl) @@ -985,18 +994,39 @@ out: return ret; } -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +{ + bool ret; + int seq; + + do { + ret = false; + seq = read_seqbegin(&state->seqlock); + if (nfs4_state_match_open_stateid_other(state, dst)) { + dst->seqid = state->open_stateid.seqid; + ret = true; + } + } while (read_seqretry(&state->seqlock, seq)); + return ret; +} + +bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + bool ret; const nfs4_stateid *src; int seq; do { + ret = false; src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - if (test_bit(NFS_OPEN_STATE, &state->flags)) + if (test_bit(NFS_OPEN_STATE, &state->flags)) { src = &state->open_stateid; + ret = true; + } nfs4_stateid_copy(dst, src); } while (read_seqretry(&state->seqlock, seq)); + return ret; } /* @@ -1177,7 +1207,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); /* The rcu_read_lock() is not strictly necessary, as the state * manager is the only thread that ever changes the rpc_xprt @@ -1269,7 +1299,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); if (res) @@ -1409,6 +1439,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode, found = true; continue; } + if (nfs4_stateid_match_other(&state->open_stateid, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) { + found = true; + continue; + } if (nfs_state_lock_state_matches_stateid(state, stateid) && nfs4_state_mark_reclaim_nograce(clp, state)) found = true; @@ -2510,7 +2545,7 @@ static void nfs4_state_manager(struct nfs_client *clp) break; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) break; - } while (atomic_read(&clp->cl_count) > 1); + } while (refcount_read(&clp->cl_count) > 1); return; out_error: if (strlen(section)) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index e7c6275519b0..a275fba93170 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -202,17 +202,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_ARGS(clp, error), TP_STRUCT__entry( - __string(dstaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)) + __string(dstaddr, clp->cl_hostname) __field(int, error) ), TP_fast_assign( __entry->error = error; - __assign_str(dstaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)); + __assign_str(dstaddr, clp->cl_hostname); ), TP_printk( @@ -1066,6 +1062,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( @@ -1133,9 +1131,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __string(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __string(dstaddr, clp ? clp->cl_hostname : "unknown") ), TP_fast_assign( @@ -1148,9 +1144,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown") ), TP_printk( @@ -1192,9 +1186,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __string(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __string(dstaddr, clp ? clp->cl_hostname : "unknown") __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1209,9 +1201,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown") __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 14ed9791ec9c..77c6729e57f0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state return decode_stateid(xdr, stateid); } +static int decode_invalid_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + nfs4_stateid dummy; + + nfs4_stateid_copy(stateid, &invalid_stateid); + return decode_stateid(xdr, &dummy); +} + static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) { int status; @@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) if (status != -EIO) nfs_increment_open_seqid(status, res->seqid); if (!status) - status = decode_open_stateid(xdr, &res->stateid); + status = decode_invalid_stateid(xdr, &res->stateid); return status; } @@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr, res->lrs_present = be32_to_cpup(p); if (res->lrs_present) status = decode_layout_stateid(xdr, &res->stateid); + else + nfs4_stateid_copy(&res->stateid, &invalid_stateid); return status; out_overflow: print_overflow_msg(__func__, xdr); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3bcd669a3152..d602fe9e1ac8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) { - atomic_inc(&lo->plh_refcount); + refcount_inc(&lo->plh_refcount); } static struct pnfs_layout_hdr * @@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) pnfs_layoutreturn_before_put_layout_hdr(lo); - if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); @@ -355,6 +355,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, } /* + * Update the seqid of a layout stateid + */ +bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct pnfs_layout_hdr *lo; + bool ret = false; + + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) { + dst->seqid = lo->plh_stateid.seqid; + ret = true; + } + spin_unlock(&inode->i_lock); + return ret; +} + +/* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * * In order to continue using the pnfs_layout_hdr, a full recovery @@ -395,14 +413,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { lo->plh_retry_timestamp = jiffies; if (!test_and_set_bit(fail_bit, &lo->plh_flags)) - atomic_inc(&lo->plh_refcount); + refcount_inc(&lo->plh_refcount); } static void pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { if (test_and_clear_bit(fail_bit, &lo->plh_flags)) - atomic_dec(&lo->plh_refcount); + refcount_dec(&lo->plh_refcount); } static void @@ -450,7 +468,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, { INIT_LIST_HEAD(&lseg->pls_list); INIT_LIST_HEAD(&lseg->pls_lc_list); - atomic_set(&lseg->pls_refcount, 1); + refcount_set(&lseg->pls_refcount, 1); set_bit(NFS_LSEG_VALID, &lseg->pls_flags); lseg->pls_layout = lo; lseg->pls_range = *range; @@ -472,7 +490,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ - atomic_dec(&lo->plh_refcount); + refcount_dec(&lo->plh_refcount); if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) return; if (list_empty(&lo->plh_segs) && @@ -507,13 +525,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) return; dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount), + refcount_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); lo = lseg->pls_layout; inode = lo->plh_inode; - if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { spin_unlock(&inode->i_lock); return; @@ -551,7 +569,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) { - if (!atomic_dec_and_test(&lseg->pls_refcount)) + if (!refcount_dec_and_test(&lseg->pls_refcount)) return false; pnfs_layout_remove_lseg(lseg->pls_layout, lseg); list_add(&lseg->pls_list, tmp_list); @@ -570,7 +588,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * outstanding io is finished. */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount)); + refcount_read(&lseg->pls_refcount)); if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) rv = 1; } @@ -1451,7 +1469,7 @@ alloc_init_layout_hdr(struct inode *ino, lo = pnfs_alloc_layout_hdr(ino, gfp_flags); if (!lo) return NULL; - atomic_set(&lo->plh_refcount, 1); + refcount_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); INIT_LIST_HEAD(&lo->plh_return_segs); @@ -1513,7 +1531,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, if ((range->iomode == IOMODE_RW && ls_range->iomode != IOMODE_RW) || (range->iomode != ls_range->iomode && - strict_iomode == true) || + strict_iomode) || !pnfs_lseg_range_intersecting(ls_range, range)) return 0; @@ -1546,7 +1564,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, } dprintk("%s:Return lseg %p ref %d\n", - __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); + __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0); return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 87f144f14d1e..8d507c361d98 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -30,6 +30,7 @@ #ifndef FS_NFS_PNFS_H #define FS_NFS_PNFS_H +#include <linux/refcount.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> #include <linux/workqueue.h> @@ -54,7 +55,7 @@ struct nfs4_pnfs_ds { char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; struct nfs_client *ds_clp; - atomic_t ds_count; + refcount_t ds_count; unsigned long ds_state; #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ }; @@ -63,7 +64,7 @@ struct pnfs_layout_segment { struct list_head pls_list; struct list_head pls_lc_list; struct pnfs_layout_range pls_range; - atomic_t pls_refcount; + refcount_t pls_refcount; u32 pls_seq; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; @@ -179,7 +180,7 @@ struct pnfs_layoutdriver_type { }; struct pnfs_layout_hdr { - atomic_t plh_refcount; + refcount_t plh_refcount; atomic_t plh_outstanding; /* number of RPCs out */ struct list_head plh_layouts; /* other client layouts */ struct list_head plh_bulk_destroy; @@ -251,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, bool is_recall); int pnfs_destroy_layouts_byclid(struct nfs_client *clp, bool is_recall); +bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode); void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -393,7 +395,7 @@ static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { - atomic_inc(&lseg->pls_refcount); + refcount_inc(&lseg->pls_refcount); smp_mb__after_atomic(); } return lseg; @@ -764,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) { } +static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, + struct inode *inode) +{ + return false; +} #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 60da59be83b6..03aaa60c7768 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds) " client %p\n" " cl_exchange_flags %x\n", ds->ds_remotestr, - atomic_read(&ds->ds_count), ds->ds_clp, + refcount_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } @@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) { - if (atomic_dec_and_lock(&ds->ds_count, + if (refcount_dec_and_lock(&ds->ds_count, &nfs4_ds_cache_lock)) { list_del_init(&ds->ds_node); spin_unlock(&nfs4_ds_cache_lock); @@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; - atomic_set(&ds->ds_count, 1); + refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); @@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) } else { kfree(remotestr); kfree(ds); - atomic_inc(&tmp_ds->ds_count); + refcount_inc(&tmp_ds->ds_count); dprintk("%s data server %s found, inc'ed ds_count to %d\n", __func__, tmp_ds->ds_remotestr, - atomic_read(&tmp_ds->ds_count)); + refcount_read(&tmp_ds->ds_count)); ds = tmp_ds; } spin_unlock(&nfs4_ds_cache_lock); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c9d24bae3025..43cadb28db6e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw, mnt->options |= NFS_OPTION_MIGRATION; break; case Opt_nomigration: - mnt->options &= NFS_OPTION_MIGRATION; + mnt->options &= ~NFS_OPTION_MIGRATION; break; /* @@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; @@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp6: mountfamily = AF_INET6; + /* fall through */ case Opt_xprt_udp: mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; + /* fall through */ case Opt_xprt_tcp: mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; break; @@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options, args->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: - data->namlen = 0; + data->namlen = 0; /* fall through */ case 2: - data->bsize = 0; + data->bsize = 0; /* fall through */ case 3: if (data->flags & NFS_MOUNT_VER3) goto out_no_v3; @@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options, memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); /* Turn off security negotiation */ extra_flags |= NFS_MOUNT_SECFLAVOUR; + /* fall through */ case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; + /* fall through */ case 5: memset(data->context, 0, sizeof(data->context)); + /* fall through */ case 6: if (data->flags & NFS_MOUNT_VER3) { if (data->root.size > NFS3_FHSIZE || data->root.size == 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index babebbccae2a..5b5f464f6f2a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -487,10 +487,8 @@ try_again: } ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } + if (ret < 0) + goto release_request; /* lock each request in the page group */ total_bytes = head->wb_bytes; @@ -515,8 +513,7 @@ try_again: if (ret < 0) { nfs_unroll_locks(inode, head, subreq); nfs_release_request(subreq); - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + goto release_request; } } /* @@ -532,8 +529,8 @@ try_again: nfs_page_group_unlock(head); nfs_unroll_locks(inode, head, subreq); nfs_unlock_and_release_request(subreq); - nfs_unlock_and_release_request(head); - return ERR_PTR(-EIO); + ret = -EIO; + goto release_request; } } @@ -576,6 +573,10 @@ try_again: /* still holds ref on head from nfs_page_find_head_request * and still has lock on head from lock loop */ return head; + +release_request: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); } static void nfs_write_error_remove_page(struct nfs_page *req) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 004af348fb80..f44d5eb74fcc 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -275,12 +275,6 @@ struct orangefs_kiocb_s { /* orangefs kernel operation type */ struct orangefs_kernel_op_s *op; - /* The user space buffers from/to which I/O is being staged */ - struct iovec *iov; - - /* number of elements in the iovector */ - unsigned long nr_segs; - /* set to indicate the type of the operation */ int rw; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index c441f9387a1b..eb3b8d39fb61 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -22,7 +22,6 @@ #include <linux/ratelimit.h> #include <linux/exportfs.h> #include "overlayfs.h" -#include "ovl_entry.h" #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) @@ -486,6 +485,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) { struct inode *udir = c->destdir->d_inode; + struct inode *inode; struct dentry *newdentry = NULL; struct dentry *temp = NULL; int err; @@ -508,7 +508,11 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) if (err) goto out_cleanup; - ovl_inode_update(d_inode(c->dentry), newdentry); + inode = d_inode(c->dentry); + ovl_inode_update(inode, newdentry); + if (S_ISDIR(inode->i_mode)) + ovl_set_flag(OVL_WHITEOUTS, inode); + out: dput(temp); return err; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index cc961a3bd3bd..e13921824c70 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -181,6 +181,11 @@ static bool ovl_type_origin(struct dentry *dentry) return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); } +static bool ovl_may_have_whiteouts(struct dentry *dentry) +{ + return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -300,7 +305,6 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) { int err; struct dentry *ret = NULL; - enum ovl_path_type type = ovl_path_type(dentry); LIST_HEAD(list); err = ovl_check_empty_dir(dentry, &list); @@ -313,13 +317,13 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) * When removing an empty opaque directory, then it makes no sense to * replace it with an exact replica of itself. * - * If no upperdentry then skip clearing whiteouts. + * If upperdentry has whiteouts, clear them. * * Can race with copy-up, since we don't hold the upperdir mutex. * Doesn't matter, since copy-up can't create a non-empty directory * from an empty one. */ - if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type)) + if (!list_empty(&list)) ret = ovl_clear_empty(dentry, &list); out_free: @@ -698,8 +702,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - /* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */ - if (is_dir && ovl_dentry_get_redirect(dentry)) { + /* Redirect/origin dir can be !ovl_lower_positive && not clean */ + if (is_dir && (ovl_dentry_get_redirect(dentry) || + ovl_may_have_whiteouts(dentry))) { opaquedir = ovl_check_empty_and_clear(dentry); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) @@ -946,7 +951,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_cred = ovl_override_creds(old->d_sb); - if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) { + if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) || + ovl_may_have_whiteouts(new))) { opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { @@ -1069,9 +1075,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, drop_nlink(d_inode(new)); } - ovl_dentry_version_inc(old->d_parent, - !overwrite && ovl_type_origin(new)); - ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old)); + ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) || + (!overwrite && ovl_type_origin(new))); + ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) || + (d_inode(new) && ovl_type_origin(new))); out_dput: dput(newdentry); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 321511ed8c42..00b6b294272a 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -15,6 +15,14 @@ #include <linux/ratelimit.h> #include "overlayfs.h" + +static dev_t ovl_get_pseudo_dev(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->lowerstack[0].layer->pseudo_dev; +} + int ovl_setattr(struct dentry *dentry, struct iattr *attr) { int err; @@ -66,6 +74,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, struct path realpath; const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); + bool samefs = ovl_same_sb(dentry->d_sb); int err; type = ovl_path_real(dentry, &realpath); @@ -75,16 +84,13 @@ int ovl_getattr(const struct path *path, struct kstat *stat, goto out; /* - * When all layers are on the same fs, all real inode number are - * unique, so we use the overlay st_dev, which is friendly to du -x. - * - * We also use st_ino of the copy up origin, if we know it. - * This guaranties constant st_dev/st_ino across copy up. + * For non-dir or same fs, we use st_ino of the copy up origin, if we + * know it. This guaranties constant st_dev/st_ino across copy up. * * If filesystem supports NFS export ops, this also guaranties * persistent st_ino across mount cycle. */ - if (ovl_same_sb(dentry->d_sb)) { + if (!is_dir || samefs) { if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); @@ -95,7 +101,6 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (err) goto out; - WARN_ON_ONCE(stat->dev != lowerstat.dev); /* * Lower hardlinks may be broken on copy up to different * upper files, so we cannot use the lower origin st_ino @@ -107,17 +112,36 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (is_dir || lowerstat.nlink == 1 || ovl_test_flag(OVL_INDEX, d_inode(dentry))) stat->ino = lowerstat.ino; + + if (samefs) + WARN_ON_ONCE(stat->dev != lowerstat.dev); + else + stat->dev = ovl_get_pseudo_dev(dentry); } - stat->dev = dentry->d_sb->s_dev; - } else if (is_dir) { + if (samefs) { + /* + * When all layers are on the same fs, all real inode + * number are unique, so we use the overlay st_dev, + * which is friendly to du -x. + */ + stat->dev = dentry->d_sb->s_dev; + } else if (!OVL_TYPE_UPPER(type)) { + /* + * For non-samefs setup, to make sure that st_dev/st_ino + * pair is unique across the system, we use a unique + * anonymous st_dev for lower layer inode. + */ + stat->dev = ovl_get_pseudo_dev(dentry); + } + } else { /* - * If not all layers are on the same fs the pair {real st_ino; - * overlay st_dev} is not unique, so use the non persistent - * overlay st_ino. - * * Always use the overlay st_dev for directories, so 'find * -xdev' will scan the entire overlay mount and won't cross the * overlay mount boundaries. + * + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino for directories. */ stat->dev = dentry->d_sb->s_dev; stat->ino = dentry->d_inode->i_ino; @@ -409,6 +433,7 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) #ifdef CONFIG_LOCKDEP static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; + static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; int depth = inode->i_sb->s_stack_depth - 1; @@ -419,6 +444,8 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); else lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); + + lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); #endif } @@ -657,6 +684,16 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, if (upperdentry && ovl_is_impuredir(upperdentry)) ovl_set_flag(OVL_IMPURE, inode); + /* Check for non-merge dir that may have whiteouts */ + if (S_ISDIR(realinode->i_mode)) { + struct ovl_entry *oe = dentry->d_fsdata; + + if (((upperdentry && lowerdentry) || oe->numlower > 1) || + ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { + ovl_set_flag(OVL_WHITEOUTS, inode); + } + } + if (inode->i_state & I_NEW) unlock_new_inode(inode); out: diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index a12dc10bf726..625ed8066570 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -15,7 +15,6 @@ #include <linux/mount.h> #include <linux/exportfs.h> #include "overlayfs.h" -#include "ovl_entry.h" struct ovl_lookup_data { struct qstr name; @@ -286,16 +285,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, static int ovl_check_origin(struct dentry *upperdentry, - struct path *lowerstack, unsigned int numlower, - struct path **stackp, unsigned int *ctrp) + struct ovl_path *lower, unsigned int numlower, + struct ovl_path **stackp, unsigned int *ctrp) { struct vfsmount *mnt; struct dentry *origin = NULL; int i; - for (i = 0; i < numlower; i++) { - mnt = lowerstack[i].mnt; + mnt = lower[i].layer->mnt; origin = ovl_get_origin(upperdentry, mnt); if (IS_ERR(origin)) return PTR_ERR(origin); @@ -309,12 +307,12 @@ static int ovl_check_origin(struct dentry *upperdentry, BUG_ON(*ctrp); if (!*stackp) - *stackp = kmalloc(sizeof(struct path), GFP_KERNEL); + *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); if (!*stackp) { dput(origin); return -ENOMEM; } - **stackp = (struct path) { .dentry = origin, .mnt = mnt }; + **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; *ctrp = 1; return 0; @@ -350,8 +348,8 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) * * Return 0 on match, -ESTALE on mismatch, < 0 on error. */ -int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, - struct dentry *origin, bool is_upper, bool set) +int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, + bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; @@ -384,13 +382,13 @@ fail: * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. */ -int ovl_verify_index(struct dentry *index, struct path *lowerstack, +int ovl_verify_index(struct dentry *index, struct ovl_path *lower, unsigned int numlower) { struct ovl_fh *fh = NULL; size_t len; - struct path origin = { }; - struct path *stack = &origin; + struct ovl_path origin = { }; + struct ovl_path *stack = &origin; unsigned int ctr = 0; int err; @@ -429,7 +427,7 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, if (err) goto fail; - err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); + err = ovl_check_origin(index, lower, numlower, &stack, &ctr); if (!err && !ctr) err = -ESTALE; if (err) @@ -568,11 +566,24 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) idx++; } BUG_ON(idx > oe->numlower); - *path = oe->lowerstack[idx - 1]; + path->dentry = oe->lowerstack[idx - 1].dentry; + path->mnt = oe->lowerstack[idx - 1].layer->mnt; return (idx < oe->numlower) ? idx + 1 : -1; } +static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) +{ + int i; + + for (i = 0; i < ofs->numlower; i++) { + if (ofs->lower_layers[i].mnt == path->layer->mnt) + break; + } + + return i; +} + struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -581,7 +592,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct ovl_entry *poe = dentry->d_parent->d_fsdata; struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; - struct path *stack = NULL; + struct ovl_path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; struct dentry *index = NULL; unsigned int ctr = 0; @@ -630,7 +641,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = ovl_check_origin(upperdentry, roe->lowerstack, roe->numlower, &stack, &ctr); if (err) - goto out; + goto out_put_upper; } if (d.redirect) { @@ -646,17 +657,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!d.stop && poe->numlower) { err = -ENOMEM; - stack = kcalloc(ofs->numlower, sizeof(struct path), + stack = kcalloc(ofs->numlower, sizeof(struct ovl_path), GFP_KERNEL); if (!stack) goto out_put_upper; } for (i = 0; !d.stop && i < poe->numlower; i++) { - struct path lowerpath = poe->lowerstack[i]; + struct ovl_path lower = poe->lowerstack[i]; d.last = i == poe->numlower - 1; - err = ovl_lookup_layer(lowerpath.dentry, &d, &this); + err = ovl_lookup_layer(lower.dentry, &d, &this); if (err) goto out_put; @@ -664,7 +675,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, continue; stack[ctr].dentry = this; - stack[ctr].mnt = lowerpath.mnt; + stack[ctr].layer = lower.layer; ctr++; if (d.stop) @@ -674,10 +685,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, poe = roe; /* Find the current layer on the root dentry */ - for (i = 0; i < poe->numlower; i++) - if (poe->lowerstack[i].mnt == lowerpath.mnt) - break; - if (WARN_ON(i == poe->numlower)) + i = ovl_find_layer(ofs, &lower); + if (WARN_ON(i == ofs->numlower)) break; } } @@ -700,7 +709,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out_put; oe->opaque = upperopaque; - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); dentry->d_fsdata = oe; if (upperdentry) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d9a0edd4e57e..13eab09a6b6f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/uuid.h> +#include "ovl_entry.h" enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), @@ -28,7 +29,10 @@ enum ovl_path_type { #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" enum ovl_flag { + /* Pure upper dir that may contain non pure upper entries */ OVL_IMPURE, + /* Non-merge dir that may contain whiteout entries */ + OVL_WHITEOUTS, OVL_INDEX, }; @@ -223,6 +227,7 @@ bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry); void ovl_copy_up_end(struct dentry *dentry); +bool ovl_check_origin_xattr(struct dentry *dentry); bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, const char *name, const void *value, size_t size, @@ -244,9 +249,9 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ -int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, - struct dentry *origin, bool is_upper, bool set); -int ovl_verify_index(struct dentry *index, struct path *lowerstack, +int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, + bool is_upper, bool set); +int ovl_verify_index(struct dentry *index, struct ovl_path *lower, unsigned int numlower); int ovl_get_index_name(struct dentry *origin, struct qstr *name); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); @@ -263,7 +268,7 @@ int ovl_check_d_type_supported(struct path *realpath); void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct path *lowerstack, unsigned int numlower); + struct ovl_path *lower, unsigned int numlower); /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 36b49bd09264..752bab645879 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -17,11 +17,21 @@ struct ovl_config { bool index; }; +struct ovl_layer { + struct vfsmount *mnt; + dev_t pseudo_dev; +}; + +struct ovl_path { + struct ovl_layer *layer; + struct dentry *dentry; +}; + /* private information held for overlayfs's superblock */ struct ovl_fs { struct vfsmount *upper_mnt; unsigned numlower; - struct vfsmount **lower_mnt; + struct ovl_layer *lower_layers; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; /* workdir is the 'work' directory under workbasedir */ @@ -52,7 +62,7 @@ struct ovl_entry { struct rcu_head rcu; }; unsigned numlower; - struct path lowerstack[]; + struct ovl_path lowerstack[]; }; struct ovl_entry *ovl_alloc_entry(unsigned int numlower); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index c310e3ff7f3f..0daa4354fec4 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -26,6 +26,7 @@ struct ovl_cache_entry { struct list_head l_node; struct rb_node node; struct ovl_cache_entry *next_maybe_whiteout; + bool is_upper; bool is_whiteout; char name[]; }; @@ -158,6 +159,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, /* Defer setting d_ino for upper entry to ovl_iterate() */ if (ovl_calc_d_ino(rdd, p)) p->ino = 0; + p->is_upper = rdd->is_upper; p->is_whiteout = false; if (d_type == DT_CHR) { @@ -316,21 +318,37 @@ static inline int ovl_dir_read(struct path *realpath, return err; } +/* + * Can we iterate real dir directly? + * + * Non-merge dir may contain whiteouts from a time it was a merge upper, before + * lower dir was removed under it and possibly before it was rotated from upper + * to lower layer. + */ +static bool ovl_dir_is_real(struct dentry *dir) +{ + return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir)); +} + static void ovl_dir_reset(struct file *file) { struct ovl_dir_file *od = file->private_data; struct ovl_dir_cache *cache = od->cache; struct dentry *dentry = file->f_path.dentry; - enum ovl_path_type type = ovl_path_type(dentry); + bool is_real; if (cache && ovl_dentry_version_get(dentry) != cache->version) { ovl_cache_put(od, dentry); od->cache = NULL; od->cursor = NULL; } - WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); - if (od->is_real && OVL_TYPE_MERGE(type)) + is_real = ovl_dir_is_real(dentry); + if (od->is_real != is_real) { + /* is_real can only become false when dir is copied up */ + if (WARN_ON(is_real)) + return; od->is_real = false; + } } static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, @@ -816,7 +834,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file) return PTR_ERR(realfile); } od->realfile = realfile; - od->is_real = !OVL_TYPE_MERGE(type); + od->is_real = ovl_dir_is_real(file->f_path.dentry); od->is_upper = OVL_TYPE_UPPER(type); file->private_data = od; @@ -835,7 +853,7 @@ const struct file_operations ovl_dir_operations = { int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) { int err; - struct ovl_cache_entry *p; + struct ovl_cache_entry *p, *n; struct rb_root root = RB_ROOT; err = ovl_dir_read_merged(dentry, list, &root); @@ -844,18 +862,29 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) err = 0; - list_for_each_entry(p, list, l_node) { - if (p->is_whiteout) - continue; + list_for_each_entry_safe(p, n, list, l_node) { + /* + * Select whiteouts in upperdir, they should + * be cleared when deleting this directory. + */ + if (p->is_whiteout) { + if (p->is_upper) + continue; + goto del_entry; + } if (p->name[0] == '.') { if (p->len == 1) - continue; + goto del_entry; if (p->len == 2 && p->name[1] == '.') - continue; + goto del_entry; } err = -ENOTEMPTY; break; + +del_entry: + list_del(&p->l_node); + kfree(p); } return err; @@ -869,7 +898,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) list_for_each_entry(p, list, l_node) { struct dentry *dentry; - if (!p->is_whiteout) + if (WARN_ON(!p->is_whiteout || !p->is_upper)) continue; dentry = lookup_one_len(p->name, upper, p->len); @@ -985,7 +1014,7 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, } int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct path *lowerstack, unsigned int numlower) + struct ovl_path *lower, unsigned int numlower) { int err; struct dentry *index = NULL; @@ -1020,7 +1049,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, index = NULL; break; } - err = ovl_verify_index(index, lowerstack, numlower); + err = ovl_verify_index(index, lower, numlower); /* Cleanup stale and orphan index entries */ if (err && (err == -ESTALE || err == -ENOENT)) err = ovl_cleanup(dir, index); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f5738e96a052..be03578181d2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -18,7 +18,6 @@ #include <linux/seq_file.h> #include <linux/posix_acl_xattr.h> #include "overlayfs.h" -#include "ovl_entry.h" MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); @@ -39,15 +38,20 @@ module_param_named(index, ovl_index_def, bool, 0644); MODULE_PARM_DESC(ovl_index_def, "Default to on or off for the inodes index feature"); +static void ovl_entry_stack_free(struct ovl_entry *oe) +{ + unsigned int i; + + for (i = 0; i < oe->numlower; i++) + dput(oe->lowerstack[i].dentry); +} + static void ovl_dentry_release(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; if (oe) { - unsigned int i; - - for (i = 0; i < oe->numlower; i++) - dput(oe->lowerstack[i].dentry); + ovl_entry_stack_free(oe); kfree_rcu(oe, rcu); } } @@ -207,39 +211,48 @@ static void ovl_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, ovl_i_callback); } -static void ovl_put_super(struct super_block *sb) +static void ovl_free_fs(struct ovl_fs *ofs) { - struct ovl_fs *ufs = sb->s_fs_info; unsigned i; - dput(ufs->indexdir); - dput(ufs->workdir); - if (ufs->workdir_locked) - ovl_inuse_unlock(ufs->workbasedir); - dput(ufs->workbasedir); - if (ufs->upper_mnt && ufs->upperdir_locked) - ovl_inuse_unlock(ufs->upper_mnt->mnt_root); - mntput(ufs->upper_mnt); - for (i = 0; i < ufs->numlower; i++) - mntput(ufs->lower_mnt[i]); - kfree(ufs->lower_mnt); - - kfree(ufs->config.lowerdir); - kfree(ufs->config.upperdir); - kfree(ufs->config.workdir); - put_cred(ufs->creator_cred); - kfree(ufs); + dput(ofs->indexdir); + dput(ofs->workdir); + if (ofs->workdir_locked) + ovl_inuse_unlock(ofs->workbasedir); + dput(ofs->workbasedir); + if (ofs->upperdir_locked) + ovl_inuse_unlock(ofs->upper_mnt->mnt_root); + mntput(ofs->upper_mnt); + for (i = 0; i < ofs->numlower; i++) { + mntput(ofs->lower_layers[i].mnt); + free_anon_bdev(ofs->lower_layers[i].pseudo_dev); + } + kfree(ofs->lower_layers); + + kfree(ofs->config.lowerdir); + kfree(ofs->config.upperdir); + kfree(ofs->config.workdir); + if (ofs->creator_cred) + put_cred(ofs->creator_cred); + kfree(ofs); +} + +static void ovl_put_super(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + ovl_free_fs(ofs); } static int ovl_sync_fs(struct super_block *sb, int wait) { - struct ovl_fs *ufs = sb->s_fs_info; + struct ovl_fs *ofs = sb->s_fs_info; struct super_block *upper_sb; int ret; - if (!ufs->upper_mnt) + if (!ofs->upper_mnt) return 0; - upper_sb = ufs->upper_mnt->mnt_sb; + upper_sb = ofs->upper_mnt->mnt_sb; if (!upper_sb->s_op->sync_fs) return 0; @@ -277,9 +290,9 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) } /* Will this overlay be forced to mount/remount ro? */ -static bool ovl_force_readonly(struct ovl_fs *ufs) +static bool ovl_force_readonly(struct ovl_fs *ofs) { - return (!ufs->upper_mnt || !ufs->workdir); + return (!ofs->upper_mnt || !ofs->workdir); } /** @@ -291,29 +304,29 @@ static bool ovl_force_readonly(struct ovl_fs *ufs) static int ovl_show_options(struct seq_file *m, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; - struct ovl_fs *ufs = sb->s_fs_info; + struct ovl_fs *ofs = sb->s_fs_info; - seq_show_option(m, "lowerdir", ufs->config.lowerdir); - if (ufs->config.upperdir) { - seq_show_option(m, "upperdir", ufs->config.upperdir); - seq_show_option(m, "workdir", ufs->config.workdir); + seq_show_option(m, "lowerdir", ofs->config.lowerdir); + if (ofs->config.upperdir) { + seq_show_option(m, "upperdir", ofs->config.upperdir); + seq_show_option(m, "workdir", ofs->config.workdir); } - if (ufs->config.default_permissions) + if (ofs->config.default_permissions) seq_puts(m, ",default_permissions"); - if (ufs->config.redirect_dir != ovl_redirect_dir_def) + if (ofs->config.redirect_dir != ovl_redirect_dir_def) seq_printf(m, ",redirect_dir=%s", - ufs->config.redirect_dir ? "on" : "off"); - if (ufs->config.index != ovl_index_def) + ofs->config.redirect_dir ? "on" : "off"); + if (ofs->config.index != ovl_index_def) seq_printf(m, ",index=%s", - ufs->config.index ? "on" : "off"); + ofs->config.index ? "on" : "off"); return 0; } static int ovl_remount(struct super_block *sb, int *flags, char *data) { - struct ovl_fs *ufs = sb->s_fs_info; + struct ovl_fs *ofs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) + if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs)) return -EROFS; return 0; @@ -451,13 +464,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) #define OVL_WORKDIR_NAME "work" #define OVL_INDEXDIR_NAME "index" -static struct dentry *ovl_workdir_create(struct super_block *sb, - struct ovl_fs *ufs, - struct dentry *dentry, +static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, const char *name, bool persist) { - struct inode *dir = dentry->d_inode; - struct vfsmount *mnt = ufs->upper_mnt; + struct inode *dir = ofs->workbasedir->d_inode; + struct vfsmount *mnt = ofs->upper_mnt; struct dentry *work; int err; bool retried = false; @@ -471,7 +482,7 @@ static struct dentry *ovl_workdir_create(struct super_block *sb, locked = true; retry: - work = lookup_one_len(name, dentry, strlen(name)); + work = lookup_one_len(name, ofs->workbasedir, strlen(name)); if (!IS_ERR(work)) { struct iattr attr = { @@ -541,8 +552,7 @@ out_dput: dput(work); out_err: pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", - ufs->config.workdir, name, -err); - sb->s_flags |= MS_RDONLY; + ofs->config.workdir, name, -err); work = NULL; goto out_unlock; } @@ -585,7 +595,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) return 0; out_put: - path_put(path); + path_put_init(path); out: return err; } @@ -603,7 +613,7 @@ static int ovl_mount_dir(const char *name, struct path *path) if (ovl_dentry_remote(path->dentry)) { pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", tmp); - path_put(path); + path_put_init(path); err = -EINVAL; } kfree(tmp); @@ -655,7 +665,7 @@ static int ovl_lower_dir(const char *name, struct path *path, return 0; out_put: - path_put(path); + path_put_init(path); out: return err; } @@ -826,129 +836,269 @@ static const struct xattr_handler *ovl_xattr_handlers[] = { NULL }; -static int ovl_fill_super(struct super_block *sb, void *data, int silent) +static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath) { - struct path upperpath = { }; - struct path workpath = { }; - struct dentry *root_dentry; - struct ovl_entry *oe; - struct ovl_fs *ufs; - struct path *stack = NULL; - char *lowertmp; - char *lower; - unsigned int numlower; - unsigned int stacklen = 0; - unsigned int i; - bool remote = false; - struct cred *cred; + struct vfsmount *upper_mnt; int err; - err = -ENOMEM; - ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); - if (!ufs) + err = ovl_mount_dir(ofs->config.upperdir, upperpath); + if (err) goto out; - ufs->config.redirect_dir = ovl_redirect_dir_def; - ufs->config.index = ovl_index_def; - err = ovl_parse_opt((char *) data, &ufs->config); + /* Upper fs should not be r/o */ + if (sb_rdonly(upperpath->mnt->mnt_sb)) { + pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); + err = -EINVAL; + goto out; + } + + err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); if (err) - goto out_free_config; + goto out; + + err = -EBUSY; + if (ovl_inuse_trylock(upperpath->dentry)) { + ofs->upperdir_locked = true; + } else if (ofs->config.index) { + pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); + goto out; + } else { + pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); + } + + upper_mnt = clone_private_mount(upperpath); + err = PTR_ERR(upper_mnt); + if (IS_ERR(upper_mnt)) { + pr_err("overlayfs: failed to clone upperpath\n"); + goto out; + } + + /* Don't inherit atime flags */ + upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); + ofs->upper_mnt = upper_mnt; + err = 0; +out: + return err; +} + +static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) +{ + struct dentry *temp; + int err; + + ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); + if (!ofs->workdir) + return 0; + + /* + * Upper should support d_type, else whiteouts are visible. Given + * workdir and upper are on same fs, we can do iterate_dir() on + * workdir. This check requires successful creation of workdir in + * previous step. + */ + err = ovl_check_d_type_supported(workpath); + if (err < 0) + return err; + + /* + * We allowed this configuration and don't want to break users over + * kernel upgrade. So warn instead of erroring out. + */ + if (!err) + pr_warn("overlayfs: upper fs needs to support d_type.\n"); + + /* Check if upper/work fs supports O_TMPFILE */ + temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); + ofs->tmpfile = !IS_ERR(temp); + if (ofs->tmpfile) + dput(temp); + else + pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* + * Check if upper/work fs supports trusted.overlay.* xattr + */ + err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); + if (err) { + ofs->noxattr = true; + pr_warn("overlayfs: upper fs does not support xattr.\n"); + } else { + vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); + } + + /* Check if upper/work fs supports file handles */ + if (ofs->config.index && + !ovl_can_decode_fh(ofs->workdir->d_sb)) { + ofs->config.index = false; + pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); + } + + return 0; +} + +static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) +{ + int err; + struct path workpath = { }; + + err = ovl_mount_dir(ofs->config.workdir, &workpath); + if (err) + goto out; err = -EINVAL; - if (!ufs->config.lowerdir) { - if (!silent) - pr_err("overlayfs: missing 'lowerdir'\n"); - goto out_free_config; + if (upperpath->mnt != workpath.mnt) { + pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); + goto out; + } + if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { + pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); + goto out; } - sb->s_stack_depth = 0; - sb->s_maxbytes = MAX_LFS_FILESIZE; - if (ufs->config.upperdir) { - if (!ufs->config.workdir) { - pr_err("overlayfs: missing 'workdir'\n"); - goto out_free_config; - } + err = -EBUSY; + if (ovl_inuse_trylock(workpath.dentry)) { + ofs->workdir_locked = true; + } else if (ofs->config.index) { + pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); + goto out; + } else { + pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); + } - err = ovl_mount_dir(ufs->config.upperdir, &upperpath); - if (err) - goto out_free_config; + ofs->workbasedir = dget(workpath.dentry); + err = ovl_make_workdir(ofs, &workpath); + if (err) + goto out; - /* Upper fs should not be r/o */ - if (sb_rdonly(upperpath.mnt->mnt_sb)) { - pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); - err = -EINVAL; - goto out_put_upperpath; - } + err = 0; +out: + path_put(&workpath); - err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); - if (err) - goto out_put_upperpath; - - err = -EBUSY; - if (ovl_inuse_trylock(upperpath.dentry)) { - ufs->upperdir_locked = true; - } else if (ufs->config.index) { - pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); - goto out_put_upperpath; - } else { - pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); - } + return err; +} + +static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, + struct path *upperpath) +{ + int err; - err = ovl_mount_dir(ufs->config.workdir, &workpath); + /* Verify lower root is upper root origin */ + err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, + false, true); + if (err) { + pr_err("overlayfs: failed to verify upper root origin\n"); + goto out; + } + + ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); + if (ofs->indexdir) { + /* Verify upper root is index dir origin */ + err = ovl_verify_origin(ofs->indexdir, upperpath->dentry, + true, true); if (err) - goto out_unlock_upperdentry; + pr_err("overlayfs: failed to verify index dir origin\n"); - err = -EINVAL; - if (upperpath.mnt != workpath.mnt) { - pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); - goto out_put_workpath; - } - if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { - pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); - goto out_put_workpath; + /* Cleanup bad/stale/orphan index entries */ + if (!err) + err = ovl_indexdir_cleanup(ofs->indexdir, + ofs->upper_mnt, + oe->lowerstack, + oe->numlower); + } + if (err || !ofs->indexdir) + pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); + +out: + return err; +} + +static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, + unsigned int numlower) +{ + int err; + unsigned int i; + + err = -ENOMEM; + ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), + GFP_KERNEL); + if (ofs->lower_layers == NULL) + goto out; + for (i = 0; i < numlower; i++) { + struct vfsmount *mnt; + dev_t dev; + + err = get_anon_bdev(&dev); + if (err) { + pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); + goto out; } - err = -EBUSY; - if (ovl_inuse_trylock(workpath.dentry)) { - ufs->workdir_locked = true; - } else if (ufs->config.index) { - pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); - goto out_put_workpath; - } else { - pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); + mnt = clone_private_mount(&stack[i]); + err = PTR_ERR(mnt); + if (IS_ERR(mnt)) { + pr_err("overlayfs: failed to clone lowerpath\n"); + free_anon_bdev(dev); + goto out; } + /* + * Make lower layers R/O. That way fchmod/fchown on lower file + * will fail instead of modifying lower fs. + */ + mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; - ufs->workbasedir = workpath.dentry; - sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; + ofs->lower_layers[ofs->numlower].mnt = mnt; + ofs->lower_layers[ofs->numlower].pseudo_dev = dev; + ofs->numlower++; + + /* Check if all lower layers are on same sb */ + if (i == 0) + ofs->same_sb = mnt->mnt_sb; + else if (ofs->same_sb != mnt->mnt_sb) + ofs->same_sb = NULL; } + err = 0; +out: + return err; +} + +static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, + struct ovl_fs *ofs) +{ + int err; + char *lowertmp, *lower; + struct path *stack = NULL; + unsigned int stacklen, numlower = 0, i; + bool remote = false; + struct ovl_entry *oe; + err = -ENOMEM; - lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); + lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL); if (!lowertmp) - goto out_unlock_workdentry; + goto out_err; err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); if (stacklen > OVL_MAX_STACK) { pr_err("overlayfs: too many lower directories, limit is %d\n", OVL_MAX_STACK); - goto out_free_lowertmp; - } else if (!ufs->config.upperdir && stacklen == 1) { + goto out_err; + } else if (!ofs->config.upperdir && stacklen == 1) { pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); - goto out_free_lowertmp; + goto out_err; } err = -ENOMEM; stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); if (!stack) - goto out_free_lowertmp; + goto out_err; err = -EINVAL; lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { - err = ovl_lower_dir(lower, &stack[numlower], ufs, + err = ovl_lower_dir(lower, &stack[numlower], ofs, &sb->s_stack_depth, &remote); if (err) - goto out_put_lowerpath; + goto out_err; lower = strchr(lower, '\0') + 1; } @@ -957,190 +1107,144 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth++; if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { pr_err("overlayfs: maximum fs stacking depth exceeded\n"); - goto out_put_lowerpath; + goto out_err; } - if (ufs->config.upperdir) { - ufs->upper_mnt = clone_private_mount(&upperpath); - err = PTR_ERR(ufs->upper_mnt); - if (IS_ERR(ufs->upper_mnt)) { - pr_err("overlayfs: failed to clone upperpath\n"); - goto out_put_lowerpath; - } + err = ovl_get_lower_layers(ofs, stack, numlower); + if (err) + goto out_err; + + err = -ENOMEM; + oe = ovl_alloc_entry(numlower); + if (!oe) + goto out_err; + + for (i = 0; i < numlower; i++) { + oe->lowerstack[i].dentry = dget(stack[i].dentry); + oe->lowerstack[i].layer = &ofs->lower_layers[i]; + } - /* Don't inherit atime flags */ - ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); + if (remote) + sb->s_d_op = &ovl_reval_dentry_operations; + else + sb->s_d_op = &ovl_dentry_operations; - sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; +out: + for (i = 0; i < numlower; i++) + path_put(&stack[i]); + kfree(stack); + kfree(lowertmp); - ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, - OVL_WORKDIR_NAME, false); - /* - * Upper should support d_type, else whiteouts are visible. - * Given workdir and upper are on same fs, we can do - * iterate_dir() on workdir. This check requires successful - * creation of workdir in previous step. - */ - if (ufs->workdir) { - struct dentry *temp; - - err = ovl_check_d_type_supported(&workpath); - if (err < 0) - goto out_put_workdir; - - /* - * We allowed this configuration and don't want to - * break users over kernel upgrade. So warn instead - * of erroring out. - */ - if (!err) - pr_warn("overlayfs: upper fs needs to support d_type.\n"); - - /* Check if upper/work fs supports O_TMPFILE */ - temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); - ufs->tmpfile = !IS_ERR(temp); - if (ufs->tmpfile) - dput(temp); - else - pr_warn("overlayfs: upper fs does not support tmpfile.\n"); - - /* - * Check if upper/work fs supports trusted.overlay.* - * xattr - */ - err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, - "0", 1, 0); - if (err) { - ufs->noxattr = true; - pr_warn("overlayfs: upper fs does not support xattr.\n"); - } else { - vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); - } + return oe; - /* Check if upper/work fs supports file handles */ - if (ufs->config.index && - !ovl_can_decode_fh(ufs->workdir->d_sb)) { - ufs->config.index = false; - pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); - } - } - } +out_err: + oe = ERR_PTR(err); + goto out; +} + +static int ovl_fill_super(struct super_block *sb, void *data, int silent) +{ + struct path upperpath = { }; + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ofs; + struct cred *cred; + int err; err = -ENOMEM; - ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); - if (ufs->lower_mnt == NULL) - goto out_put_workdir; - for (i = 0; i < numlower; i++) { - struct vfsmount *mnt = clone_private_mount(&stack[i]); + ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); + if (!ofs) + goto out; - err = PTR_ERR(mnt); - if (IS_ERR(mnt)) { - pr_err("overlayfs: failed to clone lowerpath\n"); - goto out_put_lower_mnt; - } - /* - * Make lower_mnt R/O. That way fchmod/fchown on lower file - * will fail instead of modifying lower fs. - */ - mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; + ofs->creator_cred = cred = prepare_creds(); + if (!cred) + goto out_err; - ufs->lower_mnt[ufs->numlower] = mnt; - ufs->numlower++; + ofs->config.redirect_dir = ovl_redirect_dir_def; + ofs->config.index = ovl_index_def; + err = ovl_parse_opt((char *) data, &ofs->config); + if (err) + goto out_err; - /* Check if all lower layers are on same sb */ - if (i == 0) - ufs->same_sb = mnt->mnt_sb; - else if (ufs->same_sb != mnt->mnt_sb) - ufs->same_sb = NULL; + err = -EINVAL; + if (!ofs->config.lowerdir) { + if (!silent) + pr_err("overlayfs: missing 'lowerdir'\n"); + goto out_err; } - /* If the upper fs is nonexistent, we mark overlayfs r/o too */ - if (!ufs->upper_mnt) - sb->s_flags |= MS_RDONLY; - else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) - ufs->same_sb = NULL; - - if (!(ovl_force_readonly(ufs)) && ufs->config.index) { - /* Verify lower root is upper root origin */ - err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], - stack[0].dentry, false, true); - if (err) { - pr_err("overlayfs: failed to verify upper root origin\n"); - goto out_put_lower_mnt; + sb->s_stack_depth = 0; + sb->s_maxbytes = MAX_LFS_FILESIZE; + if (ofs->config.upperdir) { + if (!ofs->config.workdir) { + pr_err("overlayfs: missing 'workdir'\n"); + goto out_err; } - ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, - OVL_INDEXDIR_NAME, true); - if (ufs->indexdir) { - /* Verify upper root is index dir origin */ - err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, - upperpath.dentry, true, true); - if (err) - pr_err("overlayfs: failed to verify index dir origin\n"); + err = ovl_get_upper(ofs, &upperpath); + if (err) + goto out_err; - /* Cleanup bad/stale/orphan index entries */ - if (!err) - err = ovl_indexdir_cleanup(ufs->indexdir, - ufs->upper_mnt, - stack, numlower); - } - if (err || !ufs->indexdir) - pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); + err = ovl_get_workdir(ofs, &upperpath); if (err) - goto out_put_indexdir; + goto out_err; + + if (!ofs->workdir) + sb->s_flags |= MS_RDONLY; + + sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; + sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; + } + oe = ovl_get_lowerstack(sb, ofs); + err = PTR_ERR(oe); + if (IS_ERR(oe)) + goto out_err; - /* Show index=off/on in /proc/mounts for any of the reasons above */ - if (!ufs->indexdir) - ufs->config.index = false; + /* If the upper fs is nonexistent, we mark overlayfs r/o too */ + if (!ofs->upper_mnt) + sb->s_flags |= MS_RDONLY; + else if (ofs->upper_mnt->mnt_sb != ofs->same_sb) + ofs->same_sb = NULL; - if (remote) - sb->s_d_op = &ovl_reval_dentry_operations; - else - sb->s_d_op = &ovl_dentry_operations; + if (!(ovl_force_readonly(ofs)) && ofs->config.index) { + err = ovl_get_indexdir(ofs, oe, &upperpath); + if (err) + goto out_free_oe; - err = -ENOMEM; - ufs->creator_cred = cred = prepare_creds(); - if (!cred) - goto out_put_indexdir; + if (!ofs->indexdir) + sb->s_flags |= MS_RDONLY; + } + + /* Show index=off/on in /proc/mounts for any of the reasons above */ + if (!ofs->indexdir) + ofs->config.index = false; /* Never override disk quota limits or use reserved space */ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); - err = -ENOMEM; - oe = ovl_alloc_entry(numlower); - if (!oe) - goto out_put_cred; - sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; sb->s_xattr = ovl_xattr_handlers; - sb->s_fs_info = ufs; + sb->s_fs_info = ofs; sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; + err = -ENOMEM; root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); if (!root_dentry) goto out_free_oe; mntput(upperpath.mnt); - for (i = 0; i < numlower; i++) - mntput(stack[i].mnt); - mntput(workpath.mnt); - kfree(lowertmp); - if (upperpath.dentry) { oe->has_upper = true; if (ovl_is_impuredir(upperpath.dentry)) ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); } - for (i = 0; i < numlower; i++) { - oe->lowerstack[i].dentry = stack[i].dentry; - oe->lowerstack[i].mnt = ufs->lower_mnt[i]; - } - kfree(stack); root_dentry->d_fsdata = oe; + /* Root is always merge -> can have whiteouts */ + ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, ovl_dentry_lower(root_dentry)); @@ -1149,39 +1253,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) return 0; out_free_oe: + ovl_entry_stack_free(oe); kfree(oe); -out_put_cred: - put_cred(ufs->creator_cred); -out_put_indexdir: - dput(ufs->indexdir); -out_put_lower_mnt: - for (i = 0; i < ufs->numlower; i++) - mntput(ufs->lower_mnt[i]); - kfree(ufs->lower_mnt); -out_put_workdir: - dput(ufs->workdir); - mntput(ufs->upper_mnt); -out_put_lowerpath: - for (i = 0; i < numlower; i++) - path_put(&stack[i]); - kfree(stack); -out_free_lowertmp: - kfree(lowertmp); -out_unlock_workdentry: - if (ufs->workdir_locked) - ovl_inuse_unlock(workpath.dentry); -out_put_workpath: - path_put(&workpath); -out_unlock_upperdentry: - if (ufs->upperdir_locked) - ovl_inuse_unlock(upperpath.dentry); -out_put_upperpath: +out_err: path_put(&upperpath); -out_free_config: - kfree(ufs->config.lowerdir); - kfree(ufs->config.upperdir); - kfree(ufs->config.workdir); - kfree(ufs); + ovl_free_fs(ofs); out: return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b9b239fa5cfd..d6bb1c9f5e7a 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -17,7 +17,6 @@ #include <linux/namei.h> #include <linux/ratelimit.h> #include "overlayfs.h" -#include "ovl_entry.h" int ovl_want_write(struct dentry *dentry) { @@ -125,7 +124,12 @@ void ovl_path_lower(struct dentry *dentry, struct path *path) { struct ovl_entry *oe = dentry->d_fsdata; - *path = oe->numlower ? oe->lowerstack[0] : (struct path) { }; + if (oe->numlower) { + path->mnt = oe->lowerstack[0].layer->mnt; + path->dentry = oe->lowerstack[0].dentry; + } else { + *path = (struct path) { }; + } } enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) @@ -329,6 +333,19 @@ void ovl_copy_up_end(struct dentry *dentry) mutex_unlock(&OVL_I(d_inode(dentry))->lock); } +bool ovl_check_origin_xattr(struct dentry *dentry) +{ + int res; + + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + + /* Zero size value means "copied up but origin unknown" */ + if (res >= 0) + return true; + + return false; +} + bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) { int res; diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c index e0f867cd8553..96f1087e372c 100644 --- a/fs/proc/cpuinfo.c +++ b/fs/proc/cpuinfo.c @@ -1,12 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/cpufreq.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +__weak void arch_freq_prepare_all(void) +{ +} + extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { + arch_freq_prepare_all(); return seq_open(file, &cpuinfo_op); } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 086e491faf04..423159abd501 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -651,7 +651,7 @@ static int pstore_write_user_compat(struct pstore_record *record, return -EINVAL; record->buf = memdup_user(buf, record->size); - if (unlikely(IS_ERR(record->buf))) { + if (IS_ERR(record->buf)) { ret = PTR_ERR(record->buf); goto out; } diff --git a/fs/read_write.c b/fs/read_write.c index 0046d72efe94..f8547b82dfb3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -635,27 +635,6 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, return ret; } -/* - * Reduce an iovec's length in-place. Return the resulting number of segments - */ -unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) -{ - unsigned long seg = 0; - size_t len = 0; - - while (seg < nr_segs) { - seg++; - if (len + iov->iov_len >= to) { - iov->iov_len = to - len; - break; - } - len += iov->iov_len; - iov++; - } - return seg; -} -EXPORT_SYMBOL(iov_shorten); - static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, loff_t *ppos, int type, rwf_t flags) { diff --git a/fs/select.c b/fs/select.c index 063067e606ca..6de493bb42a4 100644 --- a/fs/select.c +++ b/fs/select.c @@ -292,8 +292,7 @@ static int poll_select_copy_remaining(struct timespec64 *end_time, void __user *p, int timeval, int ret) { - struct timespec64 rts64; - struct timespec rts; + struct timespec64 rts; struct timeval rtv; if (!p) @@ -306,23 +305,22 @@ static int poll_select_copy_remaining(struct timespec64 *end_time, if (!end_time->tv_sec && !end_time->tv_nsec) return ret; - ktime_get_ts64(&rts64); - rts64 = timespec64_sub(*end_time, rts64); - if (rts64.tv_sec < 0) - rts64.tv_sec = rts64.tv_nsec = 0; + ktime_get_ts64(&rts); + rts = timespec64_sub(*end_time, rts); + if (rts.tv_sec < 0) + rts.tv_sec = rts.tv_nsec = 0; - rts = timespec64_to_timespec(rts64); if (timeval) { if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) memset(&rtv, 0, sizeof(rtv)); - rtv.tv_sec = rts64.tv_sec; - rtv.tv_usec = rts64.tv_nsec / NSEC_PER_USEC; + rtv.tv_sec = rts.tv_sec; + rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; if (!copy_to_user(p, &rtv, sizeof(rtv))) return ret; - } else if (!copy_to_user(p, &rts, sizeof(rts))) + } else if (!put_timespec64(&rts, p)) return ret; /* @@ -705,17 +703,15 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, const sigset_t __user *sigmask, size_t sigsetsize) { sigset_t ksigmask, sigsaved; - struct timespec ts; - struct timespec64 ts64, end_time, *to = NULL; + struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { - if (copy_from_user(&ts, tsp, sizeof(ts))) + if (get_timespec64(&ts, tsp)) return -EFAULT; - ts64 = timespec_to_timespec64(ts); to = &end_time; - if (poll_select_set_timeout(to, ts64.tv_sec, ts64.tv_nsec)) + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } @@ -1052,12 +1048,11 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, size_t, sigsetsize) { sigset_t ksigmask, sigsaved; - struct timespec ts; - struct timespec64 end_time, *to = NULL; + struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { - if (copy_from_user(&ts, tsp, sizeof(ts))) + if (get_timespec64(&ts, tsp)) return -EFAULT; to = &end_time; @@ -1103,10 +1098,10 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) static -int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p, +int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p, int timeval, int ret) { - struct timespec ts; + struct timespec64 ts; if (!p) return ret; @@ -1118,8 +1113,8 @@ int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p, if (!end_time->tv_sec && !end_time->tv_nsec) return ret; - ktime_get_ts(&ts); - ts = timespec_sub(*end_time, ts); + ktime_get_ts64(&ts); + ts = timespec64_sub(*end_time, ts); if (ts.tv_sec < 0) ts.tv_sec = ts.tv_nsec = 0; @@ -1132,12 +1127,7 @@ int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p, if (!copy_to_user(p, &rtv, sizeof(rtv))) return ret; } else { - struct compat_timespec rts; - - rts.tv_sec = ts.tv_sec; - rts.tv_nsec = ts.tv_nsec; - - if (!copy_to_user(p, &rts, sizeof(rts))) + if (!compat_put_timespec64(&ts, p)) return ret; } /* @@ -1195,7 +1185,7 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, */ static int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct timespec *end_time) + struct timespec64 *end_time) { fd_set_bits fds; void *bits; @@ -1268,7 +1258,7 @@ COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct compat_timeval __user *, tvp) { - struct timespec end_time, *to = NULL; + struct timespec64 end_time, *to = NULL; struct compat_timeval tv; int ret; @@ -1312,14 +1302,12 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, compat_size_t sigsetsize) { - compat_sigset_t ss32; sigset_t ksigmask, sigsaved; - struct compat_timespec ts; - struct timespec end_time, *to = NULL; + struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { - if (copy_from_user(&ts, tsp, sizeof(ts))) + if (compat_get_timespec64(&ts, tsp)) return -EFAULT; to = &end_time; @@ -1330,9 +1318,8 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, if (sigmask) { if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + if (get_compat_sigset(&ksigmask, sigmask)) return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); @@ -1381,14 +1368,12 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct compat_timespec __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { - compat_sigset_t ss32; sigset_t ksigmask, sigsaved; - struct compat_timespec ts; - struct timespec end_time, *to = NULL; + struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { - if (copy_from_user(&ts, tsp, sizeof(ts))) + if (compat_get_timespec64(&ts, tsp)) return -EFAULT; to = &end_time; @@ -1399,9 +1384,8 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, if (sigmask) { if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + if (get_compat_sigset(&ksigmask, sigmask)) return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); diff --git a/fs/signalfd.c b/fs/signalfd.c index 1c667af86da5..5f1ff8756595 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -313,15 +313,13 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, compat_size_t, sigsetsize, int, flags) { - compat_sigset_t ss32; sigset_t tmp; sigset_t __user *ksigmask; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + if (get_compat_sigset(&tmp, sigmask)) return -EFAULT; - sigset_from_compat(&tmp, &ss32); ksigmask = compat_alloc_user_space(sizeof(sigset_t)); if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) return -EFAULT; diff --git a/fs/statfs.c b/fs/statfs.c index c25dd9a26cc1..b072a8bab71a 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -217,7 +217,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user return error; } -int vfs_ustat(dev_t dev, struct kstatfs *sbuf) +static int vfs_ustat(dev_t dev, struct kstatfs *sbuf) { struct super_block *s = user_get_super(dev); int err; diff --git a/fs/super.c b/fs/super.c index 994db21f59bf..d4e33e8f1e6f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -155,21 +155,19 @@ static void destroy_super_rcu(struct rcu_head *head) schedule_work(&s->destroy_work); } -/** - * destroy_super - frees a superblock - * @s: superblock to free - * - * Frees a superblock. - */ -static void destroy_super(struct super_block *s) +/* Free a superblock that has never been seen by anyone */ +static void destroy_unused_super(struct super_block *s) { + if (!s) + return; + up_write(&s->s_umount); list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); security_sb_free(s); - WARN_ON(!list_empty(&s->s_mounts)); put_user_ns(s->s_user_ns); kfree(s->s_subtype); - call_rcu(&s->rcu, destroy_super_rcu); + /* no delays needed */ + destroy_super_work(&s->destroy_work); } /** @@ -257,7 +255,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, return s; fail: - destroy_super(s); + destroy_unused_super(s); return NULL; } @@ -266,11 +264,17 @@ fail: /* * Drop a superblock's refcount. The caller must hold sb_lock. */ -static void __put_super(struct super_block *sb) +static void __put_super(struct super_block *s) { - if (!--sb->s_count) { - list_del_init(&sb->s_list); - destroy_super(sb); + if (!--s->s_count) { + list_del_init(&s->s_list); + WARN_ON(s->s_dentry_lru.node); + WARN_ON(s->s_inode_lru.node); + WARN_ON(!list_empty(&s->s_mounts)); + security_sb_free(s); + put_user_ns(s->s_user_ns); + kfree(s->s_subtype); + call_rcu(&s->rcu, destroy_super_rcu); } } @@ -485,19 +489,12 @@ retry: continue; if (user_ns != old->s_user_ns) { spin_unlock(&sb_lock); - if (s) { - up_write(&s->s_umount); - destroy_super(s); - } + destroy_unused_super(s); return ERR_PTR(-EBUSY); } if (!grab_super(old)) goto retry; - if (s) { - up_write(&s->s_umount); - destroy_super(s); - s = NULL; - } + destroy_unused_super(s); return old; } } @@ -512,8 +509,7 @@ retry: err = set(s, data); if (err) { spin_unlock(&sb_lock); - up_write(&s->s_umount); - destroy_super(s); + destroy_unused_super(s); return ERR_PTR(err); } s->s_type = type; diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 343a94246f5b..19e546a41251 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -302,7 +302,7 @@ xfs_iext_rec_cmp( xfs_fileoff_t offset) { uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK; - u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; + uint32_t rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; if (rec_offset > offset) return 1; diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 996f035ee205..349d9f8edb89 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -274,7 +274,7 @@ struct xfs_inode_log_format { uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ - u8 __pad[16]; /* unused */ + uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ @@ -295,7 +295,7 @@ struct xfs_inode_log_format_32 { uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ - u8 __pad[16]; /* unused */ + uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d8226f7a5dde..61d1cb7dc10d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2357,6 +2357,7 @@ retry: */ if (ip->i_ino != inum + i) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + rcu_read_unlock(); continue; } } |