diff options
Diffstat (limited to 'fs')
53 files changed, 459 insertions, 267 deletions
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 1feb68ecef95..842d00048a65 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -94,25 +94,21 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, { struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); struct list_head *next; - struct dentry *p, *q; + struct dentry *q; spin_lock(&sbi->lookup_lock); + spin_lock(&root->d_lock); - if (prev == NULL) { - spin_lock(&root->d_lock); + if (prev) + next = prev->d_u.d_child.next; + else { prev = dget_dlock(root); next = prev->d_subdirs.next; - p = prev; - goto start; } - p = prev; - spin_lock(&p->d_lock); -again: - next = p->d_u.d_child.next; -start: +cont: if (next == &root->d_subdirs) { - spin_unlock(&p->d_lock); + spin_unlock(&root->d_lock); spin_unlock(&sbi->lookup_lock); dput(prev); return NULL; @@ -121,16 +117,15 @@ start: q = list_entry(next, struct dentry, d_u.d_child); spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); - /* Negative dentry - try next */ - if (!simple_positive(q)) { - spin_unlock(&p->d_lock); - lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_); - p = q; - goto again; + /* Already gone or negative dentry (under construction) - try next */ + if (q->d_count == 0 || !simple_positive(q)) { + spin_unlock(&q->d_lock); + next = q->d_u.d_child.next; + goto cont; } dget_dlock(q); spin_unlock(&q->d_lock); - spin_unlock(&p->d_lock); + spin_unlock(&root->d_lock); spin_unlock(&sbi->lookup_lock); dput(prev); @@ -404,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, DPRINTK("checking mountpoint %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - /* Path walk currently on this dentry? */ - ino_count = atomic_read(&ino->count) + 2; - if (dentry->d_count > ino_count) - goto next; - /* Can we umount this guy */ if (autofs4_mount_busy(mnt, dentry)) goto next; @@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) { unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL; - struct bio_slab *bslab; + struct bio_slab *bslab, *new_bio_slabs; unsigned int i, entry = -1; mutex_lock(&bio_slab_lock); @@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) if (bio_slab_nr == bio_slab_max && entry == -1) { bio_slab_max <<= 1; - bio_slabs = krealloc(bio_slabs, - bio_slab_max * sizeof(struct bio_slab), - GFP_KERNEL); - if (!bio_slabs) + new_bio_slabs = krealloc(bio_slabs, + bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!new_bio_slabs) goto out_unlock; + bio_slabs = new_bio_slabs; } if (entry == -1) entry = bio_slab_nr++; diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e519195d45b..38e721b35d45 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); + blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; @@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } + blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bc2f6ffff3cf..7bb755677a22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -664,10 +664,6 @@ static noinline int btrfs_mksubvol(struct path *parent, struct dentry *dentry; int error; - error = mnt_want_write(parent->mnt); - if (error) - return error; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, parent->dentry, namelen); @@ -703,7 +699,6 @@ out_dput: dput(dentry); out_unlock: mutex_unlock(&dir->i_mutex); - mnt_drop_write(parent->mnt); return error; } diff --git a/fs/buffer.c b/fs/buffer.c index 9f6d2e41281d..58e2e7b77372 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { @@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; - +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -999,7 +1007,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { - int ret; - struct buffer_head *bh; - /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } -retry: - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; + for (;;) { + struct buffer_head *bh; + int ret; - ret = grow_buffers(bdev, block, size); - if (ret == 0) { - free_more_memory(); - goto retry; - } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; + + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) + free_more_memory(); } - return NULL; } /* @@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fb962efdacee..6d59006bfa27 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); + BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9fff9f3b17e4..4b5762ef7c2b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, if (rinfo->head->is_dentry) { struct inode *dir = req->r_locked_dir; - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1, - &req->r_caps_reservation); - if (err < 0) - return err; + if (dir) { + err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + session, req->r_request_started, -1, + &req->r_caps_reservation); + if (err < 0) + return err; + } else { + WARN_ON_ONCE(1); + } } /* @@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, * will have trouble splicing in the virtual snapdir later */ if (rinfo->head->is_dentry && !req->r_aborted && + req->r_locked_dir && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, fsc->mount_options->snapdir_name, req->r_dentry->d_name.len))) { diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8e3fb69fbe62..1396ceb46797 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc, /* validate striping parameters */ if ((l->object_size & ~PAGE_MASK) || (l->stripe_unit & ~PAGE_MASK) || - ((unsigned)l->object_size % (unsigned)l->stripe_unit)) + (l->stripe_unit != 0 && + ((unsigned)l->object_size % (unsigned)l->stripe_unit))) return -EINVAL; /* make sure it's a valid data pool */ diff --git a/fs/compat.c b/fs/compat.c index 6161255fac45..1bdb350ea5d3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1155,11 +1155,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1221,11 +1224,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } diff --git a/fs/direct-io.c b/fs/direct-io.c index 1faf4cb56f39..f86c720dba0e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, unsigned long user_addr; size_t bytes; struct buffer_head map_bh = { 0, }; + struct blk_plug plug; if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, PAGE_SIZE - user_addr / PAGE_SIZE); } + blk_start_plug(&plug); + for (seg = 0; seg < nr_segs; seg++) { user_addr = (unsigned long)iov[seg].iov_base; sdio.size += bytes = iov[seg].iov_len; @@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (sdio.bio) dio_bio_submit(dio, &sdio); + blk_finish_plug(&plug); + /* * It is possible that, we return short IO due to end of file. * In that case, we need to release all the pages we got hold on. diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c8b55670804..eedec84c1809 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) error = PTR_ERR(file); goto out_free_fd; } - fd_install(fd, file); ep->file = file; + fd_install(fd, file); return fd; out_free_fd: diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d23b31ca9d7a..1b5089067d01 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, return desc; } -static int ext4_valid_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) +/* + * Return the block number which was discovered to be invalid, or 0 if + * the block bitmap is valid. + */ +static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) { ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; - ext4_fsblk_t bitmap_blk; + ext4_fsblk_t blk; ext4_fsblk_t group_first_block; if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { @@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb, * or it has to also read the block group where the bitmaps * are located to verify they are set. */ - return 1; + return 0; } group_first_block = ext4_group_first_block_no(sb, block_group); /* check whether block bitmap block number is set */ - bitmap_blk = ext4_block_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_block_bitmap(sb, desc); + offset = blk - group_first_block; if (!ext4_test_bit(offset, bh->b_data)) /* bad block bitmap */ - goto err_out; + return blk; /* check whether the inode bitmap block number is set */ - bitmap_blk = ext4_inode_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_inode_bitmap(sb, desc); + offset = blk - group_first_block; if (!ext4_test_bit(offset, bh->b_data)) /* bad block bitmap */ - goto err_out; + return blk; /* check whether the inode table block number is set */ - bitmap_blk = ext4_inode_table(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_inode_table(sb, desc); + offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, offset + EXT4_SB(sb)->s_itb_per_group, offset); - if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) - /* good bitmap for inode tables */ - return 1; - -err_out: - ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", - block_group, bitmap_blk); + if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) + /* bad bitmap for inode tables */ + return blk; return 0; } @@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb, unsigned int block_group, struct buffer_head *bh) { + ext4_fsblk_t blk; + if (buffer_verified(bh)) return; ext4_lock_group(sb, block_group); - if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && - ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8)) - set_buffer_verified(bh); + blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); + if (unlikely(blk != 0)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: block %llu: invalid block bitmap", + block_group, blk); + return; + } + if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, + desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); + return; + } + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); } diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index f8716eab9995..5c2d1813ebe9 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, if (provided == calculated) return 1; - ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); return 0; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cd0c7ed06772..aabbb3f53683 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2662,6 +2662,7 @@ cont: } path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); + i = 0; if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e0851e4f468..c6e0cb3d1f4a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -948,6 +948,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; + ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -3108,6 +3109,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + + sbi->s_itb_per_group + 2); + first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + (grp * EXT4_BLOCKS_PER_GROUP(sb)); last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; @@ -4419,6 +4424,7 @@ static void ext4_clear_journal_err(struct super_block *sb, ext4_commit_super(sb, 1); jbd2_journal_clear_err(journal); + jbd2_journal_update_sb_errno(journal); } } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8964cf3999b2..324bc0850534 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_file *ff; + /* Userspace expects S_IFREG in create mode */ + BUG_ON((mode & S_IFMT) != S_IFREG); + forget = fuse_alloc_forget(); err = -ENOMEM; if (!forget) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 93d8d6c9494d..aba15f1b7ad2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -703,13 +703,16 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); - if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) { + /* + * In auto invalidate mode, always update attributes on read. + * Otherwise, only update if we attempt to read past EOF (to ensure + * i_size is up to date). + */ + if (fc->auto_inval_data || + (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { int err; - /* - * If trying to read past EOF, make sure the i_size - * attribute is up-to-date. - */ err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); if (err) return err; @@ -1700,7 +1703,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) size_t n; u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; - for (n = 0; n < count; n++) { + for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 771fb6322c07..e24dd74e3068 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -484,6 +484,9 @@ struct fuse_conn { /** Is fallocate not implemented by fs? */ unsigned no_fallocate:1; + /** Use enhanced/automatic page cache invalidation. */ + unsigned auto_inval_data:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1cd61652018c..ce0a2838ccd0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t oldsize; + struct timespec old_mtime; spin_lock(&fc->lock); if (attr_version != 0 && fi->attr_version > attr_version) { @@ -204,15 +205,35 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, return; } + old_mtime = inode->i_mtime; fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); spin_unlock(&fc->lock); - if (S_ISREG(inode->i_mode) && oldsize != attr->size) { - truncate_pagecache(inode, oldsize, attr->size); - invalidate_inode_pages2(inode->i_mapping); + if (S_ISREG(inode->i_mode)) { + bool inval = false; + + if (oldsize != attr->size) { + truncate_pagecache(inode, oldsize, attr->size); + inval = true; + } else if (fc->auto_inval_data) { + struct timespec new_mtime = { + .tv_sec = attr->mtime, + .tv_nsec = attr->mtimensec, + }; + + /* + * Auto inval mode also checks and invalidates if mtime + * has changed. + */ + if (!timespec_equal(&old_mtime, &new_mtime)) + inval = true; + } + + if (inval) + invalidate_inode_pages2(inode->i_mapping); } } @@ -834,6 +855,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->big_writes = 1; if (arg->flags & FUSE_DONT_MASK) fc->dont_mask = 1; + if (arg->flags & FUSE_AUTO_INVAL_DATA) + fc->auto_inval_data = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -859,7 +882,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | - FUSE_FLOCK_LOCKS; + FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | + FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 09357508ec9a..a2862339323b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); spin_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + spin_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8625da27eccf..e149b99a7ffb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1377,7 +1377,7 @@ static void jbd2_mark_journal_empty(journal_t *journal) * Update a journal's errno. Write updated superblock to disk waiting for IO * to complete. */ -static void jbd2_journal_update_sb_errno(journal_t *journal) +void jbd2_journal_update_sb_errno(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; @@ -1390,6 +1390,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) jbd2_write_superblock(journal, WRITE_SYNC); } +EXPORT_SYMBOL(jbd2_journal_update_sb_errno); /* * Read the superblock for a given journal, performing initial diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde8..dd1ed1b8e98e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on + * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. @@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; /** * may_follow_link - Check symlink following for unsafe situations * @link: The path of the symlink + * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is @@ -2414,7 +2416,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto out; } - mode = op->mode & S_IALLUGO; + mode = op->mode; if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); @@ -2452,7 +2454,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (open_flag & O_CREAT) { - error = may_o_create(&nd->path, dentry, op->mode); + error = may_o_create(&nd->path, dentry, mode); if (error) { create_error = error; if (open_flag & O_EXCL) @@ -2489,6 +2491,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } + if (create_error && dentry->d_inode == NULL) { + error = create_error; + goto out; + } goto looked_up; } diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8bf3a3f6925a..b7db60897f91 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o -obj-$(CONFIG_NFS_V2) += nfs2.o -nfs2-y := nfs2super.o proc.o nfs2xdr.o +obj-$(CONFIG_NFS_V2) += nfsv2.o +nfsv2-y := nfs2super.o proc.o nfs2xdr.o -obj-$(CONFIG_NFS_V3) += nfs3.o -nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o -nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +obj-$(CONFIG_NFS_V3) += nfsv3.o +nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o -obj-$(CONFIG_NFS_V4) += nfs4.o -nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ +obj-$(CONFIG_NFS_V4) += nfsv4.o +nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs4getroot.o nfs4client.o -nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9fc0d9dfc91b..99694442b93f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version) if (IS_ERR(nfs)) { mutex_lock(&nfs_version_mutex); - request_module("nfs%d", version); + request_module("nfsv%d", version); nfs = find_nfs_version(version); mutex_unlock(&nfs_version_mutex); } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b701358c39c3..a850079467d8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -61,6 +61,12 @@ struct idmap { struct mutex idmap_mutex; }; +struct idmap_legacy_upcalldata { + struct rpc_pipe_msg pipe_msg; + struct idmap_msg idmap_msg; + struct idmap *idmap; +}; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields * @fattr: fully initialised struct nfs_fattr @@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, name, namelen, type, data, data_size, idmap); + idmap->idmap_key_cons = NULL; mutex_unlock(&idmap->idmap_mutex); } return ret; @@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = { static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); +static void idmap_release_pipe(struct inode *); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, + .release_pipe = idmap_release_pipe, .destroy_msg = idmap_pipe_destroy_msg, }; @@ -616,7 +625,8 @@ void nfs_idmap_quit(void) nfs_idmap_quit_keyring(); } -static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, +static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, + struct idmap_msg *im, struct rpc_pipe_msg *msg) { substring_t substr; @@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, const char *op, void *aux) { + struct idmap_legacy_upcalldata *data; struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; @@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - goto out0; - - im = kmalloc(sizeof(*im), GFP_KERNEL); - if (!im) + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) goto out1; - ret = nfs_idmap_prepare_message(key->description, im, msg); + msg = &data->pipe_msg; + im = &data->idmap_msg; + data->idmap = idmap; + + ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) goto out2; @@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - goto out2; + goto out3; return ret; +out3: + idmap->idmap_key_cons = NULL; out2: - kfree(im); + kfree(data); out1: - kfree(msg); -out0: complete_request_key(cons, ret); return ret; } @@ -749,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(cons, -ENOKEY); - goto out_incomplete; + ret = -ENOKEY; + goto out; } namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); @@ -768,16 +778,32 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) out: complete_request_key(cons, ret); -out_incomplete: return ret; } static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { + struct idmap_legacy_upcalldata *data = container_of(msg, + struct idmap_legacy_upcalldata, + pipe_msg); + struct idmap *idmap = data->idmap; + struct key_construction *cons; + if (msg->errno) { + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + complete_request_key(cons, msg->errno); + } /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(msg->data); - kfree(msg); + kfree(data); +} + +static void +idmap_release_pipe(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; + idmap->idmap_key_cons = NULL; } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0952c791df36..d6b3b5f2d779 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3b950dd81e81..da0618aeeadb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); +/* super.c */ +extern struct file_system_type nfs4_fs_type; + /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cbcdfaf32505..24eb663f8ed5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error: - kfree(clp); + nfs_free_client(clp); return ERR_PTR(err); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a99a8d948721..635274140b18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3737,9 +3737,10 @@ out: static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; + size_t buflen = sizeof(*acl) + acl_len; - if (pages && acl_len <= PAGE_SIZE) { - acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (pages && buflen <= PAGE_SIZE) { + acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; @@ -3819,7 +3820,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len - res.acl_data_offset; + acl_len = res.acl_len; if (acl_len > args.acl_len) nfs4_write_cached_acl(inode, NULL, 0, acl_len); else @@ -6223,11 +6224,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6239,9 +6287,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6259,12 +6308,19 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return PTR_ERR(task); + return; status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; @@ -6272,7 +6328,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) status = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return status; + return; } static void @@ -6304,12 +6360,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 12a31a9dbcdd..bd61221ad2c5 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -344,14 +336,8 @@ static int __init init_nfs_v4(void) if (err) goto out1; - err = register_filesystem(&nfs4_fs_type); - if (err < 0) - goto out2; - register_nfs_version(&nfs_v4); return 0; -out2: - nfs4_unregister_sysctl(); out1: nfs_idmap_quit(); out: @@ -361,7 +347,6 @@ out: static void __exit exit_nfs_v4(void) { unregister_nfs_version(&nfs_v4); - unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ca13483edd60..1bfbd67c556d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5045,22 +5045,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { unsigned int savep; - __be32 *bm_p; uint32_t attrlen, bitmap[3] = {0}; int status; - size_t page_len = xdr->buf->page_len; + unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; - bm_p = xdr->p; - res->acl_data_offset = be32_to_cpup(bm_p) + 2; - res->acl_data_offset <<= 2; - /* Check if the acl data starts beyond the allocated buffer */ - if (res->acl_data_offset > page_len) - return -ERANGE; + xdr_enter_page(xdr, xdr->buf->page_len); + + /* Calculate the offset of the page data */ + pg_offset = xdr->buf->head[0].iov_len; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5074,23 +5071,20 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - xdr->p = bm_p; + res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ - attrlen += res->acl_data_offset; - if (attrlen > page_len) { + res->acl_len = attrlen; + if (attrlen > (xdr->nwords << 2)) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ - res->acl_len = attrlen; goto out; } - dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", - attrlen, page_len); + dprintk("NFS: acl reply: attrlen %u > page_len %u\n", + attrlen, xdr->nwords << 2); return -EINVAL; } - xdr_read_pages(xdr, attrlen); - res->acl_len = attrlen; } else status = -EOPNOTSUPP; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index f50d3e8d6f22..ea6d111b03e9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, return false; return pgio->pg_count + req->wb_bytes <= - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + (unsigned long)pgio->pg_layout_private; +} + +void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + pnfs_generic_pg_init_read(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; +} + +static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, + unsigned long *stripe_end) +{ + u32 stripe_off; + unsigned stripe_size; + + if (layout->raid_algorithm == PNFS_OSD_RAID_0) + return true; + + stripe_size = layout->stripe_unit * + (layout->group_width - layout->parity); + + div_u64_rem(offset, stripe_size, &stripe_off); + if (!stripe_off) + return true; + + *stripe_end = stripe_size - stripe_off; + return false; +} + +void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + unsigned long stripe_end = 0; + + pnfs_generic_pg_init_write(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + if (req->wb_offset || + !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, + &OBJIO_LSEG(pgio->pg_lseg)->layout, + &stripe_end)) { + pgio->pg_layout_private = (void *)stripe_end; + } else { + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + } } static const struct nfs_pageio_ops objio_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = objio_init_read, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = objio_init_write, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1a6732ed04a4..311a79681e2b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->dreq = desc->pg_dreq; + hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_error = 0; desc->pg_lseg = NULL; desc->pg_dreq = NULL; + desc->pg_layout_private = NULL; } EXPORT_SYMBOL_GPL(nfs_pageio_init); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76875bfcf19c..2e00feacd4be 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; struct pnfs_layout_segment *lseg = NULL; - struct page **pages = NULL; - int i; - u32 max_resp_sz, max_pages; dprintk("--> %s\n", __func__); @@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (lgp == NULL) return NULL; - /* allocate pages for xdr post processing */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); - if (!pages) - goto out_err_free; - - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_err_free; - } - lgp->args.minlength = PAGE_CACHE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; @@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->args.layout.pages = pages; - lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp); + nfs4_proc_layoutget(lgp, gfp_flags); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } - /* free xdr pages */ - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - return lseg; - -out_err_free: - /* free any allocated xdr pages, lgp as it's not used */ - if (pages) { - for (i = 0; i < max_pages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - kfree(lgp); - return NULL; } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2c6c80503ba4..745aa1b39e7c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac6a3c55dce4..239aff7338eb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops); static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); + +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; +EXPORT_SYMBOL_GPL(nfs4_fs_type); + +static int __init register_nfs4_fs(void) +{ + return register_filesystem(&nfs4_fs_type); +} + +static void unregister_nfs4_fs(void) +{ + unregister_filesystem(&nfs4_fs_type); +} +#else +static int __init register_nfs4_fs(void) +{ + return 0; +} + +static void unregister_nfs4_fs(void) +{ +} #endif static struct shrinker acl_shrinker = { @@ -337,12 +365,18 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; - ret = nfs_register_sysctl(); + ret = register_nfs4_fs(); if (ret < 0) goto error_1; + + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_2; register_shrinker(&acl_shrinker); return 0; +error_2: + unregister_nfs4_fs(); error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); nfs_unregister_sysctl(); + unregister_nfs4_fs(); unregister_filesystem(&nfs_fs_type); } @@ -2645,4 +2680,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5829d0ce7cfb..e3b55372726c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void) nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) - return -ENOMEM; + goto out_destroy_write_cache; nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", sizeof(struct nfs_commit_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_cdata_cachep == NULL) - return -ENOMEM; + goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) - return -ENOMEM; + goto out_destroy_commit_cache; /* * NFS congestion size, scale with available memory. @@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void) nfs_congestion_kb = 256*1024; return 0; + +out_destroy_commit_cache: + kmem_cache_destroy(nfs_cdata_cachep); +out_destroy_write_mempool: + mempool_destroy(nfs_wdata_mempool); +out_destroy_write_cache: + kmem_cache_destroy(nfs_wdata_cachep); + return -ENOMEM; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); + kmem_cache_destroy(nfs_cdata_cachep); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cbaf4f8bb7b7..4c7bd35b1876 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog, args.protocol = XPRT_TRANSPORT_TCP; - args.authflavor = clp->cl_flavor; + args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { if (!conn->cb_xprt) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e6173147f982..22bd0a66c356 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -231,7 +231,6 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ - u32 cl_flavor; /* setclientid pseudoflavor */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ diff --git a/fs/open.c b/fs/open.c index bc132e167d2d..e1f2cdb91a4d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -852,9 +852,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; - if (!(flags & O_CREAT)) - mode = 0; - op->mode = mode; + if (flags & O_CREAT) + op->mode = (mode & S_IALLUGO) | S_IFREG; + else + op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 36a29b753c79..c495a3055e2a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) goto out; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 4c0c7d163d15..a98b7740a0fc 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, else if (bitmap == 0) block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - reiserfs_write_unlock(sb); bh = sb_bread(sb, block); - reiserfs_write_lock(sb); if (bh == NULL) reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " "reading failed", __func__, block); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6d4268fb6c1..855da58db145 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode) ; } out: + reiserfs_write_unlock_once(inode->i_sb, depth); clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ dquot_drop(inode); inode->i_blocks = 0; - reiserfs_write_unlock_once(inode->i_sb, depth); return; no_delete: diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8b8cc4e945f4..760de723dadb 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -167,7 +167,7 @@ struct ubifs_global_debug_info { #define ubifs_dbg_msg(type, fmt, ...) \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) -#define DBG_KEY_BUF_LEN 32 +#define DBG_KEY_BUF_LEN 48 #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ce33b2beb151..8640920766ed 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) return 0; out_err: - ubifs_lpt_free(c, 0); + if (wr) + ubifs_lpt_free(c, 1); + if (rd) + ubifs_lpt_free(c, 0); return err; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c30d976b4be8..edeec499c048 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ - ubifs_err("corruptio %d", ret); + ubifs_err("corruption %d", ret); ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eba46d4a7619..94d78fc5d4e0 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c) c->replaying = 1; lnum = c->ltail_lnum = c->lhead_lnum; - lnum = UBIFS_LOG_LNUM; do { err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) @@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; lnum = ubifs_next_log_lnum(c, lnum); - } while (lnum != UBIFS_LOG_LNUM); + } while (lnum != c->ltail_lnum); err = replay_buds(c); if (err) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c3fa6c5327a3..71a197f0f93d 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c) * * This function mounts UBIFS file system. Returns zero in case of success and * a negative error code in case of failure. - * - * Note, the function does not de-allocate resources it it fails half way - * through, and the caller has to do this instead. */ static int mount_ubifs(struct ubifs_info *c) { diff --git a/fs/udf/inode.c b/fs/udf/inode.c index fafaad795cd6..aa233469b3c1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize) if (err) return err; down_write(&iinfo->i_data_sem); - } else + } else { iinfo->i_lenAlloc = newsize; + goto set_size; + } } err = udf_extend_file(inode, newsize); if (err) { up_write(&iinfo->i_data_sem); return err; } +set_size: truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); } else { diff --git a/fs/udf/super.c b/fs/udf/super.c index dcbf98722afc..18fc038a438d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } @@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; goto out_bh; + } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { @@ -2000,6 +2003,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!silent) pr_notice("Rescanning with blocksize %d\n", UDF_DEFAULT_BLOCKSIZE); + brelse(sbi->s_lvid_bh); + sbi->s_lvid_bh = NULL; uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; ret = udf_load_vrs(sb, &uopt, silent, &fileset); } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f9c3fe304a17..69cf4fcde03e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -179,12 +179,14 @@ xfs_ioc_trim( * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ + if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + return -XFS_ERROR(EINVAL); + start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; minlen = BTOBB(max_t(u64, granularity, range.minlen)); - if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) - return -XFS_ERROR(EINVAL); if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 21e37b55f7e5..5aceb3f8ecd6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -962,23 +962,22 @@ xfs_dialloc( if (!pag->pagi_freecount && !okalloc) goto nextag; + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) goto out_error; - /* - * Once the AGI has been read in we have to recheck - * pagi_freecount with the AGI buffer lock held. - */ if (pag->pagi_freecount) { xfs_perag_put(pag); goto out_alloc; } - if (!okalloc) { - xfs_trans_brelse(tp, agbp); - goto nextag; - } + if (!okalloc) + goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); if (error) { @@ -1007,6 +1006,8 @@ xfs_dialloc( return 0; } +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 92d4331cd4f1..ca28a4ba4b54 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -857,7 +857,7 @@ xfs_rtbuf_get( xfs_buf_t *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; - int nmap; + int nmap = 1; int error; /* error value */ ip = issum ? mp->m_rsumip : mp->m_rbmip; |