diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 6 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 26 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 1 | ||||
-rw-r--r-- | fs/fscache/cookie.c | 5 | ||||
-rw-r--r-- | fs/fscache/netfs.c | 1 | ||||
-rw-r--r-- | fs/fscache/object.c | 32 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 4 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 1 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 5 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 19 | ||||
-rw-r--r-- | fs/nfsd/state.h | 4 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 97 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag_resv.c | 70 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 48 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c | 90 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 3 |
26 files changed, 332 insertions, 154 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 5db5d1340d69..3c47614a4b32 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -331,7 +331,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; - bool is_read = (iov_iter_rw(iter) == READ); + bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; int ret; @@ -344,7 +344,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio_get(bio); /* extra ref for the completion handler */ dio = container_of(bio, struct blkdev_dio, bio); - dio->is_sync = is_sync_kiocb(iocb); + dio->is_sync = is_sync = is_sync_kiocb(iocb); if (dio->is_sync) dio->waiter = current; else @@ -398,7 +398,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } blk_finish_plug(&plug); - if (!dio->is_sync) + if (!is_sync) return -EIOCBQUEUED; for (;;) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4e024260ad71..1e861a063721 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3835,10 +3835,7 @@ cache_acl: break; case S_IFDIR: inode->i_fop = &btrfs_dir_file_operations; - if (root == fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; @@ -4505,8 +4502,19 @@ search_again: if (found_type > min_type) { del_item = 1; } else { - if (item_end < new_size) + if (item_end < new_size) { + /* + * With NO_HOLES mode, for the following mapping + * + * [0-4k][hole][8k-12k] + * + * if truncating isize down to 6k, it ends up + * isize being 8k. + */ + if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) + last_size = new_size; break; + } if (found_key.offset >= new_size) del_item = 1; else @@ -5710,6 +5718,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; + inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mtime = current_time(inode); @@ -7215,7 +7224,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, struct extent_map *em = NULL; int ret; - down_read(&BTRFS_I(inode)->dio_sem); if (type != BTRFS_ORDERED_NOCOW) { em = create_pinned_em(inode, start, len, orig_start, block_start, block_len, orig_block_len, @@ -7234,7 +7242,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, em = ERR_PTR(ret); } out: - up_read(&BTRFS_I(inode)->dio_sem); return em; } @@ -8692,6 +8699,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.unsubmitted_oe_range_start = (u64)offset; dio_data.unsubmitted_oe_range_end = (u64)offset; current->journal_info = &dio_data; + down_read(&BTRFS_I(inode)->dio_sem); } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_end(inode); @@ -8704,6 +8712,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) iter, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { + up_read(&BTRFS_I(inode)->dio_sem); current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { if (dio_data.reserve) @@ -9212,6 +9221,7 @@ static int btrfs_truncate(struct inode *inode) break; } + btrfs_block_rsv_release(fs_info, rsv, -1); ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, 0); BUG_ON(ret); /* shouldn't happen */ @@ -10579,8 +10589,6 @@ static const struct inode_operations btrfs_dir_inode_operations = { static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, - .get_acl = btrfs_get_acl, - .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, }; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 8f6a2a5863b9..a27fc8791551 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) rc = -ENOMEM; goto error_exit; } + spin_lock_init(&cifsFile->file_info_lock); file->private_data = cifsFile; cifsFile->tlink = cifs_get_tlink(tlink); tcon = tlink_tcon(tlink); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 4304072161aa..40d61077bead 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (invalidate) set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); } } else { @@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, TASK_UNINTERRUPTIBLE); + /* Make sure any pending writes are cancelled. */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + fscache_invalidate_writes(cookie); + /* Reset the cookie state if it wasn't relinquished */ if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { atomic_inc(&cookie->n_active); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 9b28649df3a1..a8aa00be4444 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 9e792e30f4db..7a182c87f378 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object static const struct fscache_state *fscache_object_available(struct fscache_object *, int); static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); static const struct fscache_state *fscache_update_object(struct fscache_object *, int); +static const struct fscache_state *fscache_object_dead(struct fscache_object *, int); #define __STATE_NAME(n) fscache_osm_##n #define STATE(n) (&__STATE_NAME(n)) @@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure); static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); -static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); +static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead); static WAIT_STATE(WAIT_FOR_INIT, "?INI", TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); @@ -229,6 +230,10 @@ execute_work_state: event = -1; if (new_state == NO_TRANSIT) { _debug("{OBJ%x} %s notrans", object->debug_id, state->name); + if (unlikely(state == STATE(OBJECT_DEAD))) { + _leave(" [dead]"); + return; + } fscache_enqueue_object(object); event_mask = object->oob_event_mask; goto unmask_events; @@ -239,7 +244,7 @@ execute_work_state: object->state = state = new_state; if (state->work) { - if (unlikely(state->work == ((void *)2UL))) { + if (unlikely(state == STATE(OBJECT_DEAD))) { _leave(" [dead]"); return; } @@ -645,6 +650,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob fscache_mark_object_dead(object); object->oob_event_mask = 0; + if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) { + /* Reject any new read/write ops and abort any that are pending. */ + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + } + if (list_empty(&object->dependents) && object->n_ops == 0 && object->n_children == 0) @@ -1077,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object, } } EXPORT_SYMBOL(fscache_object_mark_killed); + +/* + * The object is dead. We can get here if an object gets queued by an event + * that would lead to its death (such as EV_KILL) when the dispatcher is + * already running (and so can be requeued) but hasn't yet cleared the event + * mask. + */ +static const struct fscache_state *fscache_object_dead(struct fscache_object *object, + int event) +{ + if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD, + &object->flags)) + return NO_TRANSIT; + + WARN(true, "FS-Cache object redispatched after death"); + return NO_TRANSIT; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ecc151697fd4..0a0eaecf9676 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2700,7 +2700,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, sattr->ia_valid |= ATTR_MTIME; /* Except MODE, it seems harmless of setting twice. */ - if ((attrset[1] & FATTR4_WORD1_MODE)) + if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && + attrset[1] & FATTR4_WORD1_MODE) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) @@ -8490,6 +8491,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } + nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 90e6193ce6be..daeb94e3acd4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1091,6 +1091,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BADXDR: case -NFS4ERR_RESOURCE: case -NFS4ERR_NOFILEHANDLE: + case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 59554f3adf29..dd042498ce7c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1200,10 +1200,10 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); if (send) status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); out_put_layout_hdr: + pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 596205d939a1..1fc07a9c70e9 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, struct nfs4_layout_stateid *ls; struct nfs4_stid *stp; - stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); + stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache, + nfsd4_free_layout_stateid); if (!stp) return NULL; - stp->sc_free = nfsd4_free_layout_stateid; + get_nfs4_file(fp); stp->sc_file = fp; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b4beaaa4eaa..a0dee8ae9f97 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -633,8 +633,8 @@ out: return co; } -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab) +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; @@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, idr_preload_end(); if (new_id < 0) goto out_free; + + stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; @@ -675,15 +677,12 @@ out_free: static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; - struct nfs4_ol_stateid *stp; - stid = nfs4_alloc_stid(clp, stateid_slab); + stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; - stp = openlockstateid(stid); - stp->st_stid.sc_free = nfs4_free_ol_stateid; - return stp; + return openlockstateid(stid); } static void nfs4_free_deleg(struct nfs4_stid *stid) @@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) goto out_dec; - dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) goto out_dec; - dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; - stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, lst = find_lock_stateid(lo, fi); if (lst == NULL) { spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab); + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c9399366f9df..4516e8b7d776 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab); +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 26c6fdb4bf67..ca13236dbb1f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -332,37 +332,6 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) } } -static __be32 -nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct iattr *iap) -{ - struct inode *inode = d_inode(fhp->fh_dentry); - int host_err; - - if (iap->ia_size < inode->i_size) { - __be32 err; - - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); - if (err) - return err; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserrno; - - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) - goto out_put_write_access; - return 0; - -out_put_write_access: - put_write_access(inode); -out_nfserrno: - return nfserrno(host_err); -} - /* * Set various file attributes. After this call fhp needs an fh_put. */ @@ -377,7 +346,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, __be32 err; int host_err; bool get_write_count; - int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -390,11 +358,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) - goto out; + return err; if (get_write_count) { host_err = fh_want_write(fhp); if (host_err) - return nfserrno(host_err); + goto out_host_err; } dentry = fhp->fh_dentry; @@ -405,50 +373,59 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) - goto out; + return 0; nfsd_sanitize_attrs(inode, iap); + if (check_guard && guardtime != inode->i_ctime.tv_sec) + return nfserr_notsync; + /* * The size case is special, it changes the file in addition to the - * attributes. + * attributes, and file systems don't expect it to be mixed with + * "random" attribute changes. We thus split out the size change + * into a separate call for vfs_truncate, and do the rest as a + * a separate setattr call. */ if (iap->ia_valid & ATTR_SIZE) { - err = nfsd_get_write_access(rqstp, fhp, iap); - if (err) - goto out; - size_change = 1; + struct path path = { + .mnt = fhp->fh_export->ex_path.mnt, + .dentry = dentry, + }; + bool implicit_mtime = false; /* - * RFC5661, Section 18.30.4: - * Changing the size of a file with SETATTR indirectly - * changes the time_modify and change attributes. - * - * (and similar for the older RFCs) + * vfs_truncate implicity updates the mtime IFF the file size + * actually changes. Avoid the additional seattr call below if + * the only other attribute that the client sends is the mtime. */ - if (iap->ia_size != i_size_read(inode)) - iap->ia_valid |= ATTR_MTIME; - } + if (iap->ia_size != i_size_read(inode) && + ((iap->ia_valid & ~(ATTR_SIZE | ATTR_MTIME)) == 0)) + implicit_mtime = true; - iap->ia_valid |= ATTR_CTIME; + host_err = vfs_truncate(&path, iap->ia_size); + if (host_err) + goto out_host_err; - if (check_guard && guardtime != inode->i_ctime.tv_sec) { - err = nfserr_notsync; - goto out_put_write_access; + iap->ia_valid &= ~ATTR_SIZE; + if (implicit_mtime) + iap->ia_valid &= ~ATTR_MTIME; + if (!iap->ia_valid) + goto done; } + iap->ia_valid |= ATTR_CTIME; + fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); - err = nfserrno(host_err); + if (host_err) + goto out_host_err; -out_put_write_access: - if (size_change) - put_write_access(inode); - if (!err) - err = nfserrno(commit_metadata(fhp)); -out: - return err; +done: + host_err = commit_metadata(fhp); +out_host_err: + return nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index d346d42c54d1..33db69be4832 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -39,6 +39,7 @@ #include "xfs_rmap_btree.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" +#include "xfs_ialloc_btree.h" /* * Per-AG Block Reservations @@ -200,22 +201,30 @@ __xfs_ag_resv_init( struct xfs_mount *mp = pag->pag_mount; struct xfs_ag_resv *resv; int error; + xfs_extlen_t reserved; - resv = xfs_perag_resv(pag, type); if (used > ask) ask = used; - resv->ar_asked = ask; - resv->ar_reserved = resv->ar_orig_reserved = ask - used; - mp->m_ag_max_usable -= ask; + reserved = ask - used; - trace_xfs_ag_resv_init(pag, type, ask); - - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); - if (error) + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); + if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); + xfs_warn(mp, +"Per-AG reservation for AG %u failed. Filesystem may run out of space.", + pag->pag_agno); + return error; + } - return error; + mp->m_ag_max_usable -= ask; + + resv = xfs_perag_resv(pag, type); + resv->ar_asked = ask; + resv->ar_reserved = resv->ar_orig_reserved = reserved; + + trace_xfs_ag_resv_init(pag, type, ask); + return 0; } /* Create a per-AG block reservation. */ @@ -223,6 +232,8 @@ int xfs_ag_resv_init( struct xfs_perag *pag) { + struct xfs_mount *mp = pag->pag_mount; + xfs_agnumber_t agno = pag->pag_agno; xfs_extlen_t ask; xfs_extlen_t used; int error = 0; @@ -231,23 +242,45 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; - error = xfs_refcountbt_calc_reserves(pag->pag_mount, - pag->pag_agno, &ask, &used); + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, - ask, used); + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) { + /* + * Because we didn't have per-AG reservations when the + * finobt feature was added we might not be able to + * reserve all needed blocks. Warn and fall back to the + * old and potentially buggy code in that case, but + * ensure we do have the reservation for the refcountbt. + */ + ask = used = 0; + + mp->m_inotbt_nores = true; + + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, + &used); + if (error) + goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) + goto out; + } } /* Create the AGFL metadata reservation */ if (pag->pag_agfl_resv.ar_asked == 0) { ask = used = 0; - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, - &ask, &used); + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; @@ -256,9 +289,16 @@ xfs_ag_resv_init( goto out; } +#ifdef DEBUG + /* need to read in the AGF for the ASSERT below to work */ + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); + if (error) + return error; + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= pag->pagf_freeblks + pag->pagf_flcount); +#endif out: return error; } diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -131,9 +131,6 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (!xfs_inode_hasattr(ip)) - return -ENOATTR; - error = xfs_attr_args_init(&args, ip, name, flags); if (error) return error; @@ -392,9 +389,6 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - if (!xfs_inode_hasattr(dp)) - return -ENOATTR; - error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 44773c9eb957..bfc00de5c6f1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3629,7 +3629,7 @@ xfs_bmap_btalloc( align = xfs_get_cowextsz_hint(ap->ip); else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); - if (unlikely(align)) { + if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, &ap->length); @@ -3701,7 +3701,7 @@ xfs_bmap_btalloc( args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { + if (align) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); @@ -4514,8 +4514,6 @@ xfs_bmapi_write( int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4603,22 +4601,44 @@ xfs_bmapi_write( bma.firstblock = firstblock; while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + bool need_alloc = false, wasdelay = false; - /* - * Make sure we only reflink into a hole. - */ - if (flags & XFS_BMAPI_REMAP) - ASSERT(inhole); - if (flags & XFS_BMAPI_COWFORK) - ASSERT(!inhole); + /* in hole or beyoned EOF? */ + if (eof || bma.got.br_startoff > bno) { + if (flags & XFS_BMAPI_DELALLOC) { + /* + * For the COW fork we can reasonably get a + * request for converting an extent that races + * with other threads already having converted + * part of it, as there converting COW to + * regular blocks is not protected using the + * IOLOCK. + */ + ASSERT(flags & XFS_BMAPI_COWFORK); + if (!(flags & XFS_BMAPI_COWFORK)) { + error = -EIO; + goto error0; + } + + if (eof || bno >= end) + break; + } else { + need_alloc = true; + } + } else { + /* + * Make sure we only reflink into a hole. + */ + ASSERT(!(flags & XFS_BMAPI_REMAP)); + if (isnullstartblock(bma.got.br_startblock)) + wasdelay = true; + } /* * First, deal with the hole before the allocated space * that we found, if any. */ - if (inhole || wasdelay) { + if (need_alloc || wasdelay) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cecd094404cc..cdef87db5262 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -110,6 +110,9 @@ struct xfs_extent_free_item /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK 0x200 +/* Only convert delalloc space, don't allocate entirely new extents */ +#define XFS_BMAPI_DELALLOC 0x400 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -120,7 +123,8 @@ struct xfs_extent_free_item { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ - { XFS_BMAPI_COWFORK, "COWFORK" } + { XFS_BMAPI_COWFORK, "COWFORK" }, \ + { XFS_BMAPI_DELALLOC, "DELALLOC" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 0fd086d03d41..7c471881c9a6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -82,11 +82,12 @@ xfs_finobt_set_root( } STATIC int -xfs_inobt_alloc_block( +__xfs_inobt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int *stat) + int *stat, + enum xfs_ag_resv_type resv) { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( args.maxlen = 1; args.prod = 1; args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.resv = resv; error = xfs_alloc_vextent(&args); if (error) { @@ -123,6 +125,27 @@ xfs_inobt_alloc_block( } STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, + XFS_AG_RESV_METADATA); +} + +STATIC int xfs_inobt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, - .alloc_block = xfs_inobt_alloc_block, + .alloc_block = xfs_finobt_alloc_block, .free_block = xfs_inobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, @@ -480,3 +503,64 @@ xfs_inobt_rec_check_count( return 0; } #endif /* DEBUG */ + +static xfs_extlen_t +xfs_inobt_max_size( + struct xfs_mount *mp) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_inobt_mxr[0] == 0) + return 0; + + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / + XFS_INODES_PER_CHUNK); +} + +static int +xfs_inobt_count_blocks( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_btnum_t btnum, + xfs_extlen_t *tree_blocks) +{ + struct xfs_buf *agbp; + struct xfs_btree_cur *cur; + int error; + + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); + if (error) + return error; + + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); + error = xfs_btree_count_blocks(cur, tree_blocks); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); + + return error; +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +int +xfs_finobt_calc_reserves( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + xfs_extlen_t tree_len = 0; + int error; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); + if (error) + return error; + + *ask += xfs_inobt_max_size(mp); + *used += tree_len; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bd88453217ce..aa81e2e63f3f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #define xfs_inobt_rec_check_count(mp, rec) 0 #endif /* DEBUG */ +int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_extlen_t *ask, xfs_extlen_t *used); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2580262e4ea0..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -242,7 +242,7 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index b9abce524c33..c1417919ab0a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -528,7 +528,6 @@ xfs_getbmap( xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ - int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ @@ -686,10 +685,8 @@ xfs_getbmap( goto out_free_map; } - nexleft = nex; - do { - nmap = (nexleft > subnex) ? subnex : nexleft; + nmap = (nex> subnex) ? subnex : nex; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); @@ -697,8 +694,8 @@ xfs_getbmap( goto out_free_map; ASSERT(nmap <= subnex); - for (i = 0; i < nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count; i++) { + for (i = 0; i < nmap && bmv->bmv_length && + cur_ext < bmv->bmv_count - 1; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; @@ -760,16 +757,27 @@ xfs_getbmap( continue; } + /* + * In order to report shared extents accurately, + * we report each distinct shared/unshared part + * of a single bmbt record using multiple bmap + * extents. To make that happen, we iterate the + * same map array item multiple times, each + * time trimming out the subextent that we just + * reported. + * + * Because of this, we must check the out array + * index (cur_ext) directly against bmv_count-1 + * to avoid overflows. + */ if (inject_map.br_startblock != NULLFSBLOCK) { map[i] = inject_map; i--; - } else - nexleft--; + } bmv->bmv_entries++; cur_ext++; } - } while (nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count); + } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); out_free_map: kmem_free(map); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7f0a01f7b592..ac3b4db519df 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -422,6 +422,7 @@ retry: out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9557795eb74..de32f0fe47c8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1792,22 +1792,23 @@ xfs_inactive_ifree( int error; /* - * The ifree transaction might need to allocate blocks for record - * insertion to the finobt. We don't want to fail here at ENOSPC, so - * allow ifree to dip into the reserved block pool if necessary. - * - * Freeing large sets of inodes generally means freeing inode chunks, - * directory and file data blocks, so this should be relatively safe. - * Only under severe circumstances should it be possible to free enough - * inodes to exhaust the reserve block pool via finobt expansion while - * at the same time not creating free space in the filesystem. + * We try to use a per-AG reservation for any block needed by the finobt + * tree, but as the finobt feature predates the per-AG reservation + * support a degraded file system might not have enough space for the + * reservation at mount time. In that case try to dip into the reserved + * pool and pray. * * Send a warning if the reservation does happen to fail, as the inode * now remains allocated and sits on the unlinked list until the fs is * repaired. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, - XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); + if (unlikely(mp->m_inotbt_nores)) { + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, + XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, + &tp); + } else { + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); + } if (error) { if (error == -ENOSPC) { xfs_warn_ratelimited(mp, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0d147428971e..1aa3abd67b36 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -681,7 +681,7 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; - int flags = 0; + int flags = XFS_BMAPI_DELALLOC; int nres; if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 84f785218907..7f351f706b7a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -140,6 +140,7 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ __uint64_t m_flags; /* global mount flags */ + bool m_inotbt_nores; /* no per-AG finobt resv. */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ int m_ialloc_min_blks;/* min blocks in sparse inode diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 45e50ea90769..b669b123287b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, + &ip); if (error) { *res = BULKSTAT_RV_NOTHING; return error; |