summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c35
-rw-r--r--fs/btrfs/tree-log.c16
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/configfs/configfs_internal.h15
-rw-r--r--fs/configfs/dir.c137
-rw-r--r--fs/configfs/file.c280
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/fs-writeback.c174
-rw-r--r--fs/io_uring.c531
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jfs/Kconfig2
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/orangefs/file.c2
-rw-r--r--fs/orangefs/orangefs-kernel.h2
-rw-r--r--fs/timerfd.c6
-rw-r--r--fs/ufs/Kconfig2
17 files changed, 788 insertions, 441 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1ff438fd5bc2..eeb75281894e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3628,6 +3628,13 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
TASK_UNINTERRUPTIBLE);
}
+static void end_extent_buffer_writeback(struct extent_buffer *eb)
+{
+ clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
+}
+
/*
* Lock eb pages and flush the bio if we can't the locks
*
@@ -3699,8 +3706,11 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
if (!trylock_page(p)) {
if (!flush) {
- ret = flush_write_bio(epd);
- if (ret < 0) {
+ int err;
+
+ err = flush_write_bio(epd);
+ if (err < 0) {
+ ret = err;
failed_page_nr = i;
goto err_unlock;
}
@@ -3715,16 +3725,23 @@ err_unlock:
/* Unlock already locked pages */
for (i = 0; i < failed_page_nr; i++)
unlock_page(eb->pages[i]);
+ /*
+ * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it.
+ * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can
+ * be made and undo everything done before.
+ */
+ btrfs_tree_lock(eb);
+ spin_lock(&eb->refs_lock);
+ set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+ end_extent_buffer_writeback(eb);
+ spin_unlock(&eb->refs_lock);
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
+ fs_info->dirty_metadata_batch);
+ btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+ btrfs_tree_unlock(eb);
return ret;
}
-static void end_extent_buffer_writeback(struct extent_buffer *eb)
-{
- clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_atomic();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
-}
-
static void set_btree_ioerr(struct page *page)
{
struct extent_buffer *eb = (struct extent_buffer *)page->private;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6c8297bcfeb7..1bfd7e34f31e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4985,7 +4985,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
BTRFS_I(inode),
LOG_OTHER_INODE_ALL,
0, LLONG_MAX, ctx);
- iput(inode);
+ btrfs_add_delayed_iput(inode);
}
}
continue;
@@ -5000,7 +5000,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
LOG_OTHER_INODE, 0, LLONG_MAX, ctx);
if (ret) {
- iput(inode);
+ btrfs_add_delayed_iput(inode);
continue;
}
@@ -5009,7 +5009,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
- iput(inode);
+ btrfs_add_delayed_iput(inode);
continue;
}
@@ -5056,7 +5056,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
}
path->slots[0]++;
}
- iput(inode);
+ btrfs_add_delayed_iput(inode);
}
return ret;
@@ -5689,7 +5689,7 @@ process_leaf:
}
if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) {
- iput(di_inode);
+ btrfs_add_delayed_iput(di_inode);
break;
}
@@ -5701,7 +5701,7 @@ process_leaf:
if (!ret &&
btrfs_must_commit_transaction(trans, BTRFS_I(di_inode)))
ret = 1;
- iput(di_inode);
+ btrfs_add_delayed_iput(di_inode);
if (ret)
goto next_dir_inode;
if (ctx->log_new_dentries) {
@@ -5848,7 +5848,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
if (!ret && ctx && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
BTRFS_I(dir_inode), ctx);
- iput(dir_inode);
+ btrfs_add_delayed_iput(dir_inode);
if (ret)
goto out;
}
@@ -5891,7 +5891,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
LOG_INODE_EXISTS,
0, LLONG_MAX, ctx);
- iput(inode);
+ btrfs_add_delayed_iput(inode);
if (ret)
return ret;
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index ce8b7f677c58..eb0bb8ca8e63 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
*/
/*
- * See Documentation/filesystems/nfs/Exporting
+ * See Documentation/filesystems/nfs/exporting.rst
* and examples in fs/exportfs
*
* Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index f752d83a9c44..520f1813e789 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -20,6 +20,15 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+struct configfs_fragment {
+ atomic_t frag_count;
+ struct rw_semaphore frag_sem;
+ bool frag_dead;
+};
+
+void put_fragment(struct configfs_fragment *);
+struct configfs_fragment *get_fragment(struct configfs_fragment *);
+
struct configfs_dirent {
atomic_t s_count;
int s_dependent_count;
@@ -34,6 +43,7 @@ struct configfs_dirent {
#ifdef CONFIG_LOCKDEP
int s_depth;
#endif
+ struct configfs_fragment *s_frag;
};
#define CONFIGFS_ROOT 0x0001
@@ -61,8 +71,8 @@ extern int configfs_create(struct dentry *, umode_t mode, void (*init)(struct in
extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
extern int configfs_create_bin_file(struct config_item *,
const struct configfs_bin_attribute *);
-extern int configfs_make_dirent(struct configfs_dirent *,
- struct dentry *, void *, umode_t, int);
+extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *,
+ void *, umode_t, int, struct configfs_fragment *);
extern int configfs_dirent_is_ready(struct configfs_dirent *);
extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
@@ -137,6 +147,7 @@ static inline void release_configfs_dirent(struct configfs_dirent * sd)
{
if (!(sd->s_type & CONFIGFS_ROOT)) {
kfree(sd->s_iattr);
+ put_fragment(sd->s_frag);
kmem_cache_free(configfs_dir_cachep, sd);
}
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 92112915de8e..79fc25aaa8cd 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -151,11 +151,38 @@ configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
#endif /* CONFIG_LOCKDEP */
+static struct configfs_fragment *new_fragment(void)
+{
+ struct configfs_fragment *p;
+
+ p = kmalloc(sizeof(struct configfs_fragment), GFP_KERNEL);
+ if (p) {
+ atomic_set(&p->frag_count, 1);
+ init_rwsem(&p->frag_sem);
+ p->frag_dead = false;
+ }
+ return p;
+}
+
+void put_fragment(struct configfs_fragment *frag)
+{
+ if (frag && atomic_dec_and_test(&frag->frag_count))
+ kfree(frag);
+}
+
+struct configfs_fragment *get_fragment(struct configfs_fragment *frag)
+{
+ if (likely(frag))
+ atomic_inc(&frag->frag_count);
+ return frag;
+}
+
/*
* Allocates a new configfs_dirent and links it to the parent configfs_dirent
*/
static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
- void *element, int type)
+ void *element, int type,
+ struct configfs_fragment *frag)
{
struct configfs_dirent * sd;
@@ -175,6 +202,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *paren
kmem_cache_free(configfs_dir_cachep, sd);
return ERR_PTR(-ENOENT);
}
+ sd->s_frag = get_fragment(frag);
list_add(&sd->s_sibling, &parent_sd->s_children);
spin_unlock(&configfs_dirent_lock);
@@ -209,11 +237,11 @@ static int configfs_dirent_exists(struct configfs_dirent *parent_sd,
int configfs_make_dirent(struct configfs_dirent * parent_sd,
struct dentry * dentry, void * element,
- umode_t mode, int type)
+ umode_t mode, int type, struct configfs_fragment *frag)
{
struct configfs_dirent * sd;
- sd = configfs_new_dirent(parent_sd, element, type);
+ sd = configfs_new_dirent(parent_sd, element, type, frag);
if (IS_ERR(sd))
return PTR_ERR(sd);
@@ -260,7 +288,8 @@ static void init_symlink(struct inode * inode)
* until it is validated by configfs_dir_set_ready()
*/
-static int configfs_create_dir(struct config_item *item, struct dentry *dentry)
+static int configfs_create_dir(struct config_item *item, struct dentry *dentry,
+ struct configfs_fragment *frag)
{
int error;
umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
@@ -273,7 +302,8 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry)
return error;
error = configfs_make_dirent(p->d_fsdata, dentry, item, mode,
- CONFIGFS_DIR | CONFIGFS_USET_CREATING);
+ CONFIGFS_DIR | CONFIGFS_USET_CREATING,
+ frag);
if (unlikely(error))
return error;
@@ -338,9 +368,10 @@ int configfs_create_link(struct configfs_symlink *sl,
{
int err = 0;
umode_t mode = S_IFLNK | S_IRWXUGO;
+ struct configfs_dirent *p = parent->d_fsdata;
- err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode,
- CONFIGFS_ITEM_LINK);
+ err = configfs_make_dirent(p, dentry, sl, mode,
+ CONFIGFS_ITEM_LINK, p->s_frag);
if (!err) {
err = configfs_create(dentry, mode, init_symlink);
if (err) {
@@ -599,7 +630,8 @@ static int populate_attrs(struct config_item *item)
static int configfs_attach_group(struct config_item *parent_item,
struct config_item *item,
- struct dentry *dentry);
+ struct dentry *dentry,
+ struct configfs_fragment *frag);
static void configfs_detach_group(struct config_item *item);
static void detach_groups(struct config_group *group)
@@ -647,7 +679,8 @@ static void detach_groups(struct config_group *group)
* try using vfs_mkdir. Just a thought.
*/
static int create_default_group(struct config_group *parent_group,
- struct config_group *group)
+ struct config_group *group,
+ struct configfs_fragment *frag)
{
int ret;
struct configfs_dirent *sd;
@@ -663,7 +696,7 @@ static int create_default_group(struct config_group *parent_group,
d_add(child, NULL);
ret = configfs_attach_group(&parent_group->cg_item,
- &group->cg_item, child);
+ &group->cg_item, child, frag);
if (!ret) {
sd = child->d_fsdata;
sd->s_type |= CONFIGFS_USET_DEFAULT;
@@ -677,13 +710,14 @@ static int create_default_group(struct config_group *parent_group,
return ret;
}
-static int populate_groups(struct config_group *group)
+static int populate_groups(struct config_group *group,
+ struct configfs_fragment *frag)
{
struct config_group *new_group;
int ret = 0;
list_for_each_entry(new_group, &group->default_groups, group_entry) {
- ret = create_default_group(group, new_group);
+ ret = create_default_group(group, new_group, frag);
if (ret) {
detach_groups(group);
break;
@@ -797,11 +831,12 @@ static void link_group(struct config_group *parent_group, struct config_group *g
*/
static int configfs_attach_item(struct config_item *parent_item,
struct config_item *item,
- struct dentry *dentry)
+ struct dentry *dentry,
+ struct configfs_fragment *frag)
{
int ret;
- ret = configfs_create_dir(item, dentry);
+ ret = configfs_create_dir(item, dentry, frag);
if (!ret) {
ret = populate_attrs(item);
if (ret) {
@@ -831,12 +866,13 @@ static void configfs_detach_item(struct config_item *item)
static int configfs_attach_group(struct config_item *parent_item,
struct config_item *item,
- struct dentry *dentry)
+ struct dentry *dentry,
+ struct configfs_fragment *frag)
{
int ret;
struct configfs_dirent *sd;
- ret = configfs_attach_item(parent_item, item, dentry);
+ ret = configfs_attach_item(parent_item, item, dentry, frag);
if (!ret) {
sd = dentry->d_fsdata;
sd->s_type |= CONFIGFS_USET_DIR;
@@ -852,7 +888,7 @@ static int configfs_attach_group(struct config_item *parent_item,
*/
inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
configfs_adjust_dir_dirent_depth_before_populate(sd);
- ret = populate_groups(to_config_group(item));
+ ret = populate_groups(to_config_group(item), frag);
if (ret) {
configfs_detach_item(item);
d_inode(dentry)->i_flags |= S_DEAD;
@@ -1247,6 +1283,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
struct configfs_dirent *sd;
const struct config_item_type *type;
struct module *subsys_owner = NULL, *new_item_owner = NULL;
+ struct configfs_fragment *frag;
char *name;
sd = dentry->d_parent->d_fsdata;
@@ -1265,6 +1302,12 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
goto out;
}
+ frag = new_fragment();
+ if (!frag) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
/* Get a working ref for the duration of this function */
parent_item = configfs_get_config_item(dentry->d_parent);
type = parent_item->ci_type;
@@ -1367,9 +1410,9 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
spin_unlock(&configfs_dirent_lock);
if (group)
- ret = configfs_attach_group(parent_item, item, dentry);
+ ret = configfs_attach_group(parent_item, item, dentry, frag);
else
- ret = configfs_attach_item(parent_item, item, dentry);
+ ret = configfs_attach_item(parent_item, item, dentry, frag);
spin_lock(&configfs_dirent_lock);
sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
@@ -1406,6 +1449,7 @@ out_put:
* reference.
*/
config_item_put(parent_item);
+ put_fragment(frag);
out:
return ret;
@@ -1417,6 +1461,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
struct config_item *item;
struct configfs_subsystem *subsys;
struct configfs_dirent *sd;
+ struct configfs_fragment *frag;
struct module *subsys_owner = NULL, *dead_item_owner = NULL;
int ret;
@@ -1474,6 +1519,16 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
}
} while (ret == -EAGAIN);
+ frag = sd->s_frag;
+ if (down_write_killable(&frag->frag_sem)) {
+ spin_lock(&configfs_dirent_lock);
+ configfs_detach_rollback(dentry);
+ spin_unlock(&configfs_dirent_lock);
+ return -EINTR;
+ }
+ frag->frag_dead = true;
+ up_write(&frag->frag_sem);
+
/* Get a working ref for the duration of this function */
item = configfs_get_config_item(dentry);
@@ -1574,7 +1629,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
*/
err = -ENOENT;
if (configfs_dirent_is_ready(parent_sd)) {
- file->private_data = configfs_new_dirent(parent_sd, NULL, 0);
+ file->private_data = configfs_new_dirent(parent_sd, NULL, 0, NULL);
if (IS_ERR(file->private_data))
err = PTR_ERR(file->private_data);
else
@@ -1732,8 +1787,13 @@ int configfs_register_group(struct config_group *parent_group,
{
struct configfs_subsystem *subsys = parent_group->cg_subsys;
struct dentry *parent;
+ struct configfs_fragment *frag;
int ret;
+ frag = new_fragment();
+ if (!frag)
+ return -ENOMEM;
+
mutex_lock(&subsys->su_mutex);
link_group(parent_group, group);
mutex_unlock(&subsys->su_mutex);
@@ -1741,7 +1801,7 @@ int configfs_register_group(struct config_group *parent_group,
parent = parent_group->cg_item.ci_dentry;
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
- ret = create_default_group(parent_group, group);
+ ret = create_default_group(parent_group, group, frag);
if (ret)
goto err_out;
@@ -1749,12 +1809,14 @@ int configfs_register_group(struct config_group *parent_group,
configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
spin_unlock(&configfs_dirent_lock);
inode_unlock(d_inode(parent));
+ put_fragment(frag);
return 0;
err_out:
inode_unlock(d_inode(parent));
mutex_lock(&subsys->su_mutex);
unlink_group(group);
mutex_unlock(&subsys->su_mutex);
+ put_fragment(frag);
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
@@ -1770,16 +1832,12 @@ void configfs_unregister_group(struct config_group *group)
struct configfs_subsystem *subsys = group->cg_subsys;
struct dentry *dentry = group->cg_item.ci_dentry;
struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
+ struct configfs_dirent *sd = dentry->d_fsdata;
+ struct configfs_fragment *frag = sd->s_frag;
- mutex_lock(&subsys->su_mutex);
- if (!group->cg_item.ci_parent->ci_group) {
- /*
- * The parent has already been unlinked and detached
- * due to a rmdir.
- */
- goto unlink_group;
- }
- mutex_unlock(&subsys->su_mutex);
+ down_write(&frag->frag_sem);
+ frag->frag_dead = true;
+ up_write(&frag->frag_sem);
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
spin_lock(&configfs_dirent_lock);
@@ -1796,7 +1854,6 @@ void configfs_unregister_group(struct config_group *group)
dput(dentry);
mutex_lock(&subsys->su_mutex);
-unlink_group:
unlink_group(group);
mutex_unlock(&subsys->su_mutex);
}
@@ -1853,10 +1910,17 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
struct dentry *dentry;
struct dentry *root;
struct configfs_dirent *sd;
+ struct configfs_fragment *frag;
+
+ frag = new_fragment();
+ if (!frag)
+ return -ENOMEM;
root = configfs_pin_fs();
- if (IS_ERR(root))
+ if (IS_ERR(root)) {
+ put_fragment(frag);
return PTR_ERR(root);
+ }
if (!group->cg_item.ci_name)
group->cg_item.ci_name = group->cg_item.ci_namebuf;
@@ -1872,7 +1936,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
d_add(dentry, NULL);
err = configfs_attach_group(sd->s_element, &group->cg_item,
- dentry);
+ dentry, frag);
if (err) {
BUG_ON(d_inode(dentry));
d_drop(dentry);
@@ -1890,6 +1954,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
unlink_group(group);
configfs_release_fs();
}
+ put_fragment(frag);
return err;
}
@@ -1899,12 +1964,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
struct config_group *group = &subsys->su_group;
struct dentry *dentry = group->cg_item.ci_dentry;
struct dentry *root = dentry->d_sb->s_root;
+ struct configfs_dirent *sd = dentry->d_fsdata;
+ struct configfs_fragment *frag = sd->s_frag;
if (dentry->d_parent != root) {
pr_err("Tried to unregister non-subsystem!\n");
return;
}
+ down_write(&frag->frag_sem);
+ frag->frag_dead = true;
+ up_write(&frag->frag_sem);
+
inode_lock_nested(d_inode(root),
I_MUTEX_PARENT);
inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 61e4db4390a1..fb65b706cc0d 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -39,40 +39,44 @@ struct configfs_buffer {
bool write_in_progress;
char *bin_buffer;
int bin_buffer_size;
+ int cb_max_size;
+ struct config_item *item;
+ struct module *owner;
+ union {
+ struct configfs_attribute *attr;
+ struct configfs_bin_attribute *bin_attr;
+ };
};
+static inline struct configfs_fragment *to_frag(struct file *file)
+{
+ struct configfs_dirent *sd = file->f_path.dentry->d_fsdata;
-/**
- * fill_read_buffer - allocate and fill buffer from item.
- * @dentry: dentry pointer.
- * @buffer: data buffer for file.
- *
- * Allocate @buffer->page, if it hasn't been already, then call the
- * config_item's show() method to fill the buffer with this attribute's
- * data.
- * This is called only once, on the file's first read.
- */
-static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buffer)
+ return sd->s_frag;
+}
+
+static int fill_read_buffer(struct file *file, struct configfs_buffer *buffer)
{
- struct configfs_attribute * attr = to_attr(dentry);
- struct config_item * item = to_item(dentry->d_parent);
- int ret = 0;
- ssize_t count;
+ struct configfs_fragment *frag = to_frag(file);
+ ssize_t count = -ENOENT;
if (!buffer->page)
buffer->page = (char *) get_zeroed_page(GFP_KERNEL);
if (!buffer->page)
return -ENOMEM;
- count = attr->show(item, buffer->page);
-
- BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
- if (count >= 0) {
- buffer->needs_read_fill = 0;
- buffer->count = count;
- } else
- ret = count;
- return ret;
+ down_read(&frag->frag_sem);
+ if (!frag->frag_dead)
+ count = buffer->attr->show(buffer->item, buffer->page);
+ up_read(&frag->frag_sem);
+
+ if (count < 0)
+ return count;
+ if (WARN_ON_ONCE(count > (ssize_t)SIMPLE_ATTR_SIZE))
+ return -EIO;
+ buffer->needs_read_fill = 0;
+ buffer->count = count;
+ return 0;
}
/**
@@ -97,12 +101,13 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
static ssize_t
configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
- struct configfs_buffer * buffer = file->private_data;
+ struct configfs_buffer *buffer = file->private_data;
ssize_t retval = 0;
mutex_lock(&buffer->mutex);
if (buffer->needs_read_fill) {
- if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
+ retval = fill_read_buffer(file, buffer);
+ if (retval)
goto out;
}
pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
@@ -138,10 +143,8 @@ static ssize_t
configfs_read_bin_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct configfs_fragment *frag = to_frag(file);
struct configfs_buffer *buffer = file->private_data;
- struct dentry *dentry = file->f_path.dentry;
- struct config_item *item = to_item(dentry->d_parent);
- struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry);
ssize_t retval = 0;
ssize_t len = min_t(size_t, count, PAGE_SIZE);
@@ -156,14 +159,19 @@ configfs_read_bin_file(struct file *file, char __user *buf,
if (buffer->needs_read_fill) {
/* perform first read with buf == NULL to get extent */
- len = bin_attr->read(item, NULL, 0);
+ down_read(&frag->frag_sem);
+ if (!frag->frag_dead)
+ len = buffer->bin_attr->read(buffer->item, NULL, 0);
+ else
+ len = -ENOENT;
+ up_read(&frag->frag_sem);
if (len <= 0) {
retval = len;
goto out;
}
/* do not exceed the maximum value */
- if (bin_attr->cb_max_size && len > bin_attr->cb_max_size) {
+ if (buffer->cb_max_size && len > buffer->cb_max_size) {
retval = -EFBIG;
goto out;
}
@@ -176,7 +184,13 @@ configfs_read_bin_file(struct file *file, char __user *buf,
buffer->bin_buffer_size = len;
/* perform second read to fill buffer */
- len = bin_attr->read(item, buffer->bin_buffer, len);
+ down_read(&frag->frag_sem);
+ if (!frag->frag_dead)
+ len = buffer->bin_attr->read(buffer->item,
+ buffer->bin_buffer, len);
+ else
+ len = -ENOENT;
+ up_read(&frag->frag_sem);
if (len < 0) {
retval = len;
vfree(buffer->bin_buffer);
@@ -226,25 +240,17 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
return error ? -EFAULT : count;
}
-
-/**
- * flush_write_buffer - push buffer to config_item.
- * @dentry: dentry to the attribute
- * @buffer: data buffer for file.
- * @count: number of bytes
- *
- * Get the correct pointers for the config_item and the attribute we're
- * dealing with, then call the store() method for the attribute,
- * passing the buffer that we acquired in fill_write_buffer().
- */
-
static int
-flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size_t count)
+flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t count)
{
- struct configfs_attribute * attr = to_attr(dentry);
- struct config_item * item = to_item(dentry->d_parent);
-
- return attr->store(item, buffer->page, count);
+ struct configfs_fragment *frag = to_frag(file);
+ int res = -ENOENT;
+
+ down_read(&frag->frag_sem);
+ if (!frag->frag_dead)
+ res = buffer->attr->store(buffer->item, buffer->page, count);
+ up_read(&frag->frag_sem);
+ return res;
}
@@ -268,13 +274,13 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size
static ssize_t
configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
- struct configfs_buffer * buffer = file->private_data;
+ struct configfs_buffer *buffer = file->private_data;
ssize_t len;
mutex_lock(&buffer->mutex);
len = fill_write_buffer(buffer, buf, count);
if (len > 0)
- len = flush_write_buffer(file->f_path.dentry, buffer, len);
+ len = flush_write_buffer(file, buffer, len);
if (len > 0)
*ppos += len;
mutex_unlock(&buffer->mutex);
@@ -299,8 +305,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct configfs_buffer *buffer = file->private_data;
- struct dentry *dentry = file->f_path.dentry;
- struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry);
void *tbuf = NULL;
ssize_t len;
@@ -316,8 +320,8 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
/* buffer grows? */
if (*ppos + count > buffer->bin_buffer_size) {
- if (bin_attr->cb_max_size &&
- *ppos + count > bin_attr->cb_max_size) {
+ if (buffer->cb_max_size &&
+ *ppos + count > buffer->cb_max_size) {
len = -EFBIG;
goto out;
}
@@ -349,31 +353,51 @@ out:
return len;
}
-static int check_perm(struct inode * inode, struct file * file, int type)
+static int __configfs_open_file(struct inode *inode, struct file *file, int type)
{
- struct config_item *item = configfs_get_config_item(file->f_path.dentry->d_parent);
- struct configfs_attribute * attr = to_attr(file->f_path.dentry);
- struct configfs_bin_attribute *bin_attr = NULL;
- struct configfs_buffer * buffer;
- struct configfs_item_operations * ops = NULL;
- int error = 0;
+ struct dentry *dentry = file->f_path.dentry;
+ struct configfs_fragment *frag = to_frag(file);
+ struct configfs_attribute *attr;
+ struct configfs_buffer *buffer;
+ int error;
- if (!item || !attr)
- goto Einval;
+ error = -ENOMEM;
+ buffer = kzalloc(sizeof(struct configfs_buffer), GFP_KERNEL);
+ if (!buffer)
+ goto out;
- if (type & CONFIGFS_ITEM_BIN_ATTR)
- bin_attr = to_bin_attr(file->f_path.dentry);
+ error = -ENOENT;
+ down_read(&frag->frag_sem);
+ if (unlikely(frag->frag_dead))
+ goto out_free_buffer;
- /* Grab the module reference for this attribute if we have one */
- if (!try_module_get(attr->ca_owner)) {
- error = -ENODEV;
- goto Done;
+ error = -EINVAL;
+ buffer->item = to_item(dentry->d_parent);
+ if (!buffer->item)
+ goto out_free_buffer;
+
+ attr = to_attr(dentry);
+ if (!attr)
+ goto out_put_item;
+
+ if (type & CONFIGFS_ITEM_BIN_ATTR) {
+ buffer->bin_attr = to_bin_attr(dentry);
+ buffer->cb_max_size = buffer->bin_attr->cb_max_size;
+ } else {
+ buffer->attr = attr;
}
- if (item->ci_type)
- ops = item->ci_type->ct_item_ops;
- else
- goto Eaccess;
+ buffer->owner = attr->ca_owner;
+ /* Grab the module reference for this attribute if we have one */
+ error = -ENODEV;
+ if (!try_module_get(buffer->owner))
+ goto out_put_item;
+
+ error = -EACCES;
+ if (!buffer->item->ci_type)
+ goto out_put_module;
+
+ buffer->ops = buffer->item->ci_type->ct_item_ops;
/* File needs write support.
* The inode's perms must say it's ok,
@@ -381,13 +405,11 @@ static int check_perm(struct inode * inode, struct file * file, int type)
*/
if (file->f_mode & FMODE_WRITE) {
if (!(inode->i_mode & S_IWUGO))
- goto Eaccess;
-
+ goto out_put_module;
if ((type & CONFIGFS_ITEM_ATTR) && !attr->store)
- goto Eaccess;
-
- if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->write)
- goto Eaccess;
+ goto out_put_module;
+ if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->write)
+ goto out_put_module;
}
/* File needs read support.
@@ -396,92 +418,72 @@ static int check_perm(struct inode * inode, struct file * file, int type)
*/
if (file->f_mode & FMODE_READ) {
if (!(inode->i_mode & S_IRUGO))
- goto Eaccess;
-
+ goto out_put_module;
if ((type & CONFIGFS_ITEM_ATTR) && !attr->show)
- goto Eaccess;
-
- if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->read)
- goto Eaccess;
+ goto out_put_module;
+ if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->read)
+ goto out_put_module;
}
- /* No error? Great, allocate a buffer for the file, and store it
- * it in file->private_data for easy access.
- */
- buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
- if (!buffer) {
- error = -ENOMEM;
- goto Enomem;
- }
mutex_init(&buffer->mutex);
buffer->needs_read_fill = 1;
buffer->read_in_progress = false;
buffer->write_in_progress = false;
- buffer->ops = ops;
file->private_data = buffer;
- goto Done;
+ up_read(&frag->frag_sem);
+ return 0;
- Einval:
- error = -EINVAL;
- goto Done;
- Eaccess:
- error = -EACCES;
- Enomem:
- module_put(attr->ca_owner);
- Done:
- if (error && item)
- config_item_put(item);
+out_put_module:
+ module_put(buffer->owner);
+out_put_item:
+ config_item_put(buffer->item);
+out_free_buffer:
+ up_read(&frag->frag_sem);
+ kfree(buffer);
+out:
return error;
}
static int configfs_release(struct inode *inode, struct file *filp)
{
- struct config_item * item = to_item(filp->f_path.dentry->d_parent);
- struct configfs_attribute * attr = to_attr(filp->f_path.dentry);
- struct module * owner = attr->ca_owner;
- struct configfs_buffer * buffer = filp->private_data;
-
- if (item)
- config_item_put(item);
- /* After this point, attr should not be accessed. */
- module_put(owner);
-
- if (buffer) {
- if (buffer->page)
- free_page((unsigned long)buffer->page);
- mutex_destroy(&buffer->mutex);
- kfree(buffer);
- }
+ struct configfs_buffer *buffer = filp->private_data;
+
+ module_put(buffer->owner);
+ if (buffer->page)
+ free_page((unsigned long)buffer->page);
+ mutex_destroy(&buffer->mutex);
+ kfree(buffer);
return 0;
}
static int configfs_open_file(struct inode *inode, struct file *filp)
{
- return check_perm(inode, filp, CONFIGFS_ITEM_ATTR);
+ return __configfs_open_file(inode, filp, CONFIGFS_ITEM_ATTR);
}
static int configfs_open_bin_file(struct inode *inode, struct file *filp)
{
- return check_perm(inode, filp, CONFIGFS_ITEM_BIN_ATTR);
+ return __configfs_open_file(inode, filp, CONFIGFS_ITEM_BIN_ATTR);
}
-static int configfs_release_bin_file(struct inode *inode, struct file *filp)
+static int configfs_release_bin_file(struct inode *inode, struct file *file)
{
- struct configfs_buffer *buffer = filp->private_data;
- struct dentry *dentry = filp->f_path.dentry;
- struct config_item *item = to_item(dentry->d_parent);
- struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry);
- ssize_t len = 0;
- int ret;
+ struct configfs_buffer *buffer = file->private_data;
buffer->read_in_progress = false;
if (buffer->write_in_progress) {
+ struct configfs_fragment *frag = to_frag(file);
buffer->write_in_progress = false;
- len = bin_attr->write(item, buffer->bin_buffer,
- buffer->bin_buffer_size);
-
+ down_read(&frag->frag_sem);
+ if (!frag->frag_dead) {
+ /* result of ->release() is ignored */
+ buffer->bin_attr->write(buffer->item,
+ buffer->bin_buffer,
+ buffer->bin_buffer_size);
+ }
+ up_read(&frag->frag_sem);
/* vfree on NULL is safe */
vfree(buffer->bin_buffer);
buffer->bin_buffer = NULL;
@@ -489,10 +491,8 @@ static int configfs_release_bin_file(struct inode *inode, struct file *filp)
buffer->needs_read_fill = 1;
}
- ret = configfs_release(inode, filp);
- if (len < 0)
- return len;
- return ret;
+ configfs_release(inode, file);
+ return 0;
}
@@ -527,7 +527,7 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
- CONFIGFS_ITEM_ATTR);
+ CONFIGFS_ITEM_ATTR, parent_sd->s_frag);
inode_unlock(d_inode(dir));
return error;
@@ -549,7 +549,7 @@ int configfs_create_bin_file(struct config_item *item,
inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
- CONFIGFS_ITEM_BIN_ATTR);
+ CONFIGFS_ITEM_BIN_ATTR, parent_sd->s_frag);
inode_unlock(dir->d_inode);
return error;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index f0e549783caf..09bc68708d28 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -7,7 +7,7 @@
* and for mapping back from file handles to dentries.
*
* For details on why we do all the strange and hairy things in here
- * take a look at Documentation/filesystems/nfs/Exporting.
+ * take a look at Documentation/filesystems/nfs/exporting.rst.
*/
#include <linux/exportfs.h>
#include <linux/fs.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 420fe3deed39..006b7a2070bf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4586,7 +4586,6 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
- struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
@@ -4675,7 +4674,6 @@ make_io:
* If we need to do any I/O, try to pre-readahead extra
* blocks from the inode table.
*/
- blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
@@ -4706,7 +4704,6 @@ make_io:
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
- blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 542b02d170f8..8aaa7eec7b74 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -36,10 +36,6 @@
*/
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
-struct wb_completion {
- atomic_t cnt;
-};
-
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
@@ -61,19 +57,6 @@ struct wb_writeback_work {
};
/*
- * If one wants to wait for one or more wb_writeback_works, each work's
- * ->done should be set to a wb_completion defined using the following
- * macro. Once all work items are issued with wb_queue_work(), the caller
- * can wait for the completion of all using wb_wait_for_completion(). Work
- * items which are waited upon aren't freed automatically on completion.
- */
-#define DEFINE_WB_COMPLETION_ONSTACK(cmpl) \
- struct wb_completion cmpl = { \
- .cnt = ATOMIC_INIT(1), \
- }
-
-
-/*
* If an inode is constantly having its pages dirtied, but then the
* updates stop dirtytime_expire_interval seconds in the past, it's
* possible for the worst case time between when an inode has its
@@ -182,7 +165,7 @@ static void finish_writeback_work(struct bdi_writeback *wb,
if (work->auto_free)
kfree(work);
if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ wake_up_all(done->waitq);
}
static void wb_queue_work(struct bdi_writeback *wb,
@@ -206,28 +189,44 @@ static void wb_queue_work(struct bdi_writeback *wb,
/**
* wb_wait_for_completion - wait for completion of bdi_writeback_works
- * @bdi: bdi work items were issued to
* @done: target wb_completion
*
* Wait for one or more work items issued to @bdi with their ->done field
- * set to @done, which should have been defined with
- * DEFINE_WB_COMPLETION_ONSTACK(). This function returns after all such
- * work items are completed. Work items which are waited upon aren't freed
+ * set to @done, which should have been initialized with
+ * DEFINE_WB_COMPLETION(). This function returns after all such work items
+ * are completed. Work items which are waited upon aren't freed
* automatically on completion.
*/
-static void wb_wait_for_completion(struct backing_dev_info *bdi,
- struct wb_completion *done)
+void wb_wait_for_completion(struct wb_completion *done)
{
atomic_dec(&done->cnt); /* put down the initial count */
- wait_event(bdi->wb_waitq, !atomic_read(&done->cnt));
+ wait_event(*done->waitq, !atomic_read(&done->cnt));
}
#ifdef CONFIG_CGROUP_WRITEBACK
-/* parameters for foreign inode detection, see wb_detach_inode() */
+/*
+ * Parameters for foreign inode detection, see wbc_detach_inode() to see
+ * how they're used.
+ *
+ * These paramters are inherently heuristical as the detection target
+ * itself is fuzzy. All we want to do is detaching an inode from the
+ * current owner if it's being written to by some other cgroups too much.
+ *
+ * The current cgroup writeback is built on the assumption that multiple
+ * cgroups writing to the same inode concurrently is very rare and a mode
+ * of operation which isn't well supported. As such, the goal is not
+ * taking too long when a different cgroup takes over an inode while
+ * avoiding too aggressive flip-flops from occasional foreign writes.
+ *
+ * We record, very roughly, 2s worth of IO time history and if more than
+ * half of that is foreign, trigger the switch. The recording is quantized
+ * to 16 slots. To avoid tiny writes from swinging the decision too much,
+ * writes smaller than 1/8 of avg size are ignored.
+ */
#define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */
#define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */
-#define WB_FRN_TIME_CUT_DIV 2 /* ignore rounds < avg / 2 */
+#define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */
#define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */
#define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */
@@ -237,6 +236,7 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
/* if foreign slots >= 8, switch */
#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
/* one round can affect upto 5 slots */
+#define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */
static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
static struct workqueue_struct *isw_wq;
@@ -389,6 +389,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
if (unlikely(inode->i_state & I_FREEING))
goto skip_switch;
+ trace_inode_switch_wbs(inode, old_wb, new_wb);
+
/*
* Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
* to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
@@ -489,18 +491,13 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
- /*
- * Avoid starting new switches while sync_inodes_sb() is in
- * progress. Otherwise, if the down_write protected issue path
- * blocks heavily, we might end up starting a large number of
- * switches which will block on the rwsem.
- */
- if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ /* avoid queueing a new switch if too many are already in flight */
+ if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
return;
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- goto out_unlock;
+ return;
/* find and pin the new wb */
rcu_read_lock();
@@ -534,15 +531,12 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
atomic_inc(&isw_nr_in_flight);
-
- goto out_unlock;
+ return;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
-out_unlock:
- up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -681,6 +675,9 @@ void wbc_detach_inode(struct writeback_control *wbc)
if (wbc->wb_id != max_id)
history |= (1U << slots) - 1;
+ if (history)
+ trace_inode_foreign_history(inode, wbc, history);
+
/*
* Switch if the current wb isn't the consistent winner.
* If there are multiple closely competing dirtiers, the
@@ -843,7 +840,7 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
restart:
rcu_read_lock();
list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
- DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
+ DEFINE_WB_COMPLETION(fallback_work_done, bdi);
struct wb_writeback_work fallback_work;
struct wb_writeback_work *work;
long nr_pages;
@@ -890,7 +887,7 @@ restart:
last_wb = wb;
rcu_read_unlock();
- wb_wait_for_completion(bdi, &fallback_work_done);
+ wb_wait_for_completion(&fallback_work_done);
goto restart;
}
rcu_read_unlock();
@@ -900,6 +897,89 @@ restart:
}
/**
+ * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
+ * @bdi_id: target bdi id
+ * @memcg_id: target memcg css id
+ * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @reason: reason why some writeback work initiated
+ * @done: target wb_completion
+ *
+ * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
+ * with the specified parameters.
+ */
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+ enum wb_reason reason, struct wb_completion *done)
+{
+ struct backing_dev_info *bdi;
+ struct cgroup_subsys_state *memcg_css;
+ struct bdi_writeback *wb;
+ struct wb_writeback_work *work;
+ int ret;
+
+ /* lookup bdi and memcg */
+ bdi = bdi_get_by_id(bdi_id);
+ if (!bdi)
+ return -ENOENT;
+
+ rcu_read_lock();
+ memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
+ if (memcg_css && !css_tryget(memcg_css))
+ memcg_css = NULL;
+ rcu_read_unlock();
+ if (!memcg_css) {
+ ret = -ENOENT;
+ goto out_bdi_put;
+ }
+
+ /*
+ * And find the associated wb. If the wb isn't there already
+ * there's nothing to flush, don't create one.
+ */
+ wb = wb_get_lookup(bdi, memcg_css);
+ if (!wb) {
+ ret = -ENOENT;
+ goto out_css_put;
+ }
+
+ /*
+ * If @nr is zero, the caller is attempting to write out most of
+ * the currently dirty pages. Let's take the current dirty page
+ * count and inflate it by 25% which should be large enough to
+ * flush out most dirty pages while avoiding getting livelocked by
+ * concurrent dirtiers.
+ */
+ if (!nr) {
+ unsigned long filepages, headroom, dirty, writeback;
+
+ mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
+ &writeback);
+ nr = dirty * 10 / 8;
+ }
+
+ /* issue the writeback work */
+ work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
+ if (work) {
+ work->nr_pages = nr;
+ work->sync_mode = WB_SYNC_NONE;
+ work->range_cyclic = 1;
+ work->reason = reason;
+ work->done = done;
+ work->auto_free = 1;
+ wb_queue_work(wb, work);
+ ret = 0;
+ } else {
+ ret = -ENOMEM;
+ }
+
+ wb_put(wb);
+out_css_put:
+ css_put(memcg_css);
+out_bdi_put:
+ bdi_put(bdi);
+ return ret;
+}
+
+/**
* cgroup_writeback_umount - flush inode wb switches for umount
*
* This function is called when a super_block is about to be destroyed and
@@ -2362,7 +2442,8 @@ static void wait_sb_inodes(struct super_block *sb)
static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
enum wb_reason reason, bool skip_if_busy)
{
- DEFINE_WB_COMPLETION_ONSTACK(done);
+ struct backing_dev_info *bdi = sb->s_bdi;
+ DEFINE_WB_COMPLETION(done, bdi);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
@@ -2371,14 +2452,13 @@ static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
.nr_pages = nr,
.reason = reason,
};
- struct backing_dev_info *bdi = sb->s_bdi;
if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
- wb_wait_for_completion(bdi, &done);
+ wb_wait_for_completion(&done);
}
/**
@@ -2440,7 +2520,8 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
*/
void sync_inodes_sb(struct super_block *sb)
{
- DEFINE_WB_COMPLETION_ONSTACK(done);
+ struct backing_dev_info *bdi = sb->s_bdi;
+ DEFINE_WB_COMPLETION(done, bdi);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
@@ -2450,7 +2531,6 @@ void sync_inodes_sb(struct super_block *sb)
.reason = WB_REASON_SYNC,
.for_sync = 1,
};
- struct backing_dev_info *bdi = sb->s_bdi;
/*
* Can't skip on !bdi_has_dirty() because we should wait for !dirty
@@ -2464,7 +2544,7 @@ void sync_inodes_sb(struct super_block *sb)
/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
- wb_wait_for_completion(bdi, &done);
+ wb_wait_for_completion(&done);
bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index cfb48bd088e1..0dadbdbead0f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -75,7 +75,7 @@
#include "internal.h"
-#define IORING_MAX_ENTRIES 4096
+#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_FIXED_FILES 1024
struct io_uring {
@@ -84,27 +84,29 @@ struct io_uring {
};
/*
- * This data is shared with the application through the mmap at offset
- * IORING_OFF_SQ_RING.
+ * This data is shared with the application through the mmap at offsets
+ * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING.
*
* The offsets to the member fields are published through struct
* io_sqring_offsets when calling io_uring_setup.
*/
-struct io_sq_ring {
+struct io_rings {
/*
* Head and tail offsets into the ring; the offsets need to be
* masked to get valid indices.
*
- * The kernel controls head and the application controls tail.
+ * The kernel controls head of the sq ring and the tail of the cq ring,
+ * and the application controls tail of the sq ring and the head of the
+ * cq ring.
*/
- struct io_uring r;
+ struct io_uring sq, cq;
/*
- * Bitmask to apply to head and tail offsets (constant, equals
+ * Bitmasks to apply to head and tail offsets (constant, equals
* ring_entries - 1)
*/
- u32 ring_mask;
- /* Ring size (constant, power of 2) */
- u32 ring_entries;
+ u32 sq_ring_mask, cq_ring_mask;
+ /* Ring sizes (constant, power of 2) */
+ u32 sq_ring_entries, cq_ring_entries;
/*
* Number of invalid entries dropped by the kernel due to
* invalid index stored in array
@@ -117,7 +119,7 @@ struct io_sq_ring {
* counter includes all submissions that were dropped reaching
* the new SQ head (and possibly more).
*/
- u32 dropped;
+ u32 sq_dropped;
/*
* Runtime flags
*
@@ -127,43 +129,7 @@ struct io_sq_ring {
* The application needs a full memory barrier before checking
* for IORING_SQ_NEED_WAKEUP after updating the sq tail.
*/
- u32 flags;
- /*
- * Ring buffer of indices into array of io_uring_sqe, which is
- * mmapped by the application using the IORING_OFF_SQES offset.
- *
- * This indirection could e.g. be used to assign fixed
- * io_uring_sqe entries to operations and only submit them to
- * the queue when needed.
- *
- * The kernel modifies neither the indices array nor the entries
- * array.
- */
- u32 array[];
-};
-
-/*
- * This data is shared with the application through the mmap at offset
- * IORING_OFF_CQ_RING.
- *
- * The offsets to the member fields are published through struct
- * io_cqring_offsets when calling io_uring_setup.
- */
-struct io_cq_ring {
- /*
- * Head and tail offsets into the ring; the offsets need to be
- * masked to get valid indices.
- *
- * The application controls head and the kernel tail.
- */
- struct io_uring r;
- /*
- * Bitmask to apply to head and tail offsets (constant, equals
- * ring_entries - 1)
- */
- u32 ring_mask;
- /* Ring size (constant, power of 2) */
- u32 ring_entries;
+ u32 sq_flags;
/*
* Number of completion events lost because the queue was full;
* this should be avoided by the application by making sure
@@ -177,7 +143,7 @@ struct io_cq_ring {
* As completion events come in out of order this counter is not
* ordered with any other data.
*/
- u32 overflow;
+ u32 cq_overflow;
/*
* Ring buffer of completion events.
*
@@ -185,7 +151,7 @@ struct io_cq_ring {
* produced, so the application is allowed to modify pending
* entries.
*/
- struct io_uring_cqe cqes[];
+ struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
};
struct io_mapped_ubuf {
@@ -201,7 +167,7 @@ struct async_list {
struct list_head list;
struct file *file;
- off_t io_end;
+ off_t io_start;
size_t io_len;
};
@@ -215,8 +181,18 @@ struct io_ring_ctx {
bool compat;
bool account_mem;
- /* SQ ring */
- struct io_sq_ring *sq_ring;
+ /*
+ * Ring buffer of indices into array of io_uring_sqe, which is
+ * mmapped by the application using the IORING_OFF_SQES offset.
+ *
+ * This indirection could e.g. be used to assign fixed
+ * io_uring_sqe entries to operations and only submit them to
+ * the queue when needed.
+ *
+ * The kernel modifies neither the indices array nor the entries
+ * array.
+ */
+ u32 *sq_array;
unsigned cached_sq_head;
unsigned sq_entries;
unsigned sq_mask;
@@ -227,15 +203,13 @@ struct io_ring_ctx {
} ____cacheline_aligned_in_smp;
/* IO offload */
- struct workqueue_struct *sqo_wq;
+ struct workqueue_struct *sqo_wq[2];
struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait;
struct completion sqo_thread_started;
struct {
- /* CQ ring */
- struct io_cq_ring *cq_ring;
unsigned cached_cq_tail;
unsigned cq_entries;
unsigned cq_mask;
@@ -244,6 +218,8 @@ struct io_ring_ctx {
struct eventfd_ctx *cq_ev_fd;
} ____cacheline_aligned_in_smp;
+ struct io_rings *rings;
+
/*
* If used, fixed file set. Writers must ensure that ->refs is dead,
* readers must ensure that ->refs is alive as long as the file* is
@@ -288,6 +264,7 @@ struct io_ring_ctx {
struct sqe_submit {
const struct io_uring_sqe *sqe;
unsigned short index;
+ u32 sequence;
bool has_user;
bool needs_lock;
bool needs_fixed_file;
@@ -335,6 +312,7 @@ struct io_kiocb {
#define REQ_F_LINK 64 /* linked sqes */
#define REQ_F_LINK_DONE 128 /* linked sqes done */
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
+#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
u64 user_data;
u32 result;
u32 sequence;
@@ -366,6 +344,7 @@ struct io_submit_state {
};
static void io_sq_wq_submit_work(struct work_struct *work);
+static void __io_free_req(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
@@ -430,7 +409,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
return false;
- return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped;
+ return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
}
static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
@@ -451,11 +430,11 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
static void __io_commit_cqring(struct io_ring_ctx *ctx)
{
- struct io_cq_ring *ring = ctx->cq_ring;
+ struct io_rings *rings = ctx->rings;
- if (ctx->cached_cq_tail != READ_ONCE(ring->r.tail)) {
+ if (ctx->cached_cq_tail != READ_ONCE(rings->cq.tail)) {
/* order cqe stores with ring update */
- smp_store_release(&ring->r.tail, ctx->cached_cq_tail);
+ smp_store_release(&rings->cq.tail, ctx->cached_cq_tail);
if (wq_has_sleeper(&ctx->cq_wait)) {
wake_up_interruptible(&ctx->cq_wait);
@@ -464,6 +443,24 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
}
}
+static inline void io_queue_async_work(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
+{
+ int rw;
+
+ switch (req->submit.sqe->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ rw = !(req->rw.ki_flags & IOCB_DIRECT);
+ break;
+ default:
+ rw = 0;
+ break;
+ }
+
+ queue_work(ctx->sqo_wq[rw], &req->work);
+}
+
static void io_commit_cqring(struct io_ring_ctx *ctx)
{
struct io_kiocb *req;
@@ -471,14 +468,19 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
__io_commit_cqring(ctx);
while ((req = io_get_deferred_req(ctx)) != NULL) {
+ if (req->flags & REQ_F_SHADOW_DRAIN) {
+ /* Just for drain, free it. */
+ __io_free_req(req);
+ continue;
+ }
req->flags |= REQ_F_IO_DRAINED;
- queue_work(ctx->sqo_wq, &req->work);
+ io_queue_async_work(ctx, req);
}
}
static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
{
- struct io_cq_ring *ring = ctx->cq_ring;
+ struct io_rings *rings = ctx->rings;
unsigned tail;
tail = ctx->cached_cq_tail;
@@ -487,11 +489,11 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
* control dependency is enough as we're using WRITE_ONCE to
* fill the cq entry
*/
- if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
+ if (tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries)
return NULL;
ctx->cached_cq_tail++;
- return &ring->cqes[tail & ctx->cq_mask];
+ return &rings->cqes[tail & ctx->cq_mask];
}
static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
@@ -510,9 +512,9 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, 0);
} else {
- unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
+ unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
- WRITE_ONCE(ctx->cq_ring->overflow, overflow + 1);
+ WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
}
}
@@ -635,7 +637,7 @@ static void io_req_link_next(struct io_kiocb *req)
nxt->flags |= REQ_F_LINK_DONE;
INIT_WORK(&nxt->work, io_sq_wq_submit_work);
- queue_work(req->ctx->sqo_wq, &nxt->work);
+ io_queue_async_work(req->ctx, nxt);
}
}
@@ -679,11 +681,11 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
-static unsigned io_cqring_events(struct io_cq_ring *ring)
+static unsigned io_cqring_events(struct io_rings *rings)
{
/* See comment at the top of this file */
smp_rmb();
- return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
+ return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
}
/*
@@ -836,7 +838,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
* If we do, we can potentially be spinning for commands that
* already triggered a CQE (eg in error).
*/
- if (io_cqring_events(ctx->cq_ring))
+ if (io_cqring_events(ctx->rings))
break;
/*
@@ -1187,6 +1189,28 @@ static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
}
+static inline bool io_should_merge(struct async_list *al, struct kiocb *kiocb)
+{
+ if (al->file == kiocb->ki_filp) {
+ off_t start, end;
+
+ /*
+ * Allow merging if we're anywhere in the range of the same
+ * page. Generally this happens for sub-page reads or writes,
+ * and it's beneficial to allow the first worker to bring the
+ * page in and the piggy backed work can then work on the
+ * cached page.
+ */
+ start = al->io_start & PAGE_MASK;
+ end = (al->io_start + al->io_len + PAGE_SIZE - 1) & PAGE_MASK;
+ if (kiocb->ki_pos >= start && kiocb->ki_pos <= end)
+ return true;
+ }
+
+ al->file = NULL;
+ return false;
+}
+
/*
* Make a note of the last file/offset/direction we punted to async
* context. We'll use this information to see if we can piggy back a
@@ -1198,9 +1222,8 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
struct async_list *async_list = &req->ctx->pending_async[rw];
struct kiocb *kiocb = &req->rw;
struct file *filp = kiocb->ki_filp;
- off_t io_end = kiocb->ki_pos + len;
- if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) {
+ if (io_should_merge(async_list, kiocb)) {
unsigned long max_bytes;
/* Use 8x RA size as a decent limiter for both reads/writes */
@@ -1213,17 +1236,16 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
req->flags |= REQ_F_SEQ_PREV;
async_list->io_len += len;
} else {
- io_end = 0;
- async_list->io_len = 0;
+ async_list->file = NULL;
}
}
/* New file? Reset state. */
if (async_list->file != filp) {
- async_list->io_len = 0;
+ async_list->io_start = kiocb->ki_pos;
+ async_list->io_len = len;
async_list->file = filp;
}
- async_list->io_end = io_end;
}
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
@@ -1535,7 +1557,7 @@ static void io_poll_remove_one(struct io_kiocb *req)
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait.entry);
- queue_work(req->ctx->sqo_wq, &req->work);
+ io_queue_async_work(req->ctx, req);
}
spin_unlock(&poll->head->lock);
@@ -1649,7 +1671,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
io_cqring_ev_posted(ctx);
io_put_req(req);
} else {
- queue_work(ctx->sqo_wq, &req->work);
+ io_queue_async_work(ctx, req);
}
return 1;
@@ -1992,7 +2014,7 @@ out:
*/
static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
{
- bool ret = false;
+ bool ret;
if (!list)
return false;
@@ -2038,10 +2060,14 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
flags = READ_ONCE(s->sqe->flags);
fd = READ_ONCE(s->sqe->fd);
- if (flags & IOSQE_IO_DRAIN) {
+ if (flags & IOSQE_IO_DRAIN)
req->flags |= REQ_F_IO_DRAIN;
- req->sequence = ctx->cached_sq_head - 1;
- }
+ /*
+ * All io need record the previous position, if LINK vs DARIN,
+ * it can be used to mark the position of the first IO in the
+ * link list.
+ */
+ req->sequence = s->sequence;
if (!io_op_needs_file(s->sqe))
return 0;
@@ -2063,21 +2089,12 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
return 0;
}
-static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s)
+static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ struct sqe_submit *s, bool force_nonblock)
{
int ret;
- ret = io_req_defer(ctx, req, s->sqe);
- if (ret) {
- if (ret != -EIOCBQUEUED) {
- io_free_req(req);
- io_cqring_add_event(ctx, s->sqe->user_data, ret);
- }
- return 0;
- }
-
- ret = __io_submit_sqe(ctx, req, s, true);
+ ret = __io_submit_sqe(ctx, req, s, force_nonblock);
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
@@ -2094,7 +2111,7 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (list)
atomic_inc(&list->cnt);
INIT_WORK(&req->work, io_sq_wq_submit_work);
- queue_work(ctx->sqo_wq, &req->work);
+ io_queue_async_work(ctx, req);
}
/*
@@ -2119,10 +2136,70 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return ret;
}
+static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ struct sqe_submit *s, bool force_nonblock)
+{
+ int ret;
+
+ ret = io_req_defer(ctx, req, s->sqe);
+ if (ret) {
+ if (ret != -EIOCBQUEUED) {
+ io_free_req(req);
+ io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ }
+ return 0;
+ }
+
+ return __io_queue_sqe(ctx, req, s, force_nonblock);
+}
+
+static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ struct sqe_submit *s, struct io_kiocb *shadow,
+ bool force_nonblock)
+{
+ int ret;
+ int need_submit = false;
+
+ if (!shadow)
+ return io_queue_sqe(ctx, req, s, force_nonblock);
+
+ /*
+ * Mark the first IO in link list as DRAIN, let all the following
+ * IOs enter the defer list. all IO needs to be completed before link
+ * list.
+ */
+ req->flags |= REQ_F_IO_DRAIN;
+ ret = io_req_defer(ctx, req, s->sqe);
+ if (ret) {
+ if (ret != -EIOCBQUEUED) {
+ io_free_req(req);
+ io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ return 0;
+ }
+ } else {
+ /*
+ * If ret == 0 means that all IOs in front of link io are
+ * running done. let's queue link head.
+ */
+ need_submit = true;
+ }
+
+ /* Insert shadow req to defer_list, blocking next IOs */
+ spin_lock_irq(&ctx->completion_lock);
+ list_add_tail(&shadow->list, &ctx->defer_list);
+ spin_unlock_irq(&ctx->completion_lock);
+
+ if (need_submit)
+ return __io_queue_sqe(ctx, req, s, force_nonblock);
+
+ return 0;
+}
+
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
- struct io_submit_state *state, struct io_kiocb **link)
+ struct io_submit_state *state, struct io_kiocb **link,
+ bool force_nonblock)
{
struct io_uring_sqe *sqe_copy;
struct io_kiocb *req;
@@ -2175,7 +2252,7 @@ err:
INIT_LIST_HEAD(&req->link_list);
*link = req;
} else {
- io_queue_sqe(ctx, req, s);
+ io_queue_sqe(ctx, req, s, force_nonblock);
}
}
@@ -2205,15 +2282,15 @@ static void io_submit_state_start(struct io_submit_state *state,
static void io_commit_sqring(struct io_ring_ctx *ctx)
{
- struct io_sq_ring *ring = ctx->sq_ring;
+ struct io_rings *rings = ctx->rings;
- if (ctx->cached_sq_head != READ_ONCE(ring->r.head)) {
+ if (ctx->cached_sq_head != READ_ONCE(rings->sq.head)) {
/*
* Ensure any loads from the SQEs are done at this point,
* since once we write the new head, the application could
* write new data to them.
*/
- smp_store_release(&ring->r.head, ctx->cached_sq_head);
+ smp_store_release(&rings->sq.head, ctx->cached_sq_head);
}
}
@@ -2227,7 +2304,8 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
*/
static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
{
- struct io_sq_ring *ring = ctx->sq_ring;
+ struct io_rings *rings = ctx->rings;
+ u32 *sq_array = ctx->sq_array;
unsigned head;
/*
@@ -2240,20 +2318,21 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
*/
head = ctx->cached_sq_head;
/* make sure SQ entry isn't read before tail */
- if (head == smp_load_acquire(&ring->r.tail))
+ if (head == smp_load_acquire(&rings->sq.tail))
return false;
- head = READ_ONCE(ring->array[head & ctx->sq_mask]);
+ head = READ_ONCE(sq_array[head & ctx->sq_mask]);
if (head < ctx->sq_entries) {
s->index = head;
s->sqe = &ctx->sq_sqes[head];
+ s->sequence = ctx->cached_sq_head;
ctx->cached_sq_head++;
return true;
}
/* drop invalid entries */
ctx->cached_sq_head++;
- ring->dropped++;
+ rings->sq_dropped++;
return false;
}
@@ -2262,6 +2341,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
{
struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL;
+ struct io_kiocb *shadow_req = NULL;
bool prev_was_link = false;
int i, submitted = 0;
@@ -2276,11 +2356,21 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
* that's the end of the chain. Submit the previous link.
*/
if (!prev_was_link && link) {
- io_queue_sqe(ctx, link, &link->submit);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req,
+ true);
link = NULL;
}
prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
+ if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
+ if (!shadow_req) {
+ shadow_req = io_get_req(ctx, NULL);
+ shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
+ refcount_dec(&shadow_req->refs);
+ }
+ shadow_req->sequence = sqes[i].sequence;
+ }
+
if (unlikely(mm_fault)) {
io_cqring_add_event(ctx, sqes[i].sqe->user_data,
-EFAULT);
@@ -2288,13 +2378,13 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
sqes[i].has_user = has_user;
sqes[i].needs_lock = true;
sqes[i].needs_fixed_file = true;
- io_submit_sqe(ctx, &sqes[i], statep, &link);
+ io_submit_sqe(ctx, &sqes[i], statep, &link, true);
submitted++;
}
}
if (link)
- io_queue_sqe(ctx, link, &link->submit);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req, true);
if (statep)
io_submit_state_end(&state);
@@ -2366,7 +2456,7 @@ static int io_sq_thread(void *data)
TASK_INTERRUPTIBLE);
/* Tell userspace we may need a wakeup call */
- ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
+ ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
/* make sure to read SQ tail after writing flags */
smp_mb();
@@ -2380,12 +2470,12 @@ static int io_sq_thread(void *data)
schedule();
finish_wait(&ctx->sqo_wait, &wait);
- ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
+ ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
continue;
}
finish_wait(&ctx->sqo_wait, &wait);
- ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
+ ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
}
i = 0;
@@ -2426,10 +2516,12 @@ static int io_sq_thread(void *data)
return 0;
}
-static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
+static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
+ bool block_for_last)
{
struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL;
+ struct io_kiocb *shadow_req = NULL;
bool prev_was_link = false;
int i, submit = 0;
@@ -2439,6 +2531,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
}
for (i = 0; i < to_submit; i++) {
+ bool force_nonblock = true;
struct sqe_submit s;
if (!io_get_sqring(ctx, &s))
@@ -2449,21 +2542,43 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
* that's the end of the chain. Submit the previous link.
*/
if (!prev_was_link && link) {
- io_queue_sqe(ctx, link, &link->submit);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req,
+ force_nonblock);
link = NULL;
}
prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
+ if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
+ if (!shadow_req) {
+ shadow_req = io_get_req(ctx, NULL);
+ shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
+ refcount_dec(&shadow_req->refs);
+ }
+ shadow_req->sequence = s.sequence;
+ }
+
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
submit++;
- io_submit_sqe(ctx, &s, statep, &link);
+
+ /*
+ * The caller will block for events after submit, submit the
+ * last IO non-blocking. This is either the only IO it's
+ * submitting, or it already submitted the previous ones. This
+ * improves performance by avoiding an async punt that we don't
+ * need to do.
+ */
+ if (block_for_last && submit == to_submit)
+ force_nonblock = false;
+
+ io_submit_sqe(ctx, &s, statep, &link, force_nonblock);
}
io_commit_sqring(ctx);
if (link)
- io_queue_sqe(ctx, link, &link->submit);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req,
+ block_for_last);
if (statep)
io_submit_state_end(statep);
@@ -2477,10 +2592,10 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz)
{
- struct io_cq_ring *ring = ctx->cq_ring;
+ struct io_rings *rings = ctx->rings;
int ret;
- if (io_cqring_events(ring) >= min_events)
+ if (io_cqring_events(rings) >= min_events)
return 0;
if (sig) {
@@ -2496,12 +2611,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
- ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
+ ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
restore_saved_sigmask_unless(ret == -ERESTARTSYS);
if (ret == -ERESTARTSYS)
ret = -EINTR;
- return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
+ return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
}
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -2551,11 +2666,15 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
static void io_finish_async(struct io_ring_ctx *ctx)
{
+ int i;
+
io_sq_thread_stop(ctx);
- if (ctx->sqo_wq) {
- destroy_workqueue(ctx->sqo_wq);
- ctx->sqo_wq = NULL;
+ for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) {
+ if (ctx->sqo_wq[i]) {
+ destroy_workqueue(ctx->sqo_wq[i]);
+ ctx->sqo_wq[i] = NULL;
+ }
}
}
@@ -2763,16 +2882,31 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
}
/* Do QD, or 2 * CPUS, whatever is smallest */
- ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE,
+ ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
+ WQ_UNBOUND | WQ_FREEZABLE,
min(ctx->sq_entries - 1, 2 * num_online_cpus()));
- if (!ctx->sqo_wq) {
+ if (!ctx->sqo_wq[0]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * This is for buffered writes, where we want to limit the parallelism
+ * due to file locking in file systems. As "normal" buffered writes
+ * should parellelize on writeout quite nicely, limit us to having 2
+ * pending. This avoids massive contention on the inode when doing
+ * buffered async writes.
+ */
+ ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
+ WQ_UNBOUND | WQ_FREEZABLE, 2);
+ if (!ctx->sqo_wq[1]) {
ret = -ENOMEM;
goto err;
}
return 0;
err:
- io_sq_thread_stop(ctx);
+ io_finish_async(ctx);
mmdrop(ctx->sqo_mm);
ctx->sqo_mm = NULL;
return ret;
@@ -2821,17 +2955,45 @@ static void *io_mem_alloc(size_t size)
return (void *) __get_free_pages(gfp_flags, get_order(size));
}
+static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
+ size_t *sq_offset)
+{
+ struct io_rings *rings;
+ size_t off, sq_array_size;
+
+ off = struct_size(rings, cqes, cq_entries);
+ if (off == SIZE_MAX)
+ return SIZE_MAX;
+
+#ifdef CONFIG_SMP
+ off = ALIGN(off, SMP_CACHE_BYTES);
+ if (off == 0)
+ return SIZE_MAX;
+#endif
+
+ sq_array_size = array_size(sizeof(u32), sq_entries);
+ if (sq_array_size == SIZE_MAX)
+ return SIZE_MAX;
+
+ if (check_add_overflow(off, sq_array_size, &off))
+ return SIZE_MAX;
+
+ if (sq_offset)
+ *sq_offset = off;
+
+ return off;
+}
+
static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
{
- struct io_sq_ring *sq_ring;
- struct io_cq_ring *cq_ring;
- size_t bytes;
+ size_t pages;
- bytes = struct_size(sq_ring, array, sq_entries);
- bytes += array_size(sizeof(struct io_uring_sqe), sq_entries);
- bytes += struct_size(cq_ring, cqes, cq_entries);
+ pages = (size_t)1 << get_order(
+ rings_size(sq_entries, cq_entries, NULL));
+ pages += (size_t)1 << get_order(
+ array_size(sizeof(struct io_uring_sqe), sq_entries));
- return (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+ return pages;
}
static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
@@ -2845,7 +3007,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
for (j = 0; j < imu->nr_bvecs; j++)
- put_page(imu->bvec[j].bv_page);
+ put_user_page(imu->bvec[j].bv_page);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, imu->nr_bvecs);
@@ -2989,10 +3151,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
* if we did partial map, or found file backed vmas,
* release any pages we did get
*/
- if (pret > 0) {
- for (j = 0; j < pret; j++)
- put_page(pages[j]);
- }
+ if (pret > 0)
+ put_user_pages(pages, pret);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, nr_pages);
kvfree(imu->bvec);
@@ -3078,9 +3238,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
}
#endif
- io_mem_free(ctx->sq_ring);
+ io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
- io_mem_free(ctx->cq_ring);
percpu_ref_exit(&ctx->refs);
if (ctx->account_mem)
@@ -3101,10 +3260,10 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
* io_commit_cqring
*/
smp_rmb();
- if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
- ctx->sq_ring->ring_entries)
+ if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
+ ctx->rings->sq_ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM;
- if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
+ if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail)
mask |= EPOLLIN | EPOLLRDNORM;
return mask;
@@ -3149,14 +3308,12 @@ static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
switch (offset) {
case IORING_OFF_SQ_RING:
- ptr = ctx->sq_ring;
+ case IORING_OFF_CQ_RING:
+ ptr = ctx->rings;
break;
case IORING_OFF_SQES:
ptr = ctx->sq_sqes;
break;
- case IORING_OFF_CQ_RING:
- ptr = ctx->cq_ring;
- break;
default:
return -EINVAL;
}
@@ -3199,19 +3356,27 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
* Just return the requested submit count, and wake the thread if
* we were asked to.
*/
+ ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sqo_wait);
submitted = to_submit;
- goto out_ctx;
- }
+ } else if (to_submit) {
+ bool block_for_last = false;
- ret = 0;
- if (to_submit) {
to_submit = min(to_submit, ctx->sq_entries);
+ /*
+ * Allow last submission to block in a series, IFF the caller
+ * asked to wait for events and we don't currently have
+ * enough. This potentially avoids an async punt.
+ */
+ if (to_submit == min_complete &&
+ io_cqring_events(ctx->rings) < min_complete)
+ block_for_last = true;
+
mutex_lock(&ctx->uring_lock);
- submitted = io_ring_submit(ctx, to_submit);
+ submitted = io_ring_submit(ctx, to_submit, block_for_last);
mutex_unlock(&ctx->uring_lock);
}
if (flags & IORING_ENTER_GETEVENTS) {
@@ -3226,7 +3391,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
}
}
-out_ctx:
io_ring_drop_ctx_refs(ctx, 1);
out_fput:
fdput(f);
@@ -3243,19 +3407,27 @@ static const struct file_operations io_uring_fops = {
static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
- struct io_sq_ring *sq_ring;
- struct io_cq_ring *cq_ring;
- size_t size;
+ struct io_rings *rings;
+ size_t size, sq_array_offset;
- sq_ring = io_mem_alloc(struct_size(sq_ring, array, p->sq_entries));
- if (!sq_ring)
+ size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+ if (size == SIZE_MAX)
+ return -EOVERFLOW;
+
+ rings = io_mem_alloc(size);
+ if (!rings)
return -ENOMEM;
- ctx->sq_ring = sq_ring;
- sq_ring->ring_mask = p->sq_entries - 1;
- sq_ring->ring_entries = p->sq_entries;
- ctx->sq_mask = sq_ring->ring_mask;
- ctx->sq_entries = sq_ring->ring_entries;
+ ctx->rings = rings;
+ ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
+ rings->sq_ring_mask = p->sq_entries - 1;
+ rings->cq_ring_mask = p->cq_entries - 1;
+ rings->sq_ring_entries = p->sq_entries;
+ rings->cq_ring_entries = p->cq_entries;
+ ctx->sq_mask = rings->sq_ring_mask;
+ ctx->cq_mask = rings->cq_ring_mask;
+ ctx->sq_entries = rings->sq_ring_entries;
+ ctx->cq_entries = rings->cq_ring_entries;
size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
if (size == SIZE_MAX)
@@ -3265,15 +3437,6 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
if (!ctx->sq_sqes)
return -ENOMEM;
- cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries));
- if (!cq_ring)
- return -ENOMEM;
-
- ctx->cq_ring = cq_ring;
- cq_ring->ring_mask = p->cq_entries - 1;
- cq_ring->ring_entries = p->cq_entries;
- ctx->cq_mask = cq_ring->ring_mask;
- ctx->cq_entries = cq_ring->ring_entries;
return 0;
}
@@ -3377,21 +3540,23 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
goto err;
memset(&p->sq_off, 0, sizeof(p->sq_off));
- p->sq_off.head = offsetof(struct io_sq_ring, r.head);
- p->sq_off.tail = offsetof(struct io_sq_ring, r.tail);
- p->sq_off.ring_mask = offsetof(struct io_sq_ring, ring_mask);
- p->sq_off.ring_entries = offsetof(struct io_sq_ring, ring_entries);
- p->sq_off.flags = offsetof(struct io_sq_ring, flags);
- p->sq_off.dropped = offsetof(struct io_sq_ring, dropped);
- p->sq_off.array = offsetof(struct io_sq_ring, array);
+ p->sq_off.head = offsetof(struct io_rings, sq.head);
+ p->sq_off.tail = offsetof(struct io_rings, sq.tail);
+ p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
+ p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
+ p->sq_off.flags = offsetof(struct io_rings, sq_flags);
+ p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
+ p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
memset(&p->cq_off, 0, sizeof(p->cq_off));
- p->cq_off.head = offsetof(struct io_cq_ring, r.head);
- p->cq_off.tail = offsetof(struct io_cq_ring, r.tail);
- p->cq_off.ring_mask = offsetof(struct io_cq_ring, ring_mask);
- p->cq_off.ring_entries = offsetof(struct io_cq_ring, ring_entries);
- p->cq_off.overflow = offsetof(struct io_cq_ring, overflow);
- p->cq_off.cqes = offsetof(struct io_cq_ring, cqes);
+ p->cq_off.head = offsetof(struct io_rings, cq.head);
+ p->cq_off.tail = offsetof(struct io_rings, cq.tail);
+ p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
+ p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
+ p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
+ p->cq_off.cqes = offsetof(struct io_rings, cqes);
+
+ p->features = IORING_FEAT_SINGLE_MMAP;
return ret;
err:
io_ring_ctx_wait_and_kill(ctx);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 85a9093769a9..35768a63fb1d 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -10,7 +10,7 @@
*
* The following files are helpful:
*
- * Documentation/filesystems/nfs/Exporting
+ * Documentation/filesystems/nfs/exporting.rst
* fs/exportfs/expfs.c.
*/
diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig
index 22a273bd4648..05cb0e8e4382 100644
--- a/fs/jfs/Kconfig
+++ b/fs/jfs/Kconfig
@@ -5,7 +5,7 @@ config JFS_FS
select CRC32
help
This is a port of IBM's Journaled Filesystem . More information is
- available in the file <file:Documentation/filesystems/jfs.txt>.
+ available in the file <file:Documentation/admin-guide/jfs.rst>.
If you do not intend to use the JFS filesystem, say N.
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c764cfe456e5..2a03bfeec10a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1403,11 +1403,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
return 0;
- /* No fileid? Just exit */
- if (!(fattr->valid & NFS_ATTR_FATTR_FILEID))
- return 0;
+ if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
+ /* Only a mounted-on-fileid? Just exit */
+ if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+ return 0;
/* Has the inode gone and changed behind our back? */
- if (nfsi->fileid != fattr->fileid) {
+ } else if (nfsi->fileid != fattr->fileid) {
/* Is this perhaps the mounted-on fileid? */
if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) &&
nfsi->fileid == fattr->mounted_on_fileid)
@@ -1807,11 +1808,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
- /* No fileid? Just exit */
- if (!(fattr->valid & NFS_ATTR_FATTR_FILEID))
- return 0;
+ if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
+ /* Only a mounted-on-fileid? Just exit */
+ if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+ return 0;
/* Has the inode gone and changed behind our back? */
- if (nfsi->fileid != fattr->fileid) {
+ } else if (nfsi->fileid != fattr->fileid) {
/* Is this perhaps the mounted-on fileid? */
if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) &&
nfsi->fileid == fattr->mounted_on_fileid)
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 960f9a3c012d..a5612abc0936 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -555,7 +555,7 @@ static int orangefs_fsync(struct file *file,
* Change the file pointer position for an instance of an open file.
*
* \note If .llseek is overriden, we must acquire lock as described in
- * Documentation/filesystems/Locking.
+ * Documentation/filesystems/locking.rst.
*
* Future upgrade could support SEEK_DATA and SEEK_HOLE but would
* require much changes to the FS
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 572dd29fbd54..34a6c99fa29b 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -246,7 +246,7 @@ struct orangefs_read_options {
extern struct orangefs_stats orangefs_stats;
/*
- * NOTE: See Documentation/filesystems/porting for information
+ * NOTE: See Documentation/filesystems/porting.rst for information
* on implementing FOO_I and properly accessing fs private data
*/
static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 6a6fc8aa1de7..48305ba41e3c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -471,7 +471,11 @@ static int do_timerfd_settime(int ufd, int flags,
break;
}
spin_unlock_irq(&ctx->wqh.lock);
- cpu_relax();
+
+ if (isalarm(ctx))
+ hrtimer_cancel_wait_running(&ctx->t.alarm.timer);
+ else
+ hrtimer_cancel_wait_running(&ctx->t.tmr);
}
/*
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index fcb41516ea59..6d30adb6b890 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -9,7 +9,7 @@ config UFS_FS
this file system as well. Saying Y here will allow you to read from
these partitions; if you also want to write to them, say Y to the
experimental "UFS file system write support", below. Please read the
- file <file:Documentation/filesystems/ufs.txt> for more information.
+ file <file:Documentation/admin-guide/ufs.rst> for more information.
The recently released UFS2 variant (used in FreeBSD 5.x) is
READ-ONLY supported.