summaryrefslogtreecommitdiff
path: root/fs/ubifs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs')
-rw-r--r--fs/ubifs/auth.c24
-rw-r--r--fs/ubifs/commit.c13
-rw-r--r--fs/ubifs/compress.c2
-rw-r--r--fs/ubifs/crypto.c3
-rw-r--r--fs/ubifs/debug.c49
-rw-r--r--fs/ubifs/debug.h7
-rw-r--r--fs/ubifs/dir.c181
-rw-r--r--fs/ubifs/file.c488
-rw-r--r--fs/ubifs/find.c40
-rw-r--r--fs/ubifs/journal.c198
-rw-r--r--fs/ubifs/lprops.c8
-rw-r--r--fs/ubifs/lpt.c1
-rw-r--r--fs/ubifs/lpt_commit.c1
-rw-r--r--fs/ubifs/master.c5
-rw-r--r--fs/ubifs/orphan.c155
-rw-r--r--fs/ubifs/replay.c4
-rw-r--r--fs/ubifs/super.c43
-rw-r--r--fs/ubifs/sysfs.c6
-rw-r--r--fs/ubifs/tnc.c9
-rw-r--r--fs/ubifs/tnc_misc.c22
-rw-r--r--fs/ubifs/ubifs.h23
-rw-r--r--fs/ubifs/xattr.c4
22 files changed, 707 insertions, 579 deletions
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index e564d5ff8781..a4a0158f712d 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -9,17 +9,16 @@
* This file implements various helper functions for UBIFS authentication support
*/
-#include <linux/crypto.h>
#include <linux/verification.h>
#include <crypto/hash.h>
-#include <crypto/algapi.h>
+#include <crypto/utils.h>
#include <keys/user-type.h>
#include <keys/asymmetric-type.h>
#include "ubifs.h"
/**
- * ubifs_node_calc_hash - calculate the hash of a UBIFS node
+ * __ubifs_node_calc_hash - calculate the hash of a UBIFS node
* @c: UBIFS file-system description object
* @node: the node to calculate a hash for
* @hash: the returned hash
@@ -508,28 +507,13 @@ out:
*/
int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac)
{
- SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
- int err;
const char well_known_message[] = "UBIFS";
if (!ubifs_authenticated(c))
return 0;
- shash->tfm = c->hmac_tfm;
-
- err = crypto_shash_init(shash);
- if (err)
- return err;
-
- err = crypto_shash_update(shash, well_known_message,
- sizeof(well_known_message) - 1);
- if (err < 0)
- return err;
-
- err = crypto_shash_final(shash, hmac);
- if (err)
- return err;
- return 0;
+ return crypto_shash_tfm_digest(c->hmac_tfm, well_known_message,
+ sizeof(well_known_message) - 1, hmac);
}
/*
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index c4fc1047fc07..5b3a840098b0 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -70,18 +70,29 @@ static int nothing_to_commit(struct ubifs_info *c)
return 0;
/*
+ * Increasing @c->dirty_pn_cnt/@c->dirty_nn_cnt and marking
+ * nnodes/pnodes as dirty in run_gc() could race with following
+ * checking, which leads inconsistent states between @c->nroot
+ * and @c->dirty_pn_cnt/@c->dirty_nn_cnt, holding @c->lp_mutex
+ * to avoid that.
+ */
+ mutex_lock(&c->lp_mutex);
+ /*
* Even though the TNC is clean, the LPT tree may have dirty nodes. For
* example, this may happen if the budgeting subsystem invoked GC to
* make some free space, and the GC found an LEB with only dirty and
* free space. In this case GC would just change the lprops of this
* LEB (by turning all space into free space) and unmap it.
*/
- if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
+ if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) {
+ mutex_unlock(&c->lp_mutex);
return 0;
+ }
ubifs_assert(c, atomic_long_read(&c->dirty_zn_cnt) == 0);
ubifs_assert(c, c->dirty_pn_cnt == 0);
ubifs_assert(c, c->dirty_nn_cnt == 0);
+ mutex_unlock(&c->lp_mutex);
return 1;
}
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 75461777c466..0b48cbab8a3d 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -82,6 +82,7 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
/**
* ubifs_compress - compress data.
+ * @c: UBIFS file-system description object
* @in_buf: data to compress
* @in_len: length of the data to compress
* @out_buf: output buffer where compressed data should be stored
@@ -140,6 +141,7 @@ no_compr:
/**
* ubifs_decompress - decompress data.
+ * @c: UBIFS file-system description object
* @in_buf: data to decompress
* @in_len: length of the data to decompress
* @out_buf: output buffer where decompressed data should
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 3125e76376ee..921f9033d0d2 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -88,8 +88,7 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
}
const struct fscrypt_operations ubifs_crypt_operations = {
- .flags = FS_CFLG_OWN_PAGES,
- .key_prefix = "ubifs:",
+ .legacy_key_prefix = "ubifs:",
.get_context = ubifs_crypt_get_context,
.set_context = ubifs_crypt_set_context,
.empty_dir = ubifs_crypt_empty_dir,
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index eef9e527d9ff..10a86c02a8b3 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -237,14 +237,14 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode));
pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode));
pr_err("\tatime %u.%u\n",
- (unsigned int)inode->i_atime.tv_sec,
- (unsigned int)inode->i_atime.tv_nsec);
+ (unsigned int) inode_get_atime_sec(inode),
+ (unsigned int) inode_get_atime_nsec(inode));
pr_err("\tmtime %u.%u\n",
- (unsigned int)inode->i_mtime.tv_sec,
- (unsigned int)inode->i_mtime.tv_nsec);
+ (unsigned int) inode_get_mtime_sec(inode),
+ (unsigned int) inode_get_mtime_nsec(inode));
pr_err("\tctime %u.%u\n",
- (unsigned int) inode_get_ctime(inode).tv_sec,
- (unsigned int) inode_get_ctime(inode).tv_nsec);
+ (unsigned int) inode_get_ctime_sec(inode),
+ (unsigned int) inode_get_ctime_nsec(inode));
pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum);
pr_err("\txattr_size %u\n", ui->xattr_size);
pr_err("\txattr_cnt %u\n", ui->xattr_cnt);
@@ -946,16 +946,20 @@ void ubifs_dump_tnc(struct ubifs_info *c)
pr_err("\n");
pr_err("(pid %d) start dumping TNC tree\n", current->pid);
- znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
- level = znode->level;
- pr_err("== Level %d ==\n", level);
- while (znode) {
- if (level != znode->level) {
- level = znode->level;
- pr_err("== Level %d ==\n", level);
+ if (c->zroot.znode) {
+ znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
+ level = znode->level;
+ pr_err("== Level %d ==\n", level);
+ while (znode) {
+ if (level != znode->level) {
+ level = znode->level;
+ pr_err("== Level %d ==\n", level);
+ }
+ ubifs_dump_znode(c, znode);
+ znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
}
- ubifs_dump_znode(c, znode);
- znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
+ } else {
+ pr_err("empty TNC tree in memory\n");
}
pr_err("(pid %d) finish dumping TNC tree\n", current->pid);
}
@@ -1742,17 +1746,22 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
err = dbg_walk_index(c, NULL, add_size, &calc);
if (err) {
ubifs_err(c, "error %d while walking the index", err);
- return err;
+ goto out_err;
}
if (calc != idx_size) {
ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld",
calc, idx_size);
dump_stack();
- return -EINVAL;
+ err = -EINVAL;
+ goto out_err;
}
return 0;
+
+out_err:
+ ubifs_destroy_tnc_tree(c);
+ return err;
}
/**
@@ -2802,7 +2811,6 @@ static const struct file_operations dfs_fops = {
.read = dfs_file_read,
.write = dfs_file_write,
.owner = THIS_MODULE,
- .llseek = no_llseek,
};
/**
@@ -2822,9 +2830,9 @@ void dbg_debugfs_init_fs(struct ubifs_info *c)
const char *fname;
struct ubifs_debug_info *d = c->dbg;
- n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
+ n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME,
c->vi.ubi_num, c->vi.vol_id);
- if (n > UBIFS_DFS_DIR_LEN) {
+ if (n >= UBIFS_DFS_DIR_LEN) {
/* The array size is too small */
return;
}
@@ -2947,7 +2955,6 @@ static const struct file_operations dfs_global_fops = {
.read = dfs_global_file_read,
.write = dfs_global_file_write,
.owner = THIS_MODULE,
- .llseek = no_llseek,
};
/**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index ed966108da80..d425861e6b82 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -19,10 +19,11 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
/*
* The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
- * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
+ * + 1 for "_" and 2 for UBI device numbers and 3 for volume number and 1 for
+ * the trailing zero byte.
*/
#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
-#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
+#define UBIFS_DFS_DIR_LEN (3 + 1 + 2 + 3 + 1)
/**
* ubifs_debug_info - per-FS debugging information.
@@ -103,7 +104,7 @@ struct ubifs_debug_info {
unsigned int chk_fs:1;
unsigned int tst_rcvry:1;
- char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
+ char dfs_dir_name[UBIFS_DFS_DIR_LEN];
struct dentry *dfs_dir;
struct dentry *dfs_dump_lprops;
struct dentry *dfs_dump_budg;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index be45972ff95d..fda82f3e16e8 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -71,8 +71,13 @@ static int inherit_flags(const struct inode *dir, umode_t mode)
* @is_xattr: whether the inode is xattr inode
*
* This function finds an unused inode number, allocates new inode and
- * initializes it. Returns new inode in case of success and an error code in
- * case of failure.
+ * initializes it. Non-xattr new inode may be written with xattrs(selinux/
+ * encryption) before writing dentry, which could cause inconsistent problem
+ * when powercut happens between two operations. To deal with it, non-xattr
+ * new inode is initialized with zero-nlink and added into orphan list, caller
+ * should make sure that inode is relinked later, and make sure that orphan
+ * removing and journal writing into an committing atomic operation. Returns
+ * new inode in case of success and an error code in case of failure.
*/
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
umode_t mode, bool is_xattr)
@@ -96,7 +101,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
inode->i_flags |= S_NOCMTIME;
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
- inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
inode->i_mapping->nrpages = 0;
if (!is_xattr) {
@@ -163,9 +168,25 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
ui->creat_sqnum = ++c->max_sqnum;
spin_unlock(&c->cnt_lock);
+ if (!is_xattr) {
+ set_nlink(inode, 0);
+ err = ubifs_add_orphan(c, inode->i_ino);
+ if (err) {
+ ubifs_err(c, "ubifs_add_orphan failed: %i", err);
+ goto out_iput;
+ }
+ down_read(&c->commit_sem);
+ ui->del_cmtno = c->cmt_no;
+ up_read(&c->commit_sem);
+ }
+
if (encrypted) {
err = fscrypt_set_context(inode, NULL);
if (err) {
+ if (!is_xattr) {
+ set_nlink(inode, 1);
+ ubifs_delete_orphan(c, inode->i_ino);
+ }
ubifs_err(c, "fscrypt_set_context failed: %i", err);
goto out_iput;
}
@@ -205,7 +226,6 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
err = fscrypt_prepare_lookup(dir, dentry, &nm);
- generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return d_splice_alias(NULL, dentry);
if (err)
@@ -321,11 +341,13 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir,
if (err)
goto out_inode;
+ set_nlink(inode, 1);
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1);
if (err)
goto out_cancel;
mutex_unlock(&dir_ui->ui_mutex);
@@ -340,8 +362,8 @@ out_cancel:
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
+ set_nlink(inode, 0);
out_inode:
- make_bad_inode(inode);
iput(inode);
out_fname:
fscrypt_free_filename(&nm);
@@ -386,7 +408,6 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
return inode;
out_inode:
- make_bad_inode(inode);
iput(inode);
out_free:
ubifs_err(c, "cannot create whiteout file, error %d", err);
@@ -470,6 +491,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
if (err)
goto out_inode;
+ set_nlink(inode, 1);
mutex_lock(&ui->ui_mutex);
insert_inode_hash(inode);
d_tmpfile(file, inode);
@@ -479,7 +501,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
mutex_unlock(&ui->ui_mutex);
lock_2_inodes(dir, inode);
- err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
+ err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 1);
if (err)
goto out_cancel;
unlock_2_inodes(dir, inode);
@@ -492,7 +514,6 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
out_cancel:
unlock_2_inodes(dir, inode);
out_inode:
- make_bad_inode(inode);
if (!instantiated)
iput(inode);
out_budg:
@@ -534,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type)
return 0;
}
+struct ubifs_dir_data {
+ struct ubifs_dent_node *dent;
+ u64 cookie;
+};
+
/*
* The classical Unix view for directory is that it is a linear array of
* (name, inode number) entries. Linux/VFS assumes this model as well.
@@ -561,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
struct inode *dir = file_inode(file);
struct ubifs_info *c = dir->i_sb->s_fs_info;
bool encrypted = IS_ENCRYPTED(dir);
+ struct ubifs_dir_data *data = file->private_data;
dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos);
@@ -583,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
fstr_real_len = fstr.len;
}
- if (file->f_version == 0) {
+ if (data->cookie == 0) {
/*
- * The file was seek'ed, which means that @file->private_data
+ * The file was seek'ed, which means that @data->dent
* is now invalid. This may also be just the first
* 'ubifs_readdir()' invocation, in which case
- * @file->private_data is NULL, and the below code is
+ * @data->dent is NULL, and the below code is
* basically a no-op.
*/
- kfree(file->private_data);
- file->private_data = NULL;
+ kfree(data->dent);
+ data->dent = NULL;
}
/*
- * 'generic_file_llseek()' unconditionally sets @file->f_version to
- * zero, and we use this for detecting whether the file was seek'ed.
+ * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this
+ * for detecting whether the file was seek'ed.
*/
- file->f_version = 1;
+ data->cookie = 1;
/* File positions 0 and 1 correspond to "." and ".." */
if (ctx->pos < 2) {
- ubifs_assert(c, !file->private_data);
+ ubifs_assert(c, !data->dent);
if (!dir_emit_dots(file, ctx)) {
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@@ -620,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
}
ctx->pos = key_hash_flash(c, &dent->key);
- file->private_data = dent;
+ data->dent = dent;
}
- dent = file->private_data;
+ dent = data->dent;
if (!dent) {
/*
* The directory was seek'ed to and is now readdir'ed.
@@ -637,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
ctx->pos = key_hash_flash(c, &dent->key);
- file->private_data = dent;
+ data->dent = dent;
}
while (1) {
@@ -680,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
- kfree(file->private_data);
+ kfree(data->dent);
ctx->pos = key_hash_flash(c, &dent->key);
- file->private_data = dent;
+ data->dent = dent;
cond_resched();
}
out:
- kfree(file->private_data);
- file->private_data = NULL;
+ kfree(data->dent);
+ data->dent = NULL;
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@@ -712,7 +739,10 @@ out:
/* Free saved readdir() state when the directory is closed */
static int ubifs_dir_release(struct inode *dir, struct file *file)
{
- kfree(file->private_data);
+ struct ubifs_dir_data *data = file->private_data;
+
+ kfree(data->dent);
+ kfree(data);
file->private_data = NULL;
return 0;
}
@@ -724,7 +754,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
struct inode *inode = d_inode(old_dentry);
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_inode *dir_ui = ubifs_inode(dir);
- int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+ int err, sz_change;
struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
.dirtied_ino_d = ALIGN(ui->data_len, 8) };
struct fscrypt_name nm;
@@ -748,6 +778,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
return err;
+ sz_change = CALC_DENT_SIZE(fname_len(&nm));
+
err = dbg_check_synced_i_size(c, inode);
if (err)
goto out_fname;
@@ -758,17 +790,14 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
lock_2_inodes(dir, inode);
- /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */
- if (inode->i_nlink == 0)
- ubifs_delete_orphan(c, inode->i_ino);
-
inc_nlink(inode);
ihold(inode);
inode_set_ctime_current(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, inode->i_nlink == 1);
if (err)
goto out_cancel;
unlock_2_inodes(dir, inode);
@@ -782,8 +811,6 @@ out_cancel:
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
drop_nlink(inode);
- if (inode->i_nlink == 0)
- ubifs_add_orphan(c, inode->i_ino);
unlock_2_inodes(dir, inode);
ubifs_release_budget(c, &req);
iput(inode);
@@ -841,8 +868,9 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
drop_nlink(inode);
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0);
if (err)
goto out_cancel;
unlock_2_inodes(dir, inode);
@@ -944,8 +972,9 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
drop_nlink(dir);
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0);
if (err)
goto out_cancel;
unlock_2_inodes(dir, inode);
@@ -1012,14 +1041,16 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
if (err)
goto out_inode;
+ set_nlink(inode, 1);
mutex_lock(&dir_ui->ui_mutex);
insert_inode_hash(inode);
inc_nlink(inode);
inc_nlink(dir);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1);
if (err) {
ubifs_err(c, "cannot create directory, error %d", err);
goto out_cancel;
@@ -1036,8 +1067,8 @@ out_cancel:
dir_ui->ui_size = dir->i_size;
drop_nlink(dir);
mutex_unlock(&dir_ui->ui_mutex);
+ set_nlink(inode, 0);
out_inode:
- make_bad_inode(inode);
iput(inode);
out_fname:
fscrypt_free_filename(&nm);
@@ -1096,21 +1127,25 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
goto out_fname;
}
+ err = ubifs_init_security(dir, inode, &dentry->d_name);
+ if (err) {
+ kfree(dev);
+ goto out_inode;
+ }
+
init_special_inode(inode, inode->i_mode, rdev);
inode->i_size = ubifs_inode(inode)->ui_size = devlen;
ui = ubifs_inode(inode);
ui->data = dev;
ui->data_len = devlen;
-
- err = ubifs_init_security(dir, inode, &dentry->d_name);
- if (err)
- goto out_inode;
+ set_nlink(inode, 1);
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1);
if (err)
goto out_cancel;
mutex_unlock(&dir_ui->ui_mutex);
@@ -1125,8 +1160,8 @@ out_cancel:
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
+ set_nlink(inode, 0);
out_inode:
- make_bad_inode(inode);
iput(inode);
out_fname:
fscrypt_free_filename(&nm);
@@ -1177,6 +1212,10 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
goto out_fname;
}
+ err = ubifs_init_security(dir, inode, &dentry->d_name);
+ if (err)
+ goto out_inode;
+
ui = ubifs_inode(inode);
ui->data = kmalloc(disk_link.len, GFP_NOFS);
if (!ui->data) {
@@ -1201,16 +1240,14 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
*/
ui->data_len = disk_link.len - 1;
inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1;
-
- err = ubifs_init_security(dir, inode, &dentry->d_name);
- if (err)
- goto out_inode;
+ set_nlink(inode, 1);
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
- err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_get_ctime(inode)));
+ err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1);
if (err)
goto out_cancel;
mutex_unlock(&dir_ui->ui_mutex);
@@ -1224,10 +1261,10 @@ out_cancel:
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
+ set_nlink(inode, 0);
out_inode:
/* Free inode->i_link before inode is marked as bad. */
fscrypt_free_inode(inode);
- make_bad_inode(inode);
iput(inode);
out_fname:
fscrypt_free_filename(&nm);
@@ -1395,14 +1432,10 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
err = ubifs_budget_space(c, &wht_req);
if (err) {
- /*
- * Whiteout inode can not be written on flash by
- * ubifs_jnl_write_inode(), because it's neither
- * dirty nor zero-nlink.
- */
iput(whiteout);
goto out_release;
}
+ set_nlink(whiteout, 1);
/* Add the old_dentry size to the old_dir size. */
old_sz -= CALC_DENT_SIZE(fname_len(&old_nm));
@@ -1481,7 +1514,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
}
err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir,
- new_inode, &new_nm, whiteout, sync);
+ new_inode, &new_nm, whiteout, sync, !!whiteout);
if (err)
goto out_cancel;
@@ -1534,6 +1567,7 @@ out_cancel:
unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
if (whiteout) {
ubifs_release_budget(c, &wht_req);
+ set_nlink(whiteout, 0);
iput(whiteout);
}
out_release:
@@ -1687,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
return 0;
}
+static int ubifs_dir_open(struct inode *inode, struct file *file)
+{
+ struct ubifs_dir_data *data;
+
+ data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ file->private_data = data;
+ return 0;
+}
+
+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ubifs_dir_data *data = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &data->cookie);
+}
+
const struct inode_operations ubifs_dir_inode_operations = {
.lookup = ubifs_lookup,
.create = ubifs_create,
@@ -1707,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = {
};
const struct file_operations ubifs_dir_operations = {
- .llseek = generic_file_llseek,
+ .open = ubifs_dir_open,
+ .llseek = ubifs_dir_llseek,
.release = ubifs_dir_release,
.read = generic_read_dir,
.iterate_shared = ubifs_readdir,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 781206d0ec84..5130123005e4 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -96,36 +96,36 @@ dump:
return -EINVAL;
}
-static int do_readpage(struct page *page)
+static int do_readpage(struct folio *folio)
{
void *addr;
int err = 0, i;
unsigned int block, beyond;
- struct ubifs_data_node *dn;
- struct inode *inode = page->mapping->host;
+ struct ubifs_data_node *dn = NULL;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
loff_t i_size = i_size_read(inode);
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
- inode->i_ino, page->index, i_size, page->flags);
- ubifs_assert(c, !PageChecked(page));
- ubifs_assert(c, !PagePrivate(page));
+ inode->i_ino, folio->index, i_size, folio->flags);
+ ubifs_assert(c, !folio_test_checked(folio));
+ ubifs_assert(c, !folio->private);
- addr = kmap(page);
+ addr = kmap_local_folio(folio, 0);
- block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
if (block >= beyond) {
/* Reading beyond inode */
- SetPageChecked(page);
- memset(addr, 0, PAGE_SIZE);
+ folio_set_checked(folio);
+ addr = folio_zero_tail(folio, 0, addr);
goto out;
}
dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
if (!dn) {
err = -ENOMEM;
- goto error;
+ goto out;
}
i = 0;
@@ -150,39 +150,35 @@ static int do_readpage(struct page *page)
memset(addr + ilen, 0, dlen - ilen);
}
}
- if (++i >= UBIFS_BLOCKS_PER_PAGE)
+ if (++i >= (UBIFS_BLOCKS_PER_PAGE << folio_order(folio)))
break;
block += 1;
addr += UBIFS_BLOCK_SIZE;
+ if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) {
+ kunmap_local(addr - UBIFS_BLOCK_SIZE);
+ addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE);
+ }
}
+
if (err) {
struct ubifs_info *c = inode->i_sb->s_fs_info;
if (err == -ENOENT) {
/* Not found, so it must be a hole */
- SetPageChecked(page);
+ folio_set_checked(folio);
dbg_gen("hole");
- goto out_free;
+ err = 0;
+ } else {
+ ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
+ folio->index, inode->i_ino, err);
}
- ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
- page->index, inode->i_ino, err);
- goto error;
}
-out_free:
- kfree(dn);
out:
- SetPageUptodate(page);
- ClearPageError(page);
- flush_dcache_page(page);
- kunmap(page);
- return 0;
-
-error:
kfree(dn);
- ClearPageUptodate(page);
- SetPageError(page);
- flush_dcache_page(page);
- kunmap(page);
+ if (!err)
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ kunmap_local(addr);
return err;
}
@@ -215,23 +211,23 @@ static void release_existing_page_budget(struct ubifs_info *c)
}
static int write_begin_slow(struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep)
+ loff_t pos, unsigned len, struct folio **foliop)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
pgoff_t index = pos >> PAGE_SHIFT;
struct ubifs_budget_req req = { .new_page = 1 };
int err, appending = !!(pos + len > inode->i_size);
- struct page *page;
+ struct folio *folio;
dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
inode->i_ino, pos, len, inode->i_size);
/*
- * At the slow path we have to budget before locking the page, because
- * budgeting may force write-back, which would wait on locked pages and
- * deadlock if we had the page locked. At this point we do not know
- * anything about the page, so assume that this is a new page which is
+ * At the slow path we have to budget before locking the folio, because
+ * budgeting may force write-back, which would wait on locked folios and
+ * deadlock if we had the folio locked. At this point we do not know
+ * anything about the folio, so assume that this is a new folio which is
* written to a hole. This corresponds to largest budget. Later the
* budget will be amended if this is not true.
*/
@@ -243,42 +239,43 @@ static int write_begin_slow(struct address_space *mapping,
if (unlikely(err))
return err;
- page = grab_cache_page_write_begin(mapping, index);
- if (unlikely(!page)) {
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
ubifs_release_budget(c, &req);
- return -ENOMEM;
+ return PTR_ERR(folio);
}
- if (!PageUptodate(page)) {
- if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
- SetPageChecked(page);
+ if (!folio_test_uptodate(folio)) {
+ if (pos == folio_pos(folio) && len >= folio_size(folio))
+ folio_set_checked(folio);
else {
- err = do_readpage(page);
+ err = do_readpage(folio);
if (err) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
ubifs_release_budget(c, &req);
return err;
}
}
}
- if (PagePrivate(page))
+ if (folio->private)
/*
- * The page is dirty, which means it was budgeted twice:
+ * The folio is dirty, which means it was budgeted twice:
* o first time the budget was allocated by the task which
- * made the page dirty and set the PG_private flag;
+ * made the folio dirty and set the private field;
* o and then we budgeted for it for the second time at the
* very beginning of this function.
*
- * So what we have to do is to release the page budget we
+ * So what we have to do is to release the folio budget we
* allocated.
*/
release_new_page_budget(c);
- else if (!PageChecked(page))
+ else if (!folio_test_checked(folio))
/*
- * We are changing a page which already exists on the media.
- * This means that changing the page does not make the amount
+ * We are changing a folio which already exists on the media.
+ * This means that changing the folio does not make the amount
* of indexing information larger, and this part of the budget
* which we have already acquired may be released.
*/
@@ -301,32 +298,33 @@ static int write_begin_slow(struct address_space *mapping,
ubifs_release_dirty_inode_budget(c, ui);
}
- *pagep = page;
+ *foliop = folio;
return 0;
}
/**
* allocate_budget - allocate budget for 'ubifs_write_begin()'.
* @c: UBIFS file-system description object
- * @page: page to allocate budget for
+ * @folio: folio to allocate budget for
* @ui: UBIFS inode object the page belongs to
* @appending: non-zero if the page is appended
*
* This is a helper function for 'ubifs_write_begin()' which allocates budget
* for the operation. The budget is allocated differently depending on whether
* this is appending, whether the page is dirty or not, and so on. This
- * function leaves the @ui->ui_mutex locked in case of appending. Returns zero
- * in case of success and %-ENOSPC in case of failure.
+ * function leaves the @ui->ui_mutex locked in case of appending.
+ *
+ * Returns: %0 in case of success and %-ENOSPC in case of failure.
*/
-static int allocate_budget(struct ubifs_info *c, struct page *page,
+static int allocate_budget(struct ubifs_info *c, struct folio *folio,
struct ubifs_inode *ui, int appending)
{
struct ubifs_budget_req req = { .fast = 1 };
- if (PagePrivate(page)) {
+ if (folio->private) {
if (!appending)
/*
- * The page is dirty and we are not appending, which
+ * The folio is dirty and we are not appending, which
* means no budget is needed at all.
*/
return 0;
@@ -350,11 +348,11 @@ static int allocate_budget(struct ubifs_info *c, struct page *page,
*/
req.dirtied_ino = 1;
} else {
- if (PageChecked(page))
+ if (folio_test_checked(folio))
/*
* The page corresponds to a hole and does not
* exist on the media. So changing it makes
- * make the amount of indexing information
+ * the amount of indexing information
* larger, and we have to budget for a new
* page.
*/
@@ -416,7 +414,7 @@ static int allocate_budget(struct ubifs_info *c, struct page *page,
*/
static int ubifs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -424,7 +422,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
int err, appending = !!(pos + len > inode->i_size);
int skipped_read = 0;
- struct page *page;
+ struct folio *folio;
ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
ubifs_assert(c, !c->ro_media && !c->ro_mount);
@@ -433,13 +431,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return -EROFS;
/* Try out the fast-path part first */
- page = grab_cache_page_write_begin(mapping, index);
- if (unlikely(!page))
- return -ENOMEM;
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
/* The page is not loaded from the flash */
- if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
+ if (pos == folio_pos(folio) && len >= folio_size(folio)) {
/*
* We change whole page so no need to load it. But we
* do not know whether this page exists on the media or
@@ -449,19 +448,19 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* media. Thus, we are setting the @PG_checked flag
* here.
*/
- SetPageChecked(page);
+ folio_set_checked(folio);
skipped_read = 1;
} else {
- err = do_readpage(page);
+ err = do_readpage(folio);
if (err) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
}
}
- err = allocate_budget(c, page, ui, appending);
+ err = allocate_budget(c, folio, ui, appending);
if (unlikely(err)) {
ubifs_assert(c, err == -ENOSPC);
/*
@@ -469,7 +468,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* write all of it, then it is not up to date.
*/
if (skipped_read)
- ClearPageChecked(page);
+ folio_clear_checked(folio);
/*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
@@ -481,10 +480,10 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
mutex_unlock(&ui->ui_mutex);
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
- return write_begin_slow(mapping, pos, len, pagep);
+ return write_begin_slow(mapping, pos, len, foliop);
}
/*
@@ -493,22 +492,21 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
- *pagep = page;
+ *foliop = folio;
return 0;
-
}
/**
* cancel_budget - cancel budget.
* @c: UBIFS file-system description object
- * @page: page to cancel budget for
+ * @folio: folio to cancel budget for
* @ui: UBIFS inode object the page belongs to
* @appending: non-zero if the page is appended
*
* This is a helper function for a page write operation. It unlocks the
* @ui->ui_mutex in case of appending.
*/
-static void cancel_budget(struct ubifs_info *c, struct page *page,
+static void cancel_budget(struct ubifs_info *c, struct folio *folio,
struct ubifs_inode *ui, int appending)
{
if (appending) {
@@ -516,8 +514,8 @@ static void cancel_budget(struct ubifs_info *c, struct page *page,
ubifs_release_dirty_inode_budget(c, ui);
mutex_unlock(&ui->ui_mutex);
}
- if (!PagePrivate(page)) {
- if (PageChecked(page))
+ if (!folio->private) {
+ if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
@@ -526,7 +524,7 @@ static void cancel_budget(struct ubifs_info *c, struct page *page,
static int ubifs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
@@ -535,47 +533,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
int appending = !!(end_pos > inode->i_size);
dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
- inode->i_ino, pos, page->index, len, copied, inode->i_size);
+ inode->i_ino, pos, folio->index, len, copied, inode->i_size);
- if (unlikely(copied < len && len == PAGE_SIZE)) {
+ if (unlikely(copied < len && !folio_test_uptodate(folio))) {
/*
- * VFS copied less data to the page that it intended and
+ * VFS copied less data to the folio than it intended and
* declared in its '->write_begin()' call via the @len
- * argument. If the page was not up-to-date, and @len was
- * @PAGE_SIZE, the 'ubifs_write_begin()' function did
+ * argument. If the folio was not up-to-date,
+ * the 'ubifs_write_begin()' function did
* not load it from the media (for optimization reasons). This
- * means that part of the page contains garbage. So read the
- * page now.
+ * means that part of the folio contains garbage. So read the
+ * folio now.
*/
dbg_gen("copied %d instead of %d, read page and repeat",
copied, len);
- cancel_budget(c, page, ui, appending);
- ClearPageChecked(page);
+ cancel_budget(c, folio, ui, appending);
+ folio_clear_checked(folio);
/*
* Return 0 to force VFS to repeat the whole operation, or the
* error code if 'do_readpage()' fails.
*/
- copied = do_readpage(page);
+ copied = do_readpage(folio);
goto out;
}
- if (len == PAGE_SIZE)
- SetPageUptodate(page);
+ if (len == folio_size(folio))
+ folio_mark_uptodate(folio);
- if (!PagePrivate(page)) {
- attach_page_private(page, (void *)1);
+ if (!folio->private) {
+ folio_attach_private(folio, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
- __set_page_dirty_nobuffers(page);
+ filemap_dirty_folio(mapping, folio);
}
if (appending) {
i_size_write(inode, end_pos);
ui->ui_size = end_pos;
/*
- * Note, we do not set @I_DIRTY_PAGES (which means that the
- * inode has dirty pages), this has been done in
- * '__set_page_dirty_nobuffers()'.
+ * We do not set @I_DIRTY_PAGES (which means that
+ * the inode has dirty pages), this was done in
+ * filemap_dirty_folio().
*/
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
@@ -583,43 +581,43 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
}
out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return copied;
}
/**
* populate_page - copy data nodes into a page for bulk-read.
* @c: UBIFS file-system description object
- * @page: page
+ * @folio: folio
* @bu: bulk-read information
* @n: next zbranch slot
*
- * This function returns %0 on success and a negative error code on failure.
+ * Returns: %0 on success and a negative error code on failure.
*/
-static int populate_page(struct ubifs_info *c, struct page *page,
+static int populate_page(struct ubifs_info *c, struct folio *folio,
struct bu_info *bu, int *n)
{
int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
unsigned int page_block;
void *addr, *zaddr;
pgoff_t end_index;
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
- inode->i_ino, page->index, i_size, page->flags);
+ inode->i_ino, folio->index, i_size, folio->flags);
- addr = zaddr = kmap(page);
+ addr = zaddr = kmap_local_folio(folio, 0);
end_index = (i_size - 1) >> PAGE_SHIFT;
- if (!i_size || page->index > end_index) {
+ if (!i_size || folio->index > end_index) {
hole = 1;
- memset(addr, 0, PAGE_SIZE);
+ addr = folio_zero_tail(folio, 0, addr);
goto out_hole;
}
- page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ page_block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
while (1) {
int err, len, out_len, dlen;
@@ -668,9 +666,13 @@ static int populate_page(struct ubifs_info *c, struct page *page,
break;
addr += UBIFS_BLOCK_SIZE;
page_block += 1;
+ if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) {
+ kunmap_local(addr - UBIFS_BLOCK_SIZE);
+ addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE);
+ }
}
- if (end_index == page->index) {
+ if (end_index == folio->index) {
int len = i_size & (PAGE_SIZE - 1);
if (len && len < read)
@@ -679,22 +681,19 @@ static int populate_page(struct ubifs_info *c, struct page *page,
out_hole:
if (hole) {
- SetPageChecked(page);
+ folio_set_checked(folio);
dbg_gen("hole");
}
- SetPageUptodate(page);
- ClearPageError(page);
- flush_dcache_page(page);
- kunmap(page);
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ kunmap_local(addr);
*n = nn;
return 0;
out_err:
- ClearPageUptodate(page);
- SetPageError(page);
- flush_dcache_page(page);
- kunmap(page);
+ flush_dcache_folio(folio);
+ kunmap_local(addr);
ubifs_err(c, "bad data node (block %u, inode %lu)",
page_block, inode->i_ino);
return -EINVAL;
@@ -704,15 +703,15 @@ out_err:
* ubifs_do_bulk_read - do bulk-read.
* @c: UBIFS file-system description object
* @bu: bulk-read information
- * @page1: first page to read
+ * @folio1: first folio to read
*
- * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
+ * Returns: %1 if the bulk-read is done, otherwise %0 is returned.
*/
static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
- struct page *page1)
+ struct folio *folio1)
{
- pgoff_t offset = page1->index, end_index;
- struct address_space *mapping = page1->mapping;
+ pgoff_t offset = folio1->index, end_index;
+ struct address_space *mapping = folio1->mapping;
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
int err, page_idx, page_cnt, ret = 0, n = 0;
@@ -762,11 +761,11 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
goto out_warn;
}
- err = populate_page(c, page1, bu, &n);
+ err = populate_page(c, folio1, bu, &n);
if (err)
goto out_warn;
- unlock_page(page1);
+ folio_unlock(folio1);
ret = 1;
isize = i_size_read(inode);
@@ -776,19 +775,19 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
for (page_idx = 1; page_idx < page_cnt; page_idx++) {
pgoff_t page_offset = offset + page_idx;
- struct page *page;
+ struct folio *folio;
if (page_offset > end_index)
break;
- page = pagecache_get_page(mapping, page_offset,
+ folio = __filemap_get_folio(mapping, page_offset,
FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
ra_gfp_mask);
- if (!page)
+ if (IS_ERR(folio))
break;
- if (!PageUptodate(page))
- err = populate_page(c, page, bu, &n);
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio))
+ err = populate_page(c, folio, bu, &n);
+ folio_unlock(folio);
+ folio_put(folio);
if (err)
break;
}
@@ -811,19 +810,21 @@ out_bu_off:
/**
* ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
- * @page: page from which to start bulk-read.
+ * @folio: folio from which to start bulk-read.
*
* Some flash media are capable of reading sequentially at faster rates. UBIFS
* bulk-read facility is designed to take advantage of that, by reading in one
* go consecutive data nodes that are also located consecutively in the same
- * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
+ * LEB.
+ *
+ * Returns: %1 if a bulk-read is done and %0 otherwise.
*/
-static int ubifs_bulk_read(struct page *page)
+static int ubifs_bulk_read(struct folio *folio)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
- pgoff_t index = page->index, last_page_read = ui->last_page_read;
+ pgoff_t index = folio->index, last_page_read = ui->last_page_read;
struct bu_info *bu;
int err = 0, allocated = 0;
@@ -871,8 +872,8 @@ static int ubifs_bulk_read(struct page *page)
bu->buf_len = c->max_bu_buf_len;
data_key_init(c, &bu->key, inode->i_ino,
- page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
- err = ubifs_do_bulk_read(c, bu, page);
+ folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+ err = ubifs_do_bulk_read(c, bu, folio);
if (!allocated)
mutex_unlock(&c->bu_mutex);
@@ -886,69 +887,71 @@ out_unlock:
static int ubifs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
-
- if (ubifs_bulk_read(page))
+ if (ubifs_bulk_read(folio))
return 0;
- do_readpage(page);
+ do_readpage(folio);
folio_unlock(folio);
return 0;
}
-static int do_writepage(struct page *page, int len)
+static int do_writepage(struct folio *folio, size_t len)
{
- int err = 0, i, blen;
+ int err = 0, blen;
unsigned int block;
void *addr;
+ size_t offset = 0;
union ubifs_key key;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
#ifdef UBIFS_DEBUG
struct ubifs_inode *ui = ubifs_inode(inode);
spin_lock(&ui->ui_lock);
- ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT);
+ ubifs_assert(c, folio->index <= ui->synced_i_size >> PAGE_SHIFT);
spin_unlock(&ui->ui_lock);
#endif
- /* Update radix tree tags */
- set_page_writeback(page);
+ folio_start_writeback(folio);
- addr = kmap(page);
- block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
- i = 0;
- while (len) {
- blen = min_t(int, len, UBIFS_BLOCK_SIZE);
+ addr = kmap_local_folio(folio, offset);
+ block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ for (;;) {
+ blen = min_t(size_t, len, UBIFS_BLOCK_SIZE);
data_key_init(c, &key, inode->i_ino, block);
err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
if (err)
break;
- if (++i >= UBIFS_BLOCKS_PER_PAGE)
+ len -= blen;
+ if (!len)
break;
block += 1;
addr += blen;
- len -= blen;
+ if (folio_test_highmem(folio) && !offset_in_page(addr)) {
+ kunmap_local(addr - blen);
+ offset += PAGE_SIZE;
+ addr = kmap_local_folio(folio, offset);
+ }
}
+ kunmap_local(addr);
if (err) {
- SetPageError(page);
- ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
- page->index, inode->i_ino, err);
+ mapping_set_error(folio->mapping, err);
+ ubifs_err(c, "cannot write folio %lu of inode %lu, error %d",
+ folio->index, inode->i_ino, err);
ubifs_ro_mode(c, err);
}
- ubifs_assert(c, PagePrivate(page));
- if (PageChecked(page))
+ ubifs_assert(c, folio->private != NULL);
+ if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
atomic_long_dec(&c->dirty_pg_cnt);
- detach_page_private(page);
- ClearPageChecked(page);
+ folio_detach_private(folio);
+ folio_clear_checked(folio);
- kunmap(page);
- unlock_page(page);
- end_page_writeback(page);
+ folio_unlock(folio);
+ folio_end_writeback(folio);
return err;
}
@@ -998,22 +1001,21 @@ static int do_writepage(struct page *page, int len)
* on the page lock and it would not write the truncated inode node to the
* journal before we have finished.
*/
-static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
+static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc,
+ void *data)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
loff_t i_size = i_size_read(inode), synced_i_size;
- pgoff_t end_index = i_size >> PAGE_SHIFT;
- int err, len = i_size & (PAGE_SIZE - 1);
- void *kaddr;
+ int err, len = folio_size(folio);
dbg_gen("ino %lu, pg %lu, pg flags %#lx",
- inode->i_ino, page->index, page->flags);
- ubifs_assert(c, PagePrivate(page));
+ inode->i_ino, folio->index, folio->flags);
+ ubifs_assert(c, folio->private != NULL);
- /* Is the page fully outside @i_size? (truncate in progress) */
- if (page->index > end_index || (page->index == end_index && !len)) {
+ /* Is the folio fully outside @i_size? (truncate in progress) */
+ if (folio_pos(folio) >= i_size) {
err = 0;
goto out_unlock;
}
@@ -1022,9 +1024,9 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
synced_i_size = ui->synced_i_size;
spin_unlock(&ui->ui_lock);
- /* Is the page fully inside @i_size? */
- if (page->index < end_index) {
- if (page->index >= synced_i_size >> PAGE_SHIFT) {
+ /* Is the folio fully inside i_size? */
+ if (folio_pos(folio) + len <= i_size) {
+ if (folio_pos(folio) + len > synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_redirty;
@@ -1037,20 +1039,18 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
* with this.
*/
}
- return do_writepage(page, PAGE_SIZE);
+ return do_writepage(folio, len);
}
/*
- * The page straddles @i_size. It must be zeroed out on each and every
+ * The folio straddles @i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- kaddr = kmap_atomic(page);
- memset(kaddr + len, 0, PAGE_SIZE - len);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
+ len = i_size - folio_pos(folio);
+ folio_zero_segment(folio, len, folio_size(folio));
if (i_size > synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
@@ -1058,19 +1058,25 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
goto out_redirty;
}
- return do_writepage(page, len);
+ return do_writepage(folio, len);
out_redirty:
/*
- * redirty_page_for_writepage() won't call ubifs_dirty_inode() because
+ * folio_redirty_for_writepage() won't call ubifs_dirty_inode() because
* it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so
* there is no need to do space budget for dirty inode.
*/
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
+static int ubifs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ return write_cache_pages(mapping, wbc, ubifs_writepage, NULL);
+}
+
/**
* do_attr_changes - change inode attributes.
* @inode: inode to change attributes for
@@ -1083,9 +1089,9 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
if (attr->ia_valid & ATTR_GID)
inode->i_gid = attr->ia_gid;
if (attr->ia_valid & ATTR_ATIME)
- inode->i_atime = attr->ia_atime;
+ inode_set_atime_to_ts(inode, attr->ia_atime);
if (attr->ia_valid & ATTR_MTIME)
- inode->i_mtime = attr->ia_mtime;
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
if (attr->ia_valid & ATTR_CTIME)
inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (attr->ia_valid & ATTR_MODE) {
@@ -1104,7 +1110,9 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
* @attr: inode attribute changes description
*
* This function implements VFS '->setattr()' call when the inode is truncated
- * to a smaller size. Returns zero in case of success and a negative error code
+ * to a smaller size.
+ *
+ * Returns: %0 in case of success and a negative error code
* in case of failure.
*/
static int do_truncation(struct ubifs_info *c, struct inode *inode,
@@ -1145,11 +1153,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
if (offset) {
pgoff_t index = new_size >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
- page = find_lock_page(inode->i_mapping, index);
- if (page) {
- if (PageDirty(page)) {
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (!IS_ERR(folio)) {
+ if (folio_test_dirty(folio)) {
/*
* 'ubifs_jnl_truncate()' will try to truncate
* the last data node, but it contains
@@ -1158,14 +1166,14 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* 'ubifs_jnl_truncate()' will see an already
* truncated (and up to date) data node.
*/
- ubifs_assert(c, PagePrivate(page));
+ ubifs_assert(c, folio->private != NULL);
- clear_page_dirty_for_io(page);
+ folio_clear_dirty_for_io(folio);
if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
- offset = new_size &
- (PAGE_SIZE - 1);
- err = do_writepage(page, offset);
- put_page(page);
+ offset = offset_in_folio(folio,
+ new_size);
+ err = do_writepage(folio, offset);
+ folio_put(folio);
if (err)
goto out_budg;
/*
@@ -1178,8 +1186,8 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* to 'ubifs_jnl_truncate()' to save it from
* having to read it.
*/
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
}
}
@@ -1187,7 +1195,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
mutex_lock(&ui->ui_mutex);
ui->ui_size = inode->i_size;
/* Truncation changes inode [mc]time */
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
/* Other attributes may be changed at the same time as well */
do_attr_changes(inode, attr);
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
@@ -1210,7 +1218,9 @@ out_budg:
* @attr: inode attribute changes description
*
* This function implements VFS '->setattr()' call for all cases except
- * truncations to smaller size. Returns zero in case of success and a negative
+ * truncations to smaller size.
+ *
+ * Returns: %0 in case of success and a negative
* error code in case of failure.
*/
static int do_setattr(struct ubifs_info *c, struct inode *inode,
@@ -1234,7 +1244,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
mutex_lock(&ui->ui_mutex);
if (attr->ia_valid & ATTR_SIZE) {
/* Truncation changes inode [mc]time */
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
/* 'truncate_setsize()' changed @i_size, update @ui_size */
ui->ui_size = inode->i_size;
}
@@ -1355,14 +1365,16 @@ out:
* This helper function checks if the inode mtime/ctime should be updated or
* not. If current values of the time-stamps are within the UBIFS inode time
* granularity, they are not updated. This is an optimization.
+ *
+ * Returns: %1 if time update is needed, %0 if not
*/
static inline int mctime_update_needed(const struct inode *inode,
const struct timespec64 *now)
{
struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
- if (!timespec64_equal(&inode->i_mtime, now) ||
- !timespec64_equal(&ctime, now))
+ if (!timespec64_equal(&mtime, now) || !timespec64_equal(&ctime, now))
return 1;
return 0;
}
@@ -1370,8 +1382,12 @@ static inline int mctime_update_needed(const struct inode *inode,
/**
* ubifs_update_time - update time of inode.
* @inode: inode to update
+ * @flags: time updating control flag determines updating
+ * which time fields of @inode
*
* This function updates time of the inode.
+ *
+ * Returns: %0 for success or a negative error code otherwise.
*/
int ubifs_update_time(struct inode *inode, int flags)
{
@@ -1405,7 +1421,9 @@ int ubifs_update_time(struct inode *inode, int flags)
* @inode: inode to update
*
* This function updates mtime and ctime of the inode if it is not equivalent to
- * current time. Returns zero in case of success and a negative error code in
+ * current time.
+ *
+ * Returns: %0 in case of success and a negative error code in
* case of failure.
*/
static int update_mctime(struct inode *inode)
@@ -1424,7 +1442,7 @@ static int update_mctime(struct inode *inode)
return err;
mutex_lock(&ui->ui_mutex);
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
release = ui->dirty;
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
@@ -1492,14 +1510,14 @@ static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags)
*/
static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec64 now = current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
int err, update_time;
- dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
+ dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, folio->index,
i_size_read(inode));
ubifs_assert(c, !c->ro_media && !c->ro_mount);
@@ -1507,17 +1525,17 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_SIGBUS; /* -EROFS */
/*
- * We have not locked @page so far so we may budget for changing the
- * page. Note, we cannot do this after we locked the page, because
+ * We have not locked @folio so far so we may budget for changing the
+ * folio. Note, we cannot do this after we locked the folio, because
* budgeting may cause write-back which would cause deadlock.
*
- * At the moment we do not know whether the page is dirty or not, so we
- * assume that it is not and budget for a new page. We could look at
+ * At the moment we do not know whether the folio is dirty or not, so we
+ * assume that it is not and budget for a new folio. We could look at
* the @PG_private flag and figure this out, but we may race with write
- * back and the page state may change by the time we lock it, so this
+ * back and the folio state may change by the time we lock it, so this
* would need additional care. We do not bother with this at the
* moment, although it might be good idea to do. Instead, we allocate
- * budget for a new page and amend it later on if the page was in fact
+ * budget for a new folio and amend it later on if the folio was in fact
* dirty.
*
* The budgeting-related logic of this function is similar to what we
@@ -1540,21 +1558,21 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
- lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping ||
- page_offset(page) > i_size_read(inode))) {
- /* Page got truncated out from underneath us */
+ folio_lock(folio);
+ if (unlikely(folio->mapping != inode->i_mapping ||
+ folio_pos(folio) >= i_size_read(inode))) {
+ /* Folio got truncated out from underneath us */
goto sigbus;
}
- if (PagePrivate(page))
+ if (folio->private)
release_new_page_budget(c);
else {
- if (!PageChecked(page))
+ if (!folio_test_checked(folio))
ubifs_convert_page_budget(c);
- attach_page_private(page, (void *)1);
+ folio_attach_private(folio, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
- __set_page_dirty_nobuffers(page);
+ filemap_dirty_folio(folio->mapping, folio);
}
if (update_time) {
@@ -1562,7 +1580,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
struct ubifs_inode *ui = ubifs_inode(inode);
mutex_lock(&ui->ui_mutex);
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
release = ui->dirty;
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
@@ -1570,11 +1588,11 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
ubifs_release_dirty_inode_budget(c, ui);
}
- wait_for_stable_page(page);
+ folio_wait_stable(folio);
return VM_FAULT_LOCKED;
sigbus:
- unlock_page(page);
+ folio_unlock(folio);
ubifs_release_budget(c, &req);
return VM_FAULT_SIGBUS;
}
@@ -1628,7 +1646,7 @@ static int ubifs_symlink_getattr(struct mnt_idmap *idmap,
const struct address_space_operations ubifs_file_address_operations = {
.read_folio = ubifs_read_folio,
- .writepage = ubifs_writepage,
+ .writepages = ubifs_writepages,
.write_begin = ubifs_write_begin,
.write_end = ubifs_write_end,
.invalidate_folio = ubifs_invalidate_folio,
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 873e6e1c92b5..643718906b9f 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -73,7 +73,7 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @arg: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -82,8 +82,9 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
*/
static int scan_for_dirty_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -166,8 +167,7 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
data.pick_free = pick_free;
data.lnum = -1;
data.exclude_index = exclude_index;
- err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_for_dirty_cb,
+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_dirty_cb,
&data);
if (err)
return ERR_PTR(err);
@@ -340,7 +340,7 @@ out:
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @arg: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -349,8 +349,9 @@ out:
*/
static int scan_for_free_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -446,7 +447,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
data.pick_free = pick_free;
data.lnum = -1;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_for_free_cb,
+ scan_for_free_cb,
&data);
if (err)
return ERR_PTR(err);
@@ -580,7 +581,7 @@ out:
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @arg: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -589,8 +590,9 @@ out:
*/
static int scan_for_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -625,8 +627,7 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
int err;
data.lnum = -1;
- err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_for_idx_cb,
+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_idx_cb,
&data);
if (err)
return ERR_PTR(err);
@@ -726,11 +727,10 @@ out:
return err;
}
-static int cmp_dirty_idx(const struct ubifs_lprops **a,
- const struct ubifs_lprops **b)
+static int cmp_dirty_idx(const void *a, const void *b)
{
- const struct ubifs_lprops *lpa = *a;
- const struct ubifs_lprops *lpb = *b;
+ const struct ubifs_lprops *lpa = *(const struct ubifs_lprops **)a;
+ const struct ubifs_lprops *lpb = *(const struct ubifs_lprops **)b;
return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
}
@@ -754,7 +754,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
sizeof(void *) * c->dirty_idx.cnt);
/* Sort it so that the dirtiest is now at the end */
sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
- (int (*)(const void *, const void *))cmp_dirty_idx, NULL);
+ cmp_dirty_idx, NULL);
dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
if (c->dirty_idx.cnt)
dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
@@ -773,7 +773,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @arg: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -782,8 +782,9 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
*/
static int scan_dirty_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -842,8 +843,7 @@ static int find_dirty_idx_leb(struct ubifs_info *c)
if (c->pnodes_have >= c->pnode_cnt)
/* All pnodes are in memory, so skip scan */
return -ENOSPC;
- err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_dirty_idx_cb,
+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_dirty_idx_cb,
&data);
if (err)
return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index ffc9beee7be6..4a35f9e8f668 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -293,6 +293,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
}
/**
+ * __queue_and_wait - queue a task and wait until the task is waked up.
+ * @c: UBIFS file-system description object
+ *
+ * This function adds current task in queue and waits until the task is waked
+ * up. This function should be called with @c->reserve_space_wq locked.
+ */
+static void __queue_and_wait(struct ubifs_info *c)
+{
+ DEFINE_WAIT(wait);
+
+ __add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&c->reserve_space_wq.lock);
+
+ schedule();
+ finish_wait(&c->reserve_space_wq, &wait);
+}
+
+/**
+ * wait_for_reservation - try queuing current task to wait until waked up.
+ * @c: UBIFS file-system description object
+ *
+ * This function queues current task to wait until waked up, if queuing is
+ * started(@c->need_wait_space is not %0). Returns %true if current task is
+ * added in queue, otherwise %false is returned.
+ */
+static bool wait_for_reservation(struct ubifs_info *c)
+{
+ if (likely(atomic_read(&c->need_wait_space) == 0))
+ /* Quick path to check whether queuing is started. */
+ return false;
+
+ spin_lock(&c->reserve_space_wq.lock);
+ if (atomic_read(&c->need_wait_space) == 0) {
+ /* Queuing is not started, don't queue current task. */
+ spin_unlock(&c->reserve_space_wq.lock);
+ return false;
+ }
+
+ __queue_and_wait(c);
+ return true;
+}
+
+/**
+ * wake_up_reservation - wake up first task in queue or stop queuing.
+ * @c: UBIFS file-system description object
+ *
+ * This function wakes up the first task in queue if it exists, or stops
+ * queuing if no tasks in queue.
+ */
+static void wake_up_reservation(struct ubifs_info *c)
+{
+ spin_lock(&c->reserve_space_wq.lock);
+ if (waitqueue_active(&c->reserve_space_wq))
+ wake_up_locked(&c->reserve_space_wq);
+ else
+ /*
+ * Compared with wait_for_reservation(), set @c->need_wait_space
+ * under the protection of wait queue lock, which can avoid that
+ * @c->need_wait_space is set to 0 after new task queued.
+ */
+ atomic_set(&c->need_wait_space, 0);
+ spin_unlock(&c->reserve_space_wq.lock);
+}
+
+/**
+ * add_or_start_queue - add current task in queue or start queuing.
+ * @c: UBIFS file-system description object
+ *
+ * This function starts queuing if queuing is not started, otherwise adds
+ * current task in queue.
+ */
+static void add_or_start_queue(struct ubifs_info *c)
+{
+ spin_lock(&c->reserve_space_wq.lock);
+ if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) {
+ /* Starts queuing, task can go on directly. */
+ spin_unlock(&c->reserve_space_wq.lock);
+ return;
+ }
+
+ /*
+ * There are at least two tasks have retried more than 32 times
+ * at certain point, first task has started queuing, just queue
+ * the left tasks.
+ */
+ __queue_and_wait(c);
+}
+
+/**
* make_reservation - reserve journal space.
* @c: UBIFS file-system description object
* @jhead: journal head
@@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
static int make_reservation(struct ubifs_info *c, int jhead, int len)
{
int err, cmt_retries = 0, nospc_retries = 0;
+ bool blocked = wait_for_reservation(c);
again:
down_read(&c->commit_sem);
err = reserve_space(c, jhead, len);
- if (!err)
+ if (!err) {
/* c->commit_sem will get released via finish_reservation(). */
- return 0;
+ goto out_wake_up;
+ }
up_read(&c->commit_sem);
if (err == -ENOSPC) {
/*
* GC could not make any progress. We should try to commit
- * once because it could make some dirty space and GC would
- * make progress, so make the error -EAGAIN so that the below
+ * because it could make some dirty space and GC would make
+ * progress, so make the error -EAGAIN so that the below
* will commit and re-try.
*/
- if (nospc_retries++ < 2) {
- dbg_jnl("no space, retry");
- err = -EAGAIN;
- }
-
- /*
- * This means that the budgeting is incorrect. We always have
- * to be able to write to the media, because all operations are
- * budgeted. Deletions are not budgeted, though, but we reserve
- * an extra LEB for them.
- */
+ nospc_retries++;
+ dbg_jnl("no space, retry");
+ err = -EAGAIN;
}
if (err != -EAGAIN)
@@ -349,15 +433,37 @@ again:
*/
if (cmt_retries > 128) {
/*
- * This should not happen unless the journal size limitations
- * are too tough.
+ * This should not happen unless:
+ * 1. The journal size limitations are too tough.
+ * 2. The budgeting is incorrect. We always have to be able to
+ * write to the media, because all operations are budgeted.
+ * Deletions are not budgeted, though, but we reserve an
+ * extra LEB for them.
*/
- ubifs_err(c, "stuck in space allocation");
+ ubifs_err(c, "stuck in space allocation, nospc_retries %d",
+ nospc_retries);
err = -ENOSPC;
goto out;
- } else if (cmt_retries > 32)
- ubifs_warn(c, "too many space allocation re-tries (%d)",
- cmt_retries);
+ } else if (cmt_retries > 32) {
+ /*
+ * It's almost impossible to happen, unless there are many tasks
+ * making reservation concurrently and someone task has retried
+ * gc + commit for many times, generated available space during
+ * this period are grabbed by other tasks.
+ * But if it happens, start queuing up all tasks that will make
+ * space reservation, then there is only one task making space
+ * reservation at any time, and it can always make success under
+ * the premise of correct budgeting.
+ */
+ ubifs_warn(c, "too many space allocation cmt_retries (%d) "
+ "nospc_retries (%d), start queuing tasks",
+ cmt_retries, nospc_retries);
+
+ if (!blocked) {
+ blocked = true;
+ add_or_start_queue(c);
+ }
+ }
dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
cmt_retries);
@@ -365,7 +471,7 @@ again:
err = ubifs_run_commit(c);
if (err)
- return err;
+ goto out_wake_up;
goto again;
out:
@@ -380,6 +486,27 @@ out:
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
}
+out_wake_up:
+ if (blocked) {
+ /*
+ * Only tasks that have ever started queuing or ever been queued
+ * can wake up other queued tasks, which can make sure that
+ * there is only one task waked up to make space reservation.
+ * For example:
+ * task A task B task C
+ * make_reservation make_reservation
+ * reserve_space // 0
+ * wake_up_reservation
+ * atomic_cmpxchg // 0, start queuing
+ * reserve_space
+ * wait_for_reservation
+ * __queue_and_wait
+ * add_wait_queue
+ * if (blocked) // false
+ * // So that task C won't be waked up to race with task B
+ */
+ wake_up_reservation(c);
+ }
return err;
}
@@ -452,12 +579,12 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
ino->ch.node_type = UBIFS_INO_NODE;
ino_key_init_flash(c, &ino->key, inode->i_ino);
ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
- ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec);
- ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
- ino->ctime_sec = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- ino->ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
- ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
- ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ ino->atime_sec = cpu_to_le64(inode_get_atime_sec(inode));
+ ino->atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode));
+ ino->ctime_sec = cpu_to_le64(inode_get_ctime_sec(inode));
+ ino->ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ ino->mtime_sec = cpu_to_le64(inode_get_mtime_sec(inode));
+ ino->mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
ino->uid = cpu_to_le32(i_uid_read(inode));
ino->gid = cpu_to_le32(i_gid_read(inode));
ino->mode = cpu_to_le32(inode->i_mode);
@@ -516,6 +643,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent)
* @inode: inode to update
* @deletion: indicates a directory entry deletion i.e unlink or rmdir
* @xent: non-zero if the directory entry is an extended attribute entry
+ * @in_orphan: indicates whether the @inode is in orphan list
*
* This function updates an inode by writing a directory entry (or extended
* attribute entry), the inode itself, and the parent directory inode (or the
@@ -537,7 +665,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent)
*/
int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
const struct fscrypt_name *nm, const struct inode *inode,
- int deletion, int xent)
+ int deletion, int xent, int in_orphan)
{
int err, dlen, ilen, len, lnum, ino_offs, dent_offs, orphan_added = 0;
int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir);
@@ -623,7 +751,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
if (err)
goto out_release;
- if (last_reference) {
+ if (last_reference && !in_orphan) {
err = ubifs_add_orphan(c, inode->i_ino);
if (err) {
release_head(c, BASEHD);
@@ -679,6 +807,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
if (err)
goto out_ro;
+ if (in_orphan && inode->i_nlink)
+ ubifs_delete_orphan(c, inode->i_ino);
+
finish_reservation(c);
spin_lock(&ui->ui_lock);
ui->synced_i_size = ui->ui_size;
@@ -1209,6 +1340,7 @@ out_free:
* @new_nm: new name of the new directory entry
* @whiteout: whiteout inode
* @sync: non-zero if the write-buffer has to be synchronized
+ * @delete_orphan: indicates an orphan entry deletion for @whiteout
*
* This function implements the re-name operation which may involve writing up
* to 4 inodes(new inode, whiteout inode, old and new parent directory inodes)
@@ -1221,7 +1353,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
const struct inode *new_dir,
const struct inode *new_inode,
const struct fscrypt_name *new_nm,
- const struct inode *whiteout, int sync)
+ const struct inode *whiteout, int sync, int delete_orphan)
{
void *p;
union ubifs_key key;
@@ -1438,6 +1570,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
goto out_ro;
}
+ if (delete_orphan)
+ ubifs_delete_orphan(c, whiteout->i_ino);
+
finish_reservation(c);
if (new_inode) {
mark_inode_clean(c, new_ui);
@@ -1607,6 +1742,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
ubifs_err(c, "bad data node (block %u, inode %lu)",
blk, inode->i_ino);
ubifs_dump_node(c, dn, dn_size);
+ err = -EUCLEAN;
goto out_free;
}
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 6d6cd85c2b4c..8788740ec57f 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1005,7 +1005,7 @@ out:
* @c: the UBIFS file-system description object
* @lp: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @lst: lprops statistics to update
+ * @arg: lprops statistics to update
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1014,8 +1014,9 @@ out:
*/
static int scan_check_cb(struct ubifs_info *c,
const struct ubifs_lprops *lp, int in_tree,
- struct ubifs_lp_stats *lst)
+ void *arg)
{
+ struct ubifs_lp_stats *lst = arg;
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
@@ -1269,8 +1270,7 @@ int dbg_check_lprops(struct ubifs_info *c)
memset(&lst, 0, sizeof(struct ubifs_lp_stats));
err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
- (ubifs_lpt_scan_callback)scan_check_cb,
- &lst);
+ scan_check_cb, &lst);
if (err && err != -ENOSPC)
goto out;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 778a22bf9a92..441d0beca4cf 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1918,6 +1918,7 @@ out_err:
* @pnode: where to keep a pnode
* @cnode: where to keep a cnode
* @in_tree: is the node in the tree in memory
+ * @ptr: union of node pointers
* @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in
* the tree
* @ptr.pnode: ditto for pnode
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index c4d079328b92..07351fdce722 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1646,7 +1646,6 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
len -= node_len;
}
- err = 0;
out:
vfree(buf);
return err;
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 7adc37c10b6a..a148760fa49e 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -67,10 +67,13 @@ static int mst_node_check_hash(const struct ubifs_info *c,
{
u8 calc[UBIFS_MAX_HASH_LEN];
const void *node = mst;
+ int ret;
- crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch),
+ ret = crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch),
UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch),
calc);
+ if (ret)
+ return ret;
if (ubifs_check_hash(c, expected, calc))
return -EPERM;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 4909321d84cf..fb957d963ba6 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -42,24 +42,30 @@
static int dbg_check_orphans(struct ubifs_info *c);
-static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum,
- struct ubifs_orphan *parent_orphan)
+/**
+ * ubifs_add_orphan - add an orphan.
+ * @c: UBIFS file-system description object
+ * @inum: orphan inode number
+ *
+ * Add an orphan. This function is called when an inodes link count drops to
+ * zero.
+ */
+int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *orphan, *o;
struct rb_node **p, *parent = NULL;
orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS);
if (!orphan)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
orphan->inum = inum;
orphan->new = 1;
- INIT_LIST_HEAD(&orphan->child_list);
spin_lock(&c->orphan_lock);
if (c->tot_orphans >= c->max_orphans) {
spin_unlock(&c->orphan_lock);
kfree(orphan);
- return ERR_PTR(-ENFILE);
+ return -ENFILE;
}
p = &c->orph_tree.rb_node;
while (*p) {
@@ -73,7 +79,7 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum,
ubifs_err(c, "orphaned twice");
spin_unlock(&c->orphan_lock);
kfree(orphan);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
}
c->tot_orphans += 1;
@@ -83,14 +89,9 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum,
list_add_tail(&orphan->list, &c->orph_list);
list_add_tail(&orphan->new_list, &c->orph_new);
- if (parent_orphan) {
- list_add_tail(&orphan->child_list,
- &parent_orphan->child_list);
- }
-
spin_unlock(&c->orphan_lock);
dbg_gen("ino %lu", (unsigned long)inum);
- return orphan;
+ return 0;
}
static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum)
@@ -135,6 +136,7 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph)
if (orph->cmt) {
orph->del = 1;
+ rb_erase(&orph->rb, &c->orph_tree);
orph->dnext = c->orph_dnext;
c->orph_dnext = orph;
dbg_gen("delete later ino %lu", (unsigned long)orph->inum);
@@ -145,59 +147,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph)
}
/**
- * ubifs_add_orphan - add an orphan.
- * @c: UBIFS file-system description object
- * @inum: orphan inode number
- *
- * Add an orphan. This function is called when an inodes link count drops to
- * zero.
- */
-int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
-{
- int err = 0;
- ino_t xattr_inum;
- union ubifs_key key;
- struct ubifs_dent_node *xent, *pxent = NULL;
- struct fscrypt_name nm = {0};
- struct ubifs_orphan *xattr_orphan;
- struct ubifs_orphan *orphan;
-
- orphan = orphan_add(c, inum, NULL);
- if (IS_ERR(orphan))
- return PTR_ERR(orphan);
-
- lowest_xent_key(c, &key, inum);
- while (1) {
- xent = ubifs_tnc_next_ent(c, &key, &nm);
- if (IS_ERR(xent)) {
- err = PTR_ERR(xent);
- if (err == -ENOENT)
- break;
- kfree(pxent);
- return err;
- }
-
- fname_name(&nm) = xent->name;
- fname_len(&nm) = le16_to_cpu(xent->nlen);
- xattr_inum = le64_to_cpu(xent->inum);
-
- xattr_orphan = orphan_add(c, xattr_inum, orphan);
- if (IS_ERR(xattr_orphan)) {
- kfree(pxent);
- kfree(xent);
- return PTR_ERR(xattr_orphan);
- }
-
- kfree(pxent);
- pxent = xent;
- key_read(c, &xent->key, &key);
- }
- kfree(pxent);
-
- return 0;
-}
-
-/**
* ubifs_delete_orphan - delete an orphan.
* @c: UBIFS file-system description object
* @inum: orphan inode number
@@ -206,7 +155,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
*/
void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
{
- struct ubifs_orphan *orph, *child_orph, *tmp_o;
+ struct ubifs_orphan *orph;
spin_lock(&c->orphan_lock);
@@ -219,11 +168,6 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
return;
}
- list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) {
- list_del(&child_orph->child_list);
- orphan_delete(c, child_orph);
- }
-
orphan_delete(c, orph);
spin_unlock(&c->orphan_lock);
@@ -518,7 +462,6 @@ static void erase_deleted(struct ubifs_info *c)
dnext = orphan->dnext;
ubifs_assert(c, !orphan->new);
ubifs_assert(c, orphan->del);
- rb_erase(&orphan->rb, &c->orph_tree);
list_del(&orphan->list);
c->tot_orphans -= 1;
dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum);
@@ -571,51 +514,6 @@ int ubifs_clear_orphans(struct ubifs_info *c)
}
/**
- * insert_dead_orphan - insert an orphan.
- * @c: UBIFS file-system description object
- * @inum: orphan inode number
- *
- * This function is a helper to the 'do_kill_orphans()' function. The orphan
- * must be kept until the next commit, so it is added to the rb-tree and the
- * deletion list.
- */
-static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
-{
- struct ubifs_orphan *orphan, *o;
- struct rb_node **p, *parent = NULL;
-
- orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL);
- if (!orphan)
- return -ENOMEM;
- orphan->inum = inum;
-
- p = &c->orph_tree.rb_node;
- while (*p) {
- parent = *p;
- o = rb_entry(parent, struct ubifs_orphan, rb);
- if (inum < o->inum)
- p = &(*p)->rb_left;
- else if (inum > o->inum)
- p = &(*p)->rb_right;
- else {
- /* Already added - no problem */
- kfree(orphan);
- return 0;
- }
- }
- c->tot_orphans += 1;
- rb_link_node(&orphan->rb, parent, p);
- rb_insert_color(&orphan->rb, &c->orph_tree);
- list_add_tail(&orphan->list, &c->orph_list);
- orphan->del = 1;
- orphan->dnext = c->orph_dnext;
- c->orph_dnext = orphan;
- dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
- c->new_orphans, c->tot_orphans);
- return 0;
-}
-
-/**
* do_kill_orphans - remove orphan inodes from the index.
* @c: UBIFS file-system description object
* @sleb: scanned LEB
@@ -691,12 +589,12 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
- union ubifs_key key1, key2;
+ union ubifs_key key;
inum = le64_to_cpu(orph->inos[i]);
- ino_key_init(c, &key1, inum);
- err = ubifs_tnc_lookup(c, &key1, ino);
+ ino_key_init(c, &key, inum);
+ err = ubifs_tnc_lookup(c, &key, ino);
if (err && err != -ENOENT)
goto out_free;
@@ -708,17 +606,10 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
dbg_rcvry("deleting orphaned inode %lu",
(unsigned long)inum);
- lowest_ino_key(c, &key1, inum);
- highest_ino_key(c, &key2, inum);
-
- err = ubifs_tnc_remove_range(c, &key1, &key2);
+ err = ubifs_tnc_remove_ino(c, inum);
if (err)
goto out_ro;
}
-
- err = insert_dead_orphan(c, inum);
- if (err)
- goto out_free;
}
*last_cmt_no = cmt_no;
@@ -925,8 +816,12 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
inum = key_inum(c, &zbr->key);
if (inum != ci->last_ino) {
- /* Lowest node type is the inode node, so it comes first */
- if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
+ /*
+ * Lowest node type is the inode node or xattr entry(when
+ * selinux/encryption is enabled), so it comes first
+ */
+ if (key_type(c, &zbr->key) != UBIFS_INO_KEY &&
+ key_type(c, &zbr->key) != UBIFS_XENT_KEY)
ubifs_err(c, "found orphan node ino %lu, type %d",
(unsigned long)inum, key_type(c, &zbr->key));
ci->last_ino = inum;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 4211e4456b1e..a950c5f2560e 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -23,13 +23,13 @@
#include "ubifs.h"
#include <linux/list_sort.h>
#include <crypto/hash.h>
-#include <crypto/algapi.h>
/**
* struct replay_entry - replay list entry.
* @lnum: logical eraseblock number of the node
* @offs: node offset
* @len: node length
+ * @hash: node hash
* @deletion: non-zero if this entry corresponds to a node deletion
* @sqnum: node sequence number
* @list: links the replay list
@@ -366,6 +366,7 @@ static void destroy_replay_list(struct ubifs_info *c)
* @lnum: node logical eraseblock number
* @offs: node offset
* @len: node length
+ * @hash: node hash
* @key: node key
* @sqnum: sequence number
* @deletion: non-zero if this is a deletion
@@ -418,6 +419,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
* @lnum: node logical eraseblock number
* @offs: node offset
* @len: node length
+ * @hash: node hash
* @key: node key
* @name: directory entry name
* @nlen: directory entry name length
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3409488d39ba..245a10cc1eeb 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -54,11 +54,7 @@ module_param_cb(default_version, &ubifs_default_version_ops, &ubifs_default_vers
static struct kmem_cache *ubifs_inode_slab;
/* UBIFS TNC shrinker description */
-static struct shrinker ubifs_shrinker_info = {
- .scan_objects = ubifs_shrink_scan,
- .count_objects = ubifs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *ubifs_shrinker_info;
/**
* validate_inode - validate inode.
@@ -142,10 +138,10 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
set_nlink(inode, le32_to_cpu(ino->nlink));
i_uid_write(inode, le32_to_cpu(ino->uid));
i_gid_write(inode, le32_to_cpu(ino->gid));
- inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
- inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
- inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec);
- inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
+ inode_set_atime(inode, (int64_t)le64_to_cpu(ino->atime_sec),
+ le32_to_cpu(ino->atime_nsec));
+ inode_set_mtime(inode, (int64_t)le64_to_cpu(ino->mtime_sec),
+ le32_to_cpu(ino->mtime_nsec));
inode_set_ctime(inode, (int64_t)le64_to_cpu(ino->ctime_sec),
le32_to_cpu(ino->ctime_nsec));
inode->i_mode = le32_to_cpu(ino->mode);
@@ -923,8 +919,10 @@ static void free_buds(struct ubifs_info *c)
{
struct ubifs_bud *bud, *n;
- rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb)
+ rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) {
+ kfree(bud->log_hash);
kfree(bud);
+ }
}
/**
@@ -1193,6 +1191,7 @@ static void destroy_journal(struct ubifs_info *c)
bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
list_del(&bud->list);
+ kfree(bud->log_hash);
kfree(bud);
}
ubifs_destroy_idx_gc(c);
@@ -2152,6 +2151,8 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
init_waitqueue_head(&c->cmt_wq);
+ init_waitqueue_head(&c->reserve_space_wq);
+ atomic_set(&c->need_wait_space, 0);
c->buds = RB_ROOT;
c->old_idx = RB_ROOT;
c->size_tree = RB_ROOT;
@@ -2240,13 +2241,14 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
goto out_umount;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
err = -ENOMEM;
goto out_umount;
}
- import_uuid(&sb->s_uuid, c->uuid);
+ super_set_uuid(sb, c->uuid, sizeof(c->uuid));
mutex_unlock(&c->umount_mutex);
return 0;
@@ -2373,7 +2375,7 @@ static void inode_slab_ctor(void *obj)
static int __init ubifs_init(void)
{
- int err;
+ int err = -ENOMEM;
BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24);
@@ -2434,15 +2436,20 @@ static int __init ubifs_init(void)
ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
sizeof(struct ubifs_inode), 0,
- SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT |
- SLAB_ACCOUNT, &inode_slab_ctor);
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ &inode_slab_ctor);
if (!ubifs_inode_slab)
return -ENOMEM;
- err = register_shrinker(&ubifs_shrinker_info, "ubifs-slab");
- if (err)
+ ubifs_shrinker_info = shrinker_alloc(0, "ubifs-slab");
+ if (!ubifs_shrinker_info)
goto out_slab;
+ ubifs_shrinker_info->count_objects = ubifs_shrink_count;
+ ubifs_shrinker_info->scan_objects = ubifs_shrink_scan;
+
+ shrinker_register(ubifs_shrinker_info);
+
err = ubifs_compressors_init();
if (err)
goto out_shrinker;
@@ -2467,7 +2474,7 @@ out_dbg:
dbg_debugfs_exit();
ubifs_compressors_exit();
out_shrinker:
- unregister_shrinker(&ubifs_shrinker_info);
+ shrinker_free(ubifs_shrinker_info);
out_slab:
kmem_cache_destroy(ubifs_inode_slab);
return err;
@@ -2483,7 +2490,7 @@ static void __exit ubifs_exit(void)
dbg_debugfs_exit();
ubifs_sysfs_exit();
ubifs_compressors_exit();
- unregister_shrinker(&ubifs_shrinker_info);
+ shrinker_free(ubifs_shrinker_info);
/*
* Make sure all delayed rcu free inodes are flushed before we
diff --git a/fs/ubifs/sysfs.c b/fs/ubifs/sysfs.c
index 1c958148bb87..aae32222f11b 100644
--- a/fs/ubifs/sysfs.c
+++ b/fs/ubifs/sysfs.c
@@ -91,17 +91,17 @@ static struct kset ubifs_kset = {
int ubifs_sysfs_register(struct ubifs_info *c)
{
int ret, n;
- char dfs_dir_name[UBIFS_DFS_DIR_LEN+1];
+ char dfs_dir_name[UBIFS_DFS_DIR_LEN];
c->stats = kzalloc(sizeof(struct ubifs_stats_info), GFP_KERNEL);
if (!c->stats) {
ret = -ENOMEM;
goto out_last;
}
- n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
+ n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME,
c->vi.ubi_num, c->vi.vol_id);
- if (n > UBIFS_DFS_DIR_LEN) {
+ if (n >= UBIFS_DFS_DIR_LEN) {
/* The array size is too small */
ret = -EINVAL;
goto out_free;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f4728e65d1bd..45cacdcd4746 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -3116,14 +3116,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
void ubifs_tnc_close(struct ubifs_info *c)
{
tnc_destroy_cnext(c);
- if (c->zroot.znode) {
- long n, freed;
-
- n = atomic_long_read(&c->clean_zn_cnt);
- freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
- ubifs_assert(c, freed == n);
- atomic_long_sub(n, &ubifs_clean_zn_cnt);
- }
+ ubifs_destroy_tnc_tree(c);
kfree(c->gap_lebs);
kfree(c->ilebs);
destroy_old_idx(c);
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index 4d686e34e64d..d3f8a6aa1f49 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -251,6 +251,28 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
}
/**
+ * ubifs_destroy_tnc_tree - destroy all znodes connected to the TNC tree.
+ * @c: UBIFS file-system description object
+ *
+ * This function destroys the whole TNC tree and updates clean global znode
+ * count.
+ */
+void ubifs_destroy_tnc_tree(struct ubifs_info *c)
+{
+ long n, freed;
+
+ if (!c->zroot.znode)
+ return;
+
+ n = atomic_long_read(&c->clean_zn_cnt);
+ freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
+ ubifs_assert(c, freed == n);
+ atomic_long_sub(n, &ubifs_clean_zn_cnt);
+
+ c->zroot.znode = NULL;
+}
+
+/**
* read_znode - read an indexing node from flash and fill znode.
* @c: UBIFS file-system description object
* @zzbr: the zbranch describing the node to read
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index ebb3ad6b5e7e..d69a5a42d693 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -31,7 +31,7 @@
#include <linux/completion.h>
#include <crypto/hash_info.h>
#include <crypto/hash.h>
-#include <crypto/algapi.h>
+#include <crypto/utils.h>
#include <linux/fscrypt.h>
@@ -158,13 +158,6 @@
#endif
/*
- * The UBIFS sysfs directory name pattern and maximum name length (3 for "ubi"
- * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
- */
-#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
-#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
-
-/*
* Lockdep classes for UBIFS inode @ui_mutex.
*/
enum {
@@ -923,8 +916,6 @@ struct ubifs_budget_req {
* @rb: rb-tree node of rb-tree of orphans sorted by inode number
* @list: list head of list of orphans in order added
* @new_list: list head of list of orphans added since the last commit
- * @child_list: list of xattr children if this orphan hosts xattrs, list head
- * if this orphan is a xattr, not used otherwise.
* @cnext: next orphan to commit
* @dnext: next orphan to delete
* @inum: inode number
@@ -936,7 +927,6 @@ struct ubifs_orphan {
struct rb_node rb;
struct list_head list;
struct list_head new_list;
- struct list_head child_list;
struct ubifs_orphan *cnext;
struct ubifs_orphan *dnext;
ino_t inum;
@@ -1047,6 +1037,8 @@ struct ubifs_debug_info;
* @bg_bud_bytes: number of bud bytes when background commit is initiated
* @old_buds: buds to be released after commit ends
* @max_bud_cnt: maximum number of buds
+ * @need_wait_space: Non %0 means space reservation tasks need to wait in queue
+ * @reserve_space_wq: wait queue to sleep on if @need_wait_space is not %0
*
* @commit_sem: synchronizes committer with other processes
* @cmt_state: commit state
@@ -1305,6 +1297,8 @@ struct ubifs_info {
long long bg_bud_bytes;
struct list_head old_buds;
int max_bud_cnt;
+ atomic_t need_wait_space;
+ wait_queue_head_t reserve_space_wq;
struct rw_semaphore commit_sem;
int cmt_state;
@@ -1799,7 +1793,7 @@ int ubifs_consolidate_log(struct ubifs_info *c);
/* journal.c */
int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
const struct fscrypt_name *nm, const struct inode *inode,
- int deletion, int xent);
+ int deletion, int xent, int in_orphan);
int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
const union ubifs_key *key, const void *buf, int len);
int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
@@ -1816,7 +1810,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
const struct inode *new_dir,
const struct inode *new_inode,
const struct fscrypt_name *new_nm,
- const struct inode *whiteout, int sync);
+ const struct inode *whiteout, int sync, int delete_orphan);
int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
loff_t old_size, loff_t new_size);
int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
@@ -1903,6 +1897,7 @@ struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c,
struct ubifs_znode *znode);
long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
struct ubifs_znode *zr);
+void ubifs_destroy_tnc_tree(struct ubifs_info *c);
struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
struct ubifs_zbranch *zbr,
struct ubifs_znode *parent, int iip);
@@ -2043,7 +2038,7 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
size_t size);
#ifdef CONFIG_UBIFS_FS_XATTR
-extern const struct xattr_handler *ubifs_xattr_handlers[];
+extern const struct xattr_handler * const ubifs_xattr_handlers[];
ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
int ubifs_purge_xattrs(struct inode *host);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 406c82eab513..f734588b224a 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -149,7 +149,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
if (strcmp(fname_name(nm), UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT) == 0)
host_ui->flags |= UBIFS_CRYPT_FL;
- err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
+ err = ubifs_jnl_update(c, host, nm, inode, 0, 1, 0);
if (err)
goto out_cancel;
ubifs_set_inode_flags(host);
@@ -735,7 +735,7 @@ static const struct xattr_handler ubifs_security_xattr_handler = {
};
#endif
-const struct xattr_handler *ubifs_xattr_handlers[] = {
+const struct xattr_handler * const ubifs_xattr_handlers[] = {
&ubifs_user_xattr_handler,
&ubifs_trusted_xattr_handler,
#ifdef CONFIG_UBIFS_FS_SECURITY