summaryrefslogtreecommitdiff
path: root/fs/f2fs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/super.c')
-rw-r--r--fs/f2fs/super.c508
1 files changed, 327 insertions, 181 deletions
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b72fa103b963..875aef2fc520 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -11,7 +11,6 @@
#include <linux/fs_context.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
-#include <linux/buffer_head.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
@@ -44,24 +43,26 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
+ [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
+ [FAULT_NO_SEGMENT] = "no free segment",
};
int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate,
@@ -93,11 +94,26 @@ int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate,
#endif
/* f2fs-wide shrinker description */
-static struct shrinker f2fs_shrinker_info = {
- .scan_objects = f2fs_shrink_scan,
- .count_objects = f2fs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *f2fs_shrinker_info;
+
+static int __init f2fs_init_shrinker(void)
+{
+ f2fs_shrinker_info = shrinker_alloc(0, "f2fs-shrinker");
+ if (!f2fs_shrinker_info)
+ return -ENOMEM;
+
+ f2fs_shrinker_info->count_objects = f2fs_shrink_count;
+ f2fs_shrinker_info->scan_objects = f2fs_shrink_scan;
+
+ shrinker_register(f2fs_shrinker_info);
+
+ return 0;
+}
+
+static void f2fs_exit_shrinker(void)
+{
+ shrinker_free(f2fs_shrinker_info);
+}
enum {
Opt_gc_background,
@@ -304,7 +320,7 @@ struct kmem_cache *f2fs_cf_name_slab;
static int __init f2fs_create_casefold_cache(void)
{
f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name",
- F2FS_NAME_LEN);
+ F2FS_NAME_LEN);
return f2fs_cf_name_slab ? 0 : -ENOMEM;
}
@@ -694,6 +710,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!strcmp(name, "on")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
} else if (!strcmp(name, "off")) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "zoned devices need bggc");
+ kfree(name);
+ return -EINVAL;
+ }
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
} else if (!strcmp(name, "sync")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
@@ -1309,13 +1330,13 @@ default_check:
return -EINVAL;
}
#endif
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (f2fs_sb_has_casefold(sbi)) {
+
+ if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
-#endif
+
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
@@ -1371,11 +1392,6 @@ default_check:
}
}
- if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) {
- f2fs_err(sbi, "LFS is not compatible with checkpoint=disable");
- return -EINVAL;
- }
-
if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) {
f2fs_err(sbi, "LFS is not compatible with ATGC");
return -EINVAL;
@@ -1499,6 +1515,12 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync)
inc_page_count(sbi, F2FS_DIRTY_IMETA);
}
spin_unlock(&sbi->inode_lock[DIRTY_META]);
+
+ /* if atomic write is not committed, set inode w/ atomic dirty */
+ if (!ret && f2fs_is_atomic_file(inode) &&
+ !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
+ set_inode_flag(inode, FI_ATOMIC_DIRTIED);
+
return ret;
}
@@ -1559,7 +1581,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- blkdev_put(FDEV(i).bdev, sbi->sb);
+ bdev_fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
@@ -1666,12 +1688,10 @@ static void f2fs_put_super(struct super_block *sb)
kvfree(sbi->ckpt);
- sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
- destroy_device_list(sbi);
f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
#ifdef CONFIG_QUOTA
@@ -1686,7 +1706,6 @@ static void f2fs_put_super(struct super_block *sb)
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
- kfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1768,26 +1787,32 @@ static int f2fs_statfs_project(struct super_block *sb,
limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
dquot->dq_dqb.dqb_bhardlimit);
- if (limit)
- limit >>= sb->s_blocksize_bits;
+ limit >>= sb->s_blocksize_bits;
+
+ if (limit) {
+ uint64_t remaining = 0;
- if (limit && buf->f_blocks > limit) {
curblock = (dquot->dq_dqb.dqb_curspace +
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
- buf->f_blocks = limit;
- buf->f_bfree = buf->f_bavail =
- (buf->f_blocks > curblock) ?
- (buf->f_blocks - curblock) : 0;
+ if (limit > curblock)
+ remaining = limit - curblock;
+
+ buf->f_blocks = min(buf->f_blocks, limit);
+ buf->f_bfree = min(buf->f_bfree, remaining);
+ buf->f_bavail = min(buf->f_bavail, remaining);
}
limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
dquot->dq_dqb.dqb_ihardlimit);
- if (limit && buf->f_files > limit) {
- buf->f_files = limit;
- buf->f_ffree =
- (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
- (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dquot->dq_dqb.dqb_curinodes)
+ remaining = limit - dquot->dq_dqb.dqb_curinodes;
+
+ buf->f_files = min(buf->f_files, limit);
+ buf->f_ffree = min(buf->f_ffree, remaining);
}
spin_unlock(&dquot->dq_dqb_lock);
@@ -1845,9 +1870,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid = u64_to_fsid(id);
#ifdef CONFIG_QUOTA
- if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
+ if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) &&
sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
- f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf);
+ f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf);
}
#endif
return 0;
@@ -2205,6 +2230,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
.err_gc_skipped = true,
+ .no_bg_gc = true,
.nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
@@ -2284,9 +2310,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
unsigned long old_sb_flags;
int err;
bool need_restart_gc = false, need_stop_gc = false;
- bool need_restart_ckpt = false, need_stop_ckpt = false;
bool need_restart_flush = false, need_stop_flush = false;
bool need_restart_discard = false, need_stop_discard = false;
+ bool need_enable_checkpoint = false, need_disable_checkpoint = false;
bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
@@ -2339,6 +2365,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto restore_opts;
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) &&
+ sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ err = -EINVAL;
+ goto restore_opts;
+ }
+#endif
+
/* flush outstanding errors before changing fs state */
flush_work(&sbi->s_error_work);
@@ -2443,24 +2480,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_IS_CLOSE);
}
- if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
- !test_opt(sbi, MERGE_CHECKPOINT)) {
- f2fs_stop_ckpt_thread(sbi);
- need_restart_ckpt = true;
- } else {
- /* Flush if the prevous checkpoint, if exists. */
- f2fs_flush_ckpt_thread(sbi);
-
- err = f2fs_start_ckpt_thread(sbi);
- if (err) {
- f2fs_err(sbi,
- "Failed to start F2FS issue_checkpoint_thread (%d)",
- err);
- goto restore_gc;
- }
- need_stop_ckpt = true;
- }
-
/*
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
@@ -2472,7 +2491,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
} else {
err = f2fs_create_flush_cmd_control(sbi);
if (err)
- goto restore_ckpt;
+ goto restore_gc;
need_stop_flush = true;
}
@@ -2494,8 +2513,31 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
err = f2fs_disable_checkpoint(sbi);
if (err)
goto restore_discard;
+ need_enable_checkpoint = true;
} else {
f2fs_enable_checkpoint(sbi);
+ need_disable_checkpoint = true;
+ }
+ }
+
+ /*
+ * Place this routine at the end, since a new checkpoint would be
+ * triggered while remount and we need to take care of it before
+ * returning from remount.
+ */
+ if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+ !test_opt(sbi, MERGE_CHECKPOINT)) {
+ f2fs_stop_ckpt_thread(sbi);
+ } else {
+ /* Flush if the prevous checkpoint, if exists. */
+ f2fs_flush_ckpt_thread(sbi);
+
+ err = f2fs_start_ckpt_thread(sbi);
+ if (err) {
+ f2fs_err(sbi,
+ "Failed to start F2FS issue_checkpoint_thread (%d)",
+ err);
+ goto restore_checkpoint;
}
}
@@ -2513,6 +2555,13 @@ skip:
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
+restore_checkpoint:
+ if (need_enable_checkpoint) {
+ f2fs_enable_checkpoint(sbi);
+ } else if (need_disable_checkpoint) {
+ if (f2fs_disable_checkpoint(sbi))
+ f2fs_warn(sbi, "checkpoint has not been disabled");
+ }
restore_discard:
if (need_restart_discard) {
if (f2fs_start_discard_thread(sbi))
@@ -2528,13 +2577,6 @@ restore_flush:
clear_opt(sbi, FLUSH_MERGE);
f2fs_destroy_flush_cmd_control(sbi, false);
}
-restore_ckpt:
- if (need_restart_ckpt) {
- if (f2fs_start_ckpt_thread(sbi))
- f2fs_warn(sbi, "background ckpt thread has stopped");
- } else if (need_stop_ckpt) {
- f2fs_stop_ckpt_thread(sbi);
- }
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
@@ -2673,7 +2715,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
const struct address_space_operations *a_ops = mapping->a_ops;
int offset = off & (sb->s_blocksize - 1);
size_t towrite = len;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int err = 0;
int tocopy;
@@ -2683,7 +2725,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
towrite);
retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy,
- &page, &fsdata);
+ &folio, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
@@ -2693,10 +2735,10 @@ retry:
break;
}
- memcpy_to_page(page, offset, data, tocopy);
+ memcpy_to_folio(folio, offset_in_folio(folio, off), data, tocopy);
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
- page, fsdata);
+ folio, fsdata);
offset = 0;
towrite -= tocopy;
off += tocopy;
@@ -2706,7 +2748,7 @@ retry:
if (len == towrite)
return err;
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
f2fs_mark_inode_dirty_sync(inode, false);
return len - towrite;
}
@@ -3200,13 +3242,6 @@ static bool f2fs_has_stable_inodes(struct super_block *sb)
return true;
}
-static void f2fs_get_ino_and_lblk_bits(struct super_block *sb,
- int *ino_bits_ret, int *lblk_bits_ret)
-{
- *ino_bits_ret = 8 * sizeof(nid_t);
- *lblk_bits_ret = 8 * sizeof(block_t);
-}
-
static struct block_device **f2fs_get_devices(struct super_block *sb,
unsigned int *num_devs)
{
@@ -3228,13 +3263,15 @@ static struct block_device **f2fs_get_devices(struct super_block *sb,
}
static const struct fscrypt_operations f2fs_cryptops = {
- .key_prefix = "f2fs:",
+ .needs_bounce_pages = 1,
+ .has_32bit_inodes = 1,
+ .supports_subblock_data_units = 1,
+ .legacy_key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
.get_dummy_policy = f2fs_get_dummy_policy,
.empty_dir = f2fs_empty_dir,
.has_stable_inodes = f2fs_has_stable_inodes,
- .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
.get_devices = f2fs_get_devices,
};
#endif
@@ -3279,6 +3316,7 @@ static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
}
static const struct export_operations f2fs_export_ops = {
+ .encode_fh = generic_encode_ino32_fh,
.fh_to_dentry = f2fs_fh_to_dentry,
.fh_to_parent = f2fs_fh_to_parent,
.get_parent = f2fs_get_parent,
@@ -3312,27 +3350,54 @@ loff_t max_file_blocks(struct inode *inode)
leaf_count *= NIDS_PER_BLOCK;
result += leaf_count;
+ /*
+ * For compatibility with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{64,32} with
+ * a 4K crypto data unit, we must restrict the max filesize to what can
+ * fit within U32_MAX + 1 data units.
+ */
+
+ result = umin(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096));
+
return result;
}
-static int __f2fs_commit_super(struct buffer_head *bh,
- struct f2fs_super_block *super)
+static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio,
+ pgoff_t index, bool update)
{
- lock_buffer(bh);
- if (super)
- memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
- set_buffer_dirty(bh);
- unlock_buffer(bh);
-
+ struct bio *bio;
/* it's rare case, we can do fua all the time */
- return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
+ blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA;
+ int ret;
+
+ folio_lock(folio);
+ folio_wait_writeback(folio);
+ if (update)
+ memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi),
+ sizeof(struct f2fs_super_block));
+ folio_mark_dirty(folio);
+ folio_clear_dirty_for_io(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+
+ bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS);
+
+ /* it doesn't need to set crypto context for superblock update */
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio));
+
+ if (!bio_add_folio(bio, folio, folio_size(folio), 0))
+ f2fs_bug_on(sbi, 1);
+
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ folio_end_writeback(folio);
+
+ return ret;
}
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
- struct buffer_head *bh)
+ struct folio *folio, pgoff_t index)
{
- struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
- (bh->b_data + F2FS_SUPER_OFFSET);
+ struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index);
struct super_block *sb = sbi->sb;
u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
@@ -3407,7 +3472,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
res = "internally";
} else {
- err = __f2fs_commit_super(bh, NULL);
+ err = __f2fs_commit_super(sbi, folio, index, false);
res = err ? "failed" : "done";
}
f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)",
@@ -3420,12 +3485,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
}
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
- struct buffer_head *bh)
+ struct folio *folio, pgoff_t index)
{
block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main;
block_t total_sections, blocks_per_seg;
- struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
- (bh->b_data + F2FS_SUPER_OFFSET);
+ struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index);
size_t crc_offset = 0;
__u32 crc = 0;
@@ -3451,7 +3515,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
}
- /* Currently, support only 4KB block size */
+ /* only support block_size equals to PAGE_SIZE */
if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) {
f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u",
le32_to_cpu(raw_super->log_blocksize),
@@ -3466,7 +3530,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- /* Currently, support 512/1024/2048/4096 bytes sector size */
+ /* Currently, support 512/1024/2048/4096/16K bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
@@ -3583,7 +3647,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
- if (sanity_check_area_boundary(sbi, bh))
+ if (sanity_check_area_boundary(sbi, folio, index))
return -EFSCORRUPTED;
return 0;
@@ -3604,6 +3668,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
block_t user_block_count, valid_user_blocks;
block_t avail_node_count, valid_node_count;
unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
+ unsigned int sit_blk_cnt;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -3715,6 +3780,13 @@ skip_cross:
return 1;
}
+ sit_blk_cnt = DIV_ROUND_UP(main_segs, SIT_ENTRY_PER_BLOCK);
+ if (sit_bitmap_size * 8 < sit_blk_cnt) {
+ f2fs_err(sbi, "Wrong bitmap size: sit: %u, sit_blk_cnt:%u",
+ sit_bitmap_size, sit_blk_cnt);
+ return 1;
+ }
+
cp_pack_start_sum = __start_sum_addr(sbi);
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
@@ -3766,9 +3838,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
sbi->total_sections = le32_to_cpu(raw_super->section_count);
- sbi->total_node_count =
- ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
- NAT_ENTRY_PER_BLOCK) << sbi->log_blocks_per_seg;
+ sbi->total_node_count = SEGS_TO_BLKS(sbi,
+ ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
+ NAT_ENTRY_PER_BLOCK));
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -3778,6 +3850,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = SEGS_PER_SEC(sbi);
+ sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ?
+ DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi);
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
@@ -3872,11 +3946,24 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
sector_t nr_sectors = bdev_nr_sectors(bdev);
struct f2fs_report_zones_args rep_zone_arg;
u64 zone_sectors;
+ unsigned int max_open_zones;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
+ if (bdev_is_zoned(FDEV(devi).bdev)) {
+ max_open_zones = bdev_max_open_zones(bdev);
+ if (max_open_zones && (max_open_zones < sbi->max_open_zones))
+ sbi->max_open_zones = max_open_zones;
+ if (sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ return -EINVAL;
+ }
+ }
+
zone_sectors = bdev_zone_sectors(bdev);
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(zone_sectors))
@@ -3917,7 +4004,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
{
struct super_block *sb = sbi->sb;
int block;
- struct buffer_head *bh;
+ struct folio *folio;
struct f2fs_super_block *super;
int err = 0;
@@ -3926,32 +4013,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
return -ENOMEM;
for (block = 0; block < 2; block++) {
- bh = sb_bread(sb, block);
- if (!bh) {
+ folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL);
+ if (IS_ERR(folio)) {
f2fs_err(sbi, "Unable to read %dth superblock",
block + 1);
- err = -EIO;
+ err = PTR_ERR(folio);
*recovery = 1;
continue;
}
/* sanity checking of raw super */
- err = sanity_check_raw_super(sbi, bh);
+ err = sanity_check_raw_super(sbi, folio, block);
if (err) {
f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
block + 1);
- brelse(bh);
+ folio_put(folio);
*recovery = 1;
continue;
}
if (!*raw_super) {
- memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
+ memcpy(super, F2FS_SUPER_BLOCK(folio, block),
sizeof(*super));
*valid_super_block = block;
*raw_super = super;
}
- brelse(bh);
+ folio_put(folio);
}
/* No valid superblock */
@@ -3965,7 +4052,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
- struct buffer_head *bh;
+ struct folio *folio;
+ pgoff_t index;
__u32 crc = 0;
int err;
@@ -3983,22 +4071,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
}
/* write back-up superblock first */
- bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
- if (!bh)
- return -EIO;
- err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
- brelse(bh);
+ index = sbi->valid_super_block ? 0 : 1;
+ folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = __f2fs_commit_super(sbi, folio, index, true);
+ folio_put(folio);
/* if we are in recovery path, skip writing valid superblock */
if (recover || err)
return err;
/* write current valid superblock */
- bh = sb_bread(sbi->sb, sbi->valid_super_block);
- if (!bh)
- return -EIO;
- err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
- brelse(bh);
+ index = sbi->valid_super_block;
+ folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = __f2fs_commit_super(sbi, folio, index, true);
+ folio_put(folio);
return err;
}
@@ -4033,7 +4123,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
f2fs_up_write(&sbi->sb_lock);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record stop_reason, err:%d",
+ err);
}
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
@@ -4076,8 +4168,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error)
err = f2fs_commit_super(sbi, false);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d",
- error, err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record errors:%u, err:%d",
+ error, err);
out_unlock:
f2fs_up_write(&sbi->sb_lock);
}
@@ -4168,6 +4261,37 @@ static void f2fs_record_error_work(struct work_struct *work)
f2fs_record_stop_reason(sbi);
}
+static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+ unsigned int zoneno, total_zones;
+ int devi;
+
+ if (!f2fs_sb_has_blkzoned(sbi))
+ return NULL_SEGNO;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++) {
+ if (!bdev_is_zoned(FDEV(devi).bdev))
+ continue;
+
+ total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments);
+
+ for (zoneno = 0; zoneno < total_zones; zoneno++) {
+ unsigned int segs, blks;
+
+ if (!f2fs_zone_is_seq(sbi, devi, zoneno))
+ continue;
+
+ segs = GET_SEG_FROM_SEC(sbi,
+ zoneno * sbi->secs_per_zone);
+ blks = SEGS_TO_BLKS(sbi, segs);
+ return GET_SEGNO(sbi, FDEV(devi).start_blk + blks);
+ }
+ }
+#endif
+ return NULL_SEGNO;
+}
+
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4196,10 +4320,22 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
+#ifdef CONFIG_BLK_DEV_ZONED
+ sbi->max_open_zones = UINT_MAX;
+ sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ;
+#endif
for (i = 0; i < max_devices; i++) {
+ if (max_devices == 1) {
+ FDEV(i).total_segments =
+ le32_to_cpu(raw_super->segment_count_main);
+ FDEV(i).start_blk = 0;
+ FDEV(i).end_blk = FDEV(i).total_segments *
+ BLKS_PER_SEG(sbi);
+ }
+
if (i == 0)
- FDEV(0).bdev = sbi->sb->s_bdev;
+ FDEV(0).bdev_file = sbi->sb->s_bdev_file;
else if (!RDEV(i).path[0])
break;
@@ -4211,21 +4347,22 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (i == 0) {
FDEV(i).start_blk = 0;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1 +
- le32_to_cpu(raw_super->segment0_blkaddr);
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1 +
+ le32_to_cpu(raw_super->segment0_blkaddr);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1;
- FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
- mode, sbi->sb, NULL);
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1;
+ FDEV(i).bdev_file = bdev_file_open_by_path(
+ FDEV(i).path, mode, sbi->sb, NULL);
}
}
- if (IS_ERR(FDEV(i).bdev))
- return PTR_ERR(FDEV(i).bdev);
+ if (IS_ERR(FDEV(i).bdev_file))
+ return PTR_ERR(FDEV(i).bdev_file);
+ FDEV(i).bdev = file_bdev(FDEV(i).bdev_file);
/* to release errored devices */
sbi->s_ndevs = i + 1;
@@ -4233,24 +4370,21 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
sbi->aligned_blksize = false;
#ifdef CONFIG_BLK_DEV_ZONED
- if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_has_blkzoned(sbi)) {
- f2fs_err(sbi, "Zoned block device feature not enabled");
- return -EINVAL;
- }
- if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
+ if (bdev_is_zoned(FDEV(i).bdev)) {
+ if (!f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device feature not enabled");
+ return -EINVAL;
+ }
if (init_blkz_info(sbi, i)) {
f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
- f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: Host-managed)",
i, FDEV(i).path,
FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk,
- bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
- "Host-aware" : "Host-managed");
+ FDEV(i).start_blk, FDEV(i).end_blk);
continue;
}
#endif
@@ -4448,7 +4582,8 @@ try_onemore:
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
- memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_sysfs_name_bdev(sb);
sb->s_iflags |= SB_I_CGROUPWB;
/* init f2fs-specific super block info */
@@ -4565,6 +4700,9 @@ try_onemore:
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
+ /* get segno of first zoned block device */
+ sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi);
+
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
if (__exist_node_summaries(sbi))
@@ -4599,6 +4737,7 @@ try_onemore:
goto free_node_inode;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
@@ -4682,19 +4821,25 @@ try_onemore:
#ifdef CONFIG_QUOTA
f2fs_recover_quota_end(sbi, quota_enabled);
#endif
-
+reset_checkpoint:
/*
* If the f2fs is not readonly and fsync data recovery succeeds,
* check zoned block devices' write pointer consistency.
*/
- if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
- err = f2fs_check_write_pointer(sbi);
- if (err)
- goto free_meta;
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) {
+ int err2;
+
+ f2fs_notice(sbi, "Checking entire write pointers");
+ err2 = f2fs_check_write_pointer(sbi);
+ if (err2)
+ err = err2;
}
+ if (err)
+ goto free_meta;
-reset_checkpoint:
- f2fs_init_inmem_curseg(sbi);
+ err = f2fs_init_inmem_curseg(sbi);
+ if (err)
+ goto sync_free_meta;
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -4818,6 +4963,7 @@ free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi);
+ sb->s_fs_info = NULL;
/* give only one another chance */
if (retry_cnt > 0 && skip_recovery) {
@@ -4836,9 +4982,9 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
- if (sb->s_root) {
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ if (sb->s_root) {
set_sbi_flag(sbi, SBI_IS_CLOSE);
f2fs_stop_gc_thread(sbi);
f2fs_stop_discard_thread(sbi);
@@ -4865,6 +5011,12 @@ static void kill_f2fs_super(struct super_block *sb)
sb->s_flags &= ~SB_RDONLY;
}
kill_block_super(sb);
+ /* Release block devices last, after fscrypt_destroy_keyring(). */
+ if (sbi) {
+ destroy_device_list(sbi);
+ kfree(sbi);
+ sb->s_fs_info = NULL;
+ }
}
static struct file_system_type f2fs_fs_type = {
@@ -4898,12 +5050,6 @@ static int __init init_f2fs_fs(void)
{
int err;
- if (PAGE_SIZE != F2FS_BLKSIZE) {
- printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n",
- PAGE_SIZE, F2FS_BLKSIZE);
- return -EINVAL;
- }
-
err = init_inodecache();
if (err)
goto fail;
@@ -4928,12 +5074,9 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_sysfs();
if (err)
goto free_garbage_collection_cache;
- err = register_shrinker(&f2fs_shrinker_info, "f2fs-shrinker");
+ err = f2fs_init_shrinker();
if (err)
goto free_sysfs;
- err = register_filesystem(&f2fs_fs_type);
- if (err)
- goto free_shrinker;
f2fs_create_root_stats();
err = f2fs_init_post_read_processing();
if (err)
@@ -4956,7 +5099,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_create_casefold_cache();
if (err)
goto free_compress_cache;
+ err = register_filesystem(&f2fs_fs_type);
+ if (err)
+ goto free_casefold_cache;
return 0;
+free_casefold_cache:
+ f2fs_destroy_casefold_cache();
free_compress_cache:
f2fs_destroy_compress_cache();
free_compress_mempool:
@@ -4971,9 +5119,7 @@ free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
f2fs_destroy_root_stats();
- unregister_filesystem(&f2fs_fs_type);
-free_shrinker:
- unregister_shrinker(&f2fs_shrinker_info);
+ f2fs_exit_shrinker();
free_sysfs:
f2fs_exit_sysfs();
free_garbage_collection_cache:
@@ -4996,6 +5142,7 @@ fail:
static void __exit exit_f2fs_fs(void)
{
+ unregister_filesystem(&f2fs_fs_type);
f2fs_destroy_casefold_cache();
f2fs_destroy_compress_cache();
f2fs_destroy_compress_mempool();
@@ -5004,8 +5151,7 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
- unregister_filesystem(&f2fs_fs_type);
- unregister_shrinker(&f2fs_shrinker_info);
+ f2fs_exit_shrinker();
f2fs_exit_sysfs();
f2fs_destroy_garbage_collection_cache();
f2fs_destroy_extent_cache();