diff options
Diffstat (limited to 'fs/f2fs/f2fs.h')
-rw-r--r-- | fs/f2fs/f2fs.h | 173 |
1 files changed, 145 insertions, 28 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68b44015514f..cd1e65bcf0b0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -123,6 +123,20 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +/* + * An implementation of an rwsem that is explicitly unfair to readers. This + * prevents priority inversion when a low-priority reader acquires the read lock + * while sleeping on the write lock but the write lock is needed by + * higher-priority clients. + */ + +struct f2fs_rwsem { + struct rw_semaphore internal_rwsem; +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wait_queue_head_t read_waiters; +#endif +}; + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -386,6 +400,10 @@ struct discard_cmd_control { struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int max_discard_request; /* max. discard request per round */ + unsigned int min_discard_issue_time; /* min. interval between discard issue */ + unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ + unsigned int max_discard_issue_time; /* max. interval between discard issue */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ @@ -561,6 +579,9 @@ enum { /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 +/* maximum retry of EIO'ed meta page */ +#define MAX_RETRY_META_PAGE_EIO 100 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ @@ -574,6 +595,9 @@ enum { /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS +#define RECOVERY_MIN_RA_BLOCKS 1 + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -721,7 +745,8 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ + FI_SKIP_WRITES, /* should skip data page writeback */ + FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ FI_HOT_DATA, /* indicate file is hot */ @@ -752,7 +777,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ - struct rw_semaphore i_sem; /* protect fi info */ + struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ @@ -777,8 +802,8 @@ struct f2fs_inode_info { struct extent_tree *extent_tree; /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ - struct rw_semaphore i_gc_rwsem[2]; - struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ + struct f2fs_rwsem i_gc_rwsem[2]; + struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */ int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ @@ -897,6 +922,7 @@ struct f2fs_nm_info { nid_t max_nid; /* maximum possible node ids */ nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ + nid_t max_rf_node_blocks; /* max # of nodes for recovery */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */ @@ -904,7 +930,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat entry tree */ + struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ @@ -1017,7 +1043,7 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ - struct rw_semaphore curseg_lock; /* for preventing curseg change */ + struct f2fs_rwsem curseg_lock; /* for preventing curseg change */ block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ @@ -1201,11 +1227,11 @@ struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ - struct rw_semaphore io_rwsem; /* blocking op for bio */ + struct f2fs_rwsem io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ struct list_head bio_list; /* bio entry list head */ - struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ + struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -1267,6 +1293,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ }; enum { @@ -1286,6 +1313,7 @@ enum { GC_IDLE_AT, GC_URGENT_HIGH, GC_URGENT_LOW, + GC_URGENT_MID, MAX_GC_MODE, }; @@ -1571,7 +1599,7 @@ struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - struct rw_semaphore sb_lock; /* lock for raw super block */ + struct f2fs_rwsem sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ @@ -1591,18 +1619,20 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ - struct rw_semaphore io_order_lock; + struct f2fs_rwsem io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ + pgoff_t metapage_eio_ofs; /* EIO page offset */ + int metapage_eio_cnt; /* EIO count */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ - struct rw_semaphore node_write; /* locking node writes */ - struct rw_semaphore node_change; /* locking node change */ + struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */ + struct f2fs_rwsem cp_rwsem; /* blocking FS operations */ + struct f2fs_rwsem node_write; /* locking node writes */ + struct f2fs_rwsem node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -1662,12 +1692,14 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ - struct rw_semaphore quota_sem; /* blocking cp for flags */ + struct f2fs_rwsem quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* # of node block writes as roll forward recovery */ + struct percpu_counter rf_node_block_count; /* writeback control */ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ @@ -1678,7 +1710,7 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct rw_semaphore gc_lock; /* + struct f2fs_rwsem gc_lock; /* * semaphore for GC, avoid * race between GC and GC or CP */ @@ -1698,7 +1730,7 @@ struct f2fs_sb_info { /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; - struct rw_semaphore pin_sem; + struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -2092,9 +2124,81 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +#define init_f2fs_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_f2fs_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, + const char *sem_name, struct lock_class_key *key) +{ + __init_rwsem(&sem->internal_rwsem, sem_name, key); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + init_waitqueue_head(&sem->read_waiters); +#endif +} + +static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem) +{ + return rwsem_is_locked(&sem->internal_rwsem); +} + +static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem) +{ + return rwsem_is_contended(&sem->internal_rwsem); +} + +static inline void f2fs_down_read(struct f2fs_rwsem *sem) +{ +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem)); +#else + down_read(&sem->internal_rwsem); +#endif +} + +static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) +{ + return down_read_trylock(&sem->internal_rwsem); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#endif + +static inline void f2fs_up_read(struct f2fs_rwsem *sem) +{ + up_read(&sem->internal_rwsem); +} + +static inline void f2fs_down_write(struct f2fs_rwsem *sem) +{ + down_write(&sem->internal_rwsem); +} + +static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) +{ + return down_write_trylock(&sem->internal_rwsem); +} + +static inline void f2fs_up_write(struct f2fs_rwsem *sem) +{ + up_write(&sem->internal_rwsem); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wake_up_all(&sem->read_waiters); +#endif +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + f2fs_down_read(&sbi->cp_rwsem); } static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) @@ -2103,22 +2207,22 @@ static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) f2fs_show_injection_info(sbi, FAULT_LOCK_OP); return 0; } - return down_read_trylock(&sbi->cp_rwsem); + return f2fs_down_read_trylock(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + f2fs_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + f2fs_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + f2fs_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) @@ -2681,6 +2785,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) if (is_inflight_io(sbi, type)) return false; + if (sbi->gc_mode == GC_URGENT_MID) + return true; + if (sbi->gc_mode == GC_URGENT_LOW && (type == DISCARD_TIME || type == GC_TIME)) return true; @@ -3579,7 +3686,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks); long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type); void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); @@ -3597,7 +3705,7 @@ void f2fs_add_orphan_inode(struct inode *inode); void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi); int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); -void f2fs_update_dirty_page(struct inode *inode, struct page *page); +void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); @@ -3631,7 +3739,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, - block_t blk_addr, struct bio *bio); + block_t blk_addr, sector_t *sector); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); void f2fs_set_data_blkaddr(struct dnode_of_data *dn); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); @@ -3661,8 +3769,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, enum iostat_type io_type, int compr_blocks, bool allow_balance); void f2fs_write_failed(struct inode *inode, loff_t to); -void f2fs_invalidate_page(struct page *page, unsigned int offset, - unsigned int length); +void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); int f2fs_release_page(struct page *page, gfp_t wait); #ifdef CONFIG_MIGRATION int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, @@ -4371,7 +4478,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - if (f2fs_post_read_required(inode)) + if (!fscrypt_dio_supported(iocb, iter)) + return true; + if (fsverity_active(inode)) + return true; + if (f2fs_compressed_file(inode)) return true; /* disallow direct IO if any of devices has unaligned blksize */ @@ -4426,6 +4537,12 @@ static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK; } +static inline void f2fs_io_schedule_timeout(long timeout) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + io_schedule_timeout(timeout); +} + #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ |