From 343a3e8bc635bd4c58d45a4fe67f9c3a78fbd191 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:20:50 +0100 Subject: bpf: Fix -Wshadow warnings There are thousands of warnings about one macro in a W=2 build: include/linux/filter.h:561:6: warning: declaration of 'ret' shadows a previous local [-Wshadow] Prefix all the locals in that macro with __ to avoid most of these warnings. Fixes: 492ecee892c2 ("bpf: enable program stats") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201026162110.3710415-1-arnd@kernel.org --- include/linux/filter.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..1b62397bd124 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -558,21 +558,21 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ - u32 ret; \ + u32 __ret; \ cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ + struct bpf_prog_stats *__stats; \ + u64 __start = sched_clock(); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&__stats->syncp); \ + __stats->cnt++; \ + __stats->nsecs += sched_clock() - __start; \ + u64_stats_update_end(&__stats->syncp); \ } else { \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ } \ - ret; }) + __ret; }) #define BPF_PROG_RUN(prog, ctx) \ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) -- cgit v1.2.3 From 080b6f40763565f65ebb9540219c71ce885cf568 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 18:15:05 +0100 Subject: bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a function scope __attribute__((optimize("-fno-gcse"))), to disable a GCC specific optimization that was causing trouble on x86 builds, and was not expected to have any positive effect in the first place. However, as the GCC manual documents, __attribute__((optimize)) is not for production use, and results in all other optimization options to be forgotten for the function in question. This can cause all kinds of trouble, but in one particular reported case, it causes -fno-asynchronous-unwind-tables to be disregarded, resulting in .eh_frame info to be emitted for the function. This reverts commit 3193c0836, and instead, it disables the -fgcse optimization for the entire source file, but only when building for X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the original commit states that CONFIG_RETPOLINE=n triggers the issue, whereas CONFIG_RETPOLINE=y performs better without the optimization, so it is kept disabled in both cases. Fixes: 3193c0836f20 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") Signed-off-by: Ard Biesheuvel Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20201028171506.15682-2-ardb@kernel.org --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler_types.h | 4 ---- kernel/bpf/Makefile | 6 +++++- kernel/bpf/core.c | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d1e3c6896b71..5deb37024574 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -175,5 +175,3 @@ #else #define __diag_GCC_8(s) #endif - -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..ac3fa37a84f9 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -247,10 +247,6 @@ struct ftrace_likely_data { #define asm_inline asm #endif -#ifndef __no_fgcse -# define __no_fgcse -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index bdc8cd1b6767..c1b9f71ee6aa 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := core.o -CFLAGS_core.o += $(call cc-disable-warning, override-init) +ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) +# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details +cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse +endif +CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9268d77898b7..55454d2278b1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * * Decode and execute eBPF instructions. */ -static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z -- cgit v1.2.3 From fc0021aa340af65a0a37d77be39e22aa886a6132 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Oct 2020 08:33:09 +0200 Subject: swiotlb: remove the tbl_dma_addr argument to swiotlb_tbl_map_single The tbl_dma_addr argument is used to check the DMA boundary for the allocations, and thus needs to be a dma_addr_t. swiotlb-xen instead passed a physical address, which could lead to incorrect results for strange offsets. Fix this by removing the parameter entirely and hard code the DMA address for io_tlb_start instead. Fixes: 91ffe4ad534a ("swiotlb-xen: introduce phys_to_dma/dma_to_phys translations") Signed-off-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/iommu/intel/iommu.c | 5 ++--- drivers/xen/swiotlb-xen.c | 3 +-- include/linux/swiotlb.h | 10 +++------- kernel/dma/swiotlb.c | 16 ++++++---------- 4 files changed, 12 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8651f6d4dfa0..6b560e6f1930 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3815,9 +3815,8 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, * page aligned, we don't need to use a bounce page. */ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { - tlb_addr = swiotlb_tbl_map_single(dev, - phys_to_dma_unencrypted(dev, io_tlb_start), - paddr, size, aligned_size, dir, attrs); + tlb_addr = swiotlb_tbl_map_single(dev, paddr, size, + aligned_size, dir, attrs); if (tlb_addr == DMA_MAPPING_ERROR) { goto swiotlb_error; } else { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 71ce1b7a23d1..2b385c1b4a99 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -395,8 +395,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - map = swiotlb_tbl_map_single(dev, virt_to_phys(xen_io_tlb_start), - phys, size, size, dir, attrs); + map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs); if (map == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 513913ff7486..3bb72266a75a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,13 +45,9 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t phys, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs); +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 54078f0d4c87..781b9dca197c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -445,14 +445,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, } } -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs) +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs) { + dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start); unsigned long flags; phys_addr_t tlb_addr; unsigned int nslots, stride, index, wrap; @@ -671,9 +668,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, swiotlb_force); - swiotlb_addr = swiotlb_tbl_map_single(dev, - phys_to_dma_unencrypted(dev, io_tlb_start), - paddr, size, size, dir, attrs); + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, + attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; -- cgit v1.2.3 From ede7dc7fa0af619afc08995776eadb9ff3b0a711 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:54 -0800 Subject: jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs The on-disk superblock field sb->s_maxlen represents the total size of the journal including the fast commit area and is no more the max number of blocks available for a transaction. The maximum number of blocks available to a transaction is reduced by the number of fast commit blocks. So, this patch renames j_maxlen to j_total_len to better represent its intent. Also, it adds a function to calculate max number of bufs available for a transaction. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 2 +- fs/ext4/super.c | 2 +- fs/jbd2/commit.c | 2 +- fs/jbd2/journal.c | 12 ++++++------ fs/jbd2/recovery.c | 6 +++--- fs/ocfs2/journal.c | 2 +- include/linux/jbd2.h | 9 +++++++-- 7 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index b232c2767534..4c2a9fe30067 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys, /* Fabricate an rmap entry for the external log device. */ irec.fmr_physical = journal->j_blk_offset; - irec.fmr_length = journal->j_maxlen; + irec.fmr_length = journal->j_total_len; irec.fmr_owner = EXT4_FMR_OWN_LOG; irec.fmr_flags = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 738a6dd4957d..8a6dd433bb70 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3976,7 +3976,7 @@ int ext4_calculate_overhead(struct super_block *sb) * loaded or not */ if (sbi->s_journal && !sbi->s_journal_bdev) - overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); + overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len); else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { /* j_inum for internal journal is non-zero */ j_inode = ext4_get_journal_inode(sb, j_inum); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index fa688e163a80..ec516490cb35 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -801,7 +801,7 @@ start_journal_io: if (first_block < journal->j_tail) freed += journal->j_last - journal->j_first; /* Update tail only if we free significant amount of space */ - if (freed < journal->j_maxlen / 4) + if (freed < jbd2_journal_get_max_txn_bufs(journal)) update_tail = 0; } J_ASSERT(commit_transaction->t_state == T_COMMIT); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0c7c42bd530f..c3c768248527 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1348,7 +1348,7 @@ static journal_t *journal_init_common(struct block_device *bdev, journal->j_dev = bdev; journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; - journal->j_maxlen = len; + journal->j_total_len = len; /* We need enough buffers to write out full descriptor block. */ n = journal->j_blocksize / jbd2_min_tag_size(); journal->j_wbufsize = n; @@ -1531,7 +1531,7 @@ static int journal_reset(journal_t *journal) journal->j_commit_sequence = journal->j_transaction_sequence - 1; journal->j_commit_request = journal->j_commit_sequence; - journal->j_max_transaction_buffers = journal->j_maxlen / 4; + journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); /* * As a special case, if the on-disk copy is already marked as needing @@ -1792,15 +1792,15 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = be32_to_cpu(sb->s_maxlen); - else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { + if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) + journal->j_total_len = be32_to_cpu(sb->s_maxlen); + else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { printk(KERN_WARNING "JBD2: journal file too short\n"); goto out; } if (be32_to_cpu(sb->s_first) == 0 || - be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + be32_to_cpu(sb->s_first) >= journal->j_total_len) { printk(KERN_WARNING "JBD2: Invalid start block of journal: %u\n", be32_to_cpu(sb->s_first)); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index eb2606133cd8..dc0694fcfcd1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start) /* Do up to 128K of readahead */ max = start + (128 * 1024 / journal->j_blocksize); - if (max > journal->j_maxlen) - max = journal->j_maxlen; + if (max > journal->j_total_len) + max = journal->j_total_len; /* Do the readahead itself. We'll submit MAXBUF buffer_heads at * a time to the block device IO layer. */ @@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, *bhp = NULL; - if (offset >= journal->j_maxlen) { + if (offset >= journal->j_total_len) { printk(KERN_ERR "JBD2: corrupted journal superblock\n"); return -EFSCORRUPTED; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b9a9d69dde7e..db52e843002a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) goto done; } - trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen); + trace_ocfs2_journal_init_maxlen(j_journal->j_total_len); *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1d5566af48ac..e0b6b53eae64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -988,9 +988,9 @@ struct journal_s struct block_device *j_fs_dev; /** - * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_total_len: Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; + unsigned int j_total_len; /** * @j_reserved_credits: @@ -1624,6 +1624,11 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); +static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 4; +} + /* * is_journal_abort * -- cgit v1.2.3 From a1e5e465b31d6015fccb359d99053b39e5180466 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:55 -0800 Subject: ext4: clean up the JBD2 API that initializes fast commits This patch removes jbd2_fc_init() API and its related functions to simplify enabling fast commits. With this change, the number of fast commit blocks to use is solely determined by the JBD2 layer. So, we move the default value for minimum number of fast commit blocks from ext4/fast_commit.h to include/linux/jbd2.h. However, whether or not to use fast commits is determined by the file system. The file system just sets the fast commit feature using jbd2_journal_set_features(). JBD2 layer then determines how many blocks to use for fast commits (based on the value found in the JBD2 superblock). Note that the JBD2 feature flag of fast commits is just an indication that there are fast commit blocks present on disk. It doesn't tell JBD2 layer about the intent of the file system of whether to it wants to use fast commit or not. That's why, we blindly clear the fast commit flag in journal_reset() after the recovery is done. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 14 -------- fs/ext4/fast_commit.h | 3 -- fs/ext4/super.c | 8 +++++ fs/jbd2/journal.c | 96 ++++++++++++++++++++++++++++++--------------------- include/linux/jbd2.h | 2 +- 5 files changed, 65 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 9399e9cccb7e..bab60c5d5095 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -2091,8 +2091,6 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, void ext4_fc_init(struct super_block *sb, journal_t *journal) { - int num_fc_blocks; - /* * We set replay callback even if fast commit disabled because we may * could still have fast commit blocks that need to be replayed even if @@ -2102,18 +2100,6 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal) if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) return; journal->j_fc_cleanup_callback = ext4_fc_cleanup; - if (!buffer_uptodate(journal->j_sb_buffer) - && ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, - true)) { - ext4_msg(sb, KERN_ERR, "I/O error on journal"); - return; - } - num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks); - if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks : - EXT4_NUM_FC_BLKS)) { - pr_warn("Error while enabling fast commits, turning off."); - ext4_clear_feature_fast_commit(sb); - } } const char *fc_ineligible_reasons[] = { diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 140fbb6af19e..1d96e0ac8138 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -3,9 +3,6 @@ #ifndef __FAST_COMMIT_H__ #define __FAST_COMMIT_H__ -/* Number of blocks in journal area to allocate for fast commits */ -#define EXT4_NUM_FC_BLKS 256 - /* Fast commit tags */ #define EXT4_FC_TAG_ADD_RANGE 0x0001 #define EXT4_FC_TAG_DEL_RANGE 0x0002 diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8a6dd433bb70..ba02d7c86fb3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4857,6 +4857,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { + ext4_msg(sb, KERN_ERR, + "Failed to set fast commit journal feature"); + goto failed_mount_wq; + } + /* We have now updated the journal if required, so we can * validate the data journaling mode. */ switch (test_opt(sb, DATA_FLAGS)) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c3c768248527..500152f0421a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1352,19 +1352,12 @@ static journal_t *journal_init_common(struct block_device *bdev, /* We need enough buffers to write out full descriptor block. */ n = journal->j_blocksize / jbd2_min_tag_size(); journal->j_wbufsize = n; + journal->j_fc_wbuf = NULL; journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), GFP_KERNEL); if (!journal->j_wbuf) goto err_cleanup; - if (journal->j_fc_wbufsize > 0) { - journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize, - sizeof(struct buffer_head *), - GFP_KERNEL); - if (!journal->j_fc_wbuf) - goto err_cleanup; - } - bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); if (!bh) { pr_err("%s: Cannot get buffer for journal superblock\n", @@ -1378,23 +1371,11 @@ static journal_t *journal_init_common(struct block_device *bdev, err_cleanup: kfree(journal->j_wbuf); - kfree(journal->j_fc_wbuf); jbd2_journal_destroy_revoke(journal); kfree(journal); return NULL; } -int jbd2_fc_init(journal_t *journal, int num_fc_blks) -{ - journal->j_fc_wbufsize = num_fc_blks; - journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize, - sizeof(struct buffer_head *), GFP_KERNEL); - if (!journal->j_fc_wbuf) - return -ENOMEM; - return 0; -} -EXPORT_SYMBOL(jbd2_fc_init); - /* jbd2_journal_init_dev and jbd2_journal_init_inode: * * Create a journal structure assigned some fixed set of disk blocks to @@ -1512,16 +1493,7 @@ static int journal_reset(journal_t *journal) } journal->j_first = first; - - if (jbd2_has_feature_fast_commit(journal) && - journal->j_fc_wbufsize > 0) { - journal->j_fc_last = last; - journal->j_last = last - journal->j_fc_wbufsize; - journal->j_fc_first = journal->j_last + 1; - journal->j_fc_off = 0; - } else { - journal->j_last = last; - } + journal->j_last = last; journal->j_head = journal->j_first; journal->j_tail = journal->j_first; @@ -1533,6 +1505,13 @@ static int journal_reset(journal_t *journal) journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); + /* + * Now that journal recovery is done, turn fast commits off here. This + * way, if fast commit was enabled before the crash but if now FS has + * disabled it, we don't enable fast commits. + */ + jbd2_clear_feature_fast_commit(journal); + /* * As a special case, if the on-disk copy is already marked as needing * no recovery (s_start == 0), then we can safely defer the superblock @@ -1872,6 +1851,7 @@ static int load_superblock(journal_t *journal) { int err; journal_superblock_t *sb; + int num_fc_blocks; err = journal_get_superblock(journal); if (err) @@ -1883,15 +1863,17 @@ static int load_superblock(journal_t *journal) journal->j_tail = be32_to_cpu(sb->s_start); journal->j_first = be32_to_cpu(sb->s_first); journal->j_errno = be32_to_cpu(sb->s_errno); + journal->j_last = be32_to_cpu(sb->s_maxlen); - if (jbd2_has_feature_fast_commit(journal) && - journal->j_fc_wbufsize > 0) { + if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); - journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize; + num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks); + if (!num_fc_blocks) + num_fc_blocks = JBD2_MIN_FC_BLOCKS; + if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) + journal->j_last = journal->j_fc_last - num_fc_blocks; journal->j_fc_first = journal->j_last + 1; journal->j_fc_off = 0; - } else { - journal->j_last = be32_to_cpu(sb->s_maxlen); } return 0; @@ -1954,9 +1936,6 @@ int jbd2_journal_load(journal_t *journal) */ journal->j_flags &= ~JBD2_ABORT; - if (journal->j_fc_wbufsize > 0) - jbd2_journal_set_features(journal, 0, 0, - JBD2_FEATURE_INCOMPAT_FAST_COMMIT); /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory * and reset them on disk. */ @@ -2040,8 +2019,7 @@ int jbd2_journal_destroy(journal_t *journal) jbd2_journal_destroy_revoke(journal); if (journal->j_chksum_driver) crypto_free_shash(journal->j_chksum_driver); - if (journal->j_fc_wbufsize > 0) - kfree(journal->j_fc_wbuf); + kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); @@ -2116,6 +2094,37 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp return 0; } +static int +jbd2_journal_initialize_fast_commit(journal_t *journal) +{ + journal_superblock_t *sb = journal->j_superblock; + unsigned long long num_fc_blks; + + num_fc_blks = be32_to_cpu(sb->s_num_fc_blks); + if (num_fc_blks == 0) + num_fc_blks = JBD2_MIN_FC_BLOCKS; + if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) + return -ENOSPC; + + /* Are we called twice? */ + WARN_ON(journal->j_fc_wbuf != NULL); + journal->j_fc_wbuf = kmalloc_array(num_fc_blks, + sizeof(struct buffer_head *), GFP_KERNEL); + if (!journal->j_fc_wbuf) + return -ENOMEM; + + journal->j_fc_wbufsize = num_fc_blks; + journal->j_fc_last = journal->j_last; + journal->j_last = journal->j_fc_last - num_fc_blks; + journal->j_fc_first = journal->j_last + 1; + journal->j_fc_off = 0; + journal->j_free = journal->j_last - journal->j_first; + journal->j_max_transaction_buffers = + jbd2_journal_get_max_txn_bufs(journal); + + return 0; +} + /** * int jbd2_journal_set_features() - Mark a given journal feature in the superblock * @journal: Journal to act on. @@ -2159,6 +2168,13 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, sb = journal->j_superblock; + if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) { + if (jbd2_journal_initialize_fast_commit(journal)) { + pr_err("JBD2: Cannot enable fast commits.\n"); + return 0; + } + } + /* Load the checksum driver if necessary */ if ((journal->j_chksum_driver == NULL) && INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e0b6b53eae64..b2caf7bbd8e5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 +#define JBD2_MIN_FC_BLOCKS 256 #ifdef __KERNEL__ @@ -1614,7 +1615,6 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ -int jbd2_fc_init(journal_t *journal, int num_fc_blks); int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); -- cgit v1.2.3 From c460e5edc85a063ec9cb60addff93d00ed378701 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:57 -0800 Subject: jbd2: don't use state lock during commit path Variables journal->j_fc_off, journal->j_fc_wbuf are accessed during commit path. Since today we allow only one process to perform a fast commit, there is no need take state lock before accessing these variables. This patch removes these locks and adds comments to describe this. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-9-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 6 ------ include/linux/jbd2.h | 10 ++++++---- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 500152f0421a..778ea50fc8d1 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -865,7 +865,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) int fc_off; *bh_out = NULL; - write_lock(&journal->j_state_lock); if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { fc_off = journal->j_fc_off; @@ -874,7 +873,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) } else { ret = -EINVAL; } - write_unlock(&journal->j_state_lock); if (ret) return ret; @@ -909,9 +907,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) struct buffer_head *bh; int i, j_fc_off; - read_lock(&journal->j_state_lock); j_fc_off = journal->j_fc_off; - read_unlock(&journal->j_state_lock); /* * Wait in reverse order to minimize chances of us being woken up before @@ -939,9 +935,7 @@ int jbd2_fc_release_bufs(journal_t *journal) struct buffer_head *bh; int i, j_fc_off; - read_lock(&journal->j_state_lock); j_fc_off = journal->j_fc_off; - read_unlock(&journal->j_state_lock); /* * Wait in reverse order to minimize chances of us being woken up before diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b2caf7bbd8e5..5f0ef6380b0c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -945,8 +945,9 @@ struct journal_s /** * @j_fc_off: * - * Number of fast commit blocks currently allocated. - * [j_state_lock]. + * Number of fast commit blocks currently allocated. Accessed only + * during fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ unsigned long j_fc_off; @@ -1109,8 +1110,9 @@ struct journal_s struct buffer_head **j_wbuf; /** - * @j_fc_wbuf: Array of fast commit bhs for - * jbd2_journal_commit_transaction. + * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only + * during a fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; -- cgit v1.2.3 From 0bce577bf9cae13ae32d391432d0030e3f67fc1d Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:58 -0800 Subject: jbd2: don't pass tid to jbd2_fc_end_commit_fallback() In jbd2_fc_end_commit_fallback(), we know which tid to commit. There's no need for caller to pass it. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-10-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 2 +- fs/jbd2/journal.c | 12 +++++++++--- include/linux/jbd2.h | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index bab60c5d5095..e69c580fa91e 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1143,7 +1143,7 @@ out: "Fast commit ended with blks = %d, reason = %d, subtid - %d", nblks, reason, subtid); if (reason == EXT4_FC_REASON_FC_FAILED) - return jbd2_fc_end_commit_fallback(journal, commit_tid); + return jbd2_fc_end_commit_fallback(journal); if (reason == EXT4_FC_REASON_FC_START_FAILED || reason == EXT4_FC_REASON_INELIGIBLE) return jbd2_complete_transaction(journal, commit_tid); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 778ea50fc8d1..59166e299cde 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -777,13 +777,19 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) int jbd2_fc_end_commit(journal_t *journal) { - return __jbd2_fc_end_commit(journal, 0, 0); + return __jbd2_fc_end_commit(journal, 0, false); } EXPORT_SYMBOL(jbd2_fc_end_commit); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid) +int jbd2_fc_end_commit_fallback(journal_t *journal) { - return __jbd2_fc_end_commit(journal, tid, 1); + tid_t tid; + + read_lock(&journal->j_state_lock); + tid = journal->j_running_transaction ? + journal->j_running_transaction->t_tid : 0; + read_unlock(&journal->j_state_lock); + return __jbd2_fc_end_commit(journal, tid, true); } EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5f0ef6380b0c..1c49fd62ff2e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1619,7 +1619,7 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); -- cgit v1.2.3 From 267fb27352b6fc9fdbad753127a239f75618ecbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 15:50:32 +0100 Subject: perf: Reduce stack usage of perf_output_begin() __perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org --- arch/powerpc/perf/imc-pmu.c | 2 +- arch/s390/kernel/perf_cpum_sf.c | 2 +- arch/x86/events/intel/ds.c | 4 ++-- include/linux/perf_event.h | 7 +++++-- kernel/events/core.c | 32 +++++++++++++++++--------------- kernel/events/ring_buffer.c | 20 +++++++++++--------- 6 files changed, 37 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ed4fcccf8a9..7b25548ec42b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event) /* If this is a valid record, create the sample */ struct perf_output_handle handle; - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, &data, event, header.size)) return; perf_output_sample(&handle, &header, &data, event); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..00255ae3979d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, data, event, header.size)) goto out; /* Update the process ID (see also kernel/events/core.c) */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 404315df1e16..cd2ae14a0a98 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void) rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..b775ae0a8c87 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1400,11 +1400,14 @@ perf_event_addr_filters(struct perf_event *event) extern void perf_event_addr_filters_sync(struct perf_event *event); extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, - unsigned int size); + struct perf_sample_data *data, + struct perf_event *event, + unsigned int size); extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a29ab09e72d..fc681c7c1e03 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7186,6 +7186,7 @@ __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, int (*output_begin)(struct perf_output_handle *, + struct perf_sample_data *, struct perf_event *, unsigned int)) { @@ -7198,7 +7199,7 @@ __perf_event_output(struct perf_event *event, perf_prepare_sample(&header, data, event, regs); - err = output_begin(&handle, event, header.size); + err = output_begin(&handle, data, event, header.size); if (err) goto exit; @@ -7264,7 +7265,7 @@ perf_event_read_event(struct perf_event *event, int ret; perf_event_header__init_id(&read_event.header, &sample, event); - ret = perf_output_begin(&handle, event, read_event.header.size); + ret = perf_output_begin(&handle, &sample, event, read_event.header.size); if (ret) return; @@ -7533,7 +7534,7 @@ static void perf_event_task_output(struct perf_event *event, perf_event_header__init_id(&task_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, task_event->event_id.header.size); if (ret) goto out; @@ -7636,7 +7637,7 @@ static void perf_event_comm_output(struct perf_event *event, return; perf_event_header__init_id(&comm_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, comm_event->event_id.header.size); if (ret) @@ -7736,7 +7737,7 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event_header__init_id(&namespaces_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, namespaces_event->event_id.header.size); if (ret) goto out; @@ -7863,7 +7864,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data) perf_event_header__init_id(&cgroup_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, cgroup_event->event_id.header.size); if (ret) goto out; @@ -7989,7 +7990,7 @@ static void perf_event_mmap_output(struct perf_event *event, } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, mmap_event->event_id.header.size); if (ret) goto out; @@ -8299,7 +8300,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head, int ret; perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; @@ -8333,7 +8334,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) perf_event_header__init_id(&lost_samples_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, lost_samples_event.header.size); if (ret) return; @@ -8388,7 +8389,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data) perf_event_header__init_id(&se->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, se->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size); if (ret) return; @@ -8463,7 +8464,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_event_header__init_id(&throttle_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, throttle_event.header.size); if (ret) return; @@ -8506,7 +8507,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data) perf_event_header__init_id(&ksymbol_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, ksymbol_event->event_id.header.size); if (ret) return; @@ -8596,7 +8597,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data) perf_event_header__init_id(&bpf_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, data, event, bpf_event->event_id.header.size); if (ret) return; @@ -8705,7 +8706,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data) perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, + text_poke_event->event_id.header.size); if (ret) return; @@ -8786,7 +8788,7 @@ static void perf_log_itrace_start(struct perf_event *event) rec.tid = perf_event_tid(event, current); perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 192b8abc6330..ef91ae75ca56 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail, static __always_inline int __perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size, bool backward) { @@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle, handle->size = (1UL << page_shift) - offset; if (unlikely(have_lost)) { - struct perf_sample_data sample_data; - lost_event.header.size = sizeof(lost_event); lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; lost_event.lost = local_xchg(&rb->lost, 0); - perf_event_header__init_id(&lost_event.header, - &sample_data, event); + /* XXX mostly redundant; @data is already fully initializes */ + perf_event_header__init_id(&lost_event.header, data, event); perf_output_put(handle, lost_event); - perf_event__output_id_sample(event, handle, &sample_data); + perf_event__output_id_sample(event, handle, data); } return 0; @@ -263,22 +262,25 @@ out: } int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, unsigned int size) + struct perf_sample_data *data, + struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, false); + return __perf_output_begin(handle, data, event, size, false); } int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, true); + return __perf_output_begin(handle, data, event, size, true); } int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, + return __perf_output_begin(handle, data, event, size, unlikely(is_write_backward(event))); } -- cgit v1.2.3 From 76a4efa80900fc40e0fdf243b42aec9fb8c35d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:14:21 +0100 Subject: perf/arch: Remove perf_sample_data::regs_user_copy struct perf_sample_data lives on-stack, we should be careful about it's size. Furthermore, the pt_regs copy in there is only because x86_64 is a trainwreck, solve it differently. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20201030151955.258178461@infradead.org --- arch/arm/kernel/perf_regs.c | 3 +-- arch/arm64/kernel/perf_regs.c | 3 +-- arch/csky/kernel/perf_regs.c | 3 +-- arch/powerpc/perf/perf_regs.c | 3 +-- arch/riscv/kernel/perf_regs.c | 3 +-- arch/s390/kernel/perf_regs.c | 3 +-- arch/x86/kernel/perf_regs.c | 15 +++++++++++---- include/linux/perf_event.h | 6 ------ include/linux/perf_regs.h | 6 ++---- kernel/events/core.c | 8 +++----- 10 files changed, 22 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 05fe92aa7d98..0529f90395c9 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 94e8718e7229..f6f58e6265df 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c index eb32838b8210..09b7f88a2d6a 100644 --- a/arch/csky/kernel/perf_regs.c +++ b/arch/csky/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 8e53f2fc3fe0..6f681b105eec 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c index 04a38fbeb9c7..fd304a248de6 100644 --- a/arch/riscv/kernel/perf_regs.c +++ b/arch/riscv/kernel/perf_regs.c @@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 4352a504f235..6e9e5d5e927e 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { /* * Use the regs from the first interruption and let diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index bb7e1132290b..f9e5352b3bef 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); @@ -129,12 +128,20 @@ u64 perf_reg_abi(struct task_struct *task) return PERF_SAMPLE_REGS_ABI_64; } +static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs); + void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { + struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs); struct pt_regs *user_regs = task_pt_regs(current); + if (!in_nmi()) { + regs_user->regs = user_regs; + regs_user->abi = perf_reg_abi(current); + return; + } + /* * If we're in an NMI that interrupted task_pt_regs setup, then * we can't sample user regs at all. This check isn't really diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b775ae0a8c87..96450f6fb1de 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1022,13 +1022,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; u64 aux_size; - /* - * regs_user may point to task_pt_regs or to regs_user_copy, depending - * on arch details. - */ struct perf_regs regs_user; - struct pt_regs regs_user_copy; - struct perf_regs regs_intr; u64 stack_user_size; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 2d12e97d5e7b..f632c5725f16 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -20,8 +20,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy); + struct pt_regs *regs); #else #define PERF_REG_EXTENDED_MASK 0 @@ -42,8 +41,7 @@ static inline u64 perf_reg_abi(struct task_struct *task) } static inline void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/kernel/events/core.c b/kernel/events/core.c index fc681c7c1e03..d67c9cbb0f6a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6374,14 +6374,13 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; } else if (!(current->flags & PF_KTHREAD)) { - perf_get_regs_user(regs_user, regs, regs_user_copy); + perf_get_regs_user(regs_user, regs); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -7083,8 +7082,7 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs, - &data->regs_user_copy); + perf_sample_regs_user(&data->regs_user, regs); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ -- cgit v1.2.3 From 9a2a9ebc0a758d887ee06e067e9f7f0b36ff7574 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:25:57 +0100 Subject: cpufreq: Introduce governor flags A new cpufreq governor flag will be added subsequently, so replace the bool dynamic_switching fleid in struct cpufreq_governor with a flags field and introduce CPUFREQ_GOV_DYNAMIC_SWITCHING to set for the "dynamic switching" governors instead of it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 2 +- drivers/cpufreq/cpufreq_governor.h | 2 +- include/linux/cpufreq.h | 9 +++++++-- kernel/sched/cpufreq_schedutil.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 336b5e94cbc8..0252903f1b43 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2254,7 +2254,7 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) return -EINVAL; /* Platform doesn't want dynamic frequency switching ? */ - if (policy->governor->dynamic_switching && + if (policy->governor->flags & CPUFREQ_GOV_DYNAMIC_SWITCHING && cpufreq_driver->flags & CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING) { struct cpufreq_governor *gov = cpufreq_fallback_governor(); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c56773c25757..bab8e6140377 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -156,7 +156,7 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy); #define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_) \ { \ .name = _name_, \ - .dynamic_switching = true, \ + .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, \ .owner = THIS_MODULE, \ .init = cpufreq_dbs_governor_init, \ .exit = cpufreq_dbs_governor_exit, \ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9bdfcf3c4748 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -570,12 +570,17 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - /* For governors which change frequency dynamically by themselves */ - bool dynamic_switching; struct list_head governor_list; struct module *owner; + u8 flags; }; +/* Governor flags */ + +/* For governors which change frequency dynamically by themselves */ +#define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) + + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d73bccde2720..97d318b0cd0c 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy) struct cpufreq_governor schedutil_gov = { .name = "schedutil", .owner = THIS_MODULE, - .dynamic_switching = true, + .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, .init = sugov_init, .exit = sugov_exit, .start = sugov_start, -- cgit v1.2.3 From 218f66870181bec7aaa6e3c72f346039c590c3c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:10 +0100 Subject: cpufreq: Introduce CPUFREQ_GOV_STRICT_TARGET Introduce a new governor flag, CPUFREQ_GOV_STRICT_TARGET, for the governors that want the target frequency to be set exactly to the given value without leaving any room for adjustments on the hardware side and set this flag for the powersave and performance governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_performance.c | 1 + drivers/cpufreq/cpufreq_powersave.c | 1 + include/linux/cpufreq.h | 3 +++ 3 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index 71c1d9aba772..addd93f2a420 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -20,6 +20,7 @@ static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy) static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .owner = THIS_MODULE, + .flags = CPUFREQ_GOV_STRICT_TARGET, .limits = cpufreq_gov_performance_limits, }; diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 7749522355b5..8d830d860e91 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -21,6 +21,7 @@ static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .limits = cpufreq_gov_powersave_limits, .owner = THIS_MODULE, + .flags = CPUFREQ_GOV_STRICT_TARGET, }; MODULE_AUTHOR("Dominik Brodowski "); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9bdfcf3c4748..6eb9a3b8ec7b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -580,6 +580,9 @@ struct cpufreq_governor { /* For governors which change frequency dynamically by themselves */ #define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) +/* For governors wanting the target frequency to be set exactly */ +#define CPUFREQ_GOV_STRICT_TARGET BIT(1) + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, -- cgit v1.2.3 From ea9364bbadf11f0c55802cf11387d74f524cee84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:37 +0100 Subject: cpufreq: Add strict_target to struct cpufreq_policy Add a new field to be set when the CPUFREQ_GOV_STRICT_TARGET flag is set for the current governor to struct cpufreq_policy, so that the drivers needing to check CPUFREQ_GOV_STRICT_TARGET do not have to access the governor object during every frequency transition. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 2 ++ include/linux/cpufreq.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0252903f1b43..1e7e3f2ff09f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2280,6 +2280,8 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) } } + policy->strict_target = !!(policy->governor->flags & CPUFREQ_GOV_STRICT_TARGET); + return 0; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6eb9a3b8ec7b..acbad3b36322 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -109,6 +109,12 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Set if the CPUFREQ_GOV_STRICT_TARGET flag is set for the current + * governor. + */ + bool strict_target; + /* * Preferred average time interval between consecutive invocations of * the driver to set the frequency for this policy. To be set by the -- cgit v1.2.3 From 8a3c84b649b033024d2349f96234b26cbd6083a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: separate __sb_start_write into blocking and non-blocking helpers Break this function into two helpers so that it's obvious that the trylock versions return a value that must be checked, and the blocking versions don't require that. While we're at it, clean up the return type mismatch. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- fs/aio.c | 2 +- fs/io_uring.c | 3 +-- fs/super.c | 18 ++++++++++++------ include/linux/fs.h | 21 +++++++++++---------- 4 files changed, 25 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index c45c20d87538..6a21d8919409 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, * we return to userspace. */ if (S_ISREG(file_inode(file)->i_mode)) { - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } req->ki_flags |= IOCB_WRITE; diff --git a/fs/io_uring.c b/fs/io_uring.c index b42dfa0243bf..4cbaddfe3d80 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3532,8 +3532,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, * we return to userspace. */ if (req->flags & REQ_F_ISREG) { - __sb_start_write(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE, true); + sb_start_write(file_inode(req->file)->i_sb); __sb_writers_release(file_inode(req->file)->i_sb, SB_FREEZE_WRITE); } diff --git a/fs/super.c b/fs/super.c index e1fd667454d4..59aa59279133 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1645,16 +1645,22 @@ EXPORT_SYMBOL(__sb_end_write); * This is an internal function, please use sb_start_{write,pagefault,intwrite} * instead. */ -int __sb_start_write(struct super_block *sb, int level, bool wait) +void __sb_start_write(struct super_block *sb, int level) { - if (!wait) - return percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - percpu_down_read(sb->s_writers.rw_sem + level-1); - return 1; + percpu_down_read(sb->s_writers.rw_sem + level - 1); } EXPORT_SYMBOL(__sb_start_write); +/* + * This is an internal function, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} +EXPORT_SYMBOL_GPL(__sb_start_write_trylock); + /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..305989afd49c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1581,7 +1581,8 @@ extern struct timespec64 current_time(struct inode *inode); */ void __sb_end_write(struct super_block *sb, int level); -int __sb_start_write(struct super_block *sb, int level, bool wait); +void __sb_start_write(struct super_block *sb, int level); +bool __sb_start_write_trylock(struct super_block *sb, int level); #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) @@ -1645,12 +1646,12 @@ static inline void sb_end_intwrite(struct super_block *sb) */ static inline void sb_start_write(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_WRITE, true); + __sb_start_write(sb, SB_FREEZE_WRITE); } -static inline int sb_start_write_trylock(struct super_block *sb) +static inline bool sb_start_write_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_WRITE, false); + return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** @@ -1674,7 +1675,7 @@ static inline int sb_start_write_trylock(struct super_block *sb) */ static inline void sb_start_pagefault(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); + __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /* @@ -1692,12 +1693,12 @@ static inline void sb_start_pagefault(struct super_block *sb) */ static inline void sb_start_intwrite(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_FS, true); + __sb_start_write(sb, SB_FREEZE_FS); } -static inline int sb_start_intwrite_trylock(struct super_block *sb) +static inline bool sb_start_intwrite_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_FS, false); + return __sb_start_write_trylock(sb, SB_FREEZE_FS); } @@ -2756,14 +2757,14 @@ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; - return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); + return sb_start_write_trylock(file_inode(file)->i_sb); } static inline void file_end_write(struct file *file) -- cgit v1.2.3 From 9b8523423b23ee3dfd88e32f5b7207be56a4e782 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: move __sb_{start,end}_write* to fs.h Now that we've straightened out the callers, move these three functions to fs.h since they're fairly trivial. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- fs/super.c | 30 ------------------------------ include/linux/fs.h | 21 ++++++++++++++++++--- 2 files changed, 18 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 59aa59279133..98bb0629ee10 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1631,36 +1631,6 @@ int super_setup_bdi(struct super_block *sb) } EXPORT_SYMBOL(super_setup_bdi); -/* - * This is an internal function, please use sb_end_{write,pagefault,intwrite} - * instead. - */ -void __sb_end_write(struct super_block *sb, int level) -{ - percpu_up_read(sb->s_writers.rw_sem + level-1); -} -EXPORT_SYMBOL(__sb_end_write); - -/* - * This is an internal function, please use sb_start_{write,pagefault,intwrite} - * instead. - */ -void __sb_start_write(struct super_block *sb, int level) -{ - percpu_down_read(sb->s_writers.rw_sem + level - 1); -} -EXPORT_SYMBOL(__sb_start_write); - -/* - * This is an internal function, please use sb_start_{write,pagefault,intwrite} - * instead. - */ -bool __sb_start_write_trylock(struct super_block *sb, int level) -{ - return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); -} -EXPORT_SYMBOL_GPL(__sb_start_write_trylock); - /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait diff --git a/include/linux/fs.h b/include/linux/fs.h index 305989afd49c..6dabd019cab0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,9 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode); * Snapshotting support. */ -void __sb_end_write(struct super_block *sb, int level); -void __sb_start_write(struct super_block *sb, int level); -bool __sb_start_write_trylock(struct super_block *sb, int level); +/* + * These are internal functions, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +static inline void __sb_end_write(struct super_block *sb, int level) +{ + percpu_up_read(sb->s_writers.rw_sem + level-1); +} + +static inline void __sb_start_write(struct super_block *sb, int level) +{ + percpu_down_read(sb->s_writers.rw_sem + level - 1); +} + +static inline bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) -- cgit v1.2.3 From 7e890c37c25c7cbca37ff0ab292873d8146e713b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Nov 2020 17:50:04 +0100 Subject: block: add a return value to set_capacity_revalidate_and_notify Return if the function ended up sending an uevent or not. Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Christoph Hellwig Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- block/genhd.c | 5 ++++- include/linux/genhd.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 0a273211fec2..9387f050c248 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -49,7 +49,7 @@ static void disk_release_events(struct gendisk *disk); * Set disk capacity and notify if the size is not currently * zero and will not be set to zero */ -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev) { sector_t capacity = get_capacity(disk); @@ -62,7 +62,10 @@ void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, char *envp[] = { "RESIZE=1", NULL }; kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } + + return false; } EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 38f23d757013..03da3f603d30 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); /* drivers/char/random.c */ -- cgit v1.2.3 From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 13 Nov 2020 22:51:59 -0800 Subject: compiler.h: fix barrier_data() on clang Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") neglected to copy barrier_data() from compiler-gcc.h into compiler-clang.h. The definition in compiler-gcc.h was really to work around clang's more aggressive optimization, so this broke barrier_data() on clang, and consequently memzero_explicit() as well. For example, this results in at least the memzero_explicit() call in lib/crypto/sha256.c:sha256_transform() being optimized away by clang. Fix this by moving the definition of barrier_data() into compiler.h. Also move the gcc/clang definition of barrier() into compiler.h, __memory_barrier() is icc-specific (and barrier() is already defined using it in compiler-intel.h) and doesn't belong in compiler.h. [rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled] Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arvind Sankar Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/asm-generic/barrier.h | 1 + include/linux/compiler-clang.h | 6 ------ include/linux/compiler-gcc.h | 19 ------------------- include/linux/compiler.h | 18 ++++++++++++++++-- 4 files changed, 17 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 798027bb89be..640f09479bdf 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ +#include #include #ifndef nop diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 230604e7f057..dd7233c48bf3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -60,12 +60,6 @@ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like ICC. - */ -#define barrier() __asm__ __volatile__("" : : : "memory") - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5deb37024574..74c6c0486eed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,25 +15,6 @@ # error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -/* - * This version is i.e. to prevent dead stores elimination on @ptr - * where gcc and llvm may behave differently when otherwise using - * normal barrier(): while gcc behavior gets along with a normal - * barrier(), llvm needs an explicit input variable to be assumed - * clobbered. The issue is as follows: while the inline asm might - * access any memory it wants, the compiler could have fit all of - * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proved that the inline asm wasn't touching any of - * it. This version works well with both compilers, i.e. we're telling - * the compiler that the inline asm absolutely may see the contents - * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 - */ -#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e512f5505dad..b8fe0c23cfff 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Optimization barrier */ #ifndef barrier -# define barrier() __memory_barrier() +/* The "volatile" is due to gcc bugs */ +# define barrier() __asm__ __volatile__("": : :"memory") #endif #ifndef barrier_data -# define barrier_data(ptr) barrier() +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proved that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") #endif /* workaround for GCC PR82365 if needed */ -- cgit v1.2.3 From 8b21ca0218d29cc6bb7028125c7e5a10dfb4730c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 13 Nov 2020 22:52:13 -0800 Subject: mm: memcontrol: fix missing wakeup polling thread When we poll the swap.events, we can miss being woken up when the swap event occurs. Because we didn't notify. Fixes: f3a53a3a1e5b ("mm, memcontrol: implement memory.swap.events") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Roman Gushchin Cc: Michal Hocko Cc: Yafang Shao Cc: Chris Down Cc: Tejun Heo Link: https://lkml.kernel.org/r/20201105161936.98312-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..a80c59af2c60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,12 +900,19 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + atomic_long_inc(&memcg->memory_events_local[event]); - cgroup_file_notify(&memcg->events_local_file); + if (!swap_event) + cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - cgroup_file_notify(&memcg->events_file); + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; -- cgit v1.2.3 From acfdd18591eaac25446e976a0c0d190f8b3dbfb1 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 23 Nov 2020 21:52:41 -0800 Subject: firmware: xilinx: Use hash-table for api feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently array of fix length PM_API_MAX is used to cache the pm_api version (valid or invalid). However ATF based PM APIs values are much higher then PM_API_MAX. So to include ATF based PM APIs also, use hash-table to store the pm_api version status. Signed-off-by: Amit Sunil Dhamne Reported-by: Arnd Bergmann  Signed-off-by: Ravi Patel Signed-off-by: Rajan Vaja Reviewed-by: Arnd Bergmann Tested-by: Michal Simek Fixes: f3217d6f2f7a ("firmware: xilinx: fix out-of-bounds access") Cc: stable Link: https://lore.kernel.org/r/1606197161-25976-1-git-send-email-rajan.vaja@xilinx.com Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 63 ++++++++++++++++++++++++++++-------- include/linux/firmware/xlnx-zynqmp.h | 4 --- 2 files changed, 49 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 5828302c7c5c..d08ac824c993 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -20,12 +20,28 @@ #include #include #include +#include #include #include "zynqmp-debug.h" +/* Max HashMap Order for PM API feature check (1<<7 = 128) */ +#define PM_API_FEATURE_CHECK_MAX_ORDER 7 + static bool feature_check_enabled; -static u32 zynqmp_pm_features[PM_API_MAX]; +DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER); + +/** + * struct pm_api_feature_data - PM API Feature data + * @pm_api_id: PM API Id, used as key to index into hashmap + * @feature_status: status of PM API feature: valid, invalid + * @hentry: hlist_node that hooks this entry into hashtable + */ +struct pm_api_feature_data { + u32 pm_api_id; + int feature_status; + struct hlist_node hentry; +}; static const struct mfd_cell firmware_devs[] = { { @@ -142,29 +158,37 @@ static int zynqmp_pm_feature(u32 api_id) int ret; u32 ret_payload[PAYLOAD_ARG_CNT]; u64 smc_arg[2]; + struct pm_api_feature_data *feature_data; if (!feature_check_enabled) return 0; - /* Return value if feature is already checked */ - if (api_id > ARRAY_SIZE(zynqmp_pm_features)) - return PM_FEATURE_INVALID; + /* Check for existing entry in hash table for given api */ + hash_for_each_possible(pm_api_features_map, feature_data, hentry, + api_id) { + if (feature_data->pm_api_id == api_id) + return feature_data->feature_status; + } - if (zynqmp_pm_features[api_id] != PM_FEATURE_UNCHECKED) - return zynqmp_pm_features[api_id]; + /* Add new entry if not present */ + feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL); + if (!feature_data) + return -ENOMEM; + feature_data->pm_api_id = api_id; smc_arg[0] = PM_SIP_SVC | PM_FEATURE_CHECK; smc_arg[1] = api_id; ret = do_fw_call(smc_arg[0], smc_arg[1], 0, ret_payload); - if (ret) { - zynqmp_pm_features[api_id] = PM_FEATURE_INVALID; - return PM_FEATURE_INVALID; - } + if (ret) + ret = -EOPNOTSUPP; + else + ret = ret_payload[1]; - zynqmp_pm_features[api_id] = ret_payload[1]; + feature_data->feature_status = ret; + hash_add(pm_api_features_map, &feature_data->hentry, api_id); - return zynqmp_pm_features[api_id]; + return ret; } /** @@ -200,9 +224,12 @@ int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, * Make sure to stay in x0 register */ u64 smc_arg[4]; + int ret; - if (zynqmp_pm_feature(pm_api_id) == PM_FEATURE_INVALID) - return -ENOTSUPP; + /* Check if feature is supported or not */ + ret = zynqmp_pm_feature(pm_api_id); + if (ret < 0) + return ret; smc_arg[0] = PM_SIP_SVC | pm_api_id; smc_arg[1] = ((u64)arg1 << 32) | arg0; @@ -1252,9 +1279,17 @@ static int zynqmp_firmware_probe(struct platform_device *pdev) static int zynqmp_firmware_remove(struct platform_device *pdev) { + struct pm_api_feature_data *feature_data; + int i; + mfd_remove_devices(&pdev->dev); zynqmp_pm_api_debugfs_exit(); + hash_for_each(pm_api_features_map, i, feature_data, hentry) { + hash_del(&feature_data->hentry); + kfree(feature_data); + } + return 0; } diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5968df82b991..41a1bab98b7e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -50,10 +50,6 @@ #define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U #define ZYNQMP_PM_CAPABILITY_UNUSABLE 0x8U -/* Feature check status */ -#define PM_FEATURE_INVALID -1 -#define PM_FEATURE_UNCHECKED 0 - /* * Firmware FPGA Manager flags * XILINX_ZYNQMP_PM_FPGA_FULL: FPGA full reconfiguration -- cgit v1.2.3