summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt22
-rw-r--r--Documentation/arch/riscv/uabi.rst4
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml11
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml18
-rw-r--r--Documentation/netlink/specs/netdev.yaml4
-rw-r--r--Documentation/process/maintainer-netdev.rst2
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/arc/net/bpf_jit.h2
-rw-r--r--arch/arc/net/bpf_jit_arcv2.c10
-rw-r--r--arch/arc/net/bpf_jit_core.c22
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/include/asm/cmpxchg.h22
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c2
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c3
-rw-r--r--drivers/char/hw_random/core.c47
-rw-r--r--drivers/char/tpm/Kconfig2
-rw-r--r--drivers/char/tpm/tpm-buf.c26
-rw-r--r--drivers/char/tpm/tpm2-cmd.c10
-rw-r--r--drivers/char/tpm/tpm2-sessions.c21
-rw-r--r--drivers/char/tpm/tpm_tis_spi_main.c3
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c11
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c12
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c10
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c22
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c258
-rw-r--r--drivers/net/ethernet/intel/ice/devlink/devlink.c31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c11
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c1
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c12
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c12
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_classifier.c2
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c4
-rw-r--r--drivers/net/netkit.c30
-rw-r--r--drivers/net/phy/micrel.c11
-rw-r--r--drivers/net/usb/smsc95xx.c11
-rw-r--r--drivers/nfc/virtual_ncidev.c4
-rw-r--r--drivers/platform/x86/Kconfig1
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c2
-rw-r--r--drivers/platform/x86/touchscreen_dmi.c115
-rw-r--r--drivers/platform/x86/x86-android-tablets/Kconfig2
-rw-r--r--drivers/pmdomain/imx/gpcv2.c11
-rw-r--r--fs/9p/vfs_dentry.c9
-rw-r--r--fs/9p/vfs_inode.c1
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/mntpt.c5
-rw-r--r--fs/bcachefs/backpointers.c2
-rw-r--r--fs/bcachefs/bcachefs.h44
-rw-r--r--fs/bcachefs/bcachefs_format.h195
-rw-r--r--fs/bcachefs/btree_gc.c18
-rw-r--r--fs/bcachefs/btree_gc.h44
-rw-r--r--fs/bcachefs/btree_gc_types.h29
-rw-r--r--fs/bcachefs/btree_io.c85
-rw-r--r--fs/bcachefs/btree_key_cache.c10
-rw-r--r--fs/bcachefs/buckets.c2
-rw-r--r--fs/bcachefs/disk_groups_format.h21
-rw-r--r--fs/bcachefs/ec.c2
-rw-r--r--fs/bcachefs/fs-io-buffered.c6
-rw-r--r--fs/bcachefs/fs-io-direct.c4
-rw-r--r--fs/bcachefs/fs.c12
-rw-r--r--fs/bcachefs/fsck.c51
-rw-r--r--fs/bcachefs/journal_seq_blacklist_format.h15
-rw-r--r--fs/bcachefs/mean_and_variance_test.c1
-rw-r--r--fs/bcachefs/replicas_format.h31
-rw-r--r--fs/bcachefs/sb-downgrade.c13
-rw-r--r--fs/bcachefs/sb-downgrade_format.h17
-rw-r--r--fs/bcachefs/sb-errors_format.h296
-rw-r--r--fs/bcachefs/sb-errors_types.h281
-rw-r--r--fs/bcachefs/sb-members_format.h110
-rw-r--r--fs/bcachefs/snapshot.c88
-rw-r--r--fs/bcachefs/snapshot.h1
-rw-r--r--fs/bcachefs/super-io.c12
-rw-r--r--fs/bcachefs/super.c2
-rw-r--r--fs/dcache.c15
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/netfs/buffered_write.c2
-rw-r--r--fs/netfs/direct_write.c2
-rw-r--r--fs/netfs/objects.c5
-rw-r--r--fs/netfs/write_collect.c7
-rw-r--r--fs/netfs/write_issue.c9
-rw-r--r--fs/signalfd.c6
-rw-r--r--fs/smb/client/cifsfs.c1
-rw-r--r--include/linux/etherdevice.h8
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/linux/netfs.h18
-rw-r--r--include/linux/pagemap.h34
-rw-r--r--include/linux/tpm.h17
-rw-r--r--include/net/dst_ops.h2
-rw-r--r--include/net/page_pool/types.h5
-rw-r--r--include/net/request_sock.h12
-rw-r--r--include/net/sock.h13
-rw-r--r--include/uapi/linux/cn_proc.h3
-rw-r--r--include/uapi/linux/netdev.h1
-rw-r--r--kernel/bpf/verifier.c10
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/trace/bpf_trace.c10
-rw-r--r--kernel/trace/trace_probe.c4
-rw-r--r--kernel/trace/trace_uprobe.c14
-rw-r--r--net/9p/client.c2
-rw-r--r--net/core/sock_map.c22
-rw-r--r--net/ethernet/eth.c4
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c2
-rw-r--r--net/ipv4/route.c22
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/route.c29
-rw-r--r--net/ipv6/tcp_ipv6.c7
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c3
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_fib.c8
-rw-r--r--net/netfilter/nft_payload.c95
-rw-r--r--net/sched/sch_taprio.c14
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/xfrm/xfrm_policy.c11
-rw-r--r--tools/bpf/resolve_btfids/main.c2
-rw-r--r--tools/include/uapi/linux/netdev.h1
-rw-r--r--tools/lib/bpf/features.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c134
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c35
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c187
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh10
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh30
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh6
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json44
142 files changed, 2185 insertions, 1218 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 500cfa776225..b600df82669d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1921,6 +1921,28 @@
Format:
<bus_id>,<clkrate>
+ i2c_touchscreen_props= [HW,ACPI,X86]
+ Set device-properties for ACPI-enumerated I2C-attached
+ touchscreen, to e.g. fix coordinates of upside-down
+ mounted touchscreens. If you need this option please
+ submit a drivers/platform/x86/touchscreen_dmi.c patch
+ adding a DMI quirk for this.
+
+ Format:
+ <ACPI_HW_ID>:<prop_name>=<val>[:prop_name=val][:...]
+ Where <val> is one of:
+ Omit "=<val>" entirely Set a boolean device-property
+ Unsigned number Set a u32 device-property
+ Anything else Set a string device-property
+
+ Examples (split over multiple lines):
+ i2c_touchscreen_props=GDIX1001:touchscreen-inverted-x:
+ touchscreen-inverted-y
+
+ i2c_touchscreen_props=MSSL1680:touchscreen-size-x=1920:
+ touchscreen-size-y=1080:touchscreen-inverted-y:
+ firmware-name=gsl1680-vendor-model.fw:silead,home-button
+
i8042.debug [HW] Toggle i8042 debug mode
i8042.unmask_kbd_data
[HW] Enable printing of interrupt data from the KBD port
diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst
index 54d199dce78b..2b420bab0527 100644
--- a/Documentation/arch/riscv/uabi.rst
+++ b/Documentation/arch/riscv/uabi.rst
@@ -65,4 +65,6 @@ the extension, or may have deliberately removed it from the listing.
Misaligned accesses
-------------------
-Misaligned accesses are supported in userspace, but they may perform poorly.
+Misaligned scalar accesses are supported in userspace, but they may perform
+poorly. Misaligned vector accesses are only supported if the Zicclsm extension
+is supported.
diff --git a/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml b/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
index 828439398fdf..fd4244fceced 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
@@ -24,6 +24,7 @@ properties:
managers:
type: object
+ additionalProperties: false
description:
List of the PD69208T4/PD69204T4/PD69208M PSE managers. Each manager
have 4 or 8 physical ports according to the chip version. No need to
@@ -47,8 +48,9 @@ properties:
- "#size-cells"
patternProperties:
- "^manager@0[0-9a-b]$":
+ "^manager@[0-9a-b]$":
type: object
+ additionalProperties: false
description:
PD69208T4/PD69204T4/PD69208M PSE manager exposing 4 or 8 physical
ports.
@@ -69,9 +71,14 @@ properties:
patternProperties:
'^port@[0-7]$':
type: object
+ additionalProperties: false
+
+ properties:
+ reg:
+ maxItems: 1
+
required:
- reg
- additionalProperties: false
required:
- reg
diff --git a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
index 4147adb11e10..6992d56832bf 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
@@ -29,13 +29,31 @@ properties:
of the ports conversion matrix that establishes relationship between
the logical ports and the physical channels.
type: object
+ additionalProperties: false
+
+ properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
patternProperties:
'^channel@[0-7]$':
type: object
+ additionalProperties: false
+
+ properties:
+ reg:
+ maxItems: 1
+
required:
- reg
+ required:
+ - "#address-cells"
+ - "#size-cells"
+
unevaluatedProperties: false
required:
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 11a32373365a..959755be4d7f 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -350,6 +350,10 @@ attribute-sets:
buffer space, host descriptors etc.
type: uint
-
+ name: rx-csum-complete
+ doc: Number of packets that were marked as CHECKSUM_COMPLETE.
+ type: uint
+ -
name: rx-csum-unnecessary
doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
type: uint
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index fd96e4a3cef9..5e1fcfad1c4c 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -227,7 +227,7 @@ preferably including links to previous postings, for example::
The amount of mooing will depend on packet rate so should match
the diurnal cycle quite well.
- Signed-of-by: Joe Defarmer <joe@barn.org>
+ Signed-off-by: Joe Defarmer <joe@barn.org>
---
v3:
- add a note about time-of-day mooing fluctuation to the commit message
diff --git a/MAINTAINERS b/MAINTAINERS
index a6a011792167..8754ac2c259d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3854,6 +3854,7 @@ BPF JIT for ARM64
M: Daniel Borkmann <daniel@iogearbox.net>
M: Alexei Starovoitov <ast@kernel.org>
M: Puranjay Mohan <puranjay@kernel.org>
+R: Xu Kuohai <xukuohai@huaweicloud.com>
L: bpf@vger.kernel.org
S: Supported
F: arch/arm64/net/
@@ -21315,7 +21316,7 @@ F: arch/riscv/boot/dts/starfive/
STARFIVE DWMAC GLUE LAYER
M: Emil Renner Berthing <kernel@esmil.dk>
-M: Samin Guo <samin.guo@starfivetech.com>
+M: Minda Chen <minda.chen@starfivetech.com>
S: Maintained
F: Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
F: drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
index ec44873c42d1..495f3023e4c1 100644
--- a/arch/arc/net/bpf_jit.h
+++ b/arch/arc/net/bpf_jit.h
@@ -39,7 +39,7 @@
/************** Functions that the back-end must provide **************/
/* Extension for 32-bit operations. */
-inline u8 zext(u8 *buf, u8 rd);
+u8 zext(u8 *buf, u8 rd);
/***** Moves *****/
u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index 31bfb6e9ce00..4458e409ca0a 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -62,7 +62,7 @@ enum {
* If/when we decide to add ARCv2 instructions that do use register pairs,
* the mapping, hopefully, doesn't need to be revisited.
*/
-const u8 bpf2arc[][2] = {
+static const u8 bpf2arc[][2] = {
/* Return value from in-kernel function, and exit value from eBPF */
[BPF_REG_0] = {ARC_R_8, ARC_R_9},
/* Arguments from eBPF program to in-kernel function */
@@ -1302,7 +1302,7 @@ static u8 arc_b(u8 *buf, s32 offset)
/************* Packers (Deal with BPF_REGs) **************/
-inline u8 zext(u8 *buf, u8 rd)
+u8 zext(u8 *buf, u8 rd)
{
if (rd != BPF_REG_FP)
return arc_movi_r(buf, REG_HI(rd), 0);
@@ -2235,6 +2235,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
break;
default:
/* The caller must have handled this. */
+ break;
}
} else {
/*
@@ -2253,6 +2254,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
break;
default:
/* The caller must have handled this. */
+ break;
}
}
@@ -2517,7 +2519,7 @@ u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
#define JCC64_NR_OF_JMPS 3 /* Number of jumps in jcc64 template. */
#define JCC64_INSNS_TO_END 3 /* Number of insn. inclusive the 2nd jmp to end. */
#define JCC64_SKIP_JMP 1 /* Index of the "skip" jump to "end". */
-const struct {
+static const struct {
/*
* "jit_off" is common between all "jmp[]" and is coupled with
* "cond" of each "jmp[]" instance. e.g.:
@@ -2883,7 +2885,7 @@ u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
* The "ARC_CC_SET" becomes "CC_unequal" because of the "tst"
* instruction that precedes the conditional branch.
*/
-const u8 arcv2_32_jmps[ARC_CC_LAST] = {
+static const u8 arcv2_32_jmps[ARC_CC_LAST] = {
[ARC_CC_UGT] = CC_great_u,
[ARC_CC_UGE] = CC_great_eq_u,
[ARC_CC_ULT] = CC_less_u,
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index 6f6b4ffccf2c..e3628922c24a 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -159,7 +159,7 @@ static void jit_dump(const struct jit_context *ctx)
/* Initialise the context so there's no garbage. */
static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
{
- memset(ctx, 0, sizeof(ctx));
+ memset(ctx, 0, sizeof(*ctx));
ctx->orig_prog = prog;
@@ -167,7 +167,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
ctx->prog = bpf_jit_blind_constants(prog);
if (IS_ERR(ctx->prog))
return PTR_ERR(ctx->prog);
- ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true);
+ ctx->blinded = (ctx->prog != ctx->orig_prog);
/* If the verifier doesn't zero-extend, then we have to do it. */
ctx->do_zext = !ctx->prog->aux->verifier_zext;
@@ -1182,12 +1182,12 @@ static int jit_prepare(struct jit_context *ctx)
}
/*
- * All the "handle_*()" functions have been called before by the
- * "jit_prepare()". If there was an error, we would know by now.
- * Therefore, no extra error checking at this point, other than
- * a sanity check at the end that expects the calculated length
- * (jit.len) to be equal to the length of generated instructions
- * (jit.index).
+ * jit_compile() is the real compilation phase. jit_prepare() is
+ * invoked before jit_compile() as a dry-run to make sure everything
+ * will go OK and allocate the necessary memory.
+ *
+ * In the end, jit_compile() checks if it has produced the same number
+ * of instructions as jit_prepare() would.
*/
static int jit_compile(struct jit_context *ctx)
{
@@ -1407,9 +1407,9 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
/*
* This function may be invoked twice for the same stream of BPF
- * instructions. The "extra pass" happens, when there are "call"s
- * involved that their addresses are not known during the first
- * invocation.
+ * instructions. The "extra pass" happens, when there are
+ * (re)locations involved that their addresses are not known
+ * during the first run.
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b94176e25be1..0525ee2d63c7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -106,7 +106,7 @@ config RISCV
select HAS_IOPORT if MMU
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
- select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
+ select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
select HAVE_ARCH_KASAN if MMU && 64BIT
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ddb002ed89de..808b4c78462e 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -10,7 +10,7 @@
#include <asm/fence.h>
-#define __arch_xchg_masked(prepend, append, r, p, n) \
+#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
({ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
@@ -25,7 +25,7 @@
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
- " sc.w %1, %1, %2\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
@@ -46,7 +46,8 @@
: "memory"); \
})
-#define _arch_xchg(ptr, new, sfx, prepend, append) \
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
+ sc_append, swap_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __new = (new); \
@@ -55,15 +56,15 @@
switch (sizeof(*__ptr)) { \
case 1: \
case 2: \
- __arch_xchg_masked(prepend, append, \
+ __arch_xchg_masked(sc_sfx, prepend, sc_append, \
__ret, __ptr, __new); \
break; \
case 4: \
- __arch_xchg(".w" sfx, prepend, append, \
+ __arch_xchg(".w" swap_sfx, prepend, swap_append, \
__ret, __ptr, __new); \
break; \
case 8: \
- __arch_xchg(".d" sfx, prepend, append, \
+ __arch_xchg(".d" swap_sfx, prepend, swap_append, \
__ret, __ptr, __new); \
break; \
default: \
@@ -73,16 +74,17 @@
})
#define arch_xchg_relaxed(ptr, x) \
- _arch_xchg(ptr, x, "", "", "")
+ _arch_xchg(ptr, x, "", "", "", "", "")
#define arch_xchg_acquire(ptr, x) \
- _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
+ _arch_xchg(ptr, x, "", "", "", \
+ RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
#define arch_xchg_release(ptr, x) \
- _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
+ _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
#define arch_xchg(ptr, x) \
- _arch_xchg(ptr, x, ".aqrl", "", "")
+ _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
#define xchg32(ptr, x) \
({ \
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 1cc7df740edd..e6fbaaf54956 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
/* Make sure tidle is updated */
smp_mb();
bdata->task_ptr = tidle;
- bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ bdata->stack_ptr = task_pt_regs(tidle);
/* Make sure boot data is updated */
smp_mb();
hsm_data = __pa(bdata);
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index 613872b0a21a..24869eb88908 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
/* Make sure tidle is updated */
smp_mb();
- WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
}
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index f5c71a617a99..4084df65c9fa 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -64,19 +64,6 @@ static size_t rng_buffer_size(void)
return RNG_BUFFER_SIZE;
}
-static void add_early_randomness(struct hwrng *rng)
-{
- int bytes_read;
-
- mutex_lock(&reading_mutex);
- bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0);
- mutex_unlock(&reading_mutex);
- if (bytes_read > 0) {
- size_t entropy = bytes_read * 8 * rng->quality / 1024;
- add_hwgenerator_randomness(rng_fillbuf, bytes_read, entropy, false);
- }
-}
-
static inline void cleanup_rng(struct kref *kref)
{
struct hwrng *rng = container_of(kref, struct hwrng, ref);
@@ -340,13 +327,12 @@ static ssize_t rng_current_store(struct device *dev,
const char *buf, size_t len)
{
int err;
- struct hwrng *rng, *old_rng, *new_rng;
+ struct hwrng *rng, *new_rng;
err = mutex_lock_interruptible(&rng_mutex);
if (err)
return -ERESTARTSYS;
- old_rng = current_rng;
if (sysfs_streq(buf, "")) {
err = enable_best_rng();
} else {
@@ -362,11 +348,8 @@ static ssize_t rng_current_store(struct device *dev,
new_rng = get_current_rng_nolock();
mutex_unlock(&rng_mutex);
- if (new_rng) {
- if (new_rng != old_rng)
- add_early_randomness(new_rng);
+ if (new_rng)
put_rng(new_rng);
- }
return err ? : len;
}
@@ -544,7 +527,6 @@ int hwrng_register(struct hwrng *rng)
{
int err = -EINVAL;
struct hwrng *tmp;
- bool is_new_current = false;
if (!rng->name || (!rng->data_read && !rng->read))
goto out;
@@ -573,25 +555,8 @@ int hwrng_register(struct hwrng *rng)
err = set_current_rng(rng);
if (err)
goto out_unlock;
- /* to use current_rng in add_early_randomness() we need
- * to take a ref
- */
- is_new_current = true;
- kref_get(&rng->ref);
}
mutex_unlock(&rng_mutex);
- if (is_new_current || !rng->init) {
- /*
- * Use a new device's input to add some randomness to
- * the system. If this rng device isn't going to be
- * used right away, its init function hasn't been
- * called yet by set_current_rng(); so only use the
- * randomness from devices that don't need an init callback
- */
- add_early_randomness(rng);
- }
- if (is_new_current)
- put_rng(rng);
return 0;
out_unlock:
mutex_unlock(&rng_mutex);
@@ -602,12 +567,11 @@ EXPORT_SYMBOL_GPL(hwrng_register);
void hwrng_unregister(struct hwrng *rng)
{
- struct hwrng *old_rng, *new_rng;
+ struct hwrng *new_rng;
int err;
mutex_lock(&rng_mutex);
- old_rng = current_rng;
list_del(&rng->list);
complete_all(&rng->dying);
if (current_rng == rng) {
@@ -626,11 +590,8 @@ void hwrng_unregister(struct hwrng *rng)
} else
mutex_unlock(&rng_mutex);
- if (new_rng) {
- if (old_rng != new_rng)
- add_early_randomness(new_rng);
+ if (new_rng)
put_rng(new_rng);
- }
wait_for_completion(&rng->cleanup_done);
}
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index e63a6a17793c..cf0be8a7939d 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -29,7 +29,7 @@ if TCG_TPM
config TCG_TPM2_HMAC
bool "Use HMAC and encrypted transactions on the TPM bus"
- default y
+ default X86_64
select CRYPTO_ECDH
select CRYPTO_LIB_AESCFB
select CRYPTO_LIB_SHA256
diff --git a/drivers/char/tpm/tpm-buf.c b/drivers/char/tpm/tpm-buf.c
index 647c6ca92ac3..cad0048bcc3c 100644
--- a/drivers/char/tpm/tpm-buf.c
+++ b/drivers/char/tpm/tpm-buf.c
@@ -223,30 +223,4 @@ u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset)
}
EXPORT_SYMBOL_GPL(tpm_buf_read_u32);
-static u16 tpm_buf_tag(struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
-
- return be16_to_cpu(head->tag);
-}
-
-/**
- * tpm_buf_parameters - return the TPM response parameters area of the tpm_buf
- * @buf: tpm_buf to use
- *
- * Where the parameters are located depends on the tag of a TPM
- * command (it's immediately after the header for TPM_ST_NO_SESSIONS
- * or 4 bytes after for TPM_ST_SESSIONS). Evaluate this and return a
- * pointer to the first byte of the parameters area.
- *
- * @return: pointer to parameters area
- */
-u8 *tpm_buf_parameters(struct tpm_buf *buf)
-{
- int offset = TPM_HEADER_SIZE;
-
- if (tpm_buf_tag(buf) == TPM2_ST_SESSIONS)
- offset += 4;
- return &buf->data[offset];
-}
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 0cdf892ec2a7..1e856259219e 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -281,6 +281,7 @@ struct tpm2_get_random_out {
int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
{
struct tpm2_get_random_out *out;
+ struct tpm_header *head;
struct tpm_buf buf;
u32 recd;
u32 num_bytes = max;
@@ -288,6 +289,7 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
int total = 0;
int retries = 5;
u8 *dest_ptr = dest;
+ off_t offset;
if (!num_bytes || max > TPM_MAX_RNG_DATA)
return -EINVAL;
@@ -320,7 +322,13 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
goto out;
}
- out = (struct tpm2_get_random_out *)tpm_buf_parameters(&buf);
+ head = (struct tpm_header *)buf.data;
+ offset = TPM_HEADER_SIZE;
+ /* Skip the parameter size field: */
+ if (be16_to_cpu(head->tag) == TPM2_ST_SESSIONS)
+ offset += 4;
+
+ out = (struct tpm2_get_random_out *)&buf.data[offset];
recd = min_t(u32, be16_to_cpu(out->size), num_bytes);
if (tpm_buf_length(&buf) <
TPM_HEADER_SIZE +
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index ea8860661876..907ac9956a78 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -80,6 +80,9 @@
/* maximum number of names the TPM must remember for authorization */
#define AUTH_MAX_NAMES 3
+#define AES_KEY_BYTES AES_KEYSIZE_128
+#define AES_KEY_BITS (AES_KEY_BYTES*8)
+
static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
u32 *handle, u8 *name);
@@ -954,6 +957,20 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
}
EXPORT_SYMBOL(tpm2_start_auth_session);
+/*
+ * A mask containing the object attributes for the kernel held null primary key
+ * used in HMAC encryption. For more information on specific attributes look up
+ * to "8.3 TPMA_OBJECT (Object Attributes)".
+ */
+#define TPM2_OA_NULL_KEY ( \
+ TPM2_OA_NO_DA | \
+ TPM2_OA_FIXED_TPM | \
+ TPM2_OA_FIXED_PARENT | \
+ TPM2_OA_SENSITIVE_DATA_ORIGIN | \
+ TPM2_OA_USER_WITH_AUTH | \
+ TPM2_OA_DECRYPT | \
+ TPM2_OA_RESTRICTED)
+
/**
* tpm2_parse_create_primary() - parse the data returned from TPM_CC_CREATE_PRIMARY
*
@@ -1018,7 +1035,7 @@ static int tpm2_parse_create_primary(struct tpm_chip *chip, struct tpm_buf *buf,
val = tpm_buf_read_u32(buf, &offset_t);
/* object properties */
- if (val != TPM2_OA_TMPL)
+ if (val != TPM2_OA_NULL_KEY)
return -EINVAL;
/* auth policy (empty) */
@@ -1178,7 +1195,7 @@ static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
tpm_buf_append_u16(&template, TPM_ALG_SHA256);
/* object properties */
- tpm_buf_append_u32(&template, TPM2_OA_TMPL);
+ tpm_buf_append_u32(&template, TPM2_OA_NULL_KEY);
/* sauth policy (empty) */
tpm_buf_append_u16(&template, 0);
diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index 3f9eaf27b41b..c9eca24bbad4 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -37,6 +37,7 @@
#include "tpm_tis_spi.h"
#define MAX_SPI_FRAMESIZE 64
+#define SPI_HDRSIZE 4
/*
* TCG SPI flow control is documented in section 6.4 of the spec[1]. In short,
@@ -247,7 +248,7 @@ static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr,
int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
int irq, const struct tpm_tis_phy_ops *phy_ops)
{
- phy->iobuf = devm_kmalloc(&spi->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL);
+ phy->iobuf = devm_kmalloc(&spi->dev, SPI_HDRSIZE + MAX_SPI_FRAMESIZE, GFP_KERNEL);
if (!phy->iobuf)
return -ENOMEM;
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 1e0085cd9a9a..2818e24e2a51 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -3142,7 +3142,7 @@ phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit)
else
interface = PHY_INTERFACE_MODE_MII;
} else if (val == bitval[P_RMII_SEL]) {
- interface = PHY_INTERFACE_MODE_RGMII;
+ interface = PHY_INTERFACE_MODE_RMII;
} else {
interface = PHY_INTERFACE_MODE_RGMII;
if (data8 & P_RGMII_ID_EG_ENABLE)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 2d8a66ea82fa..713a595370bf 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -312,7 +312,6 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
struct ena_com_io_sq *io_sq)
{
size_t size;
- int dev_node = 0;
memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
@@ -325,12 +324,9 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
size = io_sq->desc_entry_size * io_sq->q_depth;
if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
- dev_node = dev_to_node(ena_dev->dmadev);
- set_dev_node(ena_dev->dmadev, ctx->numa_node);
io_sq->desc_addr.virt_addr =
dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr,
GFP_KERNEL);
- set_dev_node(ena_dev->dmadev, dev_node);
if (!io_sq->desc_addr.virt_addr) {
io_sq->desc_addr.virt_addr =
dma_alloc_coherent(ena_dev->dmadev, size,
@@ -354,10 +350,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
io_sq->bounce_buf_ctrl.buffers_num;
- dev_node = dev_to_node(ena_dev->dmadev);
- set_dev_node(ena_dev->dmadev, ctx->numa_node);
io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
- set_dev_node(ena_dev->dmadev, dev_node);
if (!io_sq->bounce_buf_ctrl.base_buffer)
io_sq->bounce_buf_ctrl.base_buffer =
devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
@@ -397,7 +390,6 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
struct ena_com_io_cq *io_cq)
{
size_t size;
- int prev_node = 0;
memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr));
@@ -409,11 +401,8 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
- prev_node = dev_to_node(ena_dev->dmadev);
- set_dev_node(ena_dev->dmadev, ctx->numa_node);
io_cq->cdesc_addr.virt_addr =
dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
- set_dev_node(ena_dev->dmadev, prev_node);
if (!io_cq->cdesc_addr.virt_addr) {
io_cq->cdesc_addr.virt_addr =
dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr,
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f604119efc80..5f26fc3ad655 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1117,18 +1117,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf,
pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]);
if (port[IFLA_PORT_PROFILE]) {
+ if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) {
+ memcpy(pp, &prev_pp, sizeof(*pp));
+ return -EINVAL;
+ }
pp->set |= ENIC_SET_NAME;
memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]),
PORT_PROFILE_MAX);
}
if (port[IFLA_PORT_INSTANCE_UUID]) {
+ if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) {
+ memcpy(pp, &prev_pp, sizeof(*pp));
+ return -EINVAL;
+ }
pp->set |= ENIC_SET_INSTANCE;
memcpy(pp->instance_uuid,
nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX);
}
if (port[IFLA_PORT_HOST_UUID]) {
+ if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) {
+ memcpy(pp, &prev_pp, sizeof(*pp));
+ return -EINVAL;
+ }
pp->set |= ENIC_SET_HOST;
memcpy(pp->host_uuid,
nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a72d8a2eb0b3..881ece735dcf 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -4130,6 +4130,14 @@ free_queue_mem:
return ret;
}
+static void fec_enet_deinit(struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+
+ netif_napi_del(&fep->napi);
+ fec_enet_free_queue(ndev);
+}
+
#ifdef CONFIG_OF
static int fec_reset_phy(struct platform_device *pdev)
{
@@ -4524,6 +4532,7 @@ failed_register:
fec_enet_mii_remove(fep);
failed_mii_init:
failed_irq:
+ fec_enet_deinit(ndev);
failed_init:
fec_ptp_stop(pdev);
failed_reset:
@@ -4587,6 +4596,7 @@ fec_drv_remove(struct platform_device *pdev)
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+ fec_enet_deinit(ndev);
free_netdev(ndev);
}
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index f9e94be36e97..2e98a2a0bead 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1225,6 +1225,28 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
}
release:
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ /* Force SMBus mode in PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+ if (ret_val) {
+ e1000e_enable_phy_retry(hw);
+ hw->phy.ops.release(hw);
+ goto out;
+ }
+ phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_reg);
+
hw->phy.ops.release(hw);
out:
if (ret_val)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 220d62fca55d..da5c59daf8ba 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6623,7 +6623,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, ctrl_ext, rctl, status, wufc;
int retval = 0;
- u16 smb_ctrl;
/* Runtime suspend should only enable wakeup for link changes */
if (runtime)
@@ -6697,23 +6696,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
if (retval)
return retval;
}
-
- /* Force SMBUS to allow WOL */
- /* Switching PHY interface always returns MDI error
- * so disable retry mechanism to avoid wasting time
- */
- e1000e_disable_phy_retry(hw);
-
- e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
- smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
- e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
-
- e1000e_enable_phy_retry(hw);
-
- /* Force SMBus mode in MAC */
- ctrl_ext = er32(CTRL_EXT);
- ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, ctrl_ext);
}
/* Ensure that the appropriate bits are set in LPI_CTRL
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1f188c052828..284c3fad5a6e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11171,6 +11171,8 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
ret = i40e_reset(pf);
if (!ret)
i40e_rebuild(pf, reinit, lock_acquired);
+ else
+ dev_err(&pf->pdev->dev, "%s: i40e_reset() FAILED", __func__);
}
/**
@@ -16335,6 +16337,139 @@ unmap:
}
/**
+ * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
+ * using the mac_address_write admin q function
+ * @pf: pointer to i40e_pf struct
+ **/
+static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
+{
+ struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf);
+ struct i40e_hw *hw = &pf->hw;
+ u8 mac_addr[6];
+ u16 flags = 0;
+ int ret;
+
+ /* Get current MAC address in case it's an LAA */
+ if (main_vsi && main_vsi->netdev) {
+ ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr);
+ } else {
+ dev_err(&pf->pdev->dev,
+ "Failed to retrieve MAC address; using default\n");
+ ether_addr_copy(mac_addr, hw->mac.addr);
+ }
+
+ /* The FW expects the mac address write cmd to first be called with
+ * one of these flags before calling it again with the multicast
+ * enable flags.
+ */
+ flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
+
+ if (hw->func_caps.flex10_enable && hw->partition_id != 1)
+ flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
+
+ ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
+ return;
+ }
+
+ flags = I40E_AQC_MC_MAG_EN
+ | I40E_AQC_WOL_PRESERVE_ON_PFR
+ | I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
+ ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+ "Failed to enable Multicast Magic Packet wake up\n");
+}
+
+/**
+ * i40e_io_suspend - suspend all IO operations
+ * @pf: pointer to i40e_pf struct
+ *
+ **/
+static int i40e_io_suspend(struct i40e_pf *pf)
+{
+ struct i40e_hw *hw = &pf->hw;
+
+ set_bit(__I40E_DOWN, pf->state);
+
+ /* Ensure service task will not be running */
+ del_timer_sync(&pf->service_timer);
+ cancel_work_sync(&pf->service_task);
+
+ /* Client close must be called explicitly here because the timer
+ * has been stopped.
+ */
+ i40e_notify_client_of_netdev_close(pf, false);
+
+ if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
+ pf->wol_en)
+ i40e_enable_mc_magic_wake(pf);
+
+ /* Since we're going to destroy queues during the
+ * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+ * whole section
+ */
+ rtnl_lock();
+
+ i40e_prep_for_reset(pf);
+
+ wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
+ wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
+
+ /* Clear the interrupt scheme and release our IRQs so that the system
+ * can safely hibernate even when there are a large number of CPUs.
+ * Otherwise hibernation might fail when mapping all the vectors back
+ * to CPU0.
+ */
+ i40e_clear_interrupt_scheme(pf);
+
+ rtnl_unlock();
+
+ return 0;
+}
+
+/**
+ * i40e_io_resume - resume IO operations
+ * @pf: pointer to i40e_pf struct
+ *
+ **/
+static int i40e_io_resume(struct i40e_pf *pf)
+{
+ struct device *dev = &pf->pdev->dev;
+ int err;
+
+ /* We need to hold the RTNL lock prior to restoring interrupt schemes,
+ * since we're going to be restoring queues
+ */
+ rtnl_lock();
+
+ /* We cleared the interrupt scheme when we suspended, so we need to
+ * restore it now to resume device functionality.
+ */
+ err = i40e_restore_interrupt_scheme(pf);
+ if (err) {
+ dev_err(dev, "Cannot restore interrupt scheme: %d\n",
+ err);
+ }
+
+ clear_bit(__I40E_DOWN, pf->state);
+ i40e_reset_and_rebuild(pf, false, true);
+
+ rtnl_unlock();
+
+ /* Clear suspended state last after everything is recovered */
+ clear_bit(__I40E_SUSPENDED, pf->state);
+
+ /* Restart the service task */
+ mod_timer(&pf->service_timer,
+ round_jiffies(jiffies + pf->service_timer_period));
+
+ return 0;
+}
+
+/**
* i40e_pci_error_detected - warning that something funky happened in PCI land
* @pdev: PCI device information struct
* @error: the type of PCI error
@@ -16358,7 +16493,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
/* shutdown all operations */
if (!test_bit(__I40E_SUSPENDED, pf->state))
- i40e_prep_for_reset(pf);
+ i40e_io_suspend(pf);
/* Request a slot reset */
return PCI_ERS_RESULT_NEED_RESET;
@@ -16380,7 +16515,8 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
u32 reg;
dev_dbg(&pdev->dev, "%s\n", __func__);
- if (pci_enable_device_mem(pdev)) {
+ /* enable I/O and memory of the device */
+ if (pci_enable_device(pdev)) {
dev_info(&pdev->dev,
"Cannot re-enable PCI device after reset.\n");
result = PCI_ERS_RESULT_DISCONNECT;
@@ -16443,54 +16579,7 @@ static void i40e_pci_error_resume(struct pci_dev *pdev)
if (test_bit(__I40E_SUSPENDED, pf->state))
return;
- i40e_handle_reset_warning(pf, false);
-}
-
-/**
- * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
- * using the mac_address_write admin q function
- * @pf: pointer to i40e_pf struct
- **/
-static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
-{
- struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf);
- struct i40e_hw *hw = &pf->hw;
- u8 mac_addr[6];
- u16 flags = 0;
- int ret;
-
- /* Get current MAC address in case it's an LAA */
- if (main_vsi && main_vsi->netdev) {
- ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr);
- } else {
- dev_err(&pf->pdev->dev,
- "Failed to retrieve MAC address; using default\n");
- ether_addr_copy(mac_addr, hw->mac.addr);
- }
-
- /* The FW expects the mac address write cmd to first be called with
- * one of these flags before calling it again with the multicast
- * enable flags.
- */
- flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
-
- if (hw->func_caps.flex10_enable && hw->partition_id != 1)
- flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
-
- ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
- if (ret) {
- dev_err(&pf->pdev->dev,
- "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
- return;
- }
-
- flags = I40E_AQC_MC_MAG_EN
- | I40E_AQC_WOL_PRESERVE_ON_PFR
- | I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
- ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
- if (ret)
- dev_err(&pf->pdev->dev,
- "Failed to enable Multicast Magic Packet wake up\n");
+ i40e_io_resume(pf);
}
/**
@@ -16552,48 +16641,11 @@ static void i40e_shutdown(struct pci_dev *pdev)
static int i40e_suspend(struct device *dev)
{
struct i40e_pf *pf = dev_get_drvdata(dev);
- struct i40e_hw *hw = &pf->hw;
/* If we're already suspended, then there is nothing to do */
if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
return 0;
-
- set_bit(__I40E_DOWN, pf->state);
-
- /* Ensure service task will not be running */
- del_timer_sync(&pf->service_timer);
- cancel_work_sync(&pf->service_task);
-
- /* Client close must be called explicitly here because the timer
- * has been stopped.
- */
- i40e_notify_client_of_netdev_close(pf, false);
-
- if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
- pf->wol_en)
- i40e_enable_mc_magic_wake(pf);
-
- /* Since we're going to destroy queues during the
- * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
- * whole section
- */
- rtnl_lock();
-
- i40e_prep_for_reset(pf);
-
- wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
- wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
-
- /* Clear the interrupt scheme and release our IRQs so that the system
- * can safely hibernate even when there are a large number of CPUs.
- * Otherwise hibernation might fail when mapping all the vectors back
- * to CPU0.
- */
- i40e_clear_interrupt_scheme(pf);
-
- rtnl_unlock();
-
- return 0;
+ return i40e_io_suspend(pf);
}
/**
@@ -16603,39 +16655,11 @@ static int i40e_suspend(struct device *dev)
static int i40e_resume(struct device *dev)
{
struct i40e_pf *pf = dev_get_drvdata(dev);
- int err;
/* If we're not suspended, then there is nothing to do */
if (!test_bit(__I40E_SUSPENDED, pf->state))
return 0;
-
- /* We need to hold the RTNL lock prior to restoring interrupt schemes,
- * since we're going to be restoring queues
- */
- rtnl_lock();
-
- /* We cleared the interrupt scheme when we suspended, so we need to
- * restore it now to resume device functionality.
- */
- err = i40e_restore_interrupt_scheme(pf);
- if (err) {
- dev_err(dev, "Cannot restore interrupt scheme: %d\n",
- err);
- }
-
- clear_bit(__I40E_DOWN, pf->state);
- i40e_reset_and_rebuild(pf, false, true);
-
- rtnl_unlock();
-
- /* Clear suspended state last after everything is recovered */
- clear_bit(__I40E_SUSPENDED, pf->state);
-
- /* Restart the service task */
- mod_timer(&pf->service_timer,
- round_jiffies(jiffies + pf->service_timer_period));
-
- return 0;
+ return i40e_io_resume(pf);
}
static const struct pci_error_handlers i40e_err_handler = {
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
index c4b69655cdf5..704e9ad5144e 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -1388,7 +1388,7 @@ enum ice_param_id {
ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
};
-static const struct devlink_param ice_devlink_params[] = {
+static const struct devlink_param ice_dvl_rdma_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
ice_devlink_enable_roce_get,
ice_devlink_enable_roce_set,
@@ -1397,6 +1397,9 @@ static const struct devlink_param ice_devlink_params[] = {
ice_devlink_enable_iw_get,
ice_devlink_enable_iw_set,
ice_devlink_enable_iw_validate),
+};
+
+static const struct devlink_param ice_dvl_sched_params[] = {
DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
"tx_scheduling_layers",
DEVLINK_PARAM_TYPE_U8,
@@ -1464,21 +1467,31 @@ int ice_devlink_register_params(struct ice_pf *pf)
{
struct devlink *devlink = priv_to_devlink(pf);
struct ice_hw *hw = &pf->hw;
- size_t params_size;
+ int status;
- params_size = ARRAY_SIZE(ice_devlink_params);
+ status = devl_params_register(devlink, ice_dvl_rdma_params,
+ ARRAY_SIZE(ice_dvl_rdma_params));
+ if (status)
+ return status;
- if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
- params_size--;
+ if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
+ status = devl_params_register(devlink, ice_dvl_sched_params,
+ ARRAY_SIZE(ice_dvl_sched_params));
- return devl_params_register(devlink, ice_devlink_params,
- params_size);
+ return status;
}
void ice_devlink_unregister_params(struct ice_pf *pf)
{
- devl_params_unregister(priv_to_devlink(pf), ice_devlink_params,
- ARRAY_SIZE(ice_devlink_params));
+ struct devlink *devlink = priv_to_devlink(pf);
+ struct ice_hw *hw = &pf->hw;
+
+ devl_params_unregister(devlink, ice_dvl_rdma_params,
+ ARRAY_SIZE(ice_dvl_rdma_params));
+
+ if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
+ devl_params_unregister(devlink, ice_dvl_sched_params,
+ ARRAY_SIZE(ice_dvl_sched_params));
}
#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 5649b257e631..24716a3b494c 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -3148,6 +3148,16 @@ ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
case ICE_PHY_TYPE_HIGH_100G_AUI2:
speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
break;
+ case ICE_PHY_TYPE_HIGH_200G_CR4_PAM4:
+ case ICE_PHY_TYPE_HIGH_200G_SR4:
+ case ICE_PHY_TYPE_HIGH_200G_FR4:
+ case ICE_PHY_TYPE_HIGH_200G_LR4:
+ case ICE_PHY_TYPE_HIGH_200G_DR4:
+ case ICE_PHY_TYPE_HIGH_200G_KR4_PAM4:
+ case ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC:
+ case ICE_PHY_TYPE_HIGH_200G_AUI4:
+ speed_phy_type_high = ICE_AQ_LINK_SPEED_200GB;
+ break;
default:
speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
break;
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
index 2e9ad27cb9d1..6e8f2aab6080 100644
--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
@@ -45,14 +45,15 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
return -EINVAL;
err = ice_fltr_add_vlan(vsi, vlan);
- if (err && err != -EEXIST) {
+ if (!err)
+ vsi->num_vlan++;
+ else if (err == -EEXIST)
+ err = 0;
+ else
dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n",
vlan->vid, vsi->vsi_num, err);
- return err;
- }
- vsi->num_vlan++;
- return 0;
+ return err;
}
/**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 52ceda6306a3..f1ee5584e8fa 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1394,6 +1394,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
}
idpf_rx_init_buf_tail(vport);
+ idpf_vport_intr_ena(vport);
err = idpf_send_config_queues_msg(vport);
if (err) {
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 285da2177ee4..b023704bbbda 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3746,9 +3746,9 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport)
*/
void idpf_vport_intr_deinit(struct idpf_vport *vport)
{
+ idpf_vport_intr_dis_irq_all(vport);
idpf_vport_intr_napi_dis_all(vport);
idpf_vport_intr_napi_del_all(vport);
- idpf_vport_intr_dis_irq_all(vport);
idpf_vport_intr_rel_irq(vport);
}
@@ -4179,7 +4179,6 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
idpf_vport_intr_map_vector_to_qs(vport);
idpf_vport_intr_napi_add_all(vport);
- idpf_vport_intr_napi_ena_all(vport);
err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport);
if (err)
@@ -4193,17 +4192,20 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
if (err)
goto unroll_vectors_alloc;
- idpf_vport_intr_ena_irq_all(vport);
-
return 0;
unroll_vectors_alloc:
- idpf_vport_intr_napi_dis_all(vport);
idpf_vport_intr_napi_del_all(vport);
return err;
}
+void idpf_vport_intr_ena(struct idpf_vport *vport)
+{
+ idpf_vport_intr_napi_ena_all(vport);
+ idpf_vport_intr_ena_irq_all(vport);
+}
+
/**
* idpf_config_rss - Send virtchnl messages to configure RSS
* @vport: virtual port
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 3d046b81e507..551391e20464 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -990,6 +990,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport);
void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
void idpf_vport_intr_deinit(struct idpf_vport *vport);
int idpf_vport_intr_init(struct idpf_vport *vport);
+void idpf_vport_intr_ena(struct idpf_vport *vport);
enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded);
int idpf_config_rss(struct idpf_vport *vport);
int idpf_init_rss(struct idpf_vport *vport);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 070711df612e..edac008099c0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -1422,7 +1422,10 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
otx2_qos_read_txschq_cfg(pfvf, node, old_cfg);
/* delete the txschq nodes allocated for this node */
+ otx2_qos_disable_sq(pfvf, qid);
+ otx2_qos_free_hw_node_schq(pfvf, node);
otx2_qos_free_sw_node_schq(pfvf, node);
+ pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
/* mark this node as htb inner node */
WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER);
@@ -1632,6 +1635,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force
dwrr_del_node = true;
/* destroy the leaf node */
+ otx2_qos_disable_sq(pfvf, qid);
otx2_qos_destroy_node(pfvf, node);
pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index caa34b9c161e..33e32584b07f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -102,8 +102,14 @@ static inline void
mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
{
int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
+ struct udphdr *udphdr;
- udp_hdr(skb)->len = htons(payload_len);
+ if (skb->encapsulation)
+ udphdr = (struct udphdr *)skb_inner_transport_header(skb);
+ else
+ udphdr = udp_hdr(skb);
+
+ udphdr->len = htons(payload_len);
}
struct mlx5e_accel_tx_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 41a2543a52cd..e51b03d4c717 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -750,8 +750,7 @@ err_fs:
err_fs_ft:
if (rx->allow_tunnel_mode)
mlx5_eswitch_unblock_encap(mdev);
- mlx5_del_flow_rules(rx->status.rule);
- mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
+ mlx5_ipsec_rx_status_destroy(ipsec, rx);
err_add:
mlx5_destroy_flow_table(rx->ft.status);
err_fs_ft_status:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 82064614846f..359050f0b54d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -97,18 +97,11 @@ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
if (!x || !x->xso.offload_handle)
goto out_disable;
- if (xo->inner_ipproto) {
- /* Cannot support tunnel packet over IPsec tunnel mode
- * because we cannot offload three IP header csum
- */
- if (x->props.mode == XFRM_MODE_TUNNEL)
- goto out_disable;
-
- /* Only support UDP or TCP L4 checksum */
- if (xo->inner_ipproto != IPPROTO_UDP &&
- xo->inner_ipproto != IPPROTO_TCP)
- goto out_disable;
- }
+ /* Only support UDP or TCP L4 checksum */
+ if (xo->inner_ipproto &&
+ xo->inner_ipproto != IPPROTO_UDP &&
+ xo->inner_ipproto != IPPROTO_TCP)
+ goto out_disable;
return features;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b758bc72ac36..c53c99dde558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3886,7 +3886,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
mlx5e_fold_sw_stats64(priv, stats);
}
- stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
+ stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer;
stats->rx_length_errors =
PPORT_802_3_GET(pstats, a_in_range_length_errors) +
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index e211c41cec06..e1ed214e8651 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -1186,6 +1186,9 @@ void mlx5e_stats_ts_get(struct mlx5e_priv *priv,
ts_stats->err = 0;
ts_stats->lost = 0;
+ if (!ptp)
+ goto out;
+
/* Aggregate stats across all TCs */
for (i = 0; i < ptp->num_tc; i++) {
struct mlx5e_ptp_cq_stats *stats =
@@ -1214,6 +1217,7 @@ void mlx5e_stats_ts_get(struct mlx5e_priv *priv,
}
}
+out:
mutex_unlock(&priv->state_lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 099bf1078889..b09e9abd39f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -153,7 +153,11 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
*hopbyhop = 0;
if (skb->encapsulation) {
- ihs = skb_inner_tcp_all_headers(skb);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ ihs = skb_inner_transport_offset(skb) +
+ sizeof(struct udphdr);
+ else
+ ihs = skb_inner_tcp_all_headers(skb);
stats->tso_inner_packets++;
stats->tso_inner_bytes += skb->len - ihs;
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index f7f0476a4a58..d0871c46b8c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -719,6 +719,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
struct mlx5_core_dev *dev;
u8 mode;
#endif
+ bool roce_support;
int i;
for (i = 0; i < ldev->ports; i++)
@@ -743,6 +744,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
return false;
#endif
+ roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev);
+ for (i = 1; i < ldev->ports; i++)
+ if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
+ return false;
+
return true;
}
@@ -910,8 +916,10 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
} else if (roce_lag) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
- for (i = 1; i < ldev->ports; i++)
- mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+ for (i = 1; i < ldev->ports; i++) {
+ if (mlx5_get_roce_state(ldev->pf[i].dev))
+ mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+ }
} else if (shared_fdb) {
int i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index dd5d186dc614..f6deb5a3f820 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -100,10 +100,6 @@ static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
{
- /* Feature is currently implemented for PFs only */
- if (!mlx5_core_is_pf(dev))
- return false;
-
/* Honor the SW implementation limit */
if (host_buses > MLX5_SD_MAX_GROUP_SZ)
return false;
@@ -162,6 +158,14 @@ static int sd_init(struct mlx5_core_dev *dev)
bool sdm;
int err;
+ /* Feature is currently implemented for PFs only */
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ /* Block on embedded CPU PFs */
+ if (mlx5_core_is_ecpf(dev))
+ return 0;
+
if (!MLX5_CAP_MCAM_REG(dev, mpir))
return 0;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
index 79ba47bb3602..f7d21da1a0fb 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
@@ -455,7 +455,7 @@ void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr)
{
const u8 mask_addr[] = { 0, 0, 0, 0, 0, 0, };
- rx_class_ft1_set_start_len(miig_rt, slice, 0, 6);
+ rx_class_ft1_set_start_len(miig_rt, slice, ETH_ALEN, ETH_ALEN);
rx_class_ft1_set_da(miig_rt, slice, 0, mac_addr);
rx_class_ft1_set_da_mask(miig_rt, slice, 0, mask_addr);
rx_class_ft1_cfg_set_type(miig_rt, slice, 0, FT1_CFG_TYPE_EQ);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 2d5b021b4ea6..fef4eff7753a 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -439,7 +439,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- err = ip_local_out(net, skb->sk, skb);
+ err = ip_local_out(net, NULL, skb);
if (unlikely(net_xmit_eval(err)))
DEV_STATS_INC(dev, tx_errors);
else
@@ -494,7 +494,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- err = ip6_local_out(dev_net(dev), skb->sk, skb);
+ err = ip6_local_out(dev_net(dev), NULL, skb);
if (unlikely(net_xmit_eval(err)))
DEV_STATS_INC(dev, tx_errors);
else
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index a4d2e76a8d58..16789cd446e9 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -55,6 +55,7 @@ static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
skb_scrub_packet(skb, xnet);
skb->priority = 0;
nf_skip_egress(skb, true);
+ skb_reset_mac_header(skb);
}
static struct netkit *netkit_priv(const struct net_device *dev)
@@ -78,6 +79,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
+ eth_skb_pkt_type(skb, peer);
skb->dev = peer;
entry = rcu_dereference(nk->active);
if (entry)
@@ -85,7 +87,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
switch (ret) {
case NETKIT_NEXT:
case NETKIT_PASS:
- skb->protocol = eth_type_trans(skb, skb->dev);
+ eth_skb_pull_mac(skb);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
dev_sw_netstats_tx_add(dev, 1, len);
@@ -155,6 +157,16 @@ static void netkit_set_multicast(struct net_device *dev)
/* Nothing to do, we receive whatever gets pushed to us! */
}
+static int netkit_set_macaddr(struct net_device *dev, void *sa)
+{
+ struct netkit *nk = netkit_priv(dev);
+
+ if (nk->mode != NETKIT_L2)
+ return -EOPNOTSUPP;
+
+ return eth_mac_addr(dev, sa);
+}
+
static void netkit_set_headroom(struct net_device *dev, int headroom)
{
struct netkit *nk = netkit_priv(dev), *nk2;
@@ -198,6 +210,7 @@ static const struct net_device_ops netkit_netdev_ops = {
.ndo_start_xmit = netkit_xmit,
.ndo_set_rx_mode = netkit_set_multicast,
.ndo_set_rx_headroom = netkit_set_headroom,
+ .ndo_set_mac_address = netkit_set_macaddr,
.ndo_get_iflink = netkit_get_iflink,
.ndo_get_peer_dev = netkit_peer_dev,
.ndo_get_stats64 = netkit_get_stats,
@@ -300,9 +313,11 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
if (!attr)
return 0;
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "Setting Ethernet address is not supported");
- return -EOPNOTSUPP;
+ if (nla_len(attr) != ETH_ALEN)
+ return -EINVAL;
+ if (!is_valid_ether_addr(nla_data(attr)))
+ return -EADDRNOTAVAIL;
+ return 0;
}
static struct rtnl_link_ops netkit_link_ops;
@@ -365,6 +380,9 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
strscpy(ifname, "nk%d", IFNAMSIZ);
ifname_assign_type = NET_NAME_ENUM;
}
+ if (mode != NETKIT_L2 &&
+ (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
+ return -EOPNOTSUPP;
net = rtnl_link_get_net(src_net, tbp);
if (IS_ERR(net))
@@ -379,7 +397,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
netif_inherit_tso_max(peer, dev);
- if (mode == NETKIT_L2)
+ if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
eth_hw_addr_random(peer);
if (ifmp && dev->ifindex)
peer->ifindex = ifmp->ifi_index;
@@ -402,7 +420,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
if (err < 0)
goto err_configure_peer;
- if (mode == NETKIT_L2)
+ if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
if (tb[IFLA_IFNAME])
nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 13e30ea7eec5..2b8f8b7f1517 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -4029,7 +4029,7 @@ static int lan8841_config_intr(struct phy_device *phydev)
if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
err = phy_read(phydev, LAN8814_INTS);
- if (err)
+ if (err < 0)
return err;
/* Enable / disable interrupts. It is OK to enable PTP interrupt
@@ -4045,6 +4045,14 @@ static int lan8841_config_intr(struct phy_device *phydev)
return err;
err = phy_read(phydev, LAN8814_INTS);
+ if (err < 0)
+ return err;
+
+ /* Getting a positive value doesn't mean that is an error, it
+ * just indicates what was the status. Therefore make sure to
+ * clear the value and say that there is no error.
+ */
+ err = 0;
}
return err;
@@ -5327,6 +5335,7 @@ static struct phy_driver ksphy_driver[] = {
/* PHY_BASIC_FEATURES */
.probe = kszphy_probe,
.config_init = ksz8061_config_init,
+ .soft_reset = genphy_soft_reset,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
.suspend = kszphy_suspend,
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index cbea24666479..8e82184be5e7 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -879,7 +879,7 @@ static int smsc95xx_start_rx_path(struct usbnet *dev)
static int smsc95xx_reset(struct usbnet *dev)
{
struct smsc95xx_priv *pdata = dev->driver_priv;
- u32 read_buf, write_buf, burst_cap;
+ u32 read_buf, burst_cap;
int ret = 0, timeout;
netif_dbg(dev, ifup, dev->net, "entering smsc95xx_reset\n");
@@ -1003,10 +1003,13 @@ static int smsc95xx_reset(struct usbnet *dev)
return ret;
netif_dbg(dev, ifup, dev->net, "ID_REV = 0x%08x\n", read_buf);
+ ret = smsc95xx_read_reg(dev, LED_GPIO_CFG, &read_buf);
+ if (ret < 0)
+ return ret;
/* Configure GPIO pins as LED outputs */
- write_buf = LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
- LED_GPIO_CFG_FDX_LED;
- ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, write_buf);
+ read_buf |= LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
+ LED_GPIO_CFG_FDX_LED;
+ ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, read_buf);
if (ret < 0)
return ret;
diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c
index 590b038e449e..6b89d596ba9a 100644
--- a/drivers/nfc/virtual_ncidev.c
+++ b/drivers/nfc/virtual_ncidev.c
@@ -125,6 +125,10 @@ static ssize_t virtual_ncidev_write(struct file *file,
kfree_skb(skb);
return -EFAULT;
}
+ if (strnlen(skb->data, count) != count) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
nci_recv_frame(vdev->ndev, skb);
return count;
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 0ec952b5d03e..1953317541ea 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -515,6 +515,7 @@ config THINKPAD_ACPI
select NVRAM
select NEW_LEDS
select LEDS_CLASS
+ select INPUT_SPARSEKMAP
help
This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
support for Fn-Fx key combinations, Bluetooth control, video
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
index 7bac7841ff0a..7fa360073f6e 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
@@ -1610,8 +1610,8 @@ void tpmi_sst_dev_remove(struct auxiliary_device *auxdev)
tpmi_sst->partition_mask_current &= ~BIT(plat_info->partition);
/* Free the package instance when the all partitions are removed */
if (!tpmi_sst->partition_mask_current) {
- kfree(tpmi_sst);
isst_common.sst_inst[tpmi_sst->package_id] = NULL;
+ kfree(tpmi_sst);
}
mutex_unlock(&isst_tpmi_dev_lock);
}
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index c6a10ec2c83f..2d9ca2292ea1 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -9,10 +9,13 @@
*/
#include <linux/acpi.h>
+#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/efi_embedded_fw.h>
#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kstrtox.h>
#include <linux/notifier.h>
#include <linux/property.h>
#include <linux/string.h>
@@ -897,6 +900,22 @@ static const struct ts_dmi_data schneider_sct101ctm_data = {
.properties = schneider_sct101ctm_props,
};
+static const struct property_entry globalspace_solt_ivw116_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 7),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 22),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1723),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1077),
+ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-globalspace-solt-ivw116.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
+ { }
+};
+
+static const struct ts_dmi_data globalspace_solt_ivw116_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = globalspace_solt_ivw116_props,
+};
+
static const struct property_entry techbite_arc_11_6_props[] = {
PROPERTY_ENTRY_U32("touchscreen-min-x", 5),
PROPERTY_ENTRY_U32("touchscreen-min-y", 7),
@@ -1386,6 +1405,17 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* Jumper EZpad 6s Pro */
+ .driver_data = (void *)&jumper_ezpad_6_pro_b_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Jumper"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Ezpad"),
+ /* Above matches are too generic, add bios match */
+ DMI_MATCH(DMI_BIOS_VERSION, "E.WSA116_8.E1.042.bin"),
+ DMI_MATCH(DMI_BIOS_DATE, "01/08/2020"),
+ },
+ },
+ {
/* Jumper EZpad 6 m4 */
.driver_data = (void *)&jumper_ezpad_6_m4_data,
.matches = {
@@ -1625,6 +1655,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* GlobalSpace SoLT IVW 11.6" */
+ .driver_data = (void *)&globalspace_solt_ivw116_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Globalspace Tech Pvt Ltd"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "SolTIVW"),
+ DMI_MATCH(DMI_PRODUCT_SKU, "PN20170413488"),
+ },
+ },
+ {
/* Techbite Arc 11.6 */
.driver_data = (void *)&techbite_arc_11_6_data,
.matches = {
@@ -1817,7 +1856,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
{ }
};
-static const struct ts_dmi_data *ts_data;
+static struct ts_dmi_data *ts_data;
static void ts_dmi_add_props(struct i2c_client *client)
{
@@ -1852,6 +1891,64 @@ static int ts_dmi_notifier_call(struct notifier_block *nb,
return 0;
}
+#define MAX_CMDLINE_PROPS 16
+
+static struct property_entry ts_cmdline_props[MAX_CMDLINE_PROPS + 1];
+
+static struct ts_dmi_data ts_cmdline_data = {
+ .properties = ts_cmdline_props,
+};
+
+static int __init ts_parse_props(char *str)
+{
+ /* Save the original str to show it on syntax errors */
+ char orig_str[256];
+ char *name, *value;
+ u32 u32val;
+ int i, ret;
+
+ strscpy(orig_str, str, sizeof(orig_str));
+
+ /*
+ * str is part of the static_command_line from init/main.c and poking
+ * holes in that by writing 0 to it is allowed, as is taking long
+ * lasting references to it.
+ */
+ ts_cmdline_data.acpi_name = strsep(&str, ":");
+
+ for (i = 0; i < MAX_CMDLINE_PROPS; i++) {
+ name = strsep(&str, ":");
+ if (!name || !name[0])
+ break;
+
+ /* Replace '=' with 0 and make value point past '=' or NULL */
+ value = name;
+ strsep(&value, "=");
+ if (!value) {
+ ts_cmdline_props[i] = PROPERTY_ENTRY_BOOL(name);
+ } else if (isdigit(value[0])) {
+ ret = kstrtou32(value, 0, &u32val);
+ if (ret)
+ goto syntax_error;
+
+ ts_cmdline_props[i] = PROPERTY_ENTRY_U32(name, u32val);
+ } else {
+ ts_cmdline_props[i] = PROPERTY_ENTRY_STRING(name, value);
+ }
+ }
+
+ if (!i || str)
+ goto syntax_error;
+
+ ts_data = &ts_cmdline_data;
+ return 1;
+
+syntax_error:
+ pr_err("Invalid '%s' value for 'i2c_touchscreen_props='\n", orig_str);
+ return 1; /* "i2c_touchscreen_props=" is still a known parameter */
+}
+__setup("i2c_touchscreen_props=", ts_parse_props);
+
static struct notifier_block ts_dmi_notifier = {
.notifier_call = ts_dmi_notifier_call,
};
@@ -1859,13 +1956,25 @@ static struct notifier_block ts_dmi_notifier = {
static int __init ts_dmi_init(void)
{
const struct dmi_system_id *dmi_id;
+ struct ts_dmi_data *ts_data_dmi;
int error;
dmi_id = dmi_first_match(touchscreen_dmi_table);
- if (!dmi_id)
+ ts_data_dmi = dmi_id ? dmi_id->driver_data : NULL;
+
+ if (ts_data) {
+ /*
+ * Kernel cmdline provided data takes precedence, copy over
+ * DMI efi_embedded_fw info if available.
+ */
+ if (ts_data_dmi)
+ ts_data->embedded_fw = ts_data_dmi->embedded_fw;
+ } else if (ts_data_dmi) {
+ ts_data = ts_data_dmi;
+ } else {
return 0; /* Not an error */
+ }
- ts_data = dmi_id->driver_data;
/* Some dmi table entries only provide an efi_embedded_fw_desc */
if (!ts_data->properties)
return 0;
diff --git a/drivers/platform/x86/x86-android-tablets/Kconfig b/drivers/platform/x86/x86-android-tablets/Kconfig
index 6603461d4273..b591419de80c 100644
--- a/drivers/platform/x86/x86-android-tablets/Kconfig
+++ b/drivers/platform/x86/x86-android-tablets/Kconfig
@@ -6,6 +6,8 @@
config X86_ANDROID_TABLETS
tristate "X86 Android tablet support"
depends on I2C && SPI && SERIAL_DEV_BUS && ACPI && EFI && GPIOLIB && PMIC_OPREGION
+ select NEW_LEDS
+ select LEDS_CLASS
help
X86 tablets which ship with Android as (part of) the factory image
typically have various problems with their DSDTs. The factory kernels
diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c
index 4b828d74a606..856eaac0ec14 100644
--- a/drivers/pmdomain/imx/gpcv2.c
+++ b/drivers/pmdomain/imx/gpcv2.c
@@ -393,6 +393,17 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
* automatically there. Just add a delay and suppose the handshake finish
* after that.
*/
+
+ /*
+ * For some BLK-CTL module (eg. AudioMix on i.MX8MP) doesn't have BUS
+ * clk-en bit, it is better to add delay here, as the BLK-CTL module
+ * doesn't need to care about how it is powered up.
+ *
+ * regmap_read_bypassed() is to make sure the above write IO transaction
+ * already reaches target before udelay()
+ */
+ regmap_read_bypassed(domain->regmap, domain->regs->hsk, &reg_val);
+ udelay(5);
}
/* Disable reset clocks for all devices in the domain */
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f16f73581634..01338d4c2d9e 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -48,12 +48,17 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
static void v9fs_dentry_release(struct dentry *dentry)
{
struct hlist_node *p, *n;
+ struct hlist_head head;
p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
dentry, dentry);
- hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
+
+ spin_lock(&dentry->d_lock);
+ hlist_move_list((struct hlist_head *)&dentry->d_fsdata, &head);
+ spin_unlock(&dentry->d_lock);
+
+ hlist_for_each_safe(p, n, &head)
p9_fid_put(hlist_entry(p, struct p9_fid, dlist));
- dentry->d_fsdata = NULL;
}
static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7a3308d77606..fd72fc38c8f5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -348,6 +348,7 @@ void v9fs_evict_inode(struct inode *inode)
__le32 __maybe_unused version;
if (!is_bad_inode(inode)) {
+ netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
version = cpu_to_le32(v9inode->qid.version);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 94fc049aff58..15bb7989c387 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -648,6 +648,7 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+ netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
afs_set_cache_aux(vnode, &aux);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 97f50e9fd9eb..297487ee8323 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
put_page(page);
if (ret < 0)
return ret;
+
+ /* Don't cross a backup volume mountpoint from a backup volume */
+ if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
+ ctx->type == AFSVL_BACKVOL)
+ return -ENODEV;
}
return 0;
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 692b1c7d5018..4321f9fb73bd 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -690,7 +690,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bpos bucket_pos;
+ struct bpos bucket_pos = POS_MIN;
struct bch_backpointer bp;
if (p.ptr.cached)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index bc0ea2c4efef..2a538eb2af11 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -457,6 +457,7 @@ enum bch_time_stats {
};
#include "alloc_types.h"
+#include "btree_gc_types.h"
#include "btree_types.h"
#include "btree_node_scan_types.h"
#include "btree_write_buffer_types.h"
@@ -488,49 +489,6 @@ enum bch_time_stats {
struct btree;
-enum gc_phase {
- GC_PHASE_NOT_RUNNING,
- GC_PHASE_START,
- GC_PHASE_SB,
-
- GC_PHASE_BTREE_stripes,
- GC_PHASE_BTREE_extents,
- GC_PHASE_BTREE_inodes,
- GC_PHASE_BTREE_dirents,
- GC_PHASE_BTREE_xattrs,
- GC_PHASE_BTREE_alloc,
- GC_PHASE_BTREE_quotas,
- GC_PHASE_BTREE_reflink,
- GC_PHASE_BTREE_subvolumes,
- GC_PHASE_BTREE_snapshots,
- GC_PHASE_BTREE_lru,
- GC_PHASE_BTREE_freespace,
- GC_PHASE_BTREE_need_discard,
- GC_PHASE_BTREE_backpointers,
- GC_PHASE_BTREE_bucket_gens,
- GC_PHASE_BTREE_snapshot_trees,
- GC_PHASE_BTREE_deleted_inodes,
- GC_PHASE_BTREE_logged_ops,
- GC_PHASE_BTREE_rebalance_work,
- GC_PHASE_BTREE_subvolume_children,
-
- GC_PHASE_PENDING_DELETE,
-};
-
-struct gc_pos {
- enum gc_phase phase;
- u16 level;
- struct bpos pos;
-};
-
-struct reflink_gc {
- u64 offset;
- u32 size;
- u32 refcount;
-};
-
-typedef GENRADIX(struct reflink_gc) reflink_gc_table;
-
struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index d801e19cb489..90c12fe2a2cd 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -503,16 +503,22 @@ struct bch_sb_field {
#include "alloc_background_format.h"
#include "extents_format.h"
-#include "reflink_format.h"
#include "ec_format.h"
-#include "inode_format.h"
#include "dirent_format.h"
-#include "xattr_format.h"
-#include "quota_format.h"
+#include "disk_groups_format.h"
+#include "inode_format.h"
+#include "journal_seq_blacklist_format.h"
#include "logged_ops_format.h"
+#include "quota_format.h"
+#include "reflink_format.h"
+#include "replicas_format.h"
#include "snapshot_format.h"
#include "subvolume_format.h"
#include "sb-counters_format.h"
+#include "sb-downgrade_format.h"
+#include "sb-errors_format.h"
+#include "sb-members_format.h"
+#include "xattr_format.h"
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@@ -545,107 +551,6 @@ struct bch_sb_field_journal_v2 {
} d[];
};
-/* BCH_SB_FIELD_members_v1: */
-
-#define BCH_MIN_NR_NBUCKETS (1 << 6)
-
-#define BCH_IOPS_MEASUREMENTS() \
- x(seqread, 0) \
- x(seqwrite, 1) \
- x(randread, 2) \
- x(randwrite, 3)
-
-enum bch_iops_measurement {
-#define x(t, n) BCH_IOPS_##t = n,
- BCH_IOPS_MEASUREMENTS()
-#undef x
- BCH_IOPS_NR
-};
-
-#define BCH_MEMBER_ERROR_TYPES() \
- x(read, 0) \
- x(write, 1) \
- x(checksum, 2)
-
-enum bch_member_error_type {
-#define x(t, n) BCH_MEMBER_ERROR_##t = n,
- BCH_MEMBER_ERROR_TYPES()
-#undef x
- BCH_MEMBER_ERROR_NR
-};
-
-struct bch_member {
- __uuid_t uuid;
- __le64 nbuckets; /* device size */
- __le16 first_bucket; /* index of first bucket used */
- __le16 bucket_size; /* sectors */
- __u8 btree_bitmap_shift;
- __u8 pad[3];
- __le64 last_mount; /* time_t */
-
- __le64 flags;
- __le32 iops[4];
- __le64 errors[BCH_MEMBER_ERROR_NR];
- __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
- __le64 errors_reset_time;
- __le64 seq;
- __le64 btree_allocated_bitmap;
- /*
- * On recovery from a clean shutdown we don't normally read the journal,
- * but we still want to resume writing from where we left off so we
- * don't overwrite more than is necessary, for list journal debugging:
- */
- __le32 last_journal_bucket;
- __le32 last_journal_bucket_offset;
-};
-
-/*
- * This limit comes from the bucket_gens array - it's a single allocation, and
- * kernel allocation are limited to INT_MAX
- */
-#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64)
-
-#define BCH_MEMBER_V1_BYTES 56
-
-LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
-/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
-LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)
-LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20)
-LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
-LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
-LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
- struct bch_member, flags, 30, 31)
-
-#if 0
-LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
-LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-#endif
-
-#define BCH_MEMBER_STATES() \
- x(rw, 0) \
- x(ro, 1) \
- x(failed, 2) \
- x(spare, 3)
-
-enum bch_member_state {
-#define x(t, n) BCH_MEMBER_STATE_##t = n,
- BCH_MEMBER_STATES()
-#undef x
- BCH_MEMBER_STATE_NR
-};
-
-struct bch_sb_field_members_v1 {
- struct bch_sb_field field;
- struct bch_member _members[]; //Members are now variable size
-};
-
-struct bch_sb_field_members_v2 {
- struct bch_sb_field field;
- __le16 member_bytes; //size of single member entry
- u8 pad[6];
- struct bch_member _members[];
-};
-
/* BCH_SB_FIELD_crypt: */
struct nonce {
@@ -694,8 +599,6 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
-/* BCH_SB_FIELD_replicas: */
-
#define BCH_DATA_TYPES() \
x(free, 0) \
x(sb, 1) \
@@ -738,50 +641,6 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
}
}
-struct bch_replicas_entry_v0 {
- __u8 data_type;
- __u8 nr_devs;
- __u8 devs[];
-} __packed;
-
-struct bch_sb_field_replicas_v0 {
- struct bch_sb_field field;
- struct bch_replicas_entry_v0 entries[];
-} __packed __aligned(8);
-
-struct bch_replicas_entry_v1 {
- __u8 data_type;
- __u8 nr_devs;
- __u8 nr_required;
- __u8 devs[];
-} __packed;
-
-#define replicas_entry_bytes(_i) \
- (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
-
-struct bch_sb_field_replicas {
- struct bch_sb_field field;
- struct bch_replicas_entry_v1 entries[];
-} __packed __aligned(8);
-
-/* BCH_SB_FIELD_disk_groups: */
-
-#define BCH_SB_LABEL_SIZE 32
-
-struct bch_disk_group {
- __u8 label[BCH_SB_LABEL_SIZE];
- __le64 flags[2];
-} __packed __aligned(8);
-
-LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
-LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
-LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
-
-struct bch_sb_field_disk_groups {
- struct bch_sb_field field;
- struct bch_disk_group entries[];
-} __packed __aligned(8);
-
/*
* On clean shutdown, store btree roots and current journal sequence number in
* the superblock:
@@ -809,27 +668,6 @@ struct bch_sb_field_clean {
__u64 _data[];
};
-struct journal_seq_blacklist_entry {
- __le64 start;
- __le64 end;
-};
-
-struct bch_sb_field_journal_seq_blacklist {
- struct bch_sb_field field;
- struct journal_seq_blacklist_entry start[];
-};
-
-struct bch_sb_field_errors {
- struct bch_sb_field field;
- struct bch_sb_field_error_entry {
- __le64 v;
- __le64 last_error_time;
- } entries[];
-};
-
-LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
-LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
-
struct bch_sb_field_ext {
struct bch_sb_field field;
__le64 recovery_passes_required[2];
@@ -837,18 +675,6 @@ struct bch_sb_field_ext {
__le64 btrees_lost_data;
};
-struct bch_sb_field_downgrade_entry {
- __le16 version;
- __le64 recovery_passes[2];
- __le16 nr_errors;
- __le16 errors[] __counted_by(nr_errors);
-} __packed __aligned(2);
-
-struct bch_sb_field_downgrade {
- struct bch_sb_field field;
- struct bch_sb_field_downgrade_entry entries[];
-};
-
/* Superblock: */
/*
@@ -909,7 +735,6 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
#define BCH_SB_SECTOR 8
-#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
#define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 8035c8b797ab..dc97991bcd6a 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -585,16 +585,17 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
bkey_version_in_future,
- "key version number higher than recorded: %llu > %llu",
- k.k->version.lo,
- atomic64_read(&c->key_version)))
+ "key version number higher than recorded %llu\n %s",
+ atomic64_read(&c->key_version),
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
atomic64_set(&c->key_version, k.k->version.lo);
}
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
c, btree_bitmap_not_marked,
"btree ptr not marked in member info btree allocated bitmap\n %s",
- (bch2_bkey_val_to_text(&buf, c, k),
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
mutex_lock(&c->sb_lock);
bch2_dev_btree_bitmap_mark(c, k);
@@ -673,8 +674,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
{
- return (int) btree_id_to_gc_phase(l) -
- (int) btree_id_to_gc_phase(r);
+ return cmp_int(gc_btree_order(l), gc_btree_order(r));
}
static int bch2_gc_btrees(struct bch_fs *c)
@@ -711,7 +711,7 @@ fsck_err:
static int bch2_mark_superblocks(struct bch_fs *c)
{
mutex_lock(&c->sb_lock);
- gc_pos_set(c, gc_phase(GC_PHASE_SB));
+ gc_pos_set(c, gc_phase(GC_PHASE_sb));
int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc);
mutex_unlock(&c->sb_lock);
@@ -1209,7 +1209,7 @@ int bch2_check_allocations(struct bch_fs *c)
if (ret)
goto out;
- gc_pos_set(c, gc_phase(GC_PHASE_START));
+ gc_pos_set(c, gc_phase(GC_PHASE_start));
ret = bch2_mark_superblocks(c);
BUG_ON(ret);
@@ -1231,7 +1231,7 @@ out:
percpu_down_write(&c->mark_lock);
/* Indicates that gc is no longer in progress: */
- __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+ __gc_pos_set(c, gc_phase(GC_PHASE_not_running));
bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 1b6489d8e0f4..876d81e2017d 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -3,6 +3,7 @@
#define _BCACHEFS_BTREE_GC_H
#include "bkey.h"
+#include "btree_gc_types.h"
#include "btree_types.h"
int bch2_check_topology(struct bch_fs *);
@@ -32,36 +33,15 @@ int bch2_check_allocations(struct bch_fs *);
/* Position of (the start of) a gc phase: */
static inline struct gc_pos gc_phase(enum gc_phase phase)
{
- return (struct gc_pos) {
- .phase = phase,
- .level = 0,
- .pos = POS_MIN,
- };
-}
-
-static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
-{
- return cmp_int(l.phase, r.phase) ?:
- -cmp_int(l.level, r.level) ?:
- bpos_cmp(l.pos, r.pos);
-}
-
-static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
-{
- switch (id) {
-#define x(name, v, ...) case BTREE_ID_##name: return GC_PHASE_BTREE_##name;
- BCH_BTREE_IDS()
-#undef x
- default:
- BUG();
- }
+ return (struct gc_pos) { .phase = phase, };
}
static inline struct gc_pos gc_pos_btree(enum btree_id btree, unsigned level,
struct bpos pos)
{
return (struct gc_pos) {
- .phase = btree_id_to_gc_phase(btree),
+ .phase = GC_PHASE_btree,
+ .btree = btree,
.level = level,
.pos = pos,
};
@@ -76,6 +56,22 @@ static inline struct gc_pos gc_pos_btree_node(struct btree *b)
return gc_pos_btree(b->c.btree_id, b->c.level, b->key.k.p);
}
+static inline int gc_btree_order(enum btree_id btree)
+{
+ if (btree == BTREE_ID_stripes)
+ return -1;
+ return btree;
+}
+
+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
+{
+ return cmp_int(l.phase, r.phase) ?:
+ cmp_int(gc_btree_order(l.btree),
+ gc_btree_order(r.btree)) ?:
+ -cmp_int(l.level, r.level) ?:
+ bpos_cmp(l.pos, r.pos);
+}
+
static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
{
unsigned seq;
diff --git a/fs/bcachefs/btree_gc_types.h b/fs/bcachefs/btree_gc_types.h
new file mode 100644
index 000000000000..b82c24bcc088
--- /dev/null
+++ b/fs/bcachefs/btree_gc_types.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_GC_TYPES_H
+#define _BCACHEFS_BTREE_GC_TYPES_H
+
+#include <linux/generic-radix-tree.h>
+
+enum gc_phase {
+ GC_PHASE_not_running,
+ GC_PHASE_start,
+ GC_PHASE_sb,
+ GC_PHASE_btree,
+};
+
+struct gc_pos {
+ enum gc_phase phase:8;
+ enum btree_id btree:8;
+ u16 level;
+ struct bpos pos;
+};
+
+struct reflink_gc {
+ u64 offset;
+ u32 size;
+ u32 refcount;
+};
+
+typedef GENRADIX(struct reflink_gc) reflink_gc_table;
+
+#endif /* _BCACHEFS_BTREE_GC_TYPES_H */
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index cbf8f5d90602..829c1b91477d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -519,7 +519,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca,
- struct btree *b, struct bset *i,
+ struct btree *b, struct bset *i, struct bkey_packed *k,
unsigned offset, int write)
{
prt_printf(out, bch2_log_msg(c, "%s"),
@@ -537,15 +537,20 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
b->written, btree_ptr_sectors_written(&b->key));
if (i)
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
+ if (k)
+ prt_printf(out, " bset byte offset %lu",
+ (unsigned long)(void *)k -
+ ((unsigned long)(void *)i & ~511UL));
prt_str(out, ": ");
}
-__printf(9, 10)
+__printf(10, 11)
static int __btree_err(int ret,
struct bch_fs *c,
struct bch_dev *ca,
struct btree *b,
struct bset *i,
+ struct bkey_packed *k,
int write,
bool have_retry,
enum bch_sb_error_id err_type,
@@ -555,7 +560,7 @@ static int __btree_err(int ret,
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
va_list args;
- btree_err_msg(&out, c, ca, b, i, b->written, write);
+ btree_err_msg(&out, c, ca, b, i, k, b->written, write);
va_start(args, fmt);
prt_vprintf(&out, fmt, args);
@@ -611,9 +616,9 @@ fsck_err:
return ret;
}
-#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \
+#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \
({ \
- int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \
+ int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
BCH_FSCK_ERR_##_err_type, \
msg, ##__VA_ARGS__); \
\
@@ -690,7 +695,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bch2_version_compatible(version),
-BCH_ERR_btree_node_read_err_incompatible,
- c, ca, b, i,
+ c, ca, b, i, NULL,
btree_node_unsupported_version,
"unsupported bset version %u.%u",
BCH_VERSION_MAJOR(version),
@@ -698,7 +703,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
if (btree_err_on(version < c->sb.version_min,
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, NULL,
btree_node_bset_older_than_sb_min,
"bset version %u older than superblock version_min %u",
version, c->sb.version_min)) {
@@ -711,7 +716,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
if (btree_err_on(BCH_VERSION_MAJOR(version) >
BCH_VERSION_MAJOR(c->sb.version),
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, NULL,
btree_node_bset_newer_than_sb,
"bset version %u newer than superblock version %u",
version, c->sb.version)) {
@@ -723,13 +728,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
-BCH_ERR_btree_node_read_err_incompatible,
- c, ca, b, i,
+ c, ca, b, i, NULL,
btree_node_unsupported_version,
"BSET_SEPARATE_WHITEOUTS no longer supported");
if (btree_err_on(offset + sectors > btree_sectors(c),
-BCH_ERR_btree_node_read_err_fixable,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_past_end_of_btree_node,
"bset past end of btree node")) {
i->u64s = 0;
@@ -739,13 +744,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(offset && !i->u64s,
-BCH_ERR_btree_node_read_err_fixable,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_empty,
"empty bset");
btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset,
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_wrong_sector_offset,
"bset at wrong sector offset");
@@ -761,20 +766,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
/* XXX endianness */
btree_err_on(bp->seq != bn->keys.seq,
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
bset_bad_seq,
"incorrect sequence number (wrong btree node)");
}
btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
btree_node_bad_btree,
"incorrect btree id");
btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
btree_node_bad_level,
"incorrect level");
@@ -793,7 +798,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bpos_eq(b->data->min_key, bp->min_key),
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_bad_min_key,
"incorrect min_key: got %s should be %s",
(printbuf_reset(&buf1),
@@ -804,7 +809,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bpos_eq(bn->max_key, b->key.k.p),
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
btree_node_bad_max_key,
"incorrect max key %s",
(printbuf_reset(&buf1),
@@ -816,7 +821,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1),
-BCH_ERR_btree_node_read_err_bad_node,
- c, ca, b, i,
+ c, ca, b, i, NULL,
btree_node_bad_format,
"invalid bkey format: %s\n %s", buf1.buf,
(printbuf_reset(&buf2),
@@ -883,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, k,
btree_node_bkey_past_bset_end,
"key extends past end of bset")) {
i->u64s = cpu_to_le16((u64 *) k - i->_data);
@@ -892,14 +897,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, k,
btree_node_bkey_bad_format,
"invalid bkey format %u", k->format))
goto drop_this_key;
if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, k,
btree_node_bkey_bad_u64s,
"bad k->u64s %u (min %u max %zu)", k->u64s,
bkeyp_key_u64s(&b->format, k),
@@ -921,7 +926,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_val_to_text(&buf, c, u.s_c);
btree_err(-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, k,
btree_node_bad_bkey,
"invalid bkey: %s", buf.buf);
goto drop_this_key;
@@ -942,7 +947,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_to_text(&buf, u.k);
if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, k,
btree_node_bkey_out_of_order,
"%s", buf.buf))
goto drop_this_key;
@@ -1011,13 +1016,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (bch2_meta_read_fault("btree"))
btree_err(-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_fault_injected,
"dynamic fault");
btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_bad_magic,
"bad magic: want %llx, got %llx",
bset_magic(c), le64_to_cpu(b->data->magic));
@@ -1032,7 +1037,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(b->data->keys.seq != bp->seq,
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_bad_seq,
"got wrong btree node: got\n%s",
(printbuf_reset(&buf),
@@ -1041,7 +1046,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
} else {
btree_err_on(!b->data->keys.seq,
-BCH_ERR_btree_node_read_err_must_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_bad_seq,
"bad btree header: seq 0\n%s",
(printbuf_reset(&buf),
@@ -1060,7 +1065,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_unknown_csum,
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
@@ -1073,7 +1078,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_bad_csum,
"%s",
(printbuf_reset(&buf),
@@ -1088,7 +1093,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
-BCH_ERR_btree_node_read_err_incompatible,
- c, NULL, b, NULL,
+ c, NULL, b, NULL, NULL,
btree_node_unsupported_version,
"btree node does not have NEW_EXTENT_OVERWRITE set");
@@ -1102,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_unknown_csum,
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
@@ -1114,7 +1119,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_bad_csum,
"%s",
(printbuf_reset(&buf),
@@ -1152,14 +1157,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(blacklisted && first,
-BCH_ERR_btree_node_read_err_fixable,
- c, ca, b, i,
+ c, ca, b, i, NULL,
bset_blacklisted_journal_seq,
"first btree node bset has blacklisted journal seq (%llu)",
le64_to_cpu(i->journal_seq));
btree_err_on(blacklisted && ptr_written,
-BCH_ERR_btree_node_read_err_fixable,
- c, ca, b, i,
+ c, ca, b, i, NULL,
first_bset_blacklisted_journal_seq,
"found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
le64_to_cpu(i->journal_seq),
@@ -1178,7 +1183,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (ptr_written) {
btree_err_on(b->written < ptr_written,
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_data_missing,
"btree node data missing: expected %u sectors, found %u",
ptr_written, b->written);
@@ -1191,7 +1196,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
le64_to_cpu(bne->keys.journal_seq),
true),
-BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, NULL,
+ c, ca, b, NULL, NULL,
btree_node_bset_after_end,
"found bset signature after last bset");
}
@@ -1235,7 +1240,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bch2_bkey_val_to_text(&buf, c, u.s_c);
btree_err(-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, i,
+ c, NULL, b, i, k,
btree_node_bad_bkey,
"%s", buf.buf);
@@ -1471,18 +1476,18 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
written2 = btree_node_sectors_written(c, ra->buf[i]);
if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, NULL,
+ c, NULL, b, NULL, NULL,
btree_node_replicas_sectors_written_mismatch,
"btree node sectors written mismatch: %u != %u",
written, written2) ||
btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, NULL,
+ c, NULL, b, NULL, NULL,
btree_node_bset_after_end,
"found bset signature after last bset") ||
btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9),
-BCH_ERR_btree_node_read_err_fixable,
- c, NULL, b, NULL,
+ c, NULL, b, NULL, NULL,
btree_node_replicas_data_mismatch,
"btree node replicas content mismatch"))
dump_bset_maps = true;
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 75f5e6fe4634..34056aaece00 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -424,16 +424,16 @@ static int btree_key_cache_fill(struct btree_trans *trans,
goto err;
}
- if (!bch2_btree_node_relock(trans, ck_path, 0)) {
+ ret = bch2_trans_relock(trans);
+ if (ret) {
kfree(new_k);
- trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
goto err;
}
- ret = bch2_trans_relock(trans);
- if (ret) {
+ if (!bch2_btree_node_relock(trans, ck_path, 0)) {
kfree(new_k);
+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
goto err;
}
}
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index b469586517a8..ed97712d0db1 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1134,7 +1134,7 @@ static int __trigger_extent(struct btree_trans *trans,
r.e.nr_required = 1;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- s64 disk_sectors;
+ s64 disk_sectors = 0;
ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
if (ret < 0)
return ret;
diff --git a/fs/bcachefs/disk_groups_format.h b/fs/bcachefs/disk_groups_format.h
new file mode 100644
index 000000000000..698990bbf1d2
--- /dev/null
+++ b/fs/bcachefs/disk_groups_format.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DISK_GROUPS_FORMAT_H
+#define _BCACHEFS_DISK_GROUPS_FORMAT_H
+
+#define BCH_SB_LABEL_SIZE 32
+
+struct bch_disk_group {
+ __u8 label[BCH_SB_LABEL_SIZE];
+ __le64 flags[2];
+} __packed __aligned(8);
+
+LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
+LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
+
+struct bch_sb_field_disk_groups {
+ struct bch_sb_field field;
+ struct bch_disk_group entries[];
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_DISK_GROUPS_FORMAT_H */
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index b26dc7424662..d8b9beca3776 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -908,7 +908,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
- if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
+ if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 6b69e5cd68dd..54873ecc635c 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -437,8 +437,8 @@ static void bch2_writepage_io_done(struct bch_write_op *op)
*/
/*
- * PageWriteback is effectively our ref on the inode - fixup i_blocks
- * before calling end_page_writeback:
+ * The writeback flag is effectively our ref on the inode -
+ * fixup i_blocks before calling folio_end_writeback:
*/
bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
@@ -898,7 +898,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
darray_for_each(fs, fi) {
f = *fi;
f_len = min(end, folio_end_pos(f)) - f_pos;
- f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
+ f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter);
if (!f_copied) {
folios_trunc(&fs, fi);
break;
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 09d21aef879a..049b61bc9a5b 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -609,8 +609,10 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
if (unlikely(ret))
goto err_put_write_ref;
- if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
+ if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) {
+ ret = -EINVAL;
goto err_put_write_ref;
+ }
inode_dio_begin(&inode->v);
bch2_pagecache_block_get(inode);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 96040a95cf46..cd388f1702dc 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1939,8 +1939,7 @@ got_sb:
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
- ret = bch2_err_class(ret);
- return ERR_PTR(ret);
+ goto err;
}
c = sb->s_fs_info;
@@ -2016,6 +2015,15 @@ out:
err_put_super:
__bch2_fs_stop(c);
deactivate_locked_super(sb);
+err:
+ /*
+ * On an inconsistency error in recovery we might see an -EROFS derived
+ * errorcode (from the journal), but we don't want to return that to
+ * userspace as that causes util-linux to retry the mount RO - which is
+ * confusing:
+ */
+ if (bch2_err_matches(ret, EROFS) && ret != -EROFS)
+ ret = -EIO;
return ERR_PTR(bch2_err_class(ret));
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index c8f57465131c..fd277bd58ed3 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -77,21 +77,17 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
struct bkey_s_c k;
int ret;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
- POS(0, inode_nr),
- BTREE_ITER_all_snapshots);
- k = bch2_btree_iter_peek(&iter);
- ret = bkey_err(k);
- if (ret)
- goto err;
-
- if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
- ret = -BCH_ERR_ENOENT_inode;
- goto err;
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (k.k->p.offset != inode_nr)
+ break;
+ if (!bkey_is_inode(k.k))
+ continue;
+ ret = bch2_inode_unpack(k, inode);
+ goto found;
}
-
- ret = bch2_inode_unpack(k, inode);
-err:
+ ret = -BCH_ERR_ENOENT_inode;
+found:
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -770,25 +766,6 @@ static int get_visible_inodes(struct btree_trans *trans,
return ret;
}
-static int check_key_has_snapshot(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k)
-{
- struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
- int ret = 0;
-
- if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
- bkey_in_missing_snapshot,
- "key in missing snapshot: %s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- ret = bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_internal_snapshot_node) ?: 1;
-fsck_err:
- printbuf_exit(&buf);
- return ret;
-}
-
static int hash_redo_key(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
@@ -983,7 +960,7 @@ static int check_inode(struct btree_trans *trans,
bool do_update = false;
int ret;
- ret = check_key_has_snapshot(trans, iter, k);
+ ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret < 0)
goto err;
if (ret)
@@ -1487,7 +1464,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct printbuf buf = PRINTBUF;
int ret = 0;
- ret = check_key_has_snapshot(trans, iter, k);
+ ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret) {
ret = ret < 0 ? ret : 0;
goto out;
@@ -2010,7 +1987,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct printbuf buf = PRINTBUF;
int ret = 0;
- ret = check_key_has_snapshot(trans, iter, k);
+ ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret) {
ret = ret < 0 ? ret : 0;
goto out;
@@ -2165,7 +2142,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
struct inode_walker_entry *i;
int ret;
- ret = check_key_has_snapshot(trans, iter, k);
+ ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret < 0)
return ret;
if (ret)
diff --git a/fs/bcachefs/journal_seq_blacklist_format.h b/fs/bcachefs/journal_seq_blacklist_format.h
new file mode 100644
index 000000000000..2566b12dbc04
--- /dev/null
+++ b/fs/bcachefs/journal_seq_blacklist_format.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H
+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H
+
+struct journal_seq_blacklist_entry {
+ __le64 start;
+ __le64 end;
+};
+
+struct bch_sb_field_journal_seq_blacklist {
+ struct bch_sb_field field;
+ struct journal_seq_blacklist_entry start[];
+};
+
+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H */
diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c
index 4c298e74723d..e9d9c0212e44 100644
--- a/fs/bcachefs/mean_and_variance_test.c
+++ b/fs/bcachefs/mean_and_variance_test.c
@@ -217,4 +217,5 @@ static struct kunit_suite mean_and_variance_test_suite = {
kunit_test_suite(mean_and_variance_test_suite);
MODULE_AUTHOR("Daniel B. Hill");
+MODULE_DESCRIPTION("bcachefs filesystem mean and variance unit tests");
MODULE_LICENSE("GPL");
diff --git a/fs/bcachefs/replicas_format.h b/fs/bcachefs/replicas_format.h
new file mode 100644
index 000000000000..b97208195d06
--- /dev/null
+++ b/fs/bcachefs/replicas_format.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REPLICAS_FORMAT_H
+#define _BCACHEFS_REPLICAS_FORMAT_H
+
+struct bch_replicas_entry_v0 {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 devs[];
+} __packed;
+
+struct bch_sb_field_replicas_v0 {
+ struct bch_sb_field field;
+ struct bch_replicas_entry_v0 entries[];
+} __packed __aligned(8);
+
+struct bch_replicas_entry_v1 {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 nr_required;
+ __u8 devs[];
+} __packed;
+
+struct bch_sb_field_replicas {
+ struct bch_sb_field field;
+ struct bch_replicas_entry_v1 entries[];
+} __packed __aligned(8);
+
+#define replicas_entry_bytes(_i) \
+ (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
+
+#endif /* _BCACHEFS_REPLICAS_FORMAT_H */
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 390a1bbd2567..3fb23e399ffb 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -146,10 +146,17 @@ static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
for (const struct bch_sb_field_downgrade_entry *i = e->entries;
(void *) i < vstruct_end(&e->field);
i = downgrade_entry_next_c(i)) {
+ /*
+ * Careful: sb_field_downgrade_entry is only 2 byte aligned, but
+ * section sizes are 8 byte aligned - an empty entry spanning
+ * the end of the section is allowed (and ignored):
+ */
+ if ((void *) &i->errors[0] > vstruct_end(&e->field))
+ break;
+
if (flags & BCH_VALIDATE_write &&
- ((void *) &i->errors[0] > vstruct_end(&e->field) ||
- (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field))) {
- prt_printf(err, "downgrade entry overruns end of superblock section)");
+ (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
+ prt_printf(err, "downgrade entry overruns end of superblock section");
return -BCH_ERR_invalid_sb_downgrade;
}
diff --git a/fs/bcachefs/sb-downgrade_format.h b/fs/bcachefs/sb-downgrade_format.h
new file mode 100644
index 000000000000..cffd932be3ec
--- /dev/null
+++ b/fs/bcachefs/sb-downgrade_format.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_FORMAT_H
+#define _BCACHEFS_SB_DOWNGRADE_FORMAT_H
+
+struct bch_sb_field_downgrade_entry {
+ __le16 version;
+ __le64 recovery_passes[2];
+ __le16 nr_errors;
+ __le16 errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+ struct bch_sb_field field;
+ struct bch_sb_field_downgrade_entry entries[];
+};
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_FORMAT_H */
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
new file mode 100644
index 000000000000..84d2763bd597
--- /dev/null
+++ b/fs/bcachefs/sb-errors_format.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
+#define _BCACHEFS_SB_ERRORS_FORMAT_H
+
+#define BCH_SB_ERRS() \
+ x(clean_but_journal_not_empty, 0) \
+ x(dirty_but_no_journal_entries, 1) \
+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
+ x(sb_clean_journal_seq_mismatch, 3) \
+ x(sb_clean_btree_root_mismatch, 4) \
+ x(sb_clean_missing, 5) \
+ x(jset_unsupported_version, 6) \
+ x(jset_unknown_csum, 7) \
+ x(jset_last_seq_newer_than_seq, 8) \
+ x(jset_past_bucket_end, 9) \
+ x(jset_seq_blacklisted, 10) \
+ x(journal_entries_missing, 11) \
+ x(journal_entry_replicas_not_marked, 12) \
+ x(journal_entry_past_jset_end, 13) \
+ x(journal_entry_replicas_data_mismatch, 14) \
+ x(journal_entry_bkey_u64s_0, 15) \
+ x(journal_entry_bkey_past_end, 16) \
+ x(journal_entry_bkey_bad_format, 17) \
+ x(journal_entry_bkey_invalid, 18) \
+ x(journal_entry_btree_root_bad_size, 19) \
+ x(journal_entry_blacklist_bad_size, 20) \
+ x(journal_entry_blacklist_v2_bad_size, 21) \
+ x(journal_entry_blacklist_v2_start_past_end, 22) \
+ x(journal_entry_usage_bad_size, 23) \
+ x(journal_entry_data_usage_bad_size, 24) \
+ x(journal_entry_clock_bad_size, 25) \
+ x(journal_entry_clock_bad_rw, 26) \
+ x(journal_entry_dev_usage_bad_size, 27) \
+ x(journal_entry_dev_usage_bad_dev, 28) \
+ x(journal_entry_dev_usage_bad_pad, 29) \
+ x(btree_node_unreadable, 30) \
+ x(btree_node_fault_injected, 31) \
+ x(btree_node_bad_magic, 32) \
+ x(btree_node_bad_seq, 33) \
+ x(btree_node_unsupported_version, 34) \
+ x(btree_node_bset_older_than_sb_min, 35) \
+ x(btree_node_bset_newer_than_sb, 36) \
+ x(btree_node_data_missing, 37) \
+ x(btree_node_bset_after_end, 38) \
+ x(btree_node_replicas_sectors_written_mismatch, 39) \
+ x(btree_node_replicas_data_mismatch, 40) \
+ x(bset_unknown_csum, 41) \
+ x(bset_bad_csum, 42) \
+ x(bset_past_end_of_btree_node, 43) \
+ x(bset_wrong_sector_offset, 44) \
+ x(bset_empty, 45) \
+ x(bset_bad_seq, 46) \
+ x(bset_blacklisted_journal_seq, 47) \
+ x(first_bset_blacklisted_journal_seq, 48) \
+ x(btree_node_bad_btree, 49) \
+ x(btree_node_bad_level, 50) \
+ x(btree_node_bad_min_key, 51) \
+ x(btree_node_bad_max_key, 52) \
+ x(btree_node_bad_format, 53) \
+ x(btree_node_bkey_past_bset_end, 54) \
+ x(btree_node_bkey_bad_format, 55) \
+ x(btree_node_bad_bkey, 56) \
+ x(btree_node_bkey_out_of_order, 57) \
+ x(btree_root_bkey_invalid, 58) \
+ x(btree_root_read_error, 59) \
+ x(btree_root_bad_min_key, 60) \
+ x(btree_root_bad_max_key, 61) \
+ x(btree_node_read_error, 62) \
+ x(btree_node_topology_bad_min_key, 63) \
+ x(btree_node_topology_bad_max_key, 64) \
+ x(btree_node_topology_overwritten_by_prev_node, 65) \
+ x(btree_node_topology_overwritten_by_next_node, 66) \
+ x(btree_node_topology_interior_node_empty, 67) \
+ x(fs_usage_hidden_wrong, 68) \
+ x(fs_usage_btree_wrong, 69) \
+ x(fs_usage_data_wrong, 70) \
+ x(fs_usage_cached_wrong, 71) \
+ x(fs_usage_reserved_wrong, 72) \
+ x(fs_usage_persistent_reserved_wrong, 73) \
+ x(fs_usage_nr_inodes_wrong, 74) \
+ x(fs_usage_replicas_wrong, 75) \
+ x(dev_usage_buckets_wrong, 76) \
+ x(dev_usage_sectors_wrong, 77) \
+ x(dev_usage_fragmented_wrong, 78) \
+ x(dev_usage_buckets_ec_wrong, 79) \
+ x(bkey_version_in_future, 80) \
+ x(bkey_u64s_too_small, 81) \
+ x(bkey_invalid_type_for_btree, 82) \
+ x(bkey_extent_size_zero, 83) \
+ x(bkey_extent_size_greater_than_offset, 84) \
+ x(bkey_size_nonzero, 85) \
+ x(bkey_snapshot_nonzero, 86) \
+ x(bkey_snapshot_zero, 87) \
+ x(bkey_at_pos_max, 88) \
+ x(bkey_before_start_of_btree_node, 89) \
+ x(bkey_after_end_of_btree_node, 90) \
+ x(bkey_val_size_nonzero, 91) \
+ x(bkey_val_size_too_small, 92) \
+ x(alloc_v1_val_size_bad, 93) \
+ x(alloc_v2_unpack_error, 94) \
+ x(alloc_v3_unpack_error, 95) \
+ x(alloc_v4_val_size_bad, 96) \
+ x(alloc_v4_backpointers_start_bad, 97) \
+ x(alloc_key_data_type_bad, 98) \
+ x(alloc_key_empty_but_have_data, 99) \
+ x(alloc_key_dirty_sectors_0, 100) \
+ x(alloc_key_data_type_inconsistency, 101) \
+ x(alloc_key_to_missing_dev_bucket, 102) \
+ x(alloc_key_cached_inconsistency, 103) \
+ x(alloc_key_cached_but_read_time_zero, 104) \
+ x(alloc_key_to_missing_lru_entry, 105) \
+ x(alloc_key_data_type_wrong, 106) \
+ x(alloc_key_gen_wrong, 107) \
+ x(alloc_key_dirty_sectors_wrong, 108) \
+ x(alloc_key_cached_sectors_wrong, 109) \
+ x(alloc_key_stripe_wrong, 110) \
+ x(alloc_key_stripe_redundancy_wrong, 111) \
+ x(bucket_sector_count_overflow, 112) \
+ x(bucket_metadata_type_mismatch, 113) \
+ x(need_discard_key_wrong, 114) \
+ x(freespace_key_wrong, 115) \
+ x(freespace_hole_missing, 116) \
+ x(bucket_gens_val_size_bad, 117) \
+ x(bucket_gens_key_wrong, 118) \
+ x(bucket_gens_hole_wrong, 119) \
+ x(bucket_gens_to_invalid_dev, 120) \
+ x(bucket_gens_to_invalid_buckets, 121) \
+ x(bucket_gens_nonzero_for_invalid_buckets, 122) \
+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
+ x(need_discard_freespace_key_bad, 124) \
+ x(backpointer_bucket_offset_wrong, 125) \
+ x(backpointer_to_missing_device, 126) \
+ x(backpointer_to_missing_alloc, 127) \
+ x(backpointer_to_missing_ptr, 128) \
+ x(lru_entry_at_time_0, 129) \
+ x(lru_entry_to_invalid_bucket, 130) \
+ x(lru_entry_bad, 131) \
+ x(btree_ptr_val_too_big, 132) \
+ x(btree_ptr_v2_val_too_big, 133) \
+ x(btree_ptr_has_non_ptr, 134) \
+ x(extent_ptrs_invalid_entry, 135) \
+ x(extent_ptrs_no_ptrs, 136) \
+ x(extent_ptrs_too_many_ptrs, 137) \
+ x(extent_ptrs_redundant_crc, 138) \
+ x(extent_ptrs_redundant_stripe, 139) \
+ x(extent_ptrs_unwritten, 140) \
+ x(extent_ptrs_written_and_unwritten, 141) \
+ x(ptr_to_invalid_device, 142) \
+ x(ptr_to_duplicate_device, 143) \
+ x(ptr_after_last_bucket, 144) \
+ x(ptr_before_first_bucket, 145) \
+ x(ptr_spans_multiple_buckets, 146) \
+ x(ptr_to_missing_backpointer, 147) \
+ x(ptr_to_missing_alloc_key, 148) \
+ x(ptr_to_missing_replicas_entry, 149) \
+ x(ptr_to_missing_stripe, 150) \
+ x(ptr_to_incorrect_stripe, 151) \
+ x(ptr_gen_newer_than_bucket_gen, 152) \
+ x(ptr_too_stale, 153) \
+ x(stale_dirty_ptr, 154) \
+ x(ptr_bucket_data_type_mismatch, 155) \
+ x(ptr_cached_and_erasure_coded, 156) \
+ x(ptr_crc_uncompressed_size_too_small, 157) \
+ x(ptr_crc_csum_type_unknown, 158) \
+ x(ptr_crc_compression_type_unknown, 159) \
+ x(ptr_crc_redundant, 160) \
+ x(ptr_crc_uncompressed_size_too_big, 161) \
+ x(ptr_crc_nonce_mismatch, 162) \
+ x(ptr_stripe_redundant, 163) \
+ x(reservation_key_nr_replicas_invalid, 164) \
+ x(reflink_v_refcount_wrong, 165) \
+ x(reflink_p_to_missing_reflink_v, 166) \
+ x(stripe_pos_bad, 167) \
+ x(stripe_val_size_bad, 168) \
+ x(stripe_sector_count_wrong, 169) \
+ x(snapshot_tree_pos_bad, 170) \
+ x(snapshot_tree_to_missing_snapshot, 171) \
+ x(snapshot_tree_to_missing_subvol, 172) \
+ x(snapshot_tree_to_wrong_subvol, 173) \
+ x(snapshot_tree_to_snapshot_subvol, 174) \
+ x(snapshot_pos_bad, 175) \
+ x(snapshot_parent_bad, 176) \
+ x(snapshot_children_not_normalized, 177) \
+ x(snapshot_child_duplicate, 178) \
+ x(snapshot_child_bad, 179) \
+ x(snapshot_skiplist_not_normalized, 180) \
+ x(snapshot_skiplist_bad, 181) \
+ x(snapshot_should_not_have_subvol, 182) \
+ x(snapshot_to_bad_snapshot_tree, 183) \
+ x(snapshot_bad_depth, 184) \
+ x(snapshot_bad_skiplist, 185) \
+ x(subvol_pos_bad, 186) \
+ x(subvol_not_master_and_not_snapshot, 187) \
+ x(subvol_to_missing_root, 188) \
+ x(subvol_root_wrong_bi_subvol, 189) \
+ x(bkey_in_missing_snapshot, 190) \
+ x(inode_pos_inode_nonzero, 191) \
+ x(inode_pos_blockdev_range, 192) \
+ x(inode_unpack_error, 193) \
+ x(inode_str_hash_invalid, 194) \
+ x(inode_v3_fields_start_bad, 195) \
+ x(inode_snapshot_mismatch, 196) \
+ x(inode_unlinked_but_clean, 197) \
+ x(inode_unlinked_but_nlink_nonzero, 198) \
+ x(inode_checksum_type_invalid, 199) \
+ x(inode_compression_type_invalid, 200) \
+ x(inode_subvol_root_but_not_dir, 201) \
+ x(inode_i_size_dirty_but_clean, 202) \
+ x(inode_i_sectors_dirty_but_clean, 203) \
+ x(inode_i_sectors_wrong, 204) \
+ x(inode_dir_wrong_nlink, 205) \
+ x(inode_dir_multiple_links, 206) \
+ x(inode_multiple_links_but_nlink_0, 207) \
+ x(inode_wrong_backpointer, 208) \
+ x(inode_wrong_nlink, 209) \
+ x(inode_unreachable, 210) \
+ x(deleted_inode_but_clean, 211) \
+ x(deleted_inode_missing, 212) \
+ x(deleted_inode_is_dir, 213) \
+ x(deleted_inode_not_unlinked, 214) \
+ x(extent_overlapping, 215) \
+ x(extent_in_missing_inode, 216) \
+ x(extent_in_non_reg_inode, 217) \
+ x(extent_past_end_of_inode, 218) \
+ x(dirent_empty_name, 219) \
+ x(dirent_val_too_big, 220) \
+ x(dirent_name_too_long, 221) \
+ x(dirent_name_embedded_nul, 222) \
+ x(dirent_name_dot_or_dotdot, 223) \
+ x(dirent_name_has_slash, 224) \
+ x(dirent_d_type_wrong, 225) \
+ x(inode_bi_parent_wrong, 226) \
+ x(dirent_in_missing_dir_inode, 227) \
+ x(dirent_in_non_dir_inode, 228) \
+ x(dirent_to_missing_inode, 229) \
+ x(dirent_to_missing_subvol, 230) \
+ x(dirent_to_itself, 231) \
+ x(quota_type_invalid, 232) \
+ x(xattr_val_size_too_small, 233) \
+ x(xattr_val_size_too_big, 234) \
+ x(xattr_invalid_type, 235) \
+ x(xattr_name_invalid_chars, 236) \
+ x(xattr_in_missing_inode, 237) \
+ x(root_subvol_missing, 238) \
+ x(root_dir_missing, 239) \
+ x(root_inode_not_dir, 240) \
+ x(dir_loop, 241) \
+ x(hash_table_key_duplicate, 242) \
+ x(hash_table_key_wrong_offset, 243) \
+ x(unlinked_inode_not_on_deleted_list, 244) \
+ x(reflink_p_front_pad_bad, 245) \
+ x(journal_entry_dup_same_device, 246) \
+ x(inode_bi_subvol_missing, 247) \
+ x(inode_bi_subvol_wrong, 248) \
+ x(inode_points_to_missing_dirent, 249) \
+ x(inode_points_to_wrong_dirent, 250) \
+ x(inode_bi_parent_nonzero, 251) \
+ x(dirent_to_missing_parent_subvol, 252) \
+ x(dirent_not_visible_in_parent_subvol, 253) \
+ x(subvol_fs_path_parent_wrong, 254) \
+ x(subvol_root_fs_path_parent_nonzero, 255) \
+ x(subvol_children_not_set, 256) \
+ x(subvol_children_bad, 257) \
+ x(subvol_loop, 258) \
+ x(subvol_unreachable, 259) \
+ x(btree_node_bkey_bad_u64s, 260) \
+ x(btree_node_topology_empty_interior_node, 261) \
+ x(btree_ptr_v2_min_key_bad, 262) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263) \
+ x(snapshot_node_missing, 264) \
+ x(dup_backpointer_to_bad_csum_extent, 265) \
+ x(btree_bitmap_not_marked, 266) \
+ x(sb_clean_entry_overrun, 267) \
+ x(btree_ptr_v2_written_0, 268) \
+ x(subvol_snapshot_bad, 269) \
+ x(subvol_inode_bad, 270)
+
+enum bch_sb_error_id {
+#define x(t, n) BCH_FSCK_ERR_##t = n,
+ BCH_SB_ERRS()
+#undef x
+ BCH_SB_ERR_MAX
+};
+
+struct bch_sb_field_errors {
+ struct bch_sb_field field;
+ struct bch_sb_field_error_entry {
+ __le64 v;
+ __le64 last_error_time;
+ } entries[];
+};
+
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
+
+#endif /* _BCACHEFS_SB_ERRORS_FORMAT_H */
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index 666599d3fb9d..40325239c3b0 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -4,286 +4,6 @@
#include "darray.h"
-#define BCH_SB_ERRS() \
- x(clean_but_journal_not_empty, 0) \
- x(dirty_but_no_journal_entries, 1) \
- x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
- x(sb_clean_journal_seq_mismatch, 3) \
- x(sb_clean_btree_root_mismatch, 4) \
- x(sb_clean_missing, 5) \
- x(jset_unsupported_version, 6) \
- x(jset_unknown_csum, 7) \
- x(jset_last_seq_newer_than_seq, 8) \
- x(jset_past_bucket_end, 9) \
- x(jset_seq_blacklisted, 10) \
- x(journal_entries_missing, 11) \
- x(journal_entry_replicas_not_marked, 12) \
- x(journal_entry_past_jset_end, 13) \
- x(journal_entry_replicas_data_mismatch, 14) \
- x(journal_entry_bkey_u64s_0, 15) \
- x(journal_entry_bkey_past_end, 16) \
- x(journal_entry_bkey_bad_format, 17) \
- x(journal_entry_bkey_invalid, 18) \
- x(journal_entry_btree_root_bad_size, 19) \
- x(journal_entry_blacklist_bad_size, 20) \
- x(journal_entry_blacklist_v2_bad_size, 21) \
- x(journal_entry_blacklist_v2_start_past_end, 22) \
- x(journal_entry_usage_bad_size, 23) \
- x(journal_entry_data_usage_bad_size, 24) \
- x(journal_entry_clock_bad_size, 25) \
- x(journal_entry_clock_bad_rw, 26) \
- x(journal_entry_dev_usage_bad_size, 27) \
- x(journal_entry_dev_usage_bad_dev, 28) \
- x(journal_entry_dev_usage_bad_pad, 29) \
- x(btree_node_unreadable, 30) \
- x(btree_node_fault_injected, 31) \
- x(btree_node_bad_magic, 32) \
- x(btree_node_bad_seq, 33) \
- x(btree_node_unsupported_version, 34) \
- x(btree_node_bset_older_than_sb_min, 35) \
- x(btree_node_bset_newer_than_sb, 36) \
- x(btree_node_data_missing, 37) \
- x(btree_node_bset_after_end, 38) \
- x(btree_node_replicas_sectors_written_mismatch, 39) \
- x(btree_node_replicas_data_mismatch, 40) \
- x(bset_unknown_csum, 41) \
- x(bset_bad_csum, 42) \
- x(bset_past_end_of_btree_node, 43) \
- x(bset_wrong_sector_offset, 44) \
- x(bset_empty, 45) \
- x(bset_bad_seq, 46) \
- x(bset_blacklisted_journal_seq, 47) \
- x(first_bset_blacklisted_journal_seq, 48) \
- x(btree_node_bad_btree, 49) \
- x(btree_node_bad_level, 50) \
- x(btree_node_bad_min_key, 51) \
- x(btree_node_bad_max_key, 52) \
- x(btree_node_bad_format, 53) \
- x(btree_node_bkey_past_bset_end, 54) \
- x(btree_node_bkey_bad_format, 55) \
- x(btree_node_bad_bkey, 56) \
- x(btree_node_bkey_out_of_order, 57) \
- x(btree_root_bkey_invalid, 58) \
- x(btree_root_read_error, 59) \
- x(btree_root_bad_min_key, 60) \
- x(btree_root_bad_max_key, 61) \
- x(btree_node_read_error, 62) \
- x(btree_node_topology_bad_min_key, 63) \
- x(btree_node_topology_bad_max_key, 64) \
- x(btree_node_topology_overwritten_by_prev_node, 65) \
- x(btree_node_topology_overwritten_by_next_node, 66) \
- x(btree_node_topology_interior_node_empty, 67) \
- x(fs_usage_hidden_wrong, 68) \
- x(fs_usage_btree_wrong, 69) \
- x(fs_usage_data_wrong, 70) \
- x(fs_usage_cached_wrong, 71) \
- x(fs_usage_reserved_wrong, 72) \
- x(fs_usage_persistent_reserved_wrong, 73) \
- x(fs_usage_nr_inodes_wrong, 74) \
- x(fs_usage_replicas_wrong, 75) \
- x(dev_usage_buckets_wrong, 76) \
- x(dev_usage_sectors_wrong, 77) \
- x(dev_usage_fragmented_wrong, 78) \
- x(dev_usage_buckets_ec_wrong, 79) \
- x(bkey_version_in_future, 80) \
- x(bkey_u64s_too_small, 81) \
- x(bkey_invalid_type_for_btree, 82) \
- x(bkey_extent_size_zero, 83) \
- x(bkey_extent_size_greater_than_offset, 84) \
- x(bkey_size_nonzero, 85) \
- x(bkey_snapshot_nonzero, 86) \
- x(bkey_snapshot_zero, 87) \
- x(bkey_at_pos_max, 88) \
- x(bkey_before_start_of_btree_node, 89) \
- x(bkey_after_end_of_btree_node, 90) \
- x(bkey_val_size_nonzero, 91) \
- x(bkey_val_size_too_small, 92) \
- x(alloc_v1_val_size_bad, 93) \
- x(alloc_v2_unpack_error, 94) \
- x(alloc_v3_unpack_error, 95) \
- x(alloc_v4_val_size_bad, 96) \
- x(alloc_v4_backpointers_start_bad, 97) \
- x(alloc_key_data_type_bad, 98) \
- x(alloc_key_empty_but_have_data, 99) \
- x(alloc_key_dirty_sectors_0, 100) \
- x(alloc_key_data_type_inconsistency, 101) \
- x(alloc_key_to_missing_dev_bucket, 102) \
- x(alloc_key_cached_inconsistency, 103) \
- x(alloc_key_cached_but_read_time_zero, 104) \
- x(alloc_key_to_missing_lru_entry, 105) \
- x(alloc_key_data_type_wrong, 106) \
- x(alloc_key_gen_wrong, 107) \
- x(alloc_key_dirty_sectors_wrong, 108) \
- x(alloc_key_cached_sectors_wrong, 109) \
- x(alloc_key_stripe_wrong, 110) \
- x(alloc_key_stripe_redundancy_wrong, 111) \
- x(bucket_sector_count_overflow, 112) \
- x(bucket_metadata_type_mismatch, 113) \
- x(need_discard_key_wrong, 114) \
- x(freespace_key_wrong, 115) \
- x(freespace_hole_missing, 116) \
- x(bucket_gens_val_size_bad, 117) \
- x(bucket_gens_key_wrong, 118) \
- x(bucket_gens_hole_wrong, 119) \
- x(bucket_gens_to_invalid_dev, 120) \
- x(bucket_gens_to_invalid_buckets, 121) \
- x(bucket_gens_nonzero_for_invalid_buckets, 122) \
- x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
- x(need_discard_freespace_key_bad, 124) \
- x(backpointer_bucket_offset_wrong, 125) \
- x(backpointer_to_missing_device, 126) \
- x(backpointer_to_missing_alloc, 127) \
- x(backpointer_to_missing_ptr, 128) \
- x(lru_entry_at_time_0, 129) \
- x(lru_entry_to_invalid_bucket, 130) \
- x(lru_entry_bad, 131) \
- x(btree_ptr_val_too_big, 132) \
- x(btree_ptr_v2_val_too_big, 133) \
- x(btree_ptr_has_non_ptr, 134) \
- x(extent_ptrs_invalid_entry, 135) \
- x(extent_ptrs_no_ptrs, 136) \
- x(extent_ptrs_too_many_ptrs, 137) \
- x(extent_ptrs_redundant_crc, 138) \
- x(extent_ptrs_redundant_stripe, 139) \
- x(extent_ptrs_unwritten, 140) \
- x(extent_ptrs_written_and_unwritten, 141) \
- x(ptr_to_invalid_device, 142) \
- x(ptr_to_duplicate_device, 143) \
- x(ptr_after_last_bucket, 144) \
- x(ptr_before_first_bucket, 145) \
- x(ptr_spans_multiple_buckets, 146) \
- x(ptr_to_missing_backpointer, 147) \
- x(ptr_to_missing_alloc_key, 148) \
- x(ptr_to_missing_replicas_entry, 149) \
- x(ptr_to_missing_stripe, 150) \
- x(ptr_to_incorrect_stripe, 151) \
- x(ptr_gen_newer_than_bucket_gen, 152) \
- x(ptr_too_stale, 153) \
- x(stale_dirty_ptr, 154) \
- x(ptr_bucket_data_type_mismatch, 155) \
- x(ptr_cached_and_erasure_coded, 156) \
- x(ptr_crc_uncompressed_size_too_small, 157) \
- x(ptr_crc_csum_type_unknown, 158) \
- x(ptr_crc_compression_type_unknown, 159) \
- x(ptr_crc_redundant, 160) \
- x(ptr_crc_uncompressed_size_too_big, 161) \
- x(ptr_crc_nonce_mismatch, 162) \
- x(ptr_stripe_redundant, 163) \
- x(reservation_key_nr_replicas_invalid, 164) \
- x(reflink_v_refcount_wrong, 165) \
- x(reflink_p_to_missing_reflink_v, 166) \
- x(stripe_pos_bad, 167) \
- x(stripe_val_size_bad, 168) \
- x(stripe_sector_count_wrong, 169) \
- x(snapshot_tree_pos_bad, 170) \
- x(snapshot_tree_to_missing_snapshot, 171) \
- x(snapshot_tree_to_missing_subvol, 172) \
- x(snapshot_tree_to_wrong_subvol, 173) \
- x(snapshot_tree_to_snapshot_subvol, 174) \
- x(snapshot_pos_bad, 175) \
- x(snapshot_parent_bad, 176) \
- x(snapshot_children_not_normalized, 177) \
- x(snapshot_child_duplicate, 178) \
- x(snapshot_child_bad, 179) \
- x(snapshot_skiplist_not_normalized, 180) \
- x(snapshot_skiplist_bad, 181) \
- x(snapshot_should_not_have_subvol, 182) \
- x(snapshot_to_bad_snapshot_tree, 183) \
- x(snapshot_bad_depth, 184) \
- x(snapshot_bad_skiplist, 185) \
- x(subvol_pos_bad, 186) \
- x(subvol_not_master_and_not_snapshot, 187) \
- x(subvol_to_missing_root, 188) \
- x(subvol_root_wrong_bi_subvol, 189) \
- x(bkey_in_missing_snapshot, 190) \
- x(inode_pos_inode_nonzero, 191) \
- x(inode_pos_blockdev_range, 192) \
- x(inode_unpack_error, 193) \
- x(inode_str_hash_invalid, 194) \
- x(inode_v3_fields_start_bad, 195) \
- x(inode_snapshot_mismatch, 196) \
- x(inode_unlinked_but_clean, 197) \
- x(inode_unlinked_but_nlink_nonzero, 198) \
- x(inode_checksum_type_invalid, 199) \
- x(inode_compression_type_invalid, 200) \
- x(inode_subvol_root_but_not_dir, 201) \
- x(inode_i_size_dirty_but_clean, 202) \
- x(inode_i_sectors_dirty_but_clean, 203) \
- x(inode_i_sectors_wrong, 204) \
- x(inode_dir_wrong_nlink, 205) \
- x(inode_dir_multiple_links, 206) \
- x(inode_multiple_links_but_nlink_0, 207) \
- x(inode_wrong_backpointer, 208) \
- x(inode_wrong_nlink, 209) \
- x(inode_unreachable, 210) \
- x(deleted_inode_but_clean, 211) \
- x(deleted_inode_missing, 212) \
- x(deleted_inode_is_dir, 213) \
- x(deleted_inode_not_unlinked, 214) \
- x(extent_overlapping, 215) \
- x(extent_in_missing_inode, 216) \
- x(extent_in_non_reg_inode, 217) \
- x(extent_past_end_of_inode, 218) \
- x(dirent_empty_name, 219) \
- x(dirent_val_too_big, 220) \
- x(dirent_name_too_long, 221) \
- x(dirent_name_embedded_nul, 222) \
- x(dirent_name_dot_or_dotdot, 223) \
- x(dirent_name_has_slash, 224) \
- x(dirent_d_type_wrong, 225) \
- x(inode_bi_parent_wrong, 226) \
- x(dirent_in_missing_dir_inode, 227) \
- x(dirent_in_non_dir_inode, 228) \
- x(dirent_to_missing_inode, 229) \
- x(dirent_to_missing_subvol, 230) \
- x(dirent_to_itself, 231) \
- x(quota_type_invalid, 232) \
- x(xattr_val_size_too_small, 233) \
- x(xattr_val_size_too_big, 234) \
- x(xattr_invalid_type, 235) \
- x(xattr_name_invalid_chars, 236) \
- x(xattr_in_missing_inode, 237) \
- x(root_subvol_missing, 238) \
- x(root_dir_missing, 239) \
- x(root_inode_not_dir, 240) \
- x(dir_loop, 241) \
- x(hash_table_key_duplicate, 242) \
- x(hash_table_key_wrong_offset, 243) \
- x(unlinked_inode_not_on_deleted_list, 244) \
- x(reflink_p_front_pad_bad, 245) \
- x(journal_entry_dup_same_device, 246) \
- x(inode_bi_subvol_missing, 247) \
- x(inode_bi_subvol_wrong, 248) \
- x(inode_points_to_missing_dirent, 249) \
- x(inode_points_to_wrong_dirent, 250) \
- x(inode_bi_parent_nonzero, 251) \
- x(dirent_to_missing_parent_subvol, 252) \
- x(dirent_not_visible_in_parent_subvol, 253) \
- x(subvol_fs_path_parent_wrong, 254) \
- x(subvol_root_fs_path_parent_nonzero, 255) \
- x(subvol_children_not_set, 256) \
- x(subvol_children_bad, 257) \
- x(subvol_loop, 258) \
- x(subvol_unreachable, 259) \
- x(btree_node_bkey_bad_u64s, 260) \
- x(btree_node_topology_empty_interior_node, 261) \
- x(btree_ptr_v2_min_key_bad, 262) \
- x(btree_root_unreadable_and_scan_found_nothing, 263) \
- x(snapshot_node_missing, 264) \
- x(dup_backpointer_to_bad_csum_extent, 265) \
- x(btree_bitmap_not_marked, 266) \
- x(sb_clean_entry_overrun, 267) \
- x(btree_ptr_v2_written_0, 268) \
- x(subvol_snapshot_bad, 269) \
- x(subvol_inode_bad, 270)
-
-enum bch_sb_error_id {
-#define x(t, n) BCH_FSCK_ERR_##t = n,
- BCH_SB_ERRS()
-#undef x
- BCH_SB_ERR_MAX
-};
-
struct bch_sb_error_entry_cpu {
u64 id:16,
nr:48;
@@ -293,4 +13,3 @@ struct bch_sb_error_entry_cpu {
typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu;
#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */
-
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
new file mode 100644
index 000000000000..e2630548c0f6
--- /dev/null
+++ b/fs/bcachefs/sb-members_format.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_MEMBERS_FORMAT_H
+#define _BCACHEFS_SB_MEMBERS_FORMAT_H
+
+/*
+ * We refer to members with bitmasks in various places - but we need to get rid
+ * of this limit:
+ */
+#define BCH_SB_MEMBERS_MAX 64
+
+#define BCH_MIN_NR_NBUCKETS (1 << 6)
+
+#define BCH_IOPS_MEASUREMENTS() \
+ x(seqread, 0) \
+ x(seqwrite, 1) \
+ x(randread, 2) \
+ x(randwrite, 3)
+
+enum bch_iops_measurement {
+#define x(t, n) BCH_IOPS_##t = n,
+ BCH_IOPS_MEASUREMENTS()
+#undef x
+ BCH_IOPS_NR
+};
+
+#define BCH_MEMBER_ERROR_TYPES() \
+ x(read, 0) \
+ x(write, 1) \
+ x(checksum, 2)
+
+enum bch_member_error_type {
+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
+ BCH_MEMBER_ERROR_TYPES()
+#undef x
+ BCH_MEMBER_ERROR_NR
+};
+
+struct bch_member {
+ __uuid_t uuid;
+ __le64 nbuckets; /* device size */
+ __le16 first_bucket; /* index of first bucket used */
+ __le16 bucket_size; /* sectors */
+ __u8 btree_bitmap_shift;
+ __u8 pad[3];
+ __le64 last_mount; /* time_t */
+
+ __le64 flags;
+ __le32 iops[4];
+ __le64 errors[BCH_MEMBER_ERROR_NR];
+ __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
+ __le64 errors_reset_time;
+ __le64 seq;
+ __le64 btree_allocated_bitmap;
+ /*
+ * On recovery from a clean shutdown we don't normally read the journal,
+ * but we still want to resume writing from where we left off so we
+ * don't overwrite more than is necessary, for list journal debugging:
+ */
+ __le32 last_journal_bucket;
+ __le32 last_journal_bucket_offset;
+};
+
+/*
+ * This limit comes from the bucket_gens array - it's a single allocation, and
+ * kernel allocation are limited to INT_MAX
+ */
+#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64)
+
+#define BCH_MEMBER_V1_BYTES 56
+
+LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
+/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
+LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)
+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20)
+LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
+LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
+LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
+ struct bch_member, flags, 30, 31)
+
+#if 0
+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
+#endif
+
+#define BCH_MEMBER_STATES() \
+ x(rw, 0) \
+ x(ro, 1) \
+ x(failed, 2) \
+ x(spare, 3)
+
+enum bch_member_state {
+#define x(t, n) BCH_MEMBER_STATE_##t = n,
+ BCH_MEMBER_STATES()
+#undef x
+ BCH_MEMBER_STATE_NR
+};
+
+struct bch_sb_field_members_v1 {
+ struct bch_sb_field field;
+ struct bch_member _members[]; //Members are now variable size
+};
+
+struct bch_sb_field_members_v2 {
+ struct bch_sb_field field;
+ __le16 member_bytes; //size of single member entry
+ u8 pad[6];
+ struct bch_member _members[];
+};
+
+#endif /* _BCACHEFS_SB_MEMBERS_FORMAT_H */
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 629900a5e641..51918acfd726 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1042,6 +1042,25 @@ err:
return ret;
}
+int bch2_check_key_has_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
+ bkey_in_missing_snapshot,
+ "key in missing snapshot %s, delete?",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node) ?: 1;
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
@@ -1351,35 +1370,39 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it
*/
-static int snapshot_delete_key(struct btree_trans *trans,
+static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
snapshot_id_list *deleted,
snapshot_id_list *equiv_seen,
struct bpos *last_pos)
{
+ int ret = bch2_check_key_has_snapshot(trans, iter, k);
+ if (ret)
+ return ret < 0 ? ret : 0;
+
struct bch_fs *c = trans->c;
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+ if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
+ return 0;
if (!bkey_eq(k.k->p, *last_pos))
equiv_seen->nr = 0;
- *last_pos = k.k->p;
- if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
- snapshot_list_has_id(equiv_seen, equiv)) {
+ if (snapshot_list_has_id(deleted, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
- } else {
- return snapshot_list_add(c, equiv_seen, equiv);
- }
-}
-static int move_key_to_correct_snapshot(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k)
-{
- struct bch_fs *c = trans->c;
- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+ if (!bpos_eq(*last_pos, k.k->p) &&
+ snapshot_list_has_id(equiv_seen, equiv))
+ return bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node);
+
+ *last_pos = k.k->p;
+
+ ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
+ if (ret)
+ return ret;
/*
* When we have a linear chain of snapshot nodes, we consider
@@ -1389,21 +1412,20 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans,
*
* If there are multiple keys in different snapshots at the same
* position, we're only going to keep the one in the newest
- * snapshot - the rest have been overwritten and are redundant,
- * and for the key we're going to keep we need to move it to the
- * equivalance class ID if it's not there already.
+ * snapshot (we delete the others above) - the rest have been
+ * overwritten and are redundant, and for the key we're going to keep we
+ * need to move it to the equivalance class ID if it's not there
+ * already.
*/
if (equiv != k.k->p.snapshot) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
- struct btree_iter new_iter;
- int ret;
-
- ret = PTR_ERR_OR_ZERO(new);
+ int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
new->k.p.snapshot = equiv;
+ struct btree_iter new_iter;
bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
BTREE_ITER_all_snapshots|
BTREE_ITER_cached|
@@ -1538,7 +1560,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
struct btree_trans *trans;
snapshot_id_list deleted = { 0 };
snapshot_id_list deleted_interior = { 0 };
- u32 id;
int ret = 0;
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
@@ -1585,33 +1606,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (ret)
goto err;
- for (id = 0; id < BTREE_ID_NR; id++) {
+ for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
struct bpos last_pos = POS_MIN;
snapshot_id_list equiv_seen = { 0 };
struct disk_reservation res = { 0 };
- if (!btree_type_has_snapshots(id))
- continue;
-
- /*
- * deleted inodes btree is maintained by a trigger on the inodes
- * btree - no work for us to do here, and it's not safe to scan
- * it because we'll see out of date keys due to the btree write
- * buffer:
- */
- if (id == BTREE_ID_deleted_inodes)
+ if (!btree_type_has_snapshots(btree))
continue;
ret = for_each_btree_key_commit(trans, iter,
- id, POS_MIN,
- BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
- &res, NULL, BCH_TRANS_COMMIT_no_enospc,
- snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
- for_each_btree_key_commit(trans, iter,
- id, POS_MIN,
+ btree, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
- move_key_to_correct_snapshot(trans, &iter, k));
+ delete_dead_snapshots_process_key(trans, &iter, k, &deleted,
+ &equiv_seen, &last_pos));
bch2_disk_reservation_put(c, &res);
darray_exit(&equiv_seen);
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index ab13d8f4b41e..31b0ee03e962 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -242,6 +242,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *);
+int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index f1bee6c5222d..d73a0222f709 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1132,18 +1132,12 @@ bool bch2_check_version_downgrade(struct bch_fs *c)
* c->sb will be checked before we write the superblock, so update it as
* well:
*/
- if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) {
+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
- c->sb.version_upgrade_complete = bcachefs_metadata_version_current;
- }
- if (c->sb.version > bcachefs_metadata_version_current) {
+ if (c->sb.version > bcachefs_metadata_version_current)
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
- c->sb.version = bcachefs_metadata_version_current;
- }
- if (c->sb.version_min > bcachefs_metadata_version_current) {
+ if (c->sb.version_min > bcachefs_metadata_version_current)
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
- c->sb.version_min = bcachefs_metadata_version_current;
- }
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
return ret;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 2206a8dee693..df2bea38e83f 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -564,7 +564,7 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
- EBUG_ON(percpu_u64_get(c->online_reserved));
+ EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
free_percpu(c->online_reserved);
darray_exit(&c->btree_roots_extra);
diff --git a/fs/dcache.c b/fs/dcache.c
index 1ee6404b430b..407095188f83 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2360,17 +2360,19 @@ EXPORT_SYMBOL(d_hash_and_lookup);
* - unhash this dentry and free it.
*
* Usually, we want to just turn this into
- * a negative dentry, but certain workloads can
- * generate a large number of negative dentries.
- * Therefore, it would be better to simply
- * unhash it.
+ * a negative dentry, but if anybody else is
+ * currently using the dentry or the inode
+ * we can't do that and we fall back on removing
+ * it from the hash queues and waiting for
+ * it to be deleted later when it has no users
*/
/**
* d_delete - delete a dentry
* @dentry: The dentry to delete
*
- * Remove the dentry from the hash queues so it can be deleted later.
+ * Turn the dentry into a negative dentry if possible, otherwise
+ * remove it from the hash queues so it can be deleted later
*/
void d_delete(struct dentry * dentry)
@@ -2379,8 +2381,6 @@ void d_delete(struct dentry * dentry)
spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
- __d_drop(dentry);
-
/*
* Are we the only user?
*/
@@ -2388,6 +2388,7 @@ void d_delete(struct dentry * dentry)
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
dentry_unlink_inode(dentry);
} else {
+ __d_drop(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 41c8f0c68ef5..c5802a459334 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -898,11 +898,11 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
loff_t length = iomap_length(iter);
- size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
loff_t pos = iter->pos;
ssize_t total_written = 0;
long status = 0;
struct address_space *mapping = iter->inode->i_mapping;
+ size_t chunk = mapping_max_folio_size(mapping);
unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
do {
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 1121601536d1..07bc1fd43530 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -181,7 +181,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio, *writethrough = NULL;
enum netfs_how_to_modify howto;
enum netfs_folio_trace trace;
- unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
+ unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to;
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index f516460e994e..e14cd53ac9fd 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -12,7 +12,7 @@
static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
{
struct inode *inode = wreq->inode;
- unsigned long long end = wreq->start + wreq->len;
+ unsigned long long end = wreq->start + wreq->transferred;
if (!wreq->error &&
i_size_read(inode) < end) {
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index c90d482b1650..f4a642727479 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
}
+ atomic_inc(&ctx->io_count);
trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
+ struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work)
}
kvfree(rreq->direct_bv);
}
+
+ if (atomic_dec_and_test(&ictx->io_count))
+ wake_up_var(&ictx->io_count);
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 60112e4b2c5e..426cf87aaf2e 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -510,7 +510,7 @@ reassess_streams:
* stream has a gap that can be jumped.
*/
if (notes & SOME_EMPTY) {
- unsigned long long jump_to = wreq->start + wreq->len;
+ unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
@@ -690,10 +690,11 @@ void netfs_write_collection_worker(struct work_struct *work)
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
if (wreq->iocb) {
- wreq->iocb->ki_pos += wreq->transferred;
+ size_t written = min(wreq->transferred, wreq->len);
+ wreq->iocb->ki_pos += written;
if (wreq->iocb->ki_complete)
wreq->iocb->ki_complete(
- wreq->iocb, wreq->error ? wreq->error : wreq->transferred);
+ wreq->iocb, wreq->error ? wreq->error : written);
wreq->iocb = VFS_PTR_POISON;
}
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index e190043bc0da..3aa86e268f40 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -254,7 +254,7 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
stream->construct = NULL;
if (subreq->start + subreq->len > wreq->start + wreq->submitted)
- wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start;
+ WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start);
netfs_do_issue_write(stream, subreq);
}
@@ -636,7 +636,12 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
mutex_unlock(&ictx->wb_lock);
- ret = wreq->error;
+ if (wreq->iocb) {
+ ret = -EIOCBQUEUED;
+ } else {
+ wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
+ ret = wreq->error;
+ }
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
return ret;
}
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 4a5614442dbf..ec7b2da2477a 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -282,14 +282,10 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
if (IS_ERR(file)) {
put_unused_fd(ufd);
kfree(ctx);
- return ufd;
+ return PTR_ERR(file);
}
file->f_mode |= FMODE_NOWAIT;
- /*
- * When we call this, the initialization must be complete, since
- * anon_inode_getfd() will install the fd.
- */
fd_install(ufd, file);
} else {
struct fd f = fdget(ufd);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index a665aac9be9f..bb86fc0641d8 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -431,6 +431,7 @@ cifs_free_inode(struct inode *inode)
static void
cifs_evict_inode(struct inode *inode)
{
+ netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode->i_state & I_PINNING_NETFS_WB)
cifs_fscache_unuse_inode_cookie(inode, true);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2ad1ffa4ccb9..0ed47d00549b 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -636,6 +636,14 @@ static inline void eth_skb_pkt_type(struct sk_buff *skb,
}
}
+static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+
+ skb_pull_inline(skb, ETH_HLEN);
+ return eth;
+}
+
/**
* eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
* @skb: Buffer to pad
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f468763478ae..5df52e15f7d6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10308,9 +10308,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 mfrl[0x1];
u8 regs_39_to_32[0x8];
- u8 regs_31_to_10[0x16];
+ u8 regs_31_to_11[0x15];
u8 mtmp[0x1];
- u8 regs_8_to_0[0x9];
+ u8 regs_9_to_0[0xa];
};
struct mlx5_ifc_mcam_access_reg_bits1 {
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index d2d291a9cdad..3ca3906bb8da 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -68,6 +68,7 @@ struct netfs_inode {
loff_t remote_i_size; /* Size of the remote file */
loff_t zero_point; /* Size after which we assume there's no data
* on the server */
+ atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags;
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
@@ -474,6 +475,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
ctx->remote_i_size = i_size_read(&ctx->inode);
ctx->zero_point = LLONG_MAX;
ctx->flags = 0;
+ atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
@@ -517,4 +519,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
#endif
}
+/**
+ * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
+ * @ctx: The netfs inode to wait on
+ *
+ * Wait for outstanding I/O requests of any type to complete. This is intended
+ * to be called from inode eviction routines. This makes sure that any
+ * resources held by those requests are cleaned up before we let the inode get
+ * cleaned up.
+ */
+static inline void netfs_wait_for_outstanding_io(struct inode *inode)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+ wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
+}
+
#endif /* _LINUX_NETFS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3d69589c00a4..ee633712bba0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -346,6 +346,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
m->gfp_mask = mask;
}
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER 8
+#endif
+
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file.
@@ -372,6 +385,14 @@ static inline bool mapping_large_folio_support(struct address_space *mapping)
test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}
+/* Return the maximum folio size for this pagecache mapping, in bytes. */
+static inline size_t mapping_max_folio_size(struct address_space *mapping)
+{
+ if (mapping_large_folio_support(mapping))
+ return PAGE_SIZE << MAX_PAGECACHE_ORDER;
+ return PAGE_SIZE;
+}
+
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@ -530,19 +551,6 @@ static inline void *detach_page_private(struct page *page)
return folio_detach_private(page_folio(page));
}
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size. I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER 8
-#endif
-
#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
#else
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index c17e4efbb2e5..21a67dc9efe8 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -394,21 +394,6 @@ enum tpm2_object_attributes {
TPM2_OA_SIGN = BIT(18),
};
-/*
- * definitions for the canonical template. These are mandated
- * by the TCG key template documents
- */
-
-#define AES_KEY_BYTES AES_KEYSIZE_128
-#define AES_KEY_BITS (AES_KEY_BYTES*8)
-#define TPM2_OA_TMPL (TPM2_OA_NO_DA | \
- TPM2_OA_FIXED_TPM | \
- TPM2_OA_FIXED_PARENT | \
- TPM2_OA_SENSITIVE_DATA_ORIGIN | \
- TPM2_OA_USER_WITH_AUTH | \
- TPM2_OA_DECRYPT | \
- TPM2_OA_RESTRICTED)
-
enum tpm2_session_attributes {
TPM2_SA_CONTINUE_SESSION = BIT(0),
TPM2_SA_AUDIT_EXCLUSIVE = BIT(1),
@@ -437,8 +422,6 @@ u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset);
u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset);
u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset);
-u8 *tpm_buf_parameters(struct tpm_buf *buf);
-
/*
* Check if TPM device is in the firmware upgrade mode.
*/
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 6d1c8541183d..3a9001a042a5 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -24,7 +24,7 @@ struct dst_ops {
void (*destroy)(struct dst_entry *);
void (*ifdown)(struct dst_entry *,
struct net_device *dev);
- struct dst_entry * (*negative_advice)(struct dst_entry *);
+ void (*negative_advice)(struct sock *sk, struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index b088d131aeb0..7e8477057f3d 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -45,16 +45,17 @@ struct pp_alloc_cache {
/**
* struct page_pool_params - page pool parameters
+ * @fast: params accessed frequently on hotpath
* @order: 2^order pages on allocation
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
* @dev: device, for DMA pre-mapping purposes
- * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
* @napi: NAPI which is the sole consumer of pages, otherwise NULL
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
- * @netdev: corresponding &net_device for Netlink introspection
+ * @slow: params with slowpath access only (initialization and Netlink)
+ * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL
*/
struct page_pool_params {
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index d88c0dfc2d46..ebcb8896bffc 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -285,4 +285,16 @@ static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
return atomic_read(&queue->young);
}
+/* RFC 7323 2.3 Using the Window Scale Option
+ * The window field (SEG.WND) of every outgoing segment, with the
+ * exception of <SYN> segments, MUST be right-shifted by
+ * Rcv.Wind.Shift bits.
+ *
+ * This means the SEG.WND carried in SYNACK can not exceed 65535.
+ * We use this property to harden TCP stack while in NEW_SYN_RECV state.
+ */
+static inline u32 tcp_synack_window(const struct request_sock *req)
+{
+ return min(req->rsk_rcv_wnd, 65535U);
+}
#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5f4d0629348f..953c8dc4e259 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2063,17 +2063,10 @@ sk_dst_get(const struct sock *sk)
static inline void __dst_negative_advice(struct sock *sk)
{
- struct dst_entry *ndst, *dst = __sk_dst_get(sk);
+ struct dst_entry *dst = __sk_dst_get(sk);
- if (dst && dst->ops->negative_advice) {
- ndst = dst->ops->negative_advice(dst);
-
- if (ndst != dst) {
- rcu_assign_pointer(sk->sk_dst_cache, ndst);
- sk_tx_queue_clear(sk);
- WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
- }
- }
+ if (dst && dst->ops->negative_advice)
+ dst->ops->negative_advice(sk, dst);
}
static inline void dst_negative_advice(struct sock *sk)
diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index f2afb7cc4926..18e3745b86cd 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -69,8 +69,7 @@ struct proc_input {
static inline enum proc_cn_event valid_event(enum proc_cn_event ev_type)
{
- ev_type &= PROC_EVENT_ALL;
- return ev_type;
+ return (enum proc_cn_event)(ev_type & PROC_EVENT_ALL);
}
/*
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index a8188202413e..43742ac5b00d 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -148,6 +148,7 @@ enum {
NETDEV_A_QSTATS_RX_ALLOC_FAIL,
NETDEV_A_QSTATS_RX_HW_DROPS,
NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+ NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
NETDEV_A_QSTATS_RX_CSUM_NONE,
NETDEV_A_QSTATS_RX_CSUM_BAD,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 77da1f438bec..48f3a9acdef3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8882,7 +8882,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
enum bpf_attach_type eatype = env->prog->expected_attach_type;
enum bpf_prog_type type = resolve_prog_type(env->prog);
- if (func_id != BPF_FUNC_map_update_elem)
+ if (func_id != BPF_FUNC_map_update_elem &&
+ func_id != BPF_FUNC_map_delete_elem)
return false;
/* It's not possible to get access to a locked struct sock in these
@@ -8893,6 +8894,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
if (eatype == BPF_TRACE_ITER)
return true;
break;
+ case BPF_PROG_TYPE_SOCK_OPS:
+ /* map_update allowed only via dedicated helpers with event type checks */
+ if (func_id == BPF_FUNC_map_delete_elem)
+ return true;
+ break;
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
@@ -8988,7 +8994,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SOCKMAP:
if (func_id != BPF_FUNC_sk_redirect_map &&
func_id != BPF_FUNC_sock_map_update &&
- func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -8998,7 +9003,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SOCKHASH:
if (func_id != BPF_FUNC_sk_redirect_hash &&
func_id != BPF_FUNC_sock_hash_update &&
- func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem &&
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index d9abb7ab031d..753b8dd42a59 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1595,7 +1595,7 @@ int swsusp_check(bool exclusive)
put:
if (error)
- fput(hib_resume_bdev_file);
+ bdev_fput(hib_resume_bdev_file);
else
pr_debug("Image signature found, resuming\n");
} else {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f5154c051d2c..6249dac61701 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3295,7 +3295,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
struct bpf_run_ctx *old_run_ctx;
int err = 0;
- if (link->task && current != link->task)
+ if (link->task && current->mm != link->task->mm)
return 0;
if (sleepable)
@@ -3396,8 +3396,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
cnt = attr->link_create.uprobe_multi.cnt;
+ pid = attr->link_create.uprobe_multi.pid;
- if (!upath || !uoffsets || !cnt)
+ if (!upath || !uoffsets || !cnt || pid < 0)
return -EINVAL;
if (cnt > MAX_UPROBE_MULTI_CNT)
return -E2BIG;
@@ -3421,11 +3422,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error_path_put;
}
- pid = attr->link_create.uprobe_multi.pid;
if (pid) {
- rcu_read_lock();
- task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
- rcu_read_unlock();
+ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
if (!task) {
err = -ESRCH;
goto error_path_put;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 5e263c141574..39877c80d6cb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -554,6 +554,10 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
anon_offs = 0;
field = btf_find_struct_member(ctx->btf, type, fieldname,
&anon_offs);
+ if (IS_ERR(field)) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return PTR_ERR(field);
+ }
if (!field) {
trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
return -ENOENT;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8541fa1494ae..c98e3b3386ba 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -970,19 +970,17 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
static void __uprobe_trace_func(struct trace_uprobe *tu,
unsigned long func, struct pt_regs *regs,
- struct uprobe_cpu_buffer **ucbp,
+ struct uprobe_cpu_buffer *ucb,
struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
struct trace_event_buffer fbuffer;
- struct uprobe_cpu_buffer *ucb;
void *data;
int size, esize;
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
WARN_ON(call != trace_file->event_call);
- ucb = prepare_uprobe_buffer(tu, regs, ucbp);
if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
return;
@@ -1014,13 +1012,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
struct uprobe_cpu_buffer **ucbp)
{
struct event_file_link *link;
+ struct uprobe_cpu_buffer *ucb;
if (is_ret_probe(tu))
return 0;
+ ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+
rcu_read_lock();
trace_probe_for_each_link_rcu(link, &tu->tp)
- __uprobe_trace_func(tu, 0, regs, ucbp, link->file);
+ __uprobe_trace_func(tu, 0, regs, ucb, link->file);
rcu_read_unlock();
return 0;
@@ -1031,10 +1032,13 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
struct uprobe_cpu_buffer **ucbp)
{
struct event_file_link *link;
+ struct uprobe_cpu_buffer *ucb;
+
+ ucb = prepare_uprobe_buffer(tu, regs, ucbp);
rcu_read_lock();
trace_probe_for_each_link_rcu(link, &tu->tp)
- __uprobe_trace_func(tu, func, regs, ucbp, link->file);
+ __uprobe_trace_func(tu, func, regs, ucb, link->file);
rcu_read_unlock();
}
diff --git a/net/9p/client.c b/net/9p/client.c
index 00774656eeac..5cd94721d974 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -236,6 +236,8 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
+ fc->id = 0;
+ fc->tag = P9_NOTAG;
return 0;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9402889840bf..d3dbb92153f2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -423,9 +423,6 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
- if (irqs_disabled())
- return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -948,9 +945,6 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
- if (irqs_disabled())
- return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
@@ -1680,19 +1674,23 @@ void sock_map_close(struct sock *sk, long timeout)
lock_sock(sk);
rcu_read_lock();
- psock = sk_psock_get(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- saved_close = READ_ONCE(sk->sk_prot)->close;
- } else {
+ psock = sk_psock(sk);
+ if (likely(psock)) {
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ goto no_psock;
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
+ } else {
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+no_psock:
+ rcu_read_unlock();
+ release_sock(sk);
}
/* Make sure we do not recurse. This is a bug.
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 049c3adeb850..4e3651101b86 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->dev = dev;
skb_reset_mac_header(skb);
- eth = (struct ethhdr *)skb->data;
- skb_pull_inline(skb, ETH_HLEN);
-
+ eth = eth_skb_pull_mac(skb);
eth_skb_pkt_type(skb, dev);
/*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e03ba4a21c39..b24d74616637 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1532,7 +1532,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
}
NAPI_GRO_CB(skb)->flush |= flush;
- NAPI_GRO_CB(skb)->inner_network_offset = off;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
/* Note : No need to call skb_gro_postpull_rcsum() here,
* as we already checked checksum over ipv4 header was 0
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 96accde527da..f3892ee9dfb3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1887,10 +1887,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
goto done;
if (fillargs.ifindex) {
- err = -ENODEV;
dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
- if (!dev)
+ if (!dev) {
+ err = -ENODEV;
goto done;
+ }
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto done;
@@ -1902,7 +1903,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = inet_base_seq(tgt_net);
- for_each_netdev_dump(net, dev, ctx->ifindex) {
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
continue;
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 69e331799604..73e66a088e25 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
+ if (!indev)
+ return daddr;
in_dev_for_each_ifa_rcu(ifa, indev) {
if (ifa->ifa_flags & IFA_F_SECONDARY)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5fd54103174f..b3073d1c8f8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,8 @@ struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ipv4_mtu(const struct dst_entry *dst);
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+static void ipv4_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
@@ -825,22 +826,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
__ip_do_redirect(rt, skb, &fl4, true);
}
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+static void ipv4_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
struct rtable *rt = dst_rtable(dst);
- struct dst_entry *ret = dst;
- if (rt) {
- if (dst->obsolete > 0) {
- ip_rt_put(rt);
- ret = NULL;
- } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
- rt->dst.expires) {
- ip_rt_put(rt);
- ret = NULL;
- }
- }
- return ret;
+ if ((dst->obsolete > 0) ||
+ (rt->rt_flags & RTCF_REDIRECTED) ||
+ rt->dst.expires)
+ sk_dst_reset(sk);
}
/*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 30ef0c8f5e92..b710958393e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1144,14 +1144,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
#endif
}
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent),
0, &key,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b93619b2384b..538c06f95918 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -783,8 +783,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
- if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
+ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(skb)->end_seq,
+ tcp_rsk(req)->rcv_nxt,
+ tcp_rsk(req)->rcv_nxt +
+ tcp_synack_window(req))) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST) &&
!tcp_oow_rate_limited(sock_net(sk), skb,
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index bd5aff97d8b1..9822163428b0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -236,7 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if (unlikely(!iph))
goto out;
- NAPI_GRO_CB(skb)->inner_network_offset = off;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
flush += ntohs(iph->payload_len) != skb->len - hlen;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bbc2a0dd9314..a504b88ec06b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ip6_mtu(const struct dst_entry *dst);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev);
@@ -2770,24 +2771,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
}
EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
struct rt6_info *rt = dst_rt6_info(dst);
- if (rt) {
- if (rt->rt6i_flags & RTF_CACHE) {
- rcu_read_lock();
- if (rt6_check_expired(rt)) {
- rt6_remove_exception_rt(rt);
- dst = NULL;
- }
- rcu_read_unlock();
- } else {
- dst_release(dst);
- dst = NULL;
+ if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
+ if (rt6_check_expired(rt)) {
+ /* counteract the dst_release() in sk_dst_reset() */
+ dst_hold(dst);
+ sk_dst_reset(sk);
+
+ rt6_remove_exception_rt(rt);
}
+ rcu_read_unlock();
+ return;
}
- return dst;
+ sk_dst_reset(sk);
}
static void ip6_link_failure(struct sk_buff *skb)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4c3605485b68..8c577b651bfc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1272,15 +1272,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6c3f28bc59b3..54e2a1dd7f5f 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -549,6 +549,9 @@ list_set_cancel_gc(struct ip_set *set)
if (SET_WITH_TIMEOUT(set))
timer_shutdown_sync(&map->gc);
+
+ /* Flush list to drop references to other ipsets */
+ list_set_flush(set);
}
static const struct ip_set_type_variant set_variant = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 00f4bd21c59b..f1c31757e496 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head)
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
rcu);
+ rcu_read_lock();
nfqnl_flush(inst, NULL, 0);
+ rcu_read_unlock();
kfree(inst);
module_put(THIS_MODULE);
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 37cfe6dd712d..b58f62195ff3 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
- hooks = (1 << NF_INET_PRE_ROUTING);
- if (priv->flags & NFTA_FIB_F_IIF) {
- hooks |= (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_FORWARD);
- }
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 0a689c8e0295..0c43d748e23a 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
int mac_off = skb_mac_header(skb) - skb->data;
u8 *vlanh, *dst_u8 = (u8 *) d;
struct vlan_ethhdr veth;
- u8 vlan_hlen = 0;
-
- if ((skb->protocol == htons(ETH_P_8021AD) ||
- skb->protocol == htons(ETH_P_8021Q)) &&
- offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
- vlan_hlen += VLAN_HLEN;
vlanh = (u8 *) &veth;
- if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+ if (offset < VLAN_ETH_HLEN) {
u8 ethlen = len;
- if (vlan_hlen &&
- skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
- return false;
- else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
+ if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
return false;
- if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
+ if (offset + len > VLAN_ETH_HLEN)
+ ethlen -= offset + len - VLAN_ETH_HLEN;
- memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
+ memcpy(dst_u8, vlanh + offset, ethlen);
len -= ethlen;
if (len == 0)
return true;
dst_u8 += ethlen;
- offset = ETH_HLEN + vlan_hlen;
+ offset = ETH_HLEN;
} else {
- offset -= VLAN_HLEN + vlan_hlen;
+ offset -= VLAN_HLEN;
}
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
@@ -154,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff;
}
-static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
+static bool nft_payload_need_vlan_adjust(u32 offset, u32 len)
{
- unsigned int len = priv->offset + priv->len;
+ unsigned int boundary = offset + len;
/* data past ether src/dst requested, copy needed */
- if (len > offsetof(struct ethhdr, h_proto))
+ if (boundary > offsetof(struct ethhdr, h_proto))
return true;
return false;
@@ -183,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;
if (skb_vlan_tag_present(skb) &&
- nft_payload_need_vlan_copy(priv)) {
+ nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
@@ -810,21 +801,79 @@ struct nft_payload_set {
u8 csum_flags;
};
+/* This is not struct vlan_hdr. */
+struct nft_payload_vlan_hdr {
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+};
+
+static bool
+nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len,
+ int *vlan_hlen)
+{
+ struct nft_payload_vlan_hdr *vlanh;
+ __be16 vlan_proto;
+ u16 vlan_tci;
+
+ if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) {
+ *vlan_hlen = VLAN_HLEN;
+ return true;
+ }
+
+ switch (offset) {
+ case offsetof(struct vlan_ethhdr, h_vlan_proto):
+ if (len == 2) {
+ vlan_proto = nft_reg_load_be16(src);
+ skb->vlan_proto = vlan_proto;
+ } else if (len == 4) {
+ vlanh = (struct nft_payload_vlan_hdr *)src;
+ __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto,
+ ntohs(vlanh->h_vlan_TCI));
+ } else {
+ return false;
+ }
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+ if (len != 2)
+ return false;
+
+ vlan_tci = ntohs(nft_reg_load_be16(src));
+ skb->vlan_tci = vlan_tci;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
static void nft_payload_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload_set *priv = nft_expr_priv(expr);
- struct sk_buff *skb = pkt->skb;
const u32 *src = &regs->data[priv->sreg];
- int offset, csum_offset;
+ int offset, csum_offset, vlan_hlen = 0;
+ struct sk_buff *skb = pkt->skb;
__wsum fsum, tsum;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
- offset = skb_mac_header(skb) - skb->data;
+
+ if (skb_vlan_tag_present(skb) &&
+ nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
+ if (!nft_payload_set_vlan(src, skb,
+ priv->offset, priv->len,
+ &vlan_hlen))
+ goto err;
+
+ if (!vlan_hlen)
+ return;
+ }
+
+ offset = skb_mac_header(skb) - skb->data - vlan_hlen;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
offset = skb_network_offset(skb);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1ab17e8a7260..937a0c513c17 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1151,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- if (!cycle) {
- NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
- return -EINVAL;
- }
-
if (cycle < 0 || cycle > INT_MAX) {
NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
return -EINVAL;
@@ -1164,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
new->cycle_time = cycle;
}
+ if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
+ return -EINVAL;
+ }
+
taprio_calculate_gate_durations(q, new);
return 0;
@@ -1848,6 +1848,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
}
q->flags = taprio_flags;
+ /* Needed for length_to_duration() during netlink attribute parsing */
+ taprio_set_picos_per_byte(dev, q);
+
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
return err;
@@ -1907,7 +1910,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
goto free_sched;
- taprio_set_picos_per_byte(dev, q);
taprio_update_queue_max_sdu(q, new_admin, stab);
if (FULL_OFFLOAD_IS_ENABLED(q->flags))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e4af6616e1df..25b49efc0926 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -731,7 +731,7 @@ static int unix_listen(struct socket *sock, int backlog)
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out; /* Only stream/seqpacket sockets accept */
err = -EINVAL;
- if (!u->addr)
+ if (!READ_ONCE(u->addr))
goto out; /* No listens on an unbound socket */
unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
@@ -1131,8 +1131,8 @@ static struct sock *unix_find_other(struct net *net,
static int unix_autobind(struct sock *sk)
{
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct unix_address *addr;
u32 lastnum, ordernum;
@@ -1155,6 +1155,7 @@ static int unix_autobind(struct sock *sk)
addr->name->sun_family = AF_UNIX;
refcount_set(&addr->refcnt, 1);
+ old_hash = sk->sk_hash;
ordernum = get_random_u32();
lastnum = ordernum & 0xFFFFF;
retry:
@@ -1195,8 +1196,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
{
umode_t mode = S_IFSOCK |
(SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct mnt_idmap *idmap;
struct unix_address *addr;
@@ -1234,6 +1235,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
if (u->addr)
goto out_unlock;
+ old_hash = sk->sk_hash;
new_hash = unix_bsd_hash(d_backing_inode(dentry));
unix_table_double_lock(net, old_hash, new_hash);
u->path.mnt = mntget(parent.mnt);
@@ -1261,8 +1263,8 @@ out:
static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
int addr_len)
{
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct unix_address *addr;
int err;
@@ -1280,6 +1282,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
goto out_mutex;
}
+ old_hash = sk->sk_hash;
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
@@ -1369,7 +1372,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
- !unix_sk(sk)->addr) {
+ !READ_ONCE(unix_sk(sk)->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1481,7 +1484,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out;
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+ !READ_ONCE(u->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1950,7 +1954,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
}
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+ !READ_ONCE(u->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 475b904fe68b..66e07de2de35 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3910,15 +3910,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
/* Impossible. Such dst must be popped before reaches point of failure. */
}
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
{
- if (dst) {
- if (dst->obsolete) {
- dst_release(dst);
- dst = NULL;
- }
- }
- return dst;
+ if (dst->obsolete)
+ sk_dst_reset(sk);
}
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index d9520cb826b3..af393c7dee1f 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -728,7 +728,7 @@ static int sets_patch(struct object *obj)
static int symbols_patch(struct object *obj)
{
- int err;
+ off_t err;
if (__symbols_patch(obj, &obj->structs) ||
__symbols_patch(obj, &obj->unions) ||
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index a8188202413e..43742ac5b00d 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -148,6 +148,7 @@ enum {
NETDEV_A_QSTATS_RX_ALLOC_FAIL,
NETDEV_A_QSTATS_RX_HW_DROPS,
NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+ NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
NETDEV_A_QSTATS_RX_CSUM_NONE,
NETDEV_A_QSTATS_RX_CSUM_BAD,
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index a336786a22a3..3df0125ed5fa 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -392,11 +392,40 @@ static int probe_uprobe_multi_link(int token_fd)
link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
err = -errno; /* close() can clobber errno */
+ if (link_fd >= 0 || err != -EBADF) {
+ close(link_fd);
+ close(prog_fd);
+ return 0;
+ }
+
+ /* Initial multi-uprobe support in kernel didn't handle PID filtering
+ * correctly (it was doing thread filtering, not process filtering).
+ * So now we'll detect if PID filtering logic was fixed, and, if not,
+ * we'll pretend multi-uprobes are not supported, if not.
+ * Multi-uprobes are used in USDT attachment logic, and we need to be
+ * conservative here, because multi-uprobe selection happens early at
+ * load time, while the use of PID filtering is known late at
+ * attachment time, at which point it's too late to undo multi-uprobe
+ * selection.
+ *
+ * Creating uprobe with pid == -1 for (invalid) '/' binary will fail
+ * early with -EINVAL on kernels with fixed PID filtering logic;
+ * otherwise -ESRCH would be returned if passed correct binary path
+ * (but we'll just get -BADF, of course).
+ */
+ link_opts.uprobe_multi.pid = -1; /* invalid PID */
+ link_opts.uprobe_multi.path = "/"; /* invalid path */
+ link_opts.uprobe_multi.offsets = &offset;
+ link_opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+ err = -errno; /* close() can clobber errno */
+
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
- return link_fd < 0 && err == -EBADF;
+ return link_fd < 0 && err == -EINVAL;
}
static int probe_kern_bpf_cookie(int token_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 15ee7b2fc410..b9135720024c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -73,6 +73,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
"up primary");
ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
"addr primary");
+
+ if (mode == NETKIT_L3) {
+ ASSERT_EQ(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd 2> /dev/null"), 512,
+ "set hwaddress");
+ } else {
+ ASSERT_OK(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd"),
+ "set hwaddress");
+ }
if (same_netns) {
ASSERT_OK(system("ip link set dev " netkit_peer " up"),
"up peer");
@@ -89,6 +99,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
return err;
}
+static void move_netkit(void)
+{
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+}
+
static void destroy_netkit(void)
{
ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
@@ -685,3 +705,77 @@ void serial_test_tc_netkit_neigh_links(void)
serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
}
+
+static void serial_test_tc_netkit_pkt_type_mode(int mode)
+{
+ LIBBPF_OPTS(bpf_netkit_opts, optl_nk);
+ LIBBPF_OPTS(bpf_tcx_opts, optl_tcx);
+ int err, ifindex, ifindex2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, true);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc7,
+ BPF_TCX_INGRESS), 0, "tc7_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl_nk);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc7, ifindex2, &optl_tcx);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc7 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 1);
+
+ move_netkit();
+
+ tc_skel_reset_all_seen(skel);
+ skel->bss->set_type = true;
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc7, true, "seen_tc7");
+
+ ASSERT_EQ(skel->bss->seen_host, true, "seen_host");
+ ASSERT_EQ(skel->bss->seen_mcast, true, "seen_mcast");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_pkt_type(void)
+{
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 8269cdee33ae..bf6ca8e3eb13 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
+#include <pthread.h>
#include <test_progs.h>
#include "uprobe_multi.skel.h"
#include "uprobe_multi_bench.skel.h"
#include "uprobe_multi_usdt.skel.h"
#include "bpf/libbpf_internal.h"
#include "testing_helpers.h"
+#include "../sdt.h"
static char test_data[] = "test_data";
@@ -25,9 +27,17 @@ noinline void uprobe_multi_func_3(void)
asm volatile ("");
}
+noinline void usdt_trigger(void)
+{
+ STAP_PROBE(test, pid_filter_usdt);
+}
+
struct child {
int go[2];
+ int c2p[2]; /* child -> parent channel */
int pid;
+ int tid;
+ pthread_t thread;
};
static void release_child(struct child *child)
@@ -38,6 +48,10 @@ static void release_child(struct child *child)
return;
close(child->go[1]);
close(child->go[0]);
+ if (child->thread)
+ pthread_join(child->thread, NULL);
+ close(child->c2p[0]);
+ close(child->c2p[1]);
if (child->pid > 0)
waitpid(child->pid, &child_status, 0);
}
@@ -63,7 +77,7 @@ static struct child *spawn_child(void)
if (pipe(child.go))
return NULL;
- child.pid = fork();
+ child.pid = child.tid = fork();
if (child.pid < 0) {
release_child(&child);
errno = EINVAL;
@@ -82,6 +96,7 @@ static struct child *spawn_child(void)
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
+ usdt_trigger();
exit(errno);
}
@@ -89,6 +104,67 @@ static struct child *spawn_child(void)
return &child;
}
+static void *child_thread(void *ctx)
+{
+ struct child *child = ctx;
+ int c = 0, err;
+
+ child->tid = syscall(SYS_gettid);
+
+ /* let parent know we are ready */
+ err = write(child->c2p[1], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ /* wait for parent's kick */
+ err = read(child->go[0], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+
+ err = 0;
+ pthread_exit(&err);
+}
+
+static struct child *spawn_thread(void)
+{
+ static struct child child;
+ int c, err;
+
+ /* pipe to notify child to execute the trigger functions */
+ if (pipe(child.go))
+ return NULL;
+ /* pipe to notify parent that child thread is ready */
+ if (pipe(child.c2p)) {
+ close(child.go[0]);
+ close(child.go[1]);
+ return NULL;
+ }
+
+ child.pid = getpid();
+
+ err = pthread_create(&child.thread, NULL, child_thread, &child);
+ if (err) {
+ err = -errno;
+ close(child.go[0]);
+ close(child.go[1]);
+ close(child.c2p[0]);
+ close(child.c2p[1]);
+ errno = -err;
+ return NULL;
+ }
+
+ err = read(child.c2p[0], &c, 1);
+ if (!ASSERT_EQ(err, 1, "child_thread_ready"))
+ return NULL;
+
+ return &child;
+}
+
static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
{
skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
@@ -103,15 +179,23 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
* passed at the probe attach.
*/
skel->bss->pid = child ? 0 : getpid();
+ skel->bss->expect_pid = child ? child->pid : 0;
+
+ /* trigger all probes, if we are testing child *process*, just to make
+ * sure that PID filtering doesn't let through activations from wrong
+ * PIDs; when we test child *thread*, we don't want to do this to
+ * avoid double counting number of triggering events
+ */
+ if (!child || !child->thread) {
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+ }
if (child)
kick_child(child);
- /* trigger all probes */
- uprobe_multi_func_1();
- uprobe_multi_func_2();
- uprobe_multi_func_3();
-
/*
* There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
* function and each slepable probe (6) increments uprobe_multi_sleep_result.
@@ -126,8 +210,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
- if (child)
+ ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");
+
+ if (child) {
ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
+ }
}
static void test_skel_api(void)
@@ -190,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul
if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
goto cleanup;
+ /* Attach (uprobe-backed) USDTs */
+ skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
+ goto cleanup;
+
+ skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
+ goto cleanup;
+
uprobe_multi_test_run(skel, child);
+ ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
+ if (child) {
+ ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
+ }
cleanup:
uprobe_multi__destroy(skel);
}
@@ -210,6 +314,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
return;
__test_attach_api(binary, pattern, opts, child);
+
+ /* pid filter (thread) */
+ child = spawn_thread();
+ if (!ASSERT_OK_PTR(child, "spawn_thread"))
+ return;
+
+ __test_attach_api(binary, pattern, opts, child);
}
static void test_attach_api_pattern(void)
@@ -397,7 +508,7 @@ static void test_attach_api_fails(void)
link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
if (!ASSERT_ERR(link_fd, "link_fd"))
goto cleanup;
- ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
+ ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
cleanup:
if (link_fd >= 0)
@@ -495,6 +606,13 @@ static void test_link_api(void)
return;
__test_link_api(child);
+
+ /* pid filter (thread) */
+ child = spawn_thread();
+ if (!ASSERT_OK_PTR(child, "spawn_thread"))
+ return;
+
+ __test_link_api(child);
}
static void test_bench_attach_uprobe(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index c60db8beeb73..1c9c4ec1be11 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -67,6 +67,7 @@
#include "verifier_search_pruning.skel.h"
#include "verifier_sock.skel.h"
#include "verifier_sock_addr.skel.h"
+#include "verifier_sockmap_mutate.skel.h"
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
@@ -183,6 +184,7 @@ void test_verifier_sdiv(void) { RUN(verifier_sdiv); }
void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); }
void test_verifier_sock(void) { RUN(verifier_sock); }
void test_verifier_sock_addr(void) { RUN(verifier_sock_addr); }
+void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 992400acb957..ab3eae3d6af8 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -4,7 +4,8 @@
#include <linux/bpf.h>
#include <linux/if_ether.h>
-
+#include <linux/stddef.h>
+#include <linux/if_packet.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
@@ -16,7 +17,13 @@ bool seen_tc3;
bool seen_tc4;
bool seen_tc5;
bool seen_tc6;
+bool seen_tc7;
+
+bool set_type;
+
bool seen_eth;
+bool seen_host;
+bool seen_mcast;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
@@ -28,8 +35,16 @@ int tc1(struct __sk_buff *skb)
if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
goto out;
seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+ seen_host = skb->pkt_type == PACKET_HOST;
+ if (seen_host && set_type) {
+ eth.h_dest[0] = 4;
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0))
+ goto fail;
+ bpf_skb_change_type(skb, PACKET_MULTICAST);
+ }
out:
seen_tc1 = true;
+fail:
return TCX_NEXT;
}
@@ -67,3 +82,21 @@ int tc6(struct __sk_buff *skb)
seen_tc6 = true;
return TCX_PASS;
}
+
+SEC("tc/ingress")
+int tc7(struct __sk_buff *skb)
+{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ if (eth.h_dest[0] == 4 && set_type) {
+ seen_mcast = skb->pkt_type == PACKET_MULTICAST;
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+out:
+ seen_tc7 = true;
+ return TCX_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
index 419d9aa28fce..44190efcdba2 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <stdbool.h>
+#include <bpf/usdt.bpf.h>
char _license[] SEC("license") = "GPL";
@@ -22,6 +22,13 @@ __u64 uprobe_multi_sleep_result = 0;
int pid = 0;
int child_pid = 0;
+int child_tid = 0;
+int child_pid_usdt = 0;
+int child_tid_usdt = 0;
+
+int expect_pid = 0;
+bool bad_pid_seen = false;
+bool bad_pid_seen_usdt = false;
bool test_cookie = false;
void *user_ptr = 0;
@@ -36,11 +43,19 @@ static __always_inline bool verify_sleepable_user_copy(void)
static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
{
- child_pid = bpf_get_current_pid_tgid() >> 32;
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
- if (pid && child_pid != pid)
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
return;
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen = true;
+
+ child_pid = cur_pid_tgid >> 32;
+ child_tid = (__u32)cur_pid_tgid;
+
__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
__u64 addr = bpf_get_func_ip(ctx);
@@ -97,5 +112,32 @@ int uretprobe_sleep(struct pt_regs *ctx)
SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
int uprobe_extra(struct pt_regs *ctx)
{
+ /* we need this one just to mix PID-filtered and global uprobes */
+ return 0;
+}
+
+SEC("usdt")
+int usdt_pid(struct pt_regs *ctx)
+{
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
+
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
+ return 0;
+
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen_usdt = true;
+
+ child_pid_usdt = cur_pid_tgid >> 32;
+ child_tid_usdt = (__u32)cur_pid_tgid;
+
+ return 0;
+}
+
+SEC("usdt")
+int usdt_extra(struct pt_regs *ctx)
+{
+ /* we need this one just to mix PID-filtered and global USDT probes */
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
new file mode 100644
index 000000000000..fe4b123187b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+#define __always_unused __attribute__((unused))
+
+char _license[] SEC("license") = "GPL";
+
+struct sock {
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__sockmap {
+ union {
+ struct sock *sk;
+ };
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockmap SEC(".maps");
+
+enum { CG_OK = 1 };
+
+int zero = 0;
+
+static __always_inline void test_sockmap_delete(void)
+{
+ bpf_map_delete_elem(&sockmap, &zero);
+ bpf_map_delete_elem(&sockhash, &zero);
+}
+
+static __always_inline void test_sockmap_update(void *sk)
+{
+ if (sk) {
+ bpf_map_update_elem(&sockmap, &zero, sk, BPF_ANY);
+ bpf_map_update_elem(&sockhash, &zero, sk, BPF_ANY);
+ }
+}
+
+static __always_inline void test_sockmap_lookup_and_update(void)
+{
+ struct bpf_sock *sk = bpf_map_lookup_elem(&sockmap, &zero);
+
+ if (sk) {
+ test_sockmap_update(sk);
+ bpf_sk_release(sk);
+ }
+}
+
+static __always_inline void test_sockmap_mutate(void *sk)
+{
+ test_sockmap_delete();
+ test_sockmap_update(sk);
+}
+
+static __always_inline void test_sockmap_lookup_and_mutate(void)
+{
+ test_sockmap_delete();
+ test_sockmap_lookup_and_update();
+}
+
+SEC("action")
+__success
+int test_sched_act(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("classifier")
+__success
+int test_sched_cls(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("flow_dissector")
+__success
+int test_flow_dissector_delete(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("flow_dissector")
+__failure __msg("program of this type cannot use helper bpf_sk_release")
+int test_flow_dissector_update(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_lookup_and_update(); /* no access to skb->sk */
+ return 0;
+}
+
+SEC("iter/sockmap")
+__success
+int test_trace_iter(struct bpf_iter__sockmap *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_delete(const void *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_update(const void *ctx __always_unused)
+{
+ test_sockmap_lookup_and_update();
+ return 0;
+}
+
+SEC("sk_lookup")
+__success
+int test_sk_lookup(struct bpf_sk_lookup *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("sk_reuseport")
+__success
+int test_sk_reuseport(struct sk_reuseport_md *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("socket")
+__success
+int test_socket_filter(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("sockops")
+__success
+int test_sockops_delete(struct bpf_sock_ops *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return CG_OK;
+}
+
+SEC("sockops")
+__failure __msg("cannot update sockmap in this context")
+int test_sockops_update(struct bpf_sock_ops *ctx)
+{
+ test_sockmap_update(ctx->sk);
+ return CG_OK;
+}
+
+SEC("sockops")
+__success
+int test_sockops_update_dedicated(struct bpf_sock_ops *ctx)
+{
+ bpf_sock_map_update(ctx, &sockmap, &zero, BPF_ANY);
+ bpf_sock_hash_update(ctx, &sockhash, &zero, BPF_ANY);
+ return CG_OK;
+}
+
+SEC("xdp")
+__success
+int test_xdp(struct xdp_md *ctx __always_unused)
+{
+ test_sockmap_lookup_and_mutate();
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 790294c8af83..3684b813b0f6 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -174,6 +174,8 @@ trap cleanup_all_ns EXIT
setup_hsr_interfaces 0
do_complete_ping_test
+setup_ns ns1 ns2 ns3
+
setup_hsr_interfaces 1
do_complete_ping_test
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fefa9173bdaa..2b66c5fa71eb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -261,6 +261,8 @@ reset()
TEST_NAME="${1}"
+ MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified
+
if skip_test; then
MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
last_test_ignored=1
@@ -448,7 +450,9 @@ reset_with_tcp_filter()
# $1: err msg
fail_test()
{
- ret=${KSFT_FAIL}
+ if ! mptcp_lib_subtest_is_flaky; then
+ ret=${KSFT_FAIL}
+ fi
if [ ${#} -gt 0 ]; then
print_fail "${@}"
@@ -3069,6 +3073,7 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
@@ -3077,6 +3082,7 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 0 0 0 1
@@ -3095,6 +3101,7 @@ fail_tests()
{
# single subflow
if reset_with_fail "Infinite map" 1; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=128 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
@@ -3103,6 +3110,7 @@ fail_tests()
# multiple subflows
if reset_with_fail "MP_FAIL MP_RST" 2; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index ad2ebda5cb64..6ffa9b7a3260 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -21,6 +21,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_SUBTEST_FLAKY=0
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@@ -41,6 +42,16 @@ else
readonly MPTCP_LIB_COLOR_RESET=
fi
+# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
+# from subtests marked as flaky
+mptcp_lib_override_flaky() {
+ [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
+}
+
+mptcp_lib_subtest_is_flaky() {
+ [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
+}
+
# $1: color, $2: text
mptcp_lib_print_color() {
echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@@ -72,7 +83,16 @@ mptcp_lib_pr_skip() {
}
mptcp_lib_pr_fail() {
- mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+ local title cmt
+
+ if mptcp_lib_subtest_is_flaky; then
+ title="IGNO"
+ cmt=" (flaky)"
+ else
+ title="FAIL"
+ fi
+
+ mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
}
mptcp_lib_pr_info() {
@@ -208,7 +228,13 @@ mptcp_lib_result_pass() {
# $1: test name
mptcp_lib_result_fail() {
- __mptcp_lib_result_add "not ok" "${1}"
+ if mptcp_lib_subtest_is_flaky; then
+ # It might sound better to use 'not ok # TODO' or 'ok # SKIP',
+ # but some CIs don't understand 'TODO' and treat SKIP as errors.
+ __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
+ else
+ __mptcp_lib_result_add "not ok" "${1}"
+ fi
}
# $1: test name
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 4b14b4412166..f74e1c3c126d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -244,7 +244,7 @@ run_test()
do_transfer $small $large $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
- if [ $lret -ne 0 ]; then
+ if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
fi
@@ -254,7 +254,7 @@ run_test()
do_transfer $large $small $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
- if [ $lret -ne 0 ]; then
+ if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
fi
@@ -290,7 +290,7 @@ run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
# we still need some additional infrastructure to pass the following test-cases
-run_test 10 3 0 0 "unbalanced bwidth"
+MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth"
run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 12da0a939e3e..557fb074acf0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -133,6 +133,50 @@
]
},
{
+ "id": "6f62",
+ "name": "Add taprio Qdisc with too short interval",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 300 sched-entry S 02 1700 clockid CLOCK_TAI",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "831f",
+ "name": "Add taprio Qdisc with too short cycle-time",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 200000 sched-entry S 02 200000 cycle-time 100 clockid CLOCK_TAI",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
"id": "3e1e",
"name": "Add taprio Qdisc with an invalid cycle-time",
"category": [